{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Math 157: Intro to Mathematical Software\n",
    "## UC San Diego, winter 2018"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "This script creates the final project groups based on the Google Form input. This input was sanitized to remove duplicate names and to insert students that did not fill out the Google Form."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "We set a random seed to make this calculation reproducible."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "import itertools\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import random\n",
    "random.seed(2718281828)\n",
    "import os\n",
    "import shutil"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Import a CSV file retrieved from Google Docs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Your name (as shown in CoCalc)</th>\n",
       "      <th>First choice of topic</th>\n",
       "      <th>Second choice of topic</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>129</td>\n",
       "      <td>149</td>\n",
       "      <td>129</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>129</td>\n",
       "      <td>149</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>2/28/2018 14:06:51</td>\n",
       "      <td>Jiawei Zhou</td>\n",
       "      <td>Model selection for machine learning</td>\n",
       "      <td>Combinatorial designs</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>35</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 2,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poll = pd.read_csv(\"Math 157 final project - Sheet1.csv\")\n",
    "poll.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "We only need the name and the first choice. Let's use the name as the axis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Topic</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>149</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>149</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>Jiawei Zhou</td>\n",
       "      <td>Model selection for machine learning</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>1</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 3,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poll = poll[['Your name (as shown in CoCalc)', 'First choice of topic']]\n",
    "poll = poll.rename(columns={'Your name (as shown in CoCalc)': 'Name', 'First choice of topic': 'Topic'})\n",
    "poll.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Standardize capitalization and whitespace in the names.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Topic</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Aliaksandr Sumak</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>An-Vy Hoang</td>\n",
       "      <td>Numerical solution of ODEs</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Anastasia Guanio</td>\n",
       "      <td>Fourier transforms</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andrew Or</td>\n",
       "      <td>Model selection for machine learning</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ansh Sancheti</td>\n",
       "      <td>Numerical solution of ODEs</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 4,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poll['Name'] = poll['Name'].map(lambda x: \" \".join(x.split()).title())\n",
    "poll.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "To make sure that the input was sanitized correctly, compare with a CSV file of course grades generated by CoCalc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>149</td>\n",
       "      <td>149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>149</td>\n",
       "      <td>149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>Jiawei Zhou</td>\n",
       "      <td>jir021@ucsd.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 5,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster = pd.read_csv(\"export_math157_master.csv\")\n",
    "roster = roster[['Name', 'Email']]\n",
    "roster.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Standardize capitalization and whitespace in the names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Aliaksandr Sumak</td>\n",
       "      <td>asumak@ucsd.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>An-Vy Hoang</td>\n",
       "      <td>ath008@ucsd.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Anastasia Guanio</td>\n",
       "      <td>aguanio@ucsd.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andrew Or</td>\n",
       "      <td>awor@ucsd.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ansh Sancheti</td>\n",
       "      <td>asanchet@ucsd.edu</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 6,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster['Name'] = roster['Name'].map(lambda x: \" \".join(x.split()).title())\n",
    "roster.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Check that everything matches up. Make sure this is valid before using the results!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Names in final project poll not found in course roster\n",
      "76    Megan Chang\n",
      "Name: Name, dtype: object\n",
      "Names in course roster not found in final project poll\n",
      "144    Mec029@Ucsd.Edu Chang\n",
      "Name: Name, dtype: object\n"
     ]
    }
   ],
   "source": [
    "poll_names = poll['Name']\n",
    "roster_names = roster['Name']\n",
    "\n",
    "print(\"Names in final project poll not found in course roster\")\n",
    "print(poll_names[~poll_names.isin(roster_names)])\n",
    "\n",
    "print(\"Names in course roster not found in final project poll\")\n",
    "print(roster_names[~roster_names.isin(poll_names)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Convert the poll results into a dictionary."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "d = poll.set_index('Name').to_dict()['Topic']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Remove predefined groups before processing the rest of the list."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "presets = [['Zhanning Gu', 'Conghan Wang', 'Junyi Li']]\n",
    "d2 = {}\n",
    "for i in presets:\n",
    "    for j in i:\n",
    "        d2[j] = d[j]\n",
    "        del d[j]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Separately, count the number of instances of each topic, including nan (blank)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Combinatorial designs': 17,\n",
       " 'Fourier transforms': 12,\n",
       " 'Linear feedback shift registers': 6,\n",
       " 'Model selection for machine learning': 35,\n",
       " 'Numerical solution of ODEs': 29,\n",
       " 'Permutation groups': 29,\n",
       " nan: 18}"
      ]
     },
     "execution_count": 10,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topic_counts = {s:list(d.values()).count(s) for s in set(d.values())}\n",
    "topic_counts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "In order to make the groups evenly sized, we add a dummy topic. Just like for real topics, we want no two dummies in the same group."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "148 2 37\n"
     ]
    }
   ],
   "source": [
    "n = len(d.keys())\n",
    "group_size = 4\n",
    "num_dummies = (-n) % group_size\n",
    "topic_counts['Dummy topic'] = num_dummies\n",
    "n += num_dummies\n",
    "num_groups = n // group_size\n",
    "print(n, num_dummies, num_groups)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Model selection for machine learning',\n",
       " 'Numerical solution of ODEs',\n",
       " 'Permutation groups',\n",
       " 'Combinatorial designs',\n",
       " 'Fourier transforms',\n",
       " 'Linear feedback shift registers',\n",
       " 'Dummy topic']"
      ]
     },
     "execution_count": 12,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topics = list(topic_counts.keys())\n",
    "topics.sort(key = lambda x: -topic_counts[x])\n",
    "topics.remove(np.nan)\n",
    "topics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Distribute topics among the groups, from most to least popular (skipping the blanks). No topic may be assigned more than once within a group. To assign a particular topic, we choose a random subset of groups where each group is weighted by the number of empty spaces it has.\n",
    "\n",
    "There is a probability of failure because we run out of empty groups. In case this occurs, we give up and try again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "while True:\n",
    "    l = [[] for _ in range(num_groups)]\n",
    "    try:\n",
    "        for i in topics:\n",
    "            m = []\n",
    "            for j in range(num_groups):\n",
    "                m += [j for _ in range(group_size - len(l[j]))]\n",
    "            for _ in range(topic_counts[i]):\n",
    "                k = random.choice(m)\n",
    "                l[k].append(i)\n",
    "                m = [k1 for k1 in m if k1 != k]\n",
    "    except ValueError:\n",
    "        print(\"Retrying\")\n",
    "    finally:\n",
    "        break    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Pad the remaining groups with blanks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "for g in l:\n",
    "    g += ['' for _ in range(group_size - len(g))]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Fill students in at random according to their chosen topics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "for g in l:\n",
    "    for i in range(len(g)-1,-1,-1):\n",
    "        if g[i] == \"Dummy topic\":\n",
    "            del g[i]\n",
    "            continue\n",
    "        if g[i] in topics:\n",
    "            candidates = [j for j in d if d[j] == g[i]]\n",
    "        elif g[i] == '':\n",
    "            candidates = [j for j in d if d[j] is np.nan]\n",
    "        else: continue\n",
    "        x = random.choice(candidates)\n",
    "        g[i] = x\n",
    "        d2[x] = d[x]\n",
    "        del d[x]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Add back the preassigned groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "l += presets\n",
    "d.update(d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "For each group, assign each student who did not choose a topic to a random choice."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "topics.remove(\"Dummy topic\")\n",
    "for g in l:\n",
    "    for i in g:\n",
    "        if d[i] is np.nan:\n",
    "            m = [j for j in topics if not j in [d[i1] for i1 in g]]\n",
    "            d[i] = random.choice(m)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Print the groups to a file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "f = open(\"final_project_groups.md\", \"w\")\n",
    "f.write(\"The final project groups are listed below. In the shared project, you will find a folder called `final_project_workspaces`, and within that a subfolder called `group_xx` where `xx` is the number of your group (padded to 2 digits if necessary). You may use this as a shared workspace to coordinate; in particular, I have created a chat room for you to use to discuss. Remember, please let me know if you fail to contact one of your group members.\\n\\n\")\n",
    "f.write(\"For those who answered the Google Form before Saturday 5:30pm, I have assigned you your first choice. For others, I have assigned you a topic at random.\\n\\n\")\n",
    "for i in range(len(l)):\n",
    "    f.write(\"Group \" + str(i+1) + \":\\n\")\n",
    "    for j in l[i]:\n",
    "        f.write(\"- \" + j + \": \" + (d[j] if d[j] is not np.nan else \"(no topic chosen)\") + \"\\n\")\n",
    "    f.write(\"\\n\")\n",
    "f.write(\"\\n\")\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Create a workspace for each project. Make sure to edit the template chat room as appropriate before doing this.\n",
    "\n",
    "After running this command, move the folder `final_project_workspaces` into the shared project."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "if not os.path.exists('final_project_workspaces'):\n",
    "    os.makedirs('final_project_workspaces')\n",
    "for i in range(1, num_groups+1):\n",
    "    n = str(i).zfill(2) ## Assumes between 10 and 99 groups\n",
    "    s = 'final_project_workspaces/group_' + n\n",
    "    if not os.path.exists(s):\n",
    "        os.makedirs(s)\n",
    "    shutil.copyfile('template_chat.sage-chat', s + '/group_' + n + '_chat.sage-chat')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (Ubuntu Linux)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}