{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "import pandas as pd\n", "reviews = pd.read_csv(\"https://raw.githubusercontent.com/ra314ra/ml/master/ign.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
00AmazingLittleBigPlanet PS Vita/games/littlebigplanet-vita/vita-98907PlayStation Vita9.0PlatformerY2012912
11AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E.../games/littlebigplanet-ps-vita-marvel-super-he...PlayStation Vita9.0PlatformerY2012912
22GreatSplice: Tree of Life/games/splice/ipad-141070iPad8.5PuzzleN2012912
33GreatNHL 13/games/nhl-13/xbox-360-128182Xbox 3608.5SportsN2012911
44GreatNHL 13/games/nhl-13/ps3-128181PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 2, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
00AmazingLittleBigPlanet PS Vita/games/littlebigplanet-vita/vita-98907PlayStation Vita9.0PlatformerY2012912
11AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E.../games/littlebigplanet-ps-vita-marvel-super-he...PlayStation Vita9.0PlatformerY2012912
22GreatSplice: Tree of Life/games/splice/ipad-141070iPad8.5PuzzleN2012912
33GreatNHL 13/games/nhl-13/xbox-360-128182Xbox 3608.5SportsN2012911
44GreatNHL 13/games/nhl-13/ps3-128181PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 4, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.iloc[0:5, :]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "reviews = reviews.iloc[:, 1:]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
0AmazingLittleBigPlanet PS Vita/games/littlebigplanet-vita/vita-98907PlayStation Vita9.0PlatformerY2012912
1AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E.../games/littlebigplanet-ps-vita-marvel-super-he...PlayStation Vita9.0PlatformerY2012912
2GreatSplice: Tree of Life/games/splice/ipad-141070iPad8.5PuzzleN2012912
3GreatNHL 13/games/nhl-13/xbox-360-128182Xbox 3608.5SportsN2012911
4GreatNHL 13/games/nhl-13/ps3-128181PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 6, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(18625, 10)" ] }, "execution_count": 8, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.shape" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "test_reviews_part1 = reviews.iloc[:,0:2]\n", "test_reviews_part2 = reviews.iloc[:,3:]\n", "test_reviews_dropped_column = pd.concat([test_reviews_part1, test_reviews_part2], axis=1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitle
0AmazingLittleBigPlanet PS Vita
1AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E...
2GreatSplice: Tree of Life
3GreatNHL 13
4GreatNHL 13
\n", "
" ] }, "execution_count": 18, "metadata": { }, "output_type": "execute_result" } ], "source": [ "test_reviews_part1.head()\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
platformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
0PlayStation Vita9.0PlatformerY2012912
1PlayStation Vita9.0PlatformerY2012912
2iPad8.5PuzzleN2012912
3Xbox 3608.5SportsN2012911
4PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 22, "metadata": { }, "output_type": "execute_result" } ], "source": [ "test_reviews_part2.head()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
platformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
0PlayStation Vita9.0PlatformerY2012912
1PlayStation Vita9.0PlatformerY2012912
2iPad8.5PuzzleN2012912
3Xbox 3608.5SportsN2012911
4PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 23, "metadata": { }, "output_type": "execute_result" } ], "source": [ "test_reviews_part2.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
0AmazingLittleBigPlanet PS VitaPlayStation Vita9.0PlatformerY2012912
1AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E...PlayStation Vita9.0PlatformerY2012912
2GreatSplice: Tree of LifeiPad8.5PuzzleN2012912
3GreatNHL 13Xbox 3608.5SportsN2012911
4GreatNHL 13PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 25, "metadata": { }, "output_type": "execute_result" } ], "source": [ "test_drop_column = reviews.drop(['url'], axis=1)\n", "test_drop_column.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
10GoodTekken Tag Tournament 2/games/tekken-tag-tournament-2/ps3-124584PlayStation 37.5FightingN2012911
11GoodTekken Tag Tournament 2/games/tekken-tag-tournament-2/xbox-360-124581Xbox 3607.5FightingN2012911
12GoodWild Blood/games/wild-blood/iphone-139363iPhone7.0NaNN2012910
13AmazingMark of the Ninja/games/mark-of-the-ninja-135615/xbox-360-129276Xbox 3609.0Action, AdventureY201297
14AmazingMark of the Ninja/games/mark-of-the-ninja-135615/pc-143761PC9.0Action, AdventureY201297
\n", "
" ] }, "execution_count": 27, "metadata": { }, "output_type": "execute_result" } ], "source": [ "some_reviews = reviews.loc[10:20,]\n", "some_reviews.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
18MediocreWay of the Samurai 4/games/way-of-the-samurai-4/ps3-23516PlayStation 35.5Action, AdventureN201293
19GoodJoJo's Bizarre Adventure HD/games/jojos-bizarre-adventure/xbox-360-137717Xbox 3607.0FightingN201293
20GoodJoJo's Bizarre Adventure HD/games/jojos-bizarre-adventure/ps3-137896PlayStation 37.0FightingN201293
\n", "
" ] }, "execution_count": 29, "metadata": { }, "output_type": "execute_result" } ], "source": [ "some_reviews.loc[18:24,:]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 9.0\n", "1 9.0\n", "2 8.5\n", "3 8.5\n", "4 8.5\n", "5 7.0\n", "Name: score, dtype: float64" ] }, "execution_count": 30, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.loc[:5, \"score\"]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scorerelease_year
09.02012
19.02012
28.52012
38.52012
48.52012
57.02012
\n", "
" ] }, "execution_count": 31, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.loc[:5, [\"score\", \"release_year\"]]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 32, "metadata": { }, "output_type": "execute_result" } ], "source": [ "type(reviews.loc[:5, [\"score\", \"release_year\"]])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scoreeditors_choice
09.0Y
19.0Y
28.5N
38.5N
48.5N
57.0N
63.0N
79.0Y
83.0N
97.0N
107.5N
117.5N
127.0N
139.0Y
149.0Y
156.5N
166.5N
178.0N
185.5N
197.0N
207.0N
217.5N
227.5N
237.5N
249.0Y
257.0N
269.0Y
277.5N
288.0N
296.5N
.........
185954.4N
185966.5N
185974.9N
185986.8N
185997.0N
186007.4N
186017.4N
186027.4N
186037.8N
186048.6N
186056.0N
186066.4N
186077.0N
186085.4N
186098.0N
186106.0N
186115.8N
186127.8N
186138.0N
186149.2Y
186159.2Y
186167.5N
186178.4N
186189.1Y
186197.9N
186207.6N
186219.0Y
186225.8N
1862310.0Y
1862410.0Y
\n", "

18625 rows × 2 columns

\n", "
" ] }, "execution_count": 33, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews[[\"score\", \"editors_choice\"]]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "6.950459060402666" ] }, "execution_count": 35, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews[\"score\"].mean()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 510.500\n", "1 510.500\n", "2 510.375\n", "3 510.125\n", "4 510.125\n", "5 509.750\n", "6 508.750\n", "7 510.250\n", "8 508.750\n", "9 509.750\n", "10 509.875\n", "11 509.875\n", "12 509.500\n", "13 509.250\n", "14 509.250\n", "15 508.375\n", "16 508.375\n", "17 508.500\n", "18 507.375\n", "19 507.750\n", "20 507.750\n", "21 514.625\n", "22 514.625\n", "23 514.625\n", "24 515.000\n", "25 514.250\n", "26 514.750\n", "27 514.125\n", "28 514.250\n", "29 513.625\n", " ... \n", "18595 510.850\n", "18596 510.875\n", "18597 510.225\n", "18598 510.700\n", "18599 510.750\n", "18600 512.600\n", "18601 512.600\n", "18602 512.600\n", "18603 512.450\n", "18604 512.400\n", "18605 511.500\n", "18606 508.600\n", "18607 510.750\n", "18608 510.350\n", "18609 510.750\n", "18610 510.250\n", "18611 508.700\n", "18612 509.200\n", "18613 508.000\n", "18614 515.050\n", "18615 515.050\n", "18616 508.375\n", "18617 508.600\n", "18618 515.025\n", "18619 514.725\n", "18620 514.650\n", "18621 515.000\n", "18622 513.950\n", "18623 515.000\n", "18624 515.000\n", "Length: 18625, dtype: float64" ] }, "execution_count": 36, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.mean(axis=1)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scorerelease_yearrelease_monthrelease_day
score1.0000000.0627160.0076320.020079
release_year0.0627161.000000-0.1155150.016867
release_month0.007632-0.1155151.000000-0.067964
release_day0.0200790.016867-0.0679641.000000
\n", "
" ] }, "execution_count": 37, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.corr()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 4.50\n", "1 4.50\n", "2 4.25\n", "3 4.25\n", "4 4.25\n", "5 3.50\n", "6 1.50\n", "7 4.50\n", "8 1.50\n", "9 3.50\n", "10 3.75\n", "11 3.75\n", "12 3.50\n", "13 4.50\n", "14 4.50\n", "15 3.25\n", "16 3.25\n", "17 4.00\n", "18 2.75\n", "19 3.50\n", "20 3.50\n", "21 3.75\n", "22 3.75\n", "23 3.75\n", "24 4.50\n", "25 3.50\n", "26 4.50\n", "27 3.75\n", "28 4.00\n", "29 3.25\n", " ... \n", "18595 2.20\n", "18596 3.25\n", "18597 2.45\n", "18598 3.40\n", "18599 3.50\n", "18600 3.70\n", "18601 3.70\n", "18602 3.70\n", "18603 3.90\n", "18604 4.30\n", "18605 3.00\n", "18606 3.20\n", "18607 3.50\n", "18608 2.70\n", "18609 4.00\n", "18610 3.00\n", "18611 2.90\n", "18612 3.90\n", "18613 4.00\n", "18614 4.60\n", "18615 4.60\n", "18616 3.75\n", "18617 4.20\n", "18618 4.55\n", "18619 3.95\n", "18620 3.80\n", "18621 4.50\n", "18622 2.90\n", "18623 5.00\n", "18624 5.00\n", "Name: score, Length: 18625, dtype: float64" ] }, "execution_count": 39, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews[\"score\"] /2\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 True\n", "2 True\n", "3 True\n", "4 True\n", "5 False\n", "6 False\n", "7 True\n", "8 False\n", "9 False\n", "10 True\n", "11 True\n", "12 False\n", "13 True\n", "14 True\n", "15 False\n", "16 False\n", "17 True\n", "18 False\n", "19 False\n", "20 False\n", "21 True\n", "22 True\n", "23 True\n", "24 True\n", "25 False\n", "26 True\n", "27 True\n", "28 True\n", "29 False\n", " ... \n", "18595 False\n", "18596 False\n", "18597 False\n", "18598 False\n", "18599 False\n", "18600 True\n", "18601 True\n", "18602 True\n", "18603 True\n", "18604 True\n", "18605 False\n", "18606 False\n", "18607 False\n", "18608 False\n", "18609 True\n", "18610 False\n", "18611 False\n", "18612 True\n", "18613 True\n", "18614 True\n", "18615 True\n", "18616 True\n", "18617 True\n", "18618 True\n", "18619 True\n", "18620 True\n", "18621 True\n", "18622 False\n", "18623 True\n", "18624 True\n", "Name: score, Length: 18625, dtype: bool" ] }, "execution_count": 40, "metadata": { }, "output_type": "execute_result" } ], "source": [ "score_filter = reviews[\"score\"] > 7\n", "score_filter" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
0AmazingLittleBigPlanet PS Vita/games/littlebigplanet-vita/vita-98907PlayStation Vita9.0PlatformerY2012912
1AmazingLittleBigPlanet PS Vita -- Marvel Super Hero E.../games/littlebigplanet-ps-vita-marvel-super-he...PlayStation Vita9.0PlatformerY2012912
2GreatSplice: Tree of Life/games/splice/ipad-141070iPad8.5PuzzleN2012912
3GreatNHL 13/games/nhl-13/xbox-360-128182Xbox 3608.5SportsN2012911
4GreatNHL 13/games/nhl-13/ps3-128181PlayStation 38.5SportsN2012911
\n", "
" ] }, "execution_count": 42, "metadata": { }, "output_type": "execute_result" } ], "source": [ "filtered_reviews = reviews.loc[score_filter].copy()\n", "filtered_reviews.head()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 1.0\n", "2 1.0\n", "3 1.0\n", "4 1.0\n", "5 1.0\n", "6 1.0\n", "7 1.0\n", "8 1.0\n", "9 1.0\n", "10 1.0\n", "11 1.0\n", "12 1.0\n", "13 1.0\n", "14 1.0\n", "15 1.0\n", "16 1.0\n", "17 1.0\n", "18 1.0\n", "19 1.0\n", "20 1.0\n", "21 1.0\n", "22 1.0\n", "23 1.0\n", "24 1.0\n", "25 1.0\n", "26 1.0\n", "27 1.0\n", "28 1.0\n", "29 1.0\n", " ... \n", "18595 1.0\n", "18596 1.0\n", "18597 1.0\n", "18598 1.0\n", "18599 1.0\n", "18600 1.0\n", "18601 1.0\n", "18602 1.0\n", "18603 1.0\n", "18604 1.0\n", "18605 1.0\n", "18606 1.0\n", "18607 1.0\n", "18608 1.0\n", "18609 1.0\n", "18610 1.0\n", "18611 1.0\n", "18612 1.0\n", "18613 1.0\n", "18614 1.0\n", "18615 1.0\n", "18616 1.0\n", "18617 1.0\n", "18618 1.0\n", "18619 1.0\n", "18620 1.0\n", "18621 1.0\n", "18622 1.0\n", "18623 1.0\n", "18624 1.0\n", "Name: score, Length: 18625, dtype: float64" ] }, "execution_count": 44, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews[\"score\"].divide(reviews[\"score\"])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_phrasetitleurlplatformscoregenreeditors_choicerelease_yearrelease_monthrelease_day
17137AmazingGone Home/games/gone-home/xbox-one-20014361Xbox One9.5SimulationY2013815
17197AmazingRayman Legends/games/rayman-legends/xbox-one-20008449Xbox One9.5PlatformerY2013826
17295AmazingLEGO Marvel Super Heroes/games/lego-marvel-super-heroes/xbox-one-20000826Xbox One9.0ActionY20131022
17313GreatDead Rising 3/games/dead-rising-3/xbox-one-124306Xbox One8.3ActionN20131118
17317GreatKiller Instinct/games/killer-instinct-2013/xbox-one-20000538Xbox One8.4FightingN20131118
\n", "
" ] }, "execution_count": 45, "metadata": { }, "output_type": "execute_result" } ], "source": [ "xbox_one_filter = (reviews[\"score\"] > 7) & (reviews[\"platform\"] == \"Xbox One\")\n", "filtered_reviews = reviews[xbox_one_filter]\n", "filtered_reviews.head()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 46, "metadata": { }, "output_type": "execute_result" }, { "data": { "image/png": "c7f4a0aa9195c0217e4f6b43cfad50b1ac288090" }, "metadata": { "image/png": { "height": 250, "width": 384 } } } ], "source": [ "%matplotlib inline\n", "reviews[reviews[\"platform\"] == \"Xbox One\"][\"score\"].plot(kind=\"hist\")" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 47, "metadata": { }, "output_type": "execute_result" }, { "data": { "image/png": "6eb8b80c69eb2b4d2fd3ae67cdc84e756871044d" }, "metadata": { "image/png": { "height": 250, "width": 384 } } } ], "source": [ "reviews[reviews[\"platform\"] == \"PlayStation 4\"][\"score\"].plot(kind=\"hist\")" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scorerelease_yearrelease_monthrelease_day
count18625.00000018625.00000018625.0000018625.000000
mean6.9504592006.5153297.1384715.603866
std1.7117364.5875293.476718.690128
min0.5000001970.0000001.000001.000000
25%6.0000002003.0000004.000008.000000
50%7.3000002007.0000008.0000016.000000
75%8.2000002010.00000010.0000023.000000
max10.0000002016.00000012.0000031.000000
\n", "
" ] }, "execution_count": 48, "metadata": { }, "output_type": "execute_result" } ], "source": [ "reviews.describe()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RespondentIDDo you celebrate Thanksgiving?What is typically the main dish at your Thanksgiving dinner?What is typically the main dish at your Thanksgiving dinner? - Other (please specify)How is the main dish typically cooked?How is the main dish typically cooked? - Other (please specify)What kind of stuffing/dressing do you typically have?What kind of stuffing/dressing do you typically have? - Other (please specify)What type of cranberry saucedo you typically have?What type of cranberry saucedo you typically have? - Other (please specify)...Have you ever tried to meet up with hometown friends on Thanksgiving night?Have you ever attended a \"Friendsgiving?\"Will you shop any Black Friday sales on Thanksgiving Day?Do you work in retail?Will you employer make you work on Black Friday?How would you describe where you live?AgeWhat is your gender?How much total combined money did all members of your HOUSEHOLD earn last year?US Region
04337954960YesTurkeyNaNBakedNaNBread-basedNaNNoneNaN...YesNoNoNoNaNSuburban18 - 29Male$75,000 to $99,999Middle Atlantic
14337951949YesTurkeyNaNBakedNaNBread-basedNaNOther (please specify)Homemade cranberry gelatin ring...NoNoYesNoNaNRural18 - 29Female$50,000 to $74,999East South Central
24337935621YesTurkeyNaNRoastedNaNRice-basedNaNHomemadeNaN...YesYesYesNoNaNSuburban18 - 29Male$0 to $9,999Mountain
34337933040YesTurkeyNaNBakedNaNBread-basedNaNHomemadeNaN...YesNoNoNoNaNUrban30 - 44Male$200,000 and upPacific
44337931983YesTofurkeyNaNBakedNaNBread-basedNaNCannedNaN...YesNoNoNoNaNUrban30 - 44Male$100,000 to $124,999Pacific
\n", "

5 rows × 65 columns

\n", "
" ] }, "execution_count": 50, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv(\"https://raw.githubusercontent.com/ra314ra/ml/master/thanksgiving-2015-poll-data.csv\", encoding = 'latin-1')\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(1058, 65)" ] }, "execution_count": 51, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array(['Yes', 'No'], dtype=object)" ] }, "execution_count": 52, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"Do you celebrate Thanksgiving?\"].unique()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply. - Other (please specify).1',\n", " 'Do you typically pray before or after the Thanksgiving meal?',\n", " 'How far will you travel for Thanksgiving?',\n", " 'Will you watch any of the following programs on Thanksgiving? Please select all that apply. - Macy's Parade',\n", " 'What's the age cutoff at your \"kids' table\" at Thanksgiving?',\n", " 'Have you ever tried to meet up with hometown friends on Thanksgiving night?',\n", " 'Have you ever attended a \"Friendsgiving?\"',\n", " 'Will you shop any Black Friday sales on Thanksgiving Day?',\n", " 'Do you work in retail?',\n", " 'Will you employer make you work on Black Friday?',\n", " 'How would you describe where you live?', 'Age', 'What is your gender?',\n", " 'How much total combined money did all members of your HOUSEHOLD earn last year?',\n", " 'US Region'],\n", " dtype='object')" ] }, "execution_count": 53, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data.columns[50:]" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Female 544\n", "Male 481\n", "NaN 33\n", "Name: What is your gender?, dtype: int64" ] }, "execution_count": 54, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"What is your gender?\"].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "import math\n", "\n", "def gender_code(gender_string):\n", " if isinstance(gender_string, float) and math.isnan(gender_string):\n", " return gender_string\n", " return int(gender_string == \"Female\")" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ " 1.0 544\n", " 0.0 481\n", "NaN 33\n", "Name: gender, dtype: int64" ] }, "execution_count": 56, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"gender\"] = data[\"What is your gender?\"].apply(gender_code)\n", "data[\"gender\"].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "RespondentID object\n", "Do you celebrate Thanksgiving? object\n", "What is typically the main dish at your Thanksgiving dinner? object\n", "What is typically the main dish at your Thanksgiving dinner? - Other (please specify) object\n", "How is the main dish typically cooked? object\n", "dtype: object" ] }, "execution_count": 58, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data.apply(lambda x: x.dtype).head()" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "$25,000 to $49,999 180\n", "Prefer not to answer 136\n", "$50,000 to $74,999 135\n", "$75,000 to $99,999 133\n", "$100,000 to $124,999 111\n", "$200,000 and up 80\n", "$10,000 to $24,999 68\n", "$0 to $9,999 66\n", "$125,000 to $149,999 49\n", "$150,000 to $174,999 40\n", "NaN 33\n", "$175,000 to $199,999 27\n", "Name: How much total combined money did all members of your HOUSEHOLD earn last year?, dtype: int64" ] }, "execution_count": 59, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"How much total combined money did all members of your HOUSEHOLD earn last year?\"].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "import numpy as np\n", "\n", "def clean_income(value):\n", " if value == \"$200,000 and up\":\n", " return 200000\n", " elif value == \"Prefer not to answer\":\n", " return np.nan\n", " elif isinstance(value, float) and math.isnan(value):\n", " return np.nan\n", " value = value.replace(\",\", \"\").replace(\"$\", \"\")\n", " income_high, income_low = value.split(\" to \")\n", " return (int(income_high) + int(income_low)) / 2" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 87499.5\n", "1 62499.5\n", "2 4999.5\n", "3 200000.0\n", "4 112499.5\n", "Name: income, dtype: float64" ] }, "execution_count": 61, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"income\"] = data[\"How much total combined money did all members of your HOUSEHOLD earn last year?\"].apply(clean_income)\n", "data[\"income\"].head()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ " 37499.5 180\n", "NaN 169\n", " 62499.5 135\n", " 87499.5 133\n", " 112499.5 111\n", " 200000.0 80\n", " 17499.5 68\n", " 4999.5 66\n", " 137499.5 49\n", " 162499.5 40\n", " 187499.5 27\n", "Name: income, dtype: int64" ] }, "execution_count": 62, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"income\"].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Canned 502\n", "Homemade 301\n", "None 146\n", "Other (please specify) 25\n", "Name: What type of cranberry saucedo you typically have?, dtype: int64" ] }, "execution_count": 63, "metadata": { }, "output_type": "execute_result" } ], "source": [ "data[\"What type of cranberry saucedo you typically have?\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "homemade = data[data[\"What type of cranberry saucedo you typically have?\"] == \"Homemade\"]\n", "canned = data[data[\"What type of cranberry saucedo you typically have?\"] == \"Canned\"]" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "94878.1072874494\n", "83823.40340909091\n" ] } ], "source": [ "print(homemade[\"income\"].mean())\n", "print(canned[\"income\"].mean())" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 66, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped = data.groupby(\"What type of cranberry saucedo you typically have?\")\n", "grouped" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'Canned': Int64Index([ 4, 6, 8, 11, 12, 15, 18, 19, 26, 27,\n", " ...\n", " 1040, 1041, 1042, 1044, 1045, 1046, 1047, 1051, 1054, 1057],\n", " dtype='int64', length=502),\n", " 'Homemade': Int64Index([ 2, 3, 5, 7, 13, 14, 16, 20, 21, 23,\n", " ...\n", " 1016, 1017, 1025, 1027, 1030, 1034, 1048, 1049, 1053, 1056],\n", " dtype='int64', length=301),\n", " 'None': Int64Index([ 0, 17, 24, 29, 34, 36, 40, 47, 49, 51,\n", " ...\n", " 980, 981, 997, 1015, 1018, 1031, 1037, 1043, 1050, 1055],\n", " dtype='int64', length=146),\n", " 'Other (please specify)': Int64Index([ 1, 9, 154, 216, 221, 233, 249, 265, 301, 336, 380,\n", " 435, 444, 447, 513, 550, 749, 750, 784, 807, 860, 872,\n", " 905, 1000, 1007],\n", " dtype='int64')}" ] }, "execution_count": 67, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped.groups" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "What type of cranberry saucedo you typically have?\n", "Canned 502\n", "Homemade 301\n", "None 146\n", "Other (please specify) 25\n", "dtype: int64" ] }, "execution_count": 68, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped.size()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "What type of cranberry saucedo you typically have?\n", "Canned 83823.403409\n", "Homemade 94878.107287\n", "None 78886.084034\n", "Other (please specify) 86629.978261\n", "Name: income, dtype: float64" ] }, "execution_count": 69, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped[\"income\"].agg(np.mean)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RespondentIDgenderincome
What type of cranberry saucedo you typically have?
Canned4.336699e+090.55284683823.403409
Homemade4.336792e+090.53310194878.107287
None4.336765e+090.51748378886.084034
Other (please specify)4.336763e+090.64000086629.978261
\n", "
" ] }, "execution_count": 70, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped.agg(np.mean)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 71, "metadata": { }, "output_type": "execute_result" }, { "data": { "image/png": "8bf1c38f22f20f81ebe8d3e3dd5526f5bf742ec1" }, "metadata": { "image/png": { "height": 365, "width": 390 } } } ], "source": [ "%matplotlib inline\n", "\n", "sauce = grouped.agg(np.mean)\n", "sauce[\"income\"].plot(kind=\"bar\")" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RespondentIDgenderincome
What type of cranberry saucedo you typically have?What is typically the main dish at your Thanksgiving dinner?
CannedChicken4.336354e+090.33333380999.600000
Ham/Pork4.336757e+090.64285777499.535714
I don't know4.335987e+090.0000004999.500000
Other (please specify)4.336682e+091.00000053213.785714
Roast beef4.336254e+090.57142925499.500000
Tofurkey4.337157e+090.714286100713.857143
Turkey4.336705e+090.54444485242.682045
HomemadeChicken4.336540e+090.75000019999.500000
Ham/Pork4.337253e+090.25000096874.625000
I don't know4.336084e+091.000000NaN
Other (please specify)4.336863e+090.60000055356.642857
Roast beef4.336174e+090.00000033749.500000
Tofurkey4.336790e+090.66666757916.166667
Turducken4.337475e+090.500000200000.000000
Turkey4.336791e+090.53100897690.147982
NoneChicken4.336151e+090.50000011249.500000
Ham/Pork4.336680e+090.44444461249.500000
I don't know4.336412e+090.50000033749.500000
Other (please specify)4.336688e+090.600000119106.678571
Roast beef4.337424e+090.000000162499.500000
Tofurkey4.336950e+090.500000112499.500000
Turducken4.336739e+090.000000NaN
Turkey4.336784e+090.52336474606.275281
Other (please specify)Ham/Pork4.336465e+091.00000087499.500000
Other (please specify)4.337335e+090.000000124999.666667
Tofurkey4.336122e+091.00000037499.500000
Turkey4.336724e+090.70000082916.194444
\n", "
" ] }, "execution_count": 72, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped = data.groupby([\"What type of cranberry saucedo you typically have?\", \"What is typically the main dish at your Thanksgiving dinner?\"])\n", "grouped.agg(np.mean)" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meansumstd
What type of cranberry saucedo you typically have?What is typically the main dish at your Thanksgiving dinner?
CannedChicken80999.600000404998.075779.481062
Ham/Pork77499.5357141084993.556645.063944
I don't know4999.5000004999.5NaN
Other (please specify)53213.785714372496.529780.946290
Roast beef25499.500000127497.524584.039538
Tofurkey100713.857143704997.061351.484439
Turkey85242.68204534182315.555687.436102
HomemadeChicken19999.50000059998.516393.596311
Ham/Pork96874.625000387498.577308.452805
I don't knowNaN0.0NaN
\n", "
" ] }, "execution_count": 73, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped[\"income\"].agg([np.mean, np.sum, np.std]).head(10)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "What type of cranberry saucedo you typically have? What is typically the main dish at your Thanksgiving dinner?\n", "Canned Chicken 6\n", " Ham/Pork 15\n", " I don't know 2\n", " Other (please specify) 7\n", " Roast beef 7\n", " Tofurkey 7\n", " Turkey 458\n", "Homemade Chicken 4\n", " Ham/Pork 4\n", " I don't know 1\n", " Other (please specify) 10\n", " Roast beef 3\n", " Tofurkey 6\n", " Turducken 2\n", " Turkey 271\n", "None Chicken 2\n", " Ham/Pork 9\n", " I don't know 2\n", " Other (please specify) 15\n", " Roast beef 1\n", " Tofurkey 6\n", " Turducken 1\n", " Turkey 110\n", "Other (please specify) Ham/Pork 1\n", " Other (please specify) 3\n", " Tofurkey 1\n", " Turkey 20\n", "dtype: int64" ] }, "execution_count": 74, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped.size()" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "How would you describe where you live?\n", "Rural 216\n", "Suburban 496\n", "Urban 236\n", "Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64" ] }, "execution_count": 75, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped = data.groupby(\"How would you describe where you live?\")[\"What is typically the main dish at your Thanksgiving dinner?\"]\n", "grouped.size()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "How would you describe where you live? \n", "Rural Turkey 189\n", " Other (please specify) 9\n", " Ham/Pork 7\n", " I don't know 3\n", " Tofurkey 3\n", " Turducken 2\n", " Chicken 2\n", " Roast beef 1\n", "Suburban Turkey 449\n", " Ham/Pork 17\n", " Other (please specify) 13\n", " Tofurkey 9\n", " Roast beef 3\n", " Chicken 3\n", " Turducken 1\n", " I don't know 1\n", "Urban Turkey 198\n", " Other (please specify) 13\n", " Tofurkey 8\n", " Chicken 7\n", " Roast beef 6\n", " Ham/Pork 4\n", "Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64" ] }, "execution_count": 76, "metadata": { }, "output_type": "execute_result" } ], "source": [ "grouped.apply(lambda x:x.value_counts())" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (Anaconda)", "language": "python", "name": "anaconda3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 0 }