{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import pandas as pd\n",
"reviews = pd.read_csv(\"https://raw.githubusercontent.com/ra314ra/ml/master/ign.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
" /games/littlebigplanet-vita/vita-98907 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
" /games/littlebigplanet-ps-vita-marvel-super-he... | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
" /games/splice/ipad-141070 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/xbox-360-128182 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/ps3-128181 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 2,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
" /games/littlebigplanet-vita/vita-98907 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
" /games/littlebigplanet-ps-vita-marvel-super-he... | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
" /games/splice/ipad-141070 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/xbox-360-128182 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/ps3-128181 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 4,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.iloc[0:5, :]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"reviews = reviews.iloc[:, 1:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
" /games/littlebigplanet-vita/vita-98907 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
" /games/littlebigplanet-ps-vita-marvel-super-he... | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
" /games/splice/ipad-141070 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/xbox-360-128182 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/ps3-128181 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 6,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(18625, 10)"
]
},
"execution_count": 8,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"test_reviews_part1 = reviews.iloc[:,0:2]\n",
"test_reviews_part2 = reviews.iloc[:,3:]\n",
"test_reviews_dropped_column = pd.concat([test_reviews_part1, test_reviews_part2], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
"
\n",
" \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
"
\n",
" \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
"
\n",
" \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
"
\n",
" \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 18,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"test_reviews_part1.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 22,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"test_reviews_part2.head()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 23,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"test_reviews_part2.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 25,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"test_drop_column = reviews.drop(['url'], axis=1)\n",
"test_drop_column.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 10 | \n",
" Good | \n",
" Tekken Tag Tournament 2 | \n",
" /games/tekken-tag-tournament-2/ps3-124584 | \n",
" PlayStation 3 | \n",
" 7.5 | \n",
" Fighting | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 11 | \n",
" Good | \n",
" Tekken Tag Tournament 2 | \n",
" /games/tekken-tag-tournament-2/xbox-360-124581 | \n",
" Xbox 360 | \n",
" 7.5 | \n",
" Fighting | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 12 | \n",
" Good | \n",
" Wild Blood | \n",
" /games/wild-blood/iphone-139363 | \n",
" iPhone | \n",
" 7.0 | \n",
" NaN | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 10 | \n",
"
\n",
" \n",
" 13 | \n",
" Amazing | \n",
" Mark of the Ninja | \n",
" /games/mark-of-the-ninja-135615/xbox-360-129276 | \n",
" Xbox 360 | \n",
" 9.0 | \n",
" Action, Adventure | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 7 | \n",
"
\n",
" \n",
" 14 | \n",
" Amazing | \n",
" Mark of the Ninja | \n",
" /games/mark-of-the-ninja-135615/pc-143761 | \n",
" PC | \n",
" 9.0 | \n",
" Action, Adventure | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 27,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"some_reviews = reviews.loc[10:20,]\n",
"some_reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 18 | \n",
" Mediocre | \n",
" Way of the Samurai 4 | \n",
" /games/way-of-the-samurai-4/ps3-23516 | \n",
" PlayStation 3 | \n",
" 5.5 | \n",
" Action, Adventure | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 3 | \n",
"
\n",
" \n",
" 19 | \n",
" Good | \n",
" JoJo's Bizarre Adventure HD | \n",
" /games/jojos-bizarre-adventure/xbox-360-137717 | \n",
" Xbox 360 | \n",
" 7.0 | \n",
" Fighting | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 3 | \n",
"
\n",
" \n",
" 20 | \n",
" Good | \n",
" JoJo's Bizarre Adventure HD | \n",
" /games/jojos-bizarre-adventure/ps3-137896 | \n",
" PlayStation 3 | \n",
" 7.0 | \n",
" Fighting | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 29,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"some_reviews.loc[18:24,:]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 9.0\n",
"1 9.0\n",
"2 8.5\n",
"3 8.5\n",
"4 8.5\n",
"5 7.0\n",
"Name: score, dtype: float64"
]
},
"execution_count": 30,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.loc[:5, \"score\"]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" release_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 9.0 | \n",
" 2012 | \n",
"
\n",
" \n",
" 1 | \n",
" 9.0 | \n",
" 2012 | \n",
"
\n",
" \n",
" 2 | \n",
" 8.5 | \n",
" 2012 | \n",
"
\n",
" \n",
" 3 | \n",
" 8.5 | \n",
" 2012 | \n",
"
\n",
" \n",
" 4 | \n",
" 8.5 | \n",
" 2012 | \n",
"
\n",
" \n",
" 5 | \n",
" 7.0 | \n",
" 2012 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 31,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.loc[:5, [\"score\", \"release_year\"]]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 32,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"type(reviews.loc[:5, [\"score\", \"release_year\"]])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" editors_choice | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 1 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 2 | \n",
" 8.5 | \n",
" N | \n",
"
\n",
" \n",
" 3 | \n",
" 8.5 | \n",
" N | \n",
"
\n",
" \n",
" 4 | \n",
" 8.5 | \n",
" N | \n",
"
\n",
" \n",
" 5 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 6 | \n",
" 3.0 | \n",
" N | \n",
"
\n",
" \n",
" 7 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 8 | \n",
" 3.0 | \n",
" N | \n",
"
\n",
" \n",
" 9 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 10 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 11 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 12 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 13 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 14 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 15 | \n",
" 6.5 | \n",
" N | \n",
"
\n",
" \n",
" 16 | \n",
" 6.5 | \n",
" N | \n",
"
\n",
" \n",
" 17 | \n",
" 8.0 | \n",
" N | \n",
"
\n",
" \n",
" 18 | \n",
" 5.5 | \n",
" N | \n",
"
\n",
" \n",
" 19 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 20 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 21 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 22 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 23 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 24 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 25 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 26 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 27 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 28 | \n",
" 8.0 | \n",
" N | \n",
"
\n",
" \n",
" 29 | \n",
" 6.5 | \n",
" N | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 18595 | \n",
" 4.4 | \n",
" N | \n",
"
\n",
" \n",
" 18596 | \n",
" 6.5 | \n",
" N | \n",
"
\n",
" \n",
" 18597 | \n",
" 4.9 | \n",
" N | \n",
"
\n",
" \n",
" 18598 | \n",
" 6.8 | \n",
" N | \n",
"
\n",
" \n",
" 18599 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 18600 | \n",
" 7.4 | \n",
" N | \n",
"
\n",
" \n",
" 18601 | \n",
" 7.4 | \n",
" N | \n",
"
\n",
" \n",
" 18602 | \n",
" 7.4 | \n",
" N | \n",
"
\n",
" \n",
" 18603 | \n",
" 7.8 | \n",
" N | \n",
"
\n",
" \n",
" 18604 | \n",
" 8.6 | \n",
" N | \n",
"
\n",
" \n",
" 18605 | \n",
" 6.0 | \n",
" N | \n",
"
\n",
" \n",
" 18606 | \n",
" 6.4 | \n",
" N | \n",
"
\n",
" \n",
" 18607 | \n",
" 7.0 | \n",
" N | \n",
"
\n",
" \n",
" 18608 | \n",
" 5.4 | \n",
" N | \n",
"
\n",
" \n",
" 18609 | \n",
" 8.0 | \n",
" N | \n",
"
\n",
" \n",
" 18610 | \n",
" 6.0 | \n",
" N | \n",
"
\n",
" \n",
" 18611 | \n",
" 5.8 | \n",
" N | \n",
"
\n",
" \n",
" 18612 | \n",
" 7.8 | \n",
" N | \n",
"
\n",
" \n",
" 18613 | \n",
" 8.0 | \n",
" N | \n",
"
\n",
" \n",
" 18614 | \n",
" 9.2 | \n",
" Y | \n",
"
\n",
" \n",
" 18615 | \n",
" 9.2 | \n",
" Y | \n",
"
\n",
" \n",
" 18616 | \n",
" 7.5 | \n",
" N | \n",
"
\n",
" \n",
" 18617 | \n",
" 8.4 | \n",
" N | \n",
"
\n",
" \n",
" 18618 | \n",
" 9.1 | \n",
" Y | \n",
"
\n",
" \n",
" 18619 | \n",
" 7.9 | \n",
" N | \n",
"
\n",
" \n",
" 18620 | \n",
" 7.6 | \n",
" N | \n",
"
\n",
" \n",
" 18621 | \n",
" 9.0 | \n",
" Y | \n",
"
\n",
" \n",
" 18622 | \n",
" 5.8 | \n",
" N | \n",
"
\n",
" \n",
" 18623 | \n",
" 10.0 | \n",
" Y | \n",
"
\n",
" \n",
" 18624 | \n",
" 10.0 | \n",
" Y | \n",
"
\n",
" \n",
"
\n",
"
18625 rows × 2 columns
\n",
"
"
]
},
"execution_count": 33,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews[[\"score\", \"editors_choice\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"6.950459060402666"
]
},
"execution_count": 35,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews[\"score\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 510.500\n",
"1 510.500\n",
"2 510.375\n",
"3 510.125\n",
"4 510.125\n",
"5 509.750\n",
"6 508.750\n",
"7 510.250\n",
"8 508.750\n",
"9 509.750\n",
"10 509.875\n",
"11 509.875\n",
"12 509.500\n",
"13 509.250\n",
"14 509.250\n",
"15 508.375\n",
"16 508.375\n",
"17 508.500\n",
"18 507.375\n",
"19 507.750\n",
"20 507.750\n",
"21 514.625\n",
"22 514.625\n",
"23 514.625\n",
"24 515.000\n",
"25 514.250\n",
"26 514.750\n",
"27 514.125\n",
"28 514.250\n",
"29 513.625\n",
" ... \n",
"18595 510.850\n",
"18596 510.875\n",
"18597 510.225\n",
"18598 510.700\n",
"18599 510.750\n",
"18600 512.600\n",
"18601 512.600\n",
"18602 512.600\n",
"18603 512.450\n",
"18604 512.400\n",
"18605 511.500\n",
"18606 508.600\n",
"18607 510.750\n",
"18608 510.350\n",
"18609 510.750\n",
"18610 510.250\n",
"18611 508.700\n",
"18612 509.200\n",
"18613 508.000\n",
"18614 515.050\n",
"18615 515.050\n",
"18616 508.375\n",
"18617 508.600\n",
"18618 515.025\n",
"18619 514.725\n",
"18620 514.650\n",
"18621 515.000\n",
"18622 513.950\n",
"18623 515.000\n",
"18624 515.000\n",
"Length: 18625, dtype: float64"
]
},
"execution_count": 36,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.mean(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" score | \n",
" 1.000000 | \n",
" 0.062716 | \n",
" 0.007632 | \n",
" 0.020079 | \n",
"
\n",
" \n",
" release_year | \n",
" 0.062716 | \n",
" 1.000000 | \n",
" -0.115515 | \n",
" 0.016867 | \n",
"
\n",
" \n",
" release_month | \n",
" 0.007632 | \n",
" -0.115515 | \n",
" 1.000000 | \n",
" -0.067964 | \n",
"
\n",
" \n",
" release_day | \n",
" 0.020079 | \n",
" 0.016867 | \n",
" -0.067964 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 37,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.corr()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 4.50\n",
"1 4.50\n",
"2 4.25\n",
"3 4.25\n",
"4 4.25\n",
"5 3.50\n",
"6 1.50\n",
"7 4.50\n",
"8 1.50\n",
"9 3.50\n",
"10 3.75\n",
"11 3.75\n",
"12 3.50\n",
"13 4.50\n",
"14 4.50\n",
"15 3.25\n",
"16 3.25\n",
"17 4.00\n",
"18 2.75\n",
"19 3.50\n",
"20 3.50\n",
"21 3.75\n",
"22 3.75\n",
"23 3.75\n",
"24 4.50\n",
"25 3.50\n",
"26 4.50\n",
"27 3.75\n",
"28 4.00\n",
"29 3.25\n",
" ... \n",
"18595 2.20\n",
"18596 3.25\n",
"18597 2.45\n",
"18598 3.40\n",
"18599 3.50\n",
"18600 3.70\n",
"18601 3.70\n",
"18602 3.70\n",
"18603 3.90\n",
"18604 4.30\n",
"18605 3.00\n",
"18606 3.20\n",
"18607 3.50\n",
"18608 2.70\n",
"18609 4.00\n",
"18610 3.00\n",
"18611 2.90\n",
"18612 3.90\n",
"18613 4.00\n",
"18614 4.60\n",
"18615 4.60\n",
"18616 3.75\n",
"18617 4.20\n",
"18618 4.55\n",
"18619 3.95\n",
"18620 3.80\n",
"18621 4.50\n",
"18622 2.90\n",
"18623 5.00\n",
"18624 5.00\n",
"Name: score, Length: 18625, dtype: float64"
]
},
"execution_count": 39,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews[\"score\"] /2\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
"5 False\n",
"6 False\n",
"7 True\n",
"8 False\n",
"9 False\n",
"10 True\n",
"11 True\n",
"12 False\n",
"13 True\n",
"14 True\n",
"15 False\n",
"16 False\n",
"17 True\n",
"18 False\n",
"19 False\n",
"20 False\n",
"21 True\n",
"22 True\n",
"23 True\n",
"24 True\n",
"25 False\n",
"26 True\n",
"27 True\n",
"28 True\n",
"29 False\n",
" ... \n",
"18595 False\n",
"18596 False\n",
"18597 False\n",
"18598 False\n",
"18599 False\n",
"18600 True\n",
"18601 True\n",
"18602 True\n",
"18603 True\n",
"18604 True\n",
"18605 False\n",
"18606 False\n",
"18607 False\n",
"18608 False\n",
"18609 True\n",
"18610 False\n",
"18611 False\n",
"18612 True\n",
"18613 True\n",
"18614 True\n",
"18615 True\n",
"18616 True\n",
"18617 True\n",
"18618 True\n",
"18619 True\n",
"18620 True\n",
"18621 True\n",
"18622 False\n",
"18623 True\n",
"18624 True\n",
"Name: score, Length: 18625, dtype: bool"
]
},
"execution_count": 40,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"score_filter = reviews[\"score\"] > 7\n",
"score_filter"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita | \n",
" /games/littlebigplanet-vita/vita-98907 | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" Amazing | \n",
" LittleBigPlanet PS Vita -- Marvel Super Hero E... | \n",
" /games/littlebigplanet-ps-vita-marvel-super-he... | \n",
" PlayStation Vita | \n",
" 9.0 | \n",
" Platformer | \n",
" Y | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 2 | \n",
" Great | \n",
" Splice: Tree of Life | \n",
" /games/splice/ipad-141070 | \n",
" iPad | \n",
" 8.5 | \n",
" Puzzle | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/xbox-360-128182 | \n",
" Xbox 360 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" Great | \n",
" NHL 13 | \n",
" /games/nhl-13/ps3-128181 | \n",
" PlayStation 3 | \n",
" 8.5 | \n",
" Sports | \n",
" N | \n",
" 2012 | \n",
" 9 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 42,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"filtered_reviews = reviews.loc[score_filter].copy()\n",
"filtered_reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 1.0\n",
"2 1.0\n",
"3 1.0\n",
"4 1.0\n",
"5 1.0\n",
"6 1.0\n",
"7 1.0\n",
"8 1.0\n",
"9 1.0\n",
"10 1.0\n",
"11 1.0\n",
"12 1.0\n",
"13 1.0\n",
"14 1.0\n",
"15 1.0\n",
"16 1.0\n",
"17 1.0\n",
"18 1.0\n",
"19 1.0\n",
"20 1.0\n",
"21 1.0\n",
"22 1.0\n",
"23 1.0\n",
"24 1.0\n",
"25 1.0\n",
"26 1.0\n",
"27 1.0\n",
"28 1.0\n",
"29 1.0\n",
" ... \n",
"18595 1.0\n",
"18596 1.0\n",
"18597 1.0\n",
"18598 1.0\n",
"18599 1.0\n",
"18600 1.0\n",
"18601 1.0\n",
"18602 1.0\n",
"18603 1.0\n",
"18604 1.0\n",
"18605 1.0\n",
"18606 1.0\n",
"18607 1.0\n",
"18608 1.0\n",
"18609 1.0\n",
"18610 1.0\n",
"18611 1.0\n",
"18612 1.0\n",
"18613 1.0\n",
"18614 1.0\n",
"18615 1.0\n",
"18616 1.0\n",
"18617 1.0\n",
"18618 1.0\n",
"18619 1.0\n",
"18620 1.0\n",
"18621 1.0\n",
"18622 1.0\n",
"18623 1.0\n",
"18624 1.0\n",
"Name: score, Length: 18625, dtype: float64"
]
},
"execution_count": 44,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews[\"score\"].divide(reviews[\"score\"])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score_phrase | \n",
" title | \n",
" url | \n",
" platform | \n",
" score | \n",
" genre | \n",
" editors_choice | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" 17137 | \n",
" Amazing | \n",
" Gone Home | \n",
" /games/gone-home/xbox-one-20014361 | \n",
" Xbox One | \n",
" 9.5 | \n",
" Simulation | \n",
" Y | \n",
" 2013 | \n",
" 8 | \n",
" 15 | \n",
"
\n",
" \n",
" 17197 | \n",
" Amazing | \n",
" Rayman Legends | \n",
" /games/rayman-legends/xbox-one-20008449 | \n",
" Xbox One | \n",
" 9.5 | \n",
" Platformer | \n",
" Y | \n",
" 2013 | \n",
" 8 | \n",
" 26 | \n",
"
\n",
" \n",
" 17295 | \n",
" Amazing | \n",
" LEGO Marvel Super Heroes | \n",
" /games/lego-marvel-super-heroes/xbox-one-20000826 | \n",
" Xbox One | \n",
" 9.0 | \n",
" Action | \n",
" Y | \n",
" 2013 | \n",
" 10 | \n",
" 22 | \n",
"
\n",
" \n",
" 17313 | \n",
" Great | \n",
" Dead Rising 3 | \n",
" /games/dead-rising-3/xbox-one-124306 | \n",
" Xbox One | \n",
" 8.3 | \n",
" Action | \n",
" N | \n",
" 2013 | \n",
" 11 | \n",
" 18 | \n",
"
\n",
" \n",
" 17317 | \n",
" Great | \n",
" Killer Instinct | \n",
" /games/killer-instinct-2013/xbox-one-20000538 | \n",
" Xbox One | \n",
" 8.4 | \n",
" Fighting | \n",
" N | \n",
" 2013 | \n",
" 11 | \n",
" 18 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 45,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"xbox_one_filter = (reviews[\"score\"] > 7) & (reviews[\"platform\"] == \"Xbox One\")\n",
"filtered_reviews = reviews[xbox_one_filter]\n",
"filtered_reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 46,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "c7f4a0aa9195c0217e4f6b43cfad50b1ac288090"
},
"metadata": {
"image/png": {
"height": 250,
"width": 384
}
}
}
],
"source": [
"%matplotlib inline\n",
"reviews[reviews[\"platform\"] == \"Xbox One\"][\"score\"].plot(kind=\"hist\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 47,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "6eb8b80c69eb2b4d2fd3ae67cdc84e756871044d"
},
"metadata": {
"image/png": {
"height": 250,
"width": 384
}
}
}
],
"source": [
"reviews[reviews[\"platform\"] == \"PlayStation 4\"][\"score\"].plot(kind=\"hist\")"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" release_year | \n",
" release_month | \n",
" release_day | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 18625.000000 | \n",
" 18625.000000 | \n",
" 18625.00000 | \n",
" 18625.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 6.950459 | \n",
" 2006.515329 | \n",
" 7.13847 | \n",
" 15.603866 | \n",
"
\n",
" \n",
" std | \n",
" 1.711736 | \n",
" 4.587529 | \n",
" 3.47671 | \n",
" 8.690128 | \n",
"
\n",
" \n",
" min | \n",
" 0.500000 | \n",
" 1970.000000 | \n",
" 1.00000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.000000 | \n",
" 2003.000000 | \n",
" 4.00000 | \n",
" 8.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.300000 | \n",
" 2007.000000 | \n",
" 8.00000 | \n",
" 16.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 8.200000 | \n",
" 2010.000000 | \n",
" 10.00000 | \n",
" 23.000000 | \n",
"
\n",
" \n",
" max | \n",
" 10.000000 | \n",
" 2016.000000 | \n",
" 12.00000 | \n",
" 31.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 48,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"reviews.describe()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" RespondentID | \n",
" Do you celebrate Thanksgiving? | \n",
" What is typically the main dish at your Thanksgiving dinner? | \n",
" What is typically the main dish at your Thanksgiving dinner? - Other (please specify) | \n",
" How is the main dish typically cooked? | \n",
" How is the main dish typically cooked? - Other (please specify) | \n",
" What kind of stuffing/dressing do you typically have? | \n",
" What kind of stuffing/dressing do you typically have? - Other (please specify) | \n",
" What type of cranberry saucedo you typically have? | \n",
" What type of cranberry saucedo you typically have? - Other (please specify) | \n",
" ... | \n",
" Have you ever tried to meet up with hometown friends on Thanksgiving night? | \n",
" Have you ever attended a \"Friendsgiving?\" | \n",
" Will you shop any Black Friday sales on Thanksgiving Day? | \n",
" Do you work in retail? | \n",
" Will you employer make you work on Black Friday? | \n",
" How would you describe where you live? | \n",
" Age | \n",
" What is your gender? | \n",
" How much total combined money did all members of your HOUSEHOLD earn last year? | \n",
" US Region | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 4337954960 | \n",
" Yes | \n",
" Turkey | \n",
" NaN | \n",
" Baked | \n",
" NaN | \n",
" Bread-based | \n",
" NaN | \n",
" None | \n",
" NaN | \n",
" ... | \n",
" Yes | \n",
" No | \n",
" No | \n",
" No | \n",
" NaN | \n",
" Suburban | \n",
" 18 - 29 | \n",
" Male | \n",
" $75,000 to $99,999 | \n",
" Middle Atlantic | \n",
"
\n",
" \n",
" 1 | \n",
" 4337951949 | \n",
" Yes | \n",
" Turkey | \n",
" NaN | \n",
" Baked | \n",
" NaN | \n",
" Bread-based | \n",
" NaN | \n",
" Other (please specify) | \n",
" Homemade cranberry gelatin ring | \n",
" ... | \n",
" No | \n",
" No | \n",
" Yes | \n",
" No | \n",
" NaN | \n",
" Rural | \n",
" 18 - 29 | \n",
" Female | \n",
" $50,000 to $74,999 | \n",
" East South Central | \n",
"
\n",
" \n",
" 2 | \n",
" 4337935621 | \n",
" Yes | \n",
" Turkey | \n",
" NaN | \n",
" Roasted | \n",
" NaN | \n",
" Rice-based | \n",
" NaN | \n",
" Homemade | \n",
" NaN | \n",
" ... | \n",
" Yes | \n",
" Yes | \n",
" Yes | \n",
" No | \n",
" NaN | \n",
" Suburban | \n",
" 18 - 29 | \n",
" Male | \n",
" $0 to $9,999 | \n",
" Mountain | \n",
"
\n",
" \n",
" 3 | \n",
" 4337933040 | \n",
" Yes | \n",
" Turkey | \n",
" NaN | \n",
" Baked | \n",
" NaN | \n",
" Bread-based | \n",
" NaN | \n",
" Homemade | \n",
" NaN | \n",
" ... | \n",
" Yes | \n",
" No | \n",
" No | \n",
" No | \n",
" NaN | \n",
" Urban | \n",
" 30 - 44 | \n",
" Male | \n",
" $200,000 and up | \n",
" Pacific | \n",
"
\n",
" \n",
" 4 | \n",
" 4337931983 | \n",
" Yes | \n",
" Tofurkey | \n",
" NaN | \n",
" Baked | \n",
" NaN | \n",
" Bread-based | \n",
" NaN | \n",
" Canned | \n",
" NaN | \n",
" ... | \n",
" Yes | \n",
" No | \n",
" No | \n",
" No | \n",
" NaN | \n",
" Urban | \n",
" 30 - 44 | \n",
" Male | \n",
" $100,000 to $124,999 | \n",
" Pacific | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 65 columns
\n",
"
"
]
},
"execution_count": 50,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(\"https://raw.githubusercontent.com/ra314ra/ml/master/thanksgiving-2015-poll-data.csv\", encoding = 'latin-1')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(1058, 65)"
]
},
"execution_count": 51,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array(['Yes', 'No'], dtype=object)"
]
},
"execution_count": 52,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"Do you celebrate Thanksgiving?\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply. - Other (please specify).1',\n",
" 'Do you typically pray before or after the Thanksgiving meal?',\n",
" 'How far will you travel for Thanksgiving?',\n",
" 'Will you watch any of the following programs on Thanksgiving? Please select all that apply. - Macy's Parade',\n",
" 'What's the age cutoff at your \"kids' table\" at Thanksgiving?',\n",
" 'Have you ever tried to meet up with hometown friends on Thanksgiving night?',\n",
" 'Have you ever attended a \"Friendsgiving?\"',\n",
" 'Will you shop any Black Friday sales on Thanksgiving Day?',\n",
" 'Do you work in retail?',\n",
" 'Will you employer make you work on Black Friday?',\n",
" 'How would you describe where you live?', 'Age', 'What is your gender?',\n",
" 'How much total combined money did all members of your HOUSEHOLD earn last year?',\n",
" 'US Region'],\n",
" dtype='object')"
]
},
"execution_count": 53,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.columns[50:]"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Female 544\n",
"Male 481\n",
"NaN 33\n",
"Name: What is your gender?, dtype: int64"
]
},
"execution_count": 54,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"What is your gender?\"].value_counts(dropna=False)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import math\n",
"\n",
"def gender_code(gender_string):\n",
" if isinstance(gender_string, float) and math.isnan(gender_string):\n",
" return gender_string\n",
" return int(gender_string == \"Female\")"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
" 1.0 544\n",
" 0.0 481\n",
"NaN 33\n",
"Name: gender, dtype: int64"
]
},
"execution_count": 56,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"gender\"] = data[\"What is your gender?\"].apply(gender_code)\n",
"data[\"gender\"].value_counts(dropna=False)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"RespondentID object\n",
"Do you celebrate Thanksgiving? object\n",
"What is typically the main dish at your Thanksgiving dinner? object\n",
"What is typically the main dish at your Thanksgiving dinner? - Other (please specify) object\n",
"How is the main dish typically cooked? object\n",
"dtype: object"
]
},
"execution_count": 58,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.apply(lambda x: x.dtype).head()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"$25,000 to $49,999 180\n",
"Prefer not to answer 136\n",
"$50,000 to $74,999 135\n",
"$75,000 to $99,999 133\n",
"$100,000 to $124,999 111\n",
"$200,000 and up 80\n",
"$10,000 to $24,999 68\n",
"$0 to $9,999 66\n",
"$125,000 to $149,999 49\n",
"$150,000 to $174,999 40\n",
"NaN 33\n",
"$175,000 to $199,999 27\n",
"Name: How much total combined money did all members of your HOUSEHOLD earn last year?, dtype: int64"
]
},
"execution_count": 59,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"How much total combined money did all members of your HOUSEHOLD earn last year?\"].value_counts(dropna=False)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import numpy as np\n",
"\n",
"def clean_income(value):\n",
" if value == \"$200,000 and up\":\n",
" return 200000\n",
" elif value == \"Prefer not to answer\":\n",
" return np.nan\n",
" elif isinstance(value, float) and math.isnan(value):\n",
" return np.nan\n",
" value = value.replace(\",\", \"\").replace(\"$\", \"\")\n",
" income_high, income_low = value.split(\" to \")\n",
" return (int(income_high) + int(income_low)) / 2"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 87499.5\n",
"1 62499.5\n",
"2 4999.5\n",
"3 200000.0\n",
"4 112499.5\n",
"Name: income, dtype: float64"
]
},
"execution_count": 61,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"income\"] = data[\"How much total combined money did all members of your HOUSEHOLD earn last year?\"].apply(clean_income)\n",
"data[\"income\"].head()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
" 37499.5 180\n",
"NaN 169\n",
" 62499.5 135\n",
" 87499.5 133\n",
" 112499.5 111\n",
" 200000.0 80\n",
" 17499.5 68\n",
" 4999.5 66\n",
" 137499.5 49\n",
" 162499.5 40\n",
" 187499.5 27\n",
"Name: income, dtype: int64"
]
},
"execution_count": 62,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"income\"].value_counts(dropna=False)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Canned 502\n",
"Homemade 301\n",
"None 146\n",
"Other (please specify) 25\n",
"Name: What type of cranberry saucedo you typically have?, dtype: int64"
]
},
"execution_count": 63,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data[\"What type of cranberry saucedo you typically have?\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"homemade = data[data[\"What type of cranberry saucedo you typically have?\"] == \"Homemade\"]\n",
"canned = data[data[\"What type of cranberry saucedo you typically have?\"] == \"Canned\"]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"94878.1072874494\n",
"83823.40340909091\n"
]
}
],
"source": [
"print(homemade[\"income\"].mean())\n",
"print(canned[\"income\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 66,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped = data.groupby(\"What type of cranberry saucedo you typically have?\")\n",
"grouped"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'Canned': Int64Index([ 4, 6, 8, 11, 12, 15, 18, 19, 26, 27,\n",
" ...\n",
" 1040, 1041, 1042, 1044, 1045, 1046, 1047, 1051, 1054, 1057],\n",
" dtype='int64', length=502),\n",
" 'Homemade': Int64Index([ 2, 3, 5, 7, 13, 14, 16, 20, 21, 23,\n",
" ...\n",
" 1016, 1017, 1025, 1027, 1030, 1034, 1048, 1049, 1053, 1056],\n",
" dtype='int64', length=301),\n",
" 'None': Int64Index([ 0, 17, 24, 29, 34, 36, 40, 47, 49, 51,\n",
" ...\n",
" 980, 981, 997, 1015, 1018, 1031, 1037, 1043, 1050, 1055],\n",
" dtype='int64', length=146),\n",
" 'Other (please specify)': Int64Index([ 1, 9, 154, 216, 221, 233, 249, 265, 301, 336, 380,\n",
" 435, 444, 447, 513, 550, 749, 750, 784, 807, 860, 872,\n",
" 905, 1000, 1007],\n",
" dtype='int64')}"
]
},
"execution_count": 67,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped.groups"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"What type of cranberry saucedo you typically have?\n",
"Canned 502\n",
"Homemade 301\n",
"None 146\n",
"Other (please specify) 25\n",
"dtype: int64"
]
},
"execution_count": 68,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped.size()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"What type of cranberry saucedo you typically have?\n",
"Canned 83823.403409\n",
"Homemade 94878.107287\n",
"None 78886.084034\n",
"Other (please specify) 86629.978261\n",
"Name: income, dtype: float64"
]
},
"execution_count": 69,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped[\"income\"].agg(np.mean)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" RespondentID | \n",
" gender | \n",
" income | \n",
"
\n",
" \n",
" What type of cranberry saucedo you typically have? | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Canned | \n",
" 4.336699e+09 | \n",
" 0.552846 | \n",
" 83823.403409 | \n",
"
\n",
" \n",
" Homemade | \n",
" 4.336792e+09 | \n",
" 0.533101 | \n",
" 94878.107287 | \n",
"
\n",
" \n",
" None | \n",
" 4.336765e+09 | \n",
" 0.517483 | \n",
" 78886.084034 | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 4.336763e+09 | \n",
" 0.640000 | \n",
" 86629.978261 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 70,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped.agg(np.mean)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 71,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "8bf1c38f22f20f81ebe8d3e3dd5526f5bf742ec1"
},
"metadata": {
"image/png": {
"height": 365,
"width": 390
}
}
}
],
"source": [
"%matplotlib inline\n",
"\n",
"sauce = grouped.agg(np.mean)\n",
"sauce[\"income\"].plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" RespondentID | \n",
" gender | \n",
" income | \n",
"
\n",
" \n",
" What type of cranberry saucedo you typically have? | \n",
" What is typically the main dish at your Thanksgiving dinner? | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Canned | \n",
" Chicken | \n",
" 4.336354e+09 | \n",
" 0.333333 | \n",
" 80999.600000 | \n",
"
\n",
" \n",
" Ham/Pork | \n",
" 4.336757e+09 | \n",
" 0.642857 | \n",
" 77499.535714 | \n",
"
\n",
" \n",
" I don't know | \n",
" 4.335987e+09 | \n",
" 0.000000 | \n",
" 4999.500000 | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 4.336682e+09 | \n",
" 1.000000 | \n",
" 53213.785714 | \n",
"
\n",
" \n",
" Roast beef | \n",
" 4.336254e+09 | \n",
" 0.571429 | \n",
" 25499.500000 | \n",
"
\n",
" \n",
" Tofurkey | \n",
" 4.337157e+09 | \n",
" 0.714286 | \n",
" 100713.857143 | \n",
"
\n",
" \n",
" Turkey | \n",
" 4.336705e+09 | \n",
" 0.544444 | \n",
" 85242.682045 | \n",
"
\n",
" \n",
" Homemade | \n",
" Chicken | \n",
" 4.336540e+09 | \n",
" 0.750000 | \n",
" 19999.500000 | \n",
"
\n",
" \n",
" Ham/Pork | \n",
" 4.337253e+09 | \n",
" 0.250000 | \n",
" 96874.625000 | \n",
"
\n",
" \n",
" I don't know | \n",
" 4.336084e+09 | \n",
" 1.000000 | \n",
" NaN | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 4.336863e+09 | \n",
" 0.600000 | \n",
" 55356.642857 | \n",
"
\n",
" \n",
" Roast beef | \n",
" 4.336174e+09 | \n",
" 0.000000 | \n",
" 33749.500000 | \n",
"
\n",
" \n",
" Tofurkey | \n",
" 4.336790e+09 | \n",
" 0.666667 | \n",
" 57916.166667 | \n",
"
\n",
" \n",
" Turducken | \n",
" 4.337475e+09 | \n",
" 0.500000 | \n",
" 200000.000000 | \n",
"
\n",
" \n",
" Turkey | \n",
" 4.336791e+09 | \n",
" 0.531008 | \n",
" 97690.147982 | \n",
"
\n",
" \n",
" None | \n",
" Chicken | \n",
" 4.336151e+09 | \n",
" 0.500000 | \n",
" 11249.500000 | \n",
"
\n",
" \n",
" Ham/Pork | \n",
" 4.336680e+09 | \n",
" 0.444444 | \n",
" 61249.500000 | \n",
"
\n",
" \n",
" I don't know | \n",
" 4.336412e+09 | \n",
" 0.500000 | \n",
" 33749.500000 | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 4.336688e+09 | \n",
" 0.600000 | \n",
" 119106.678571 | \n",
"
\n",
" \n",
" Roast beef | \n",
" 4.337424e+09 | \n",
" 0.000000 | \n",
" 162499.500000 | \n",
"
\n",
" \n",
" Tofurkey | \n",
" 4.336950e+09 | \n",
" 0.500000 | \n",
" 112499.500000 | \n",
"
\n",
" \n",
" Turducken | \n",
" 4.336739e+09 | \n",
" 0.000000 | \n",
" NaN | \n",
"
\n",
" \n",
" Turkey | \n",
" 4.336784e+09 | \n",
" 0.523364 | \n",
" 74606.275281 | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" Ham/Pork | \n",
" 4.336465e+09 | \n",
" 1.000000 | \n",
" 87499.500000 | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 4.337335e+09 | \n",
" 0.000000 | \n",
" 124999.666667 | \n",
"
\n",
" \n",
" Tofurkey | \n",
" 4.336122e+09 | \n",
" 1.000000 | \n",
" 37499.500000 | \n",
"
\n",
" \n",
" Turkey | \n",
" 4.336724e+09 | \n",
" 0.700000 | \n",
" 82916.194444 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 72,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped = data.groupby([\"What type of cranberry saucedo you typically have?\", \"What is typically the main dish at your Thanksgiving dinner?\"])\n",
"grouped.agg(np.mean)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" mean | \n",
" sum | \n",
" std | \n",
"
\n",
" \n",
" What type of cranberry saucedo you typically have? | \n",
" What is typically the main dish at your Thanksgiving dinner? | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Canned | \n",
" Chicken | \n",
" 80999.600000 | \n",
" 404998.0 | \n",
" 75779.481062 | \n",
"
\n",
" \n",
" Ham/Pork | \n",
" 77499.535714 | \n",
" 1084993.5 | \n",
" 56645.063944 | \n",
"
\n",
" \n",
" I don't know | \n",
" 4999.500000 | \n",
" 4999.5 | \n",
" NaN | \n",
"
\n",
" \n",
" Other (please specify) | \n",
" 53213.785714 | \n",
" 372496.5 | \n",
" 29780.946290 | \n",
"
\n",
" \n",
" Roast beef | \n",
" 25499.500000 | \n",
" 127497.5 | \n",
" 24584.039538 | \n",
"
\n",
" \n",
" Tofurkey | \n",
" 100713.857143 | \n",
" 704997.0 | \n",
" 61351.484439 | \n",
"
\n",
" \n",
" Turkey | \n",
" 85242.682045 | \n",
" 34182315.5 | \n",
" 55687.436102 | \n",
"
\n",
" \n",
" Homemade | \n",
" Chicken | \n",
" 19999.500000 | \n",
" 59998.5 | \n",
" 16393.596311 | \n",
"
\n",
" \n",
" Ham/Pork | \n",
" 96874.625000 | \n",
" 387498.5 | \n",
" 77308.452805 | \n",
"
\n",
" \n",
" I don't know | \n",
" NaN | \n",
" 0.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 73,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped[\"income\"].agg([np.mean, np.sum, np.std]).head(10)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"What type of cranberry saucedo you typically have? What is typically the main dish at your Thanksgiving dinner?\n",
"Canned Chicken 6\n",
" Ham/Pork 15\n",
" I don't know 2\n",
" Other (please specify) 7\n",
" Roast beef 7\n",
" Tofurkey 7\n",
" Turkey 458\n",
"Homemade Chicken 4\n",
" Ham/Pork 4\n",
" I don't know 1\n",
" Other (please specify) 10\n",
" Roast beef 3\n",
" Tofurkey 6\n",
" Turducken 2\n",
" Turkey 271\n",
"None Chicken 2\n",
" Ham/Pork 9\n",
" I don't know 2\n",
" Other (please specify) 15\n",
" Roast beef 1\n",
" Tofurkey 6\n",
" Turducken 1\n",
" Turkey 110\n",
"Other (please specify) Ham/Pork 1\n",
" Other (please specify) 3\n",
" Tofurkey 1\n",
" Turkey 20\n",
"dtype: int64"
]
},
"execution_count": 74,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped.size()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"How would you describe where you live?\n",
"Rural 216\n",
"Suburban 496\n",
"Urban 236\n",
"Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64"
]
},
"execution_count": 75,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped = data.groupby(\"How would you describe where you live?\")[\"What is typically the main dish at your Thanksgiving dinner?\"]\n",
"grouped.size()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"How would you describe where you live? \n",
"Rural Turkey 189\n",
" Other (please specify) 9\n",
" Ham/Pork 7\n",
" I don't know 3\n",
" Tofurkey 3\n",
" Turducken 2\n",
" Chicken 2\n",
" Roast beef 1\n",
"Suburban Turkey 449\n",
" Ham/Pork 17\n",
" Other (please specify) 13\n",
" Tofurkey 9\n",
" Roast beef 3\n",
" Chicken 3\n",
" Turducken 1\n",
" I don't know 1\n",
"Urban Turkey 198\n",
" Other (please specify) 13\n",
" Tofurkey 8\n",
" Chicken 7\n",
" Roast beef 6\n",
" Ham/Pork 4\n",
"Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64"
]
},
"execution_count": 76,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"grouped.apply(lambda x:x.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (Anaconda)",
"language": "python",
"name": "anaconda3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}