{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"# import matplotlib.pyplot as plt\n",
"# import seaborn as sns\n",
"\n",
"data_dir = './data'"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" eid | \n",
" first | \n",
" last | \n",
" mail | \n",
" seniority | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A0001 | \n",
" Tom | \n",
" Lee | \n",
" tom.lee@mail.com | \n",
" 7 | \n",
"
\n",
" \n",
" 1 | \n",
" A0003 | \n",
" John | \n",
" Huang | \n",
" john.huang@mail.com | \n",
" 7 | \n",
"
\n",
" \n",
" 2 | \n",
" A0030 | \n",
" Jennie | \n",
" Zhang | \n",
" hennie.zhang@mail.cim | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" A0070 | \n",
" Alex | \n",
" Huang | \n",
" alex.huang@mail.com | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" eid first last mail seniority\n",
"0 A0001 Tom Lee tom.lee@mail.com 7\n",
"1 A0003 John Huang john.huang@mail.com 7\n",
"2 A0030 Jennie Zhang hennie.zhang@mail.cim 3\n",
"3 A0070 Alex Huang alex.huang@mail.com 1"
]
},
"execution_count": 19,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"people = {\n",
" 'eid': ['A0001', 'A0003', 'A0030', 'A0070'],\n",
" 'first': ['Tom', 'John', 'Jennie', 'Alex'],\n",
" 'last': ['Lee', 'Huang', 'Zhang', 'Huang'],\n",
" 'mail': ['tom.lee@mail.com', 'john.huang@mail.com', 'hennie.zhang@mail.cim', 'alex.huang@mail.com'],\n",
" 'seniority': [7, 7, 3, 1]\n",
"}\n",
"\n",
"df = pd.DataFrame(people)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 4 entries, 0 to 3\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 eid 4 non-null object\n",
" 1 first 4 non-null object\n",
" 2 last 4 non-null object\n",
" 3 mail 4 non-null object\n",
" 4 seniority 4 non-null int64 \n",
"dtypes: int64(1), object(4)\n",
"memory usage: 288.0+ bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" seniority | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 4.0 | \n",
"
\n",
" \n",
" mean | \n",
" 4.5 | \n",
"
\n",
" \n",
" std | \n",
" 3.0 | \n",
"
\n",
" \n",
" min | \n",
" 1.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.5 | \n",
"
\n",
" \n",
" 50% | \n",
" 5.0 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.0 | \n",
"
\n",
" \n",
" max | \n",
" 7.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" seniority\n",
"count 4.0\n",
"mean 4.5\n",
"std 3.0\n",
"min 1.0\n",
"25% 2.5\n",
"50% 5.0\n",
"75% 7.0\n",
"max 7.0"
]
},
"execution_count": 21,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(pandas.core.series.Series, pandas.core.series.Series)"
]
},
"execution_count": 22,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"type(df['eid']) , type(df.iloc[0])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" eid | \n",
" first | \n",
" last | \n",
" mail | \n",
" seniority | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" A0030 | \n",
" Jennie | \n",
" Zhang | \n",
" hennie.zhang@mail.cim | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" A0070 | \n",
" Alex | \n",
" Huang | \n",
" alex.huang@mail.com | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" eid first last mail seniority\n",
"2 A0030 Jennie Zhang hennie.zhang@mail.cim 3\n",
"3 A0070 Alex Huang alex.huang@mail.com 1"
]
},
"execution_count": 23,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df.seniority <= 4]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" last | \n",
" mail | \n",
" seniority | \n",
"
\n",
" \n",
" eid | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" A0001 | \n",
" Tom | \n",
" Lee | \n",
" tom.lee@mail.com | \n",
" 7 | \n",
"
\n",
" \n",
" A0003 | \n",
" John | \n",
" Huang | \n",
" john.huang@mail.com | \n",
" 7 | \n",
"
\n",
" \n",
" A0030 | \n",
" Jennie | \n",
" Zhang | \n",
" hennie.zhang@mail.cim | \n",
" 3 | \n",
"
\n",
" \n",
" A0070 | \n",
" Alex | \n",
" Huang | \n",
" alex.huang@mail.com | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first last mail seniority\n",
"eid \n",
"A0001 Tom Lee tom.lee@mail.com 7\n",
"A0003 John Huang john.huang@mail.com 7\n",
"A0030 Jennie Zhang hennie.zhang@mail.cim 3\n",
"A0070 Alex Huang alex.huang@mail.com 1"
]
},
"execution_count": 24,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.set_index('eid') # don't forget inplace"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" eid | \n",
" first | \n",
" last | \n",
" mail | \n",
" seniority | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" A0030 | \n",
" Jennie | \n",
" Zhang | \n",
" hennie.zhang@mail.cim | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" A0070 | \n",
" Alex | \n",
" Huang | \n",
" alex.huang@mail.com | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" eid first last mail seniority\n",
"2 A0030 Jennie Zhang hennie.zhang@mail.cim 3\n",
"3 A0070 Alex Huang alex.huang@mail.com 1"
]
},
"execution_count": 25,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df.seniority < 5]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'tom.lee@mail.com'"
]
},
"execution_count": 26,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[0, 3]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"del df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Car | \n",
" Color | \n",
" Price | \n",
" MPG | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Jerry | \n",
" Honda | \n",
" red | \n",
" 8000 | \n",
" 14 | \n",
"
\n",
" \n",
" 1 | \n",
" John | \n",
" Honda | \n",
" red | \n",
" 7500 | \n",
" 14 | \n",
"
\n",
" \n",
" 2 | \n",
" Nancy | \n",
" Honda | \n",
" blue | \n",
" 11000 | \n",
" 16 | \n",
"
\n",
" \n",
" 3 | \n",
" Greg | \n",
" Ford | \n",
" gray | \n",
" 7000 | \n",
" 16 | \n",
"
\n",
" \n",
" 4 | \n",
" Brian | \n",
" Ford | \n",
" white | \n",
" 8500 | \n",
" 15 | \n",
"
\n",
" \n",
" 5 | \n",
" Jen | \n",
" Ford | \n",
" black | \n",
" 12500 | \n",
" 20 | \n",
"
\n",
" \n",
" 6 | \n",
" Sandy | \n",
" Ford | \n",
" black | \n",
" 12500 | \n",
" 20 | \n",
"
\n",
" \n",
" 7 | \n",
" Gabriel | \n",
" Ford | \n",
" black | \n",
" 13950 | \n",
" 20 | \n",
"
\n",
" \n",
" 8 | \n",
" Will | \n",
" Toyota | \n",
" green | \n",
" 300 | \n",
" 10 | \n",
"
\n",
" \n",
" 9 | \n",
" Laura | \n",
" Toyota | \n",
" red | \n",
" 1000 | \n",
" 12 | \n",
"
\n",
" \n",
" 10 | \n",
" Mel | \n",
" BMW | \n",
" black | \n",
" 50000 | \n",
" 35 | \n",
"
\n",
" \n",
" 11 | \n",
" Steve | \n",
" Volvo | \n",
" blue | \n",
" 1000 | \n",
" 15 | \n",
"
\n",
" \n",
" 12 | \n",
" Chris | \n",
" Volvo | \n",
" blue | \n",
" 250 | \n",
" 15 | \n",
"
\n",
" \n",
" 13 | \n",
" Sandra | \n",
" Volvo | \n",
" brown | \n",
" 100 | \n",
" 12 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Car Color Price MPG\n",
"0 Jerry Honda red 8000 14\n",
"1 John Honda red 7500 14\n",
"2 Nancy Honda blue 11000 16\n",
"3 Greg Ford gray 7000 16\n",
"4 Brian Ford white 8500 15\n",
"5 Jen Ford black 12500 20\n",
"6 Sandy Ford black 12500 20\n",
"7 Gabriel Ford black 13950 20\n",
"8 Will Toyota green 300 10\n",
"9 Laura Toyota red 1000 12\n",
"10 Mel BMW black 50000 35\n",
"11 Steve Volvo blue 1000 15\n",
"12 Chris Volvo blue 250 15\n",
"13 Sandra Volvo brown 100 12"
]
},
"execution_count": 8,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data_source = os.path.join(data_dir, 'car_data.csv')\n",
"df = pd.read_csv(data_source)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" FileName | \n",
" Date | \n",
" SegFile | \n",
" b | \n",
" e | \n",
" LBE | \n",
" LB | \n",
" AC | \n",
" FM | \n",
" UC | \n",
" ... | \n",
" C | \n",
" D | \n",
" E | \n",
" AD | \n",
" DE | \n",
" LD | \n",
" FS | \n",
" SUSP | \n",
" CLASS | \n",
" NSP | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Variab10.txt | \n",
" 1996-12-01 | \n",
" CTG0001.txt | \n",
" 240.0 | \n",
" 357.0 | \n",
" 120.0 | \n",
" 120.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 9.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Fmcs_1.txt | \n",
" 1996-05-03 | \n",
" CTG0002.txt | \n",
" 5.0 | \n",
" 632.0 | \n",
" 132.0 | \n",
" 132.0 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Fmcs_1.txt | \n",
" 1996-05-03 | \n",
" CTG0003.txt | \n",
" 177.0 | \n",
" 779.0 | \n",
" 133.0 | \n",
" 133.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Fmcs_1.txt | \n",
" 1996-05-03 | \n",
" CTG0004.txt | \n",
" 411.0 | \n",
" 1192.0 | \n",
" 134.0 | \n",
" 134.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 6.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 5 | \n",
" Fmcs_1.txt | \n",
" 1996-05-03 | \n",
" CTG0005.txt | \n",
" 533.0 | \n",
" 1147.0 | \n",
" 132.0 | \n",
" 132.0 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 40 columns
\n",
"
"
],
"text/plain": [
" FileName Date SegFile b e LBE LB AC \\\n",
"1 Variab10.txt 1996-12-01 CTG0001.txt 240.0 357.0 120.0 120.0 0.0 \n",
"2 Fmcs_1.txt 1996-05-03 CTG0002.txt 5.0 632.0 132.0 132.0 4.0 \n",
"3 Fmcs_1.txt 1996-05-03 CTG0003.txt 177.0 779.0 133.0 133.0 2.0 \n",
"4 Fmcs_1.txt 1996-05-03 CTG0004.txt 411.0 1192.0 134.0 134.0 2.0 \n",
"5 Fmcs_1.txt 1996-05-03 CTG0005.txt 533.0 1147.0 132.0 132.0 4.0 \n",
"\n",
" FM UC ... C D E AD DE LD FS SUSP CLASS NSP \n",
"1 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 9.0 2.0 \n",
"2 0.0 4.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0 \n",
"3 0.0 5.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0 \n",
"4 0.0 6.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0 \n",
"5 0.0 5.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 1.0 \n",
"\n",
"[5 rows x 40 columns]"
]
},
"execution_count": 13,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"xls_source = 'data/CTG.xls'\n",
"df = pd.read_excel(xls_source, sheet_name='Raw Data')\n",
"df.dropna(inplace=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"df.to_excel(os.path.join(data_dir, 'wq_0121_exercise.xls'))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 14.1 s, sys: 69.3 ms, total: 14.1 s\n",
"Wall time: 14.7 s\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" order_id | \n",
" order_date | \n",
" ship_date | \n",
" ship_mode | \n",
" customer_name | \n",
" segment | \n",
" state | \n",
" country | \n",
" market | \n",
" region | \n",
" ... | \n",
" category | \n",
" sub_category | \n",
" product_name | \n",
" sales | \n",
" quantity | \n",
" discount | \n",
" profit | \n",
" shipping_cost | \n",
" order_priority | \n",
" year | \n",
"
\n",
" \n",
" \n",
" \n",
" 51285 | \n",
" CA-2014-115427 | \n",
" 2014-12-31 | \n",
" 2015-01-04 | \n",
" Standard Class | \n",
" Erica Bern | \n",
" Corporate | \n",
" California | \n",
" United States | \n",
" US | \n",
" West | \n",
" ... | \n",
" Office Supplies | \n",
" Binders | \n",
" Cardinal Slant-D Ring Binder, Heavy Gauge Vinyl | \n",
" 13.904 | \n",
" 2 | \n",
" 0.2 | \n",
" 4.5188 | \n",
" 0.890 | \n",
" Medium | \n",
" 2014 | \n",
"
\n",
" \n",
" 51286 | \n",
" MO-2014-2560 | \n",
" 2014-12-31 | \n",
" 2015-01-05 | \n",
" Standard Class | \n",
" Liz Preis | \n",
" Consumer | \n",
" Souss-Massa-Draâ | \n",
" Morocco | \n",
" Africa | \n",
" Africa | \n",
" ... | \n",
" Office Supplies | \n",
" Binders | \n",
" Wilson Jones Hole Reinforcements, Clear | \n",
" 3.990 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.4200 | \n",
" 0.490 | \n",
" Medium | \n",
" 2014 | \n",
"
\n",
" \n",
" 51287 | \n",
" MX-2014-110527 | \n",
" 2014-12-31 | \n",
" 2015-01-02 | \n",
" Second Class | \n",
" Charlotte Melton | \n",
" Consumer | \n",
" Managua | \n",
" Nicaragua | \n",
" LATAM | \n",
" Central | \n",
" ... | \n",
" Office Supplies | \n",
" Labels | \n",
" Hon Color Coded Labels, 5000 Label Set | \n",
" 26.400 | \n",
" 3 | \n",
" 0.0 | \n",
" 12.3600 | \n",
" 0.350 | \n",
" Medium | \n",
" 2014 | \n",
"
\n",
" \n",
" 51288 | \n",
" MX-2014-114783 | \n",
" 2014-12-31 | \n",
" 2015-01-06 | \n",
" Standard Class | \n",
" Tamara Dahlen | \n",
" Consumer | \n",
" Chihuahua | \n",
" Mexico | \n",
" LATAM | \n",
" North | \n",
" ... | \n",
" Office Supplies | \n",
" Labels | \n",
" Hon Legal Exhibit Labels, Alphabetical | \n",
" 7.120 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.5600 | \n",
" 0.199 | \n",
" Medium | \n",
" 2014 | \n",
"
\n",
" \n",
" 51289 | \n",
" CA-2014-156720 | \n",
" 2014-12-31 | \n",
" 2015-01-04 | \n",
" Standard Class | \n",
" Jill Matthias | \n",
" Consumer | \n",
" Colorado | \n",
" United States | \n",
" US | \n",
" West | \n",
" ... | \n",
" Office Supplies | \n",
" Fasteners | \n",
" Bagged Rubber Bands | \n",
" 3.024 | \n",
" 3 | \n",
" 0.2 | \n",
" -0.6048 | \n",
" 0.170 | \n",
" Medium | \n",
" 2014 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 21 columns
\n",
"
"
],
"text/plain": [
" order_id order_date ship_date ship_mode customer_name \\\n",
"51285 CA-2014-115427 2014-12-31 2015-01-04 Standard Class Erica Bern \n",
"51286 MO-2014-2560 2014-12-31 2015-01-05 Standard Class Liz Preis \n",
"51287 MX-2014-110527 2014-12-31 2015-01-02 Second Class Charlotte Melton \n",
"51288 MX-2014-114783 2014-12-31 2015-01-06 Standard Class Tamara Dahlen \n",
"51289 CA-2014-156720 2014-12-31 2015-01-04 Standard Class Jill Matthias \n",
"\n",
" segment state country market region ... \\\n",
"51285 Corporate California United States US West ... \n",
"51286 Consumer Souss-Massa-Draâ Morocco Africa Africa ... \n",
"51287 Consumer Managua Nicaragua LATAM Central ... \n",
"51288 Consumer Chihuahua Mexico LATAM North ... \n",
"51289 Consumer Colorado United States US West ... \n",
"\n",
" category sub_category \\\n",
"51285 Office Supplies Binders \n",
"51286 Office Supplies Binders \n",
"51287 Office Supplies Labels \n",
"51288 Office Supplies Labels \n",
"51289 Office Supplies Fasteners \n",
"\n",
" product_name sales quantity \\\n",
"51285 Cardinal Slant-D Ring Binder, Heavy Gauge Vinyl 13.904 2 \n",
"51286 Wilson Jones Hole Reinforcements, Clear 3.990 1 \n",
"51287 Hon Color Coded Labels, 5000 Label Set 26.400 3 \n",
"51288 Hon Legal Exhibit Labels, Alphabetical 7.120 1 \n",
"51289 Bagged Rubber Bands 3.024 3 \n",
"\n",
" discount profit shipping_cost order_priority year \n",
"51285 0.2 4.5188 0.890 Medium 2014 \n",
"51286 0.0 0.4200 0.490 Medium 2014 \n",
"51287 0.0 12.3600 0.350 Medium 2014 \n",
"51288 0.0 0.5600 0.199 Medium 2014 \n",
"51289 0.2 -0.6048 0.170 Medium 2014 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 15,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"data_source = 'data/superstore_sales.xlsx'\n",
"df = pd.read_excel(data_source, sheet_name='Orders')\n",
"# df = pd.read_excel(data_source, sheet_name='Orders', engine='openpyxl')\n",
"df.tail(5)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sales | \n",
" quantity | \n",
" discount | \n",
" profit | \n",
" shipping_cost | \n",
" year | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 51290.000000 | \n",
" 51290.000000 | \n",
" 51290.000000 | \n",
" 51290.000000 | \n",
" 51290.000000 | \n",
" 51290.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 246.490581 | \n",
" 3.476545 | \n",
" 0.142908 | \n",
" 28.641740 | \n",
" 26.375818 | \n",
" 2012.777208 | \n",
"
\n",
" \n",
" std | \n",
" 487.565361 | \n",
" 2.278766 | \n",
" 0.212280 | \n",
" 174.424113 | \n",
" 57.296810 | \n",
" 1.098931 | \n",
"
\n",
" \n",
" min | \n",
" 0.444000 | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" -6599.978000 | \n",
" 0.002000 | \n",
" 2011.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 30.758625 | \n",
" 2.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 2.610000 | \n",
" 2012.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 85.053000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
" 9.240000 | \n",
" 7.790000 | \n",
" 2013.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 251.053200 | \n",
" 5.000000 | \n",
" 0.200000 | \n",
" 36.810000 | \n",
" 24.450000 | \n",
" 2014.000000 | \n",
"
\n",
" \n",
" max | \n",
" 22638.480000 | \n",
" 14.000000 | \n",
" 0.850000 | \n",
" 8399.976000 | \n",
" 933.570000 | \n",
" 2014.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sales quantity discount profit shipping_cost \\\n",
"count 51290.000000 51290.000000 51290.000000 51290.000000 51290.000000 \n",
"mean 246.490581 3.476545 0.142908 28.641740 26.375818 \n",
"std 487.565361 2.278766 0.212280 174.424113 57.296810 \n",
"min 0.444000 1.000000 0.000000 -6599.978000 0.002000 \n",
"25% 30.758625 2.000000 0.000000 0.000000 2.610000 \n",
"50% 85.053000 3.000000 0.000000 9.240000 7.790000 \n",
"75% 251.053200 5.000000 0.200000 36.810000 24.450000 \n",
"max 22638.480000 14.000000 0.850000 8399.976000 933.570000 \n",
"\n",
" year \n",
"count 51290.000000 \n",
"mean 2012.777208 \n",
"std 1.098931 \n",
"min 2011.000000 \n",
"25% 2012.000000 \n",
"50% 2013.000000 \n",
"75% 2014.000000 \n",
"max 2014.000000 "
]
},
"execution_count": 16,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (system-wide)",
"language": "python",
"metadata": {
"cocalc": {
"description": "Python 3 programming language",
"priority": 100,
"url": "https://www.python.org/"
}
},
"name": "python3",
"resource_dir": "/ext/jupyter/kernels/python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}