Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 5313
Image: default
Kernel: Python 3 (system-wide)
import numpy as np import matplotlib.pyplot as plt import pandas as pd
# This is how you read the csv file with open("Chocolate-Consumption.csv", 'r') as file: consumption = pd.read_csv(file, names = ['Country', 'Consumption'])
# Show the first 3 lines of the file consumption.head(3)
Country Consumption
0 Germany 11.1
1 Switzerland 10.3
2 Estonia 8.8
# Show the last 3 lines of the file consumption.tail(3)
Country Consumption
11 Italy 3.1
12 Greece 3.0
13 Hungary 2.9
# Sort by the country name consumption.sort_values(by = ['Country'])
Country Consumption
7 Belgium 6.4
6 Croatia 6.6
5 Denmark 6.9
2 Estonia 8.8
4 Finland 7.4
0 Germany 11.1
12 Greece 3.0
13 Hungary 2.9
11 Italy 3.1
8 Lithuania 4.7
10 Portugal 3.6
9 Spain 4.5
1 Switzerland 10.3
3 UK 8.1
consumption.plot.bar("Country", "Consumption") plt.show()
Image in a Jupyter notebook
with open("nobels.txt", "r") as file: prizes_list = file.readlines() prizes_list
['Rank\tEntity\tNobel\n', 'laureates[1]\tPopulation\n', '(2018)[2]\tLaureates/\n', '10 million\n', '—\t Faroe Islands\t1\t49,489\t202.065\n', '1\t Saint Lucia\t2\t179,667\t111.317\n', '2\t Luxembourg\t2\t590,321\t33.880\n', '3\t Switzerland\t28\t8,544,034\t32.771\n', '4\t Sweden\t30\t9,982,709\t30.052\n', '5\t Iceland\t1\t337,780\t29.605\n', '6\t Austria\t22\t8,751,820\t25.138\n', '7\t Denmark\t14\t5,754,356\t24.329\n', '8\t Norway\t13\t5,353,363\t24.284\n', '9\t United Kingdom\t133\t66,573,504\t19.429\n', '10\t East Timor\t2\t1,324,094\t15.105\n', '11\t Ireland\t7\t4,803,748\t14.572\n', '12\t Israel\t12\t8,452,841\t14.196\n', '13\t Hungary\t13\t9,688,847\t13.417\n', '13\t Germany\t108\t82,293,457\t13.245\n', '15\t United States\t383\t326,766,748\t11.721\n', '16\t Netherlands\t21\t17,084,459\t11.707\n', '17\t France\t70\t65,233,271\t10.664\n', '—\t European Union[3]\t378\t444,697,104\t8.005\n', '18\t Finland\t5\t5,542,517\t9.021\n', '19\t Belgium\t10\t11,498,519\t8.697\n', '20\t Cyprus\t1\t1,189,085\t8.410\n', '21\t Trinidad and Tobago\t1\t1,372,598\t7.285\n', '22\t Canada\t25\t36,953,765\t6.765\n', '23\t New Zealand\t3\t4,749,598\t6.316\n', '24\t Bosnia and Herzegovina\t2\t3,503,554\t5.708\n', '25\t Latvia\t1\t1,929,938\t5.182\n', '26\t Poland\t19\t38,104,832\t4.986\n', '27\t Australia\t12\t24,772,247\t4.844\n', '28\t Slovenia\t1\t2,081,260\t4.805\n', '29\t North Macedonia\t1\t2,085,051\t4.796\n', '30\t Czech Republic\t5\t10,625,250\t4.706\n', '31\t Liberia\t2\t4,853,516\t4.121\n', '32\t Lithuania\t1\t2,876,475\t3.476\n', '33\t Italy\t20\t59,290,969\t3.373\n', '—\t Tibet[4]\t1\t3,310,836\t3.020\n', '34\t Croatia\t1\t4,164,783\t2.401\n', '35\t Japan\t28\t127,185,332\t2.202\n', '36\t Belarus\t2\t9,452,113\t2.116\n', '37\t Romania\t4\t19,580,634\t2.043\n', '38\t Costa Rica\t1\t4,953,199\t2.019\n', '39\t Palestine\t1\t5,052,776\t1.979\n', '40\t Portugal\t2\t10,291,196\t1.943\n', '41\t Greece\t2\t11,142,161\t1.795\n', '42\t South Africa\t10\t57,398,421\t1.742\n', '43\t Spain\t8\t46,397,452\t1.724\n', '44\t Russia\t23\t143,964,709\t1.598\n', '45\t Bulgaria\t1\t7,036,848\t1.421\n', '—\t Hong Kong\t1\t7,428,887\t1.346\n', '—\t World[5]\t919\t7,632,819,325\t1.204\n', '46\t Guatemala\t2\t17,245,346\t1.160\n', '47\t Argentina\t5\t44,688,864\t1.119\n', '48\t Chile\t2\t18,197,209\t1.099\n', '49\t Azerbaijan\t1\t9,923,914\t1.008\n', '50\t Algeria\t2\t42,008,054\t0.476\n', '51\t Ukraine\t2\t44,009,214\t0.454\n', '52\t Taiwan\t1\t23,694,089\t0.422\n', '53\t Colombia\t2\t49,464,683\t0.404\n', '54\t Egypt\t4\t99,375,741\t0.403\n', '55\t South Korea\t2\t51,164,435\t0.391\n', '56\t Yemen\t1\t28,915,284\t0.346\n', '57\t Ghana\t1\t29,463,643\t0.339\n', '58\t Venezuela\t1\t32,381,221\t0.309\n', '59\t Peru\t1\t32,551,815\t0.307\n', '60\t Morocco\t1\t36,191,805\t0.276\n', '61\t Iraq\t1\t39,339,753\t0.254\n', '62\t Turkey\t2\t81,916,871\t0.244\n', '63\t Iran\t2\t82,011,735\t0.244\n', '64\t Mexico\t3\t130,759,074\t0.229\n', '65\t Kenya\t1\t50,950,879\t0.196\n', '66\t Myanmar\t1\t53,855,735\t0.186\n', '67\t DR Congo\t1\t84,004,989\t0.119\n', '68\t Vietnam\t1\t96,491,146\t0.104\n', '69\t Pakistan\t2\t200,813,818\t0.100\n', '70\t Ethiopia\t1\t109,224,410[6]\t0.092\n', '71\t India\t11\t1,354,051,854\t0.081\n', '72\t China\t9\t1,415,045,928\t0.064\n', '73\t Bangladesh\t1\t166,368,149\t0.060\n', '74\t Nigeria\t1\t195,875,237\t0.051']
print(*prizes_list)
Rank Entity Nobel laureates[1] Population (2018)[2] Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union[3] 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet[4] 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World[5] 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410[6] 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# reassemble the header line - first remove newlines for i in range(3): prizes_list[i] = prizes_list[i].rstrip() prizes_list[:4]
['Rank\tEntity\tNobel', 'laureates[1]\tPopulation', '(2018)[2]\tLaureates/', '10 million\n']
# Then join it up (putting in a space where the newlines were) prizes2 = [' '.join(prizes_list[:4])] + prizes_list[4:] print(*prizes2)
Rank Entity Nobel laureates[1] Population (2018)[2] Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union[3] 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet[4] 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World[5] 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410[6] 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# Use regular expression to remove reference num import re prizes3 = [ re.sub('\[\d+\]', '', line) for line in prizes2 ] print(*prizes3)
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# The first method to remove words prizes4 = [line for line in prizes3 if not ('World' in line or 'Europe'in line)] print(*prizes4)
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
prizes4 = [line for line in prizes3 if not re.search('(World)|(Europe)', line)] prizes4[0] = re.sub('/ ', '/', prizes4[0]) print(*prizes4)
Rank Entity Nobel laureates Population (2018) Laureates/10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# Now we are going to construct the dataframe # First line contains the headers # The rest of the data prizes = pd.DataFrame(columns = prizes4[0].split("\t"), data = [row.split('\t') for row in prizes4[1:]]) # Defaults to displays the first 5 rows prizes.head()
Rank Entity Nobel laureates Population (2018) Laureates/10 million\n
0 — Faroe Islands 1 49,489 202.065\n
1 1 Saint Lucia 2 179,667 111.317\n
2 2 Luxembourg 2 590,321 33.880\n
3 3 Switzerland 28 8,544,034 32.771\n
4 4 Sweden 30 9,982,709 30.052\n
# Get ("index, locate") item using only indices prizes.iloc[4, 1]
' Sweden'
# Get ("locate") item using category name prizes.loc[4, "Entity"]
' Sweden'
# Get all the data from that columns prizes.loc[:, "Entity"]
0 Faroe Islands 1 Saint Lucia 2 Luxembourg 3 Switzerland 4 Sweden ... 72 Ethiopia 73 India 74 China 75 Bangladesh 76 Nigeria Name: Entity, Length: 77, dtype: object
# Another way to get all data from Entity prizes['Entity']
0 Faroe Islands 1 Saint Lucia 2 Luxembourg 3 Switzerland 4 Sweden ... 72 Ethiopia 73 India 74 China 75 Bangladesh 76 Nigeria Name: Entity, Length: 77, dtype: object
prizes3a = pd.DataFrame(columns = prizes3[0].split("\t"), data = [row.split('\t') for row in prizes3[1:]]) prizes3a[16:20]
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million\n
16 16 Netherlands 21 17,084,459 11.707\n
17 17 France 70 65,233,271 10.664\n
18 — European Union 378 444,697,104 8.005\n
19 18 Finland 5 5,542,517 9.021\n
euro_mask = (prizes3a["Entity"] == " European Union") euro_mask[16:20]
16 False 17 False 18 True 19 False Name: Entity, dtype: bool
prizes3b = prizes3a[euro_mask] prizes3b
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million\n
18 — European Union 378 444,697,104 8.005\n
prizes3c = prizes3a[np.logical_not(euro_mask)] prizes3c[16:20]
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million\n
16 16 Netherlands 21 17,084,459 11.707\n
17 17 France 70 65,233,271 10.664\n
19 18 Finland 5 5,542,517 9.021\n
20 19 Belgium 10 11,498,519 8.697\n
prizes.loc[4, "Entity"]
' Sweden'
# Working on a single item prizes.loc[4, "Entity"] = prizes.loc[4, "Entity"].strip() prizes.loc[4, "Entity"]
'Sweden'
# Working on the whole series # Strip whitespace from every entity prizes["Entity"] = prizes["Entity"].apply(str.strip) prizes.loc[6, "Entity"] # Now we get rid of the extra whitespace from Entity column
'Austria'
# Now get rid of the extra whitespace from last column prizes["Laureates/10 million\n"] = prizes["Laureates/10 million\n"].apply(str.strip) prizes.loc[5, "Laureates/10 million\n"]
'29.605'
prizes.head()
Rank Entity Nobel laureates Population (2018) Laureates/10 million\n
0 — Faroe Islands 1 49,489 202.065
1 1 Saint Lucia 2 179,667 111.317
2 2 Luxembourg 2 590,321 33.880
3 3 Switzerland 28 8,544,034 32.771
4 4 Sweden 30 9,982,709 30.052
prizes.columns[4]
'Laureates/10 million\n'
prizes.rename(columns = {prizes.columns[4] : prizes.columns[4].strip()}, inplace=True) prizes.columns[4]
'Laureates/10 million'
prizes.head()
Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 — Faroe Islands 1 49,489 202.065
1 1 Saint Lucia 2 179,667 111.317
2 2 Luxembourg 2 590,321 33.880
3 3 Switzerland 28 8,544,034 32.771
4 4 Sweden 30 9,982,709 30.052
prizes.dtypes
Rank object Entity object Nobel laureates object Population (2018) object Laureates/10 million object dtype: object
# First we got a copy of that dataframe prizesnums = prizes.copy() prizesnums["Laureates/10 million"] = pd.to_numeric(prizesnums["Laureates/10 million"]) prizesnums.dtypes
Rank object Entity object Nobel laureates object Population (2018) object Laureates/10 million float64 dtype: object
prizes["Population (2018)"] = prizes["Population (2018)"].apply(str.replace, args=(',', '')) prizes["Population (2018)"]
0 49489 1 179667 2 590321 3 8544034 4 9982709 ... 72 109224410 73 1354051854 74 1415045928 75 166368149 76 195875237 Name: Population (2018), Length: 77, dtype: object
prizesnums["Population (2018)"] = pd.to_numeric(prizesnums["Population (2018)"]) prizesnums.dtypes
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric() ValueError: Unable to parse string "49,489" During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-34-d44213833c91> in <module> ----> 1 prizesnums["Population (2018)"] = pd.to_numeric(prizesnums["Population (2018)"]) 2 3 prizesnums.dtypes /usr/local/lib/python3.8/dist-packages/pandas/core/tools/numeric.py in to_numeric(arg, errors, downcast) 150 coerce_numeric = errors not in ("ignore", "raise") 151 try: --> 152 values = lib.maybe_convert_numeric( 153 values, set(), coerce_numeric=coerce_numeric 154 ) pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric() ValueError: Unable to parse string "49,489" at position 0
prizesnums = prizes.astype({"Nobel laureates" : np.int64, "Population (2018)" : np.int64, "Laureates/10 million" : np.float64}) prizesnums.dtypes
Rank object Entity object Nobel laureates int64 Population (2018) int64 Laureates/10 million float64 dtype: object
# By default it tries to plot all numerical columns prizesnums.plot.bar() plt.show()
Image in a Jupyter notebook
prizesnums.plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
prizesnums.sort_values("Nobel laureates", ascending=False, inplace=True) prizesnums.plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
# Mask out the smaller one prizesnums[prizesnums["Nobel laureates"]>2].plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
# Sorting the per capita value ---> the organge one prizesnums.sort_values("Laureates/10 million", ascending=False, inplace=True) # Compare two or more columns prizesnums[prizesnums["Laureates/10 million"]>5].plot.bar("Entity", ["Nobel laureates", "Laureates/10 million"]) plt.show()
Image in a Jupyter notebook
prizesnums.plot.scatter("Population (2018)", "Nobel laureates") plt.show()
Image in a Jupyter notebook
prizesnums[prizesnums["Population (2018)"]<300000000].plot.scatter("Population (2018)", "Nobel laureates") plt.show()
Image in a Jupyter notebook
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1") merged
Country Consumption Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 Germany 11.1 13 Germany 108 82293457 13.245
1 Switzerland 10.3 3 Switzerland 28 8544034 32.771
2 Finland 7.4 18 Finland 5 5542517 9.021
3 Denmark 6.9 7 Denmark 14 5754356 24.329
4 Croatia 6.6 34 Croatia 1 4164783 2.401
5 Belgium 6.4 19 Belgium 10 11498519 8.697
6 Lithuania 4.7 32 Lithuania 1 2876475 3.476
7 Spain 4.5 43 Spain 8 46397452 1.724
8 Portugal 3.6 40 Portugal 2 10291196 1.943
9 Italy 3.1 33 Italy 20 59290969 3.373
10 Greece 3.0 41 Greece 2 11142161 1.795
11 Hungary 2.9 13 Hungary 13 9688847 13.417
print(prizesnums.shape) print(consumption.shape) print(merged.shape)
(77, 5) (14, 2) (12, 7)
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1", how="outer", sort=True) merged
Country Consumption Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 NaN NaN 50 Algeria 2.0 42008054.0 0.476
1 NaN NaN 47 Argentina 5.0 44688864.0 1.119
2 NaN NaN 27 Australia 12.0 24772247.0 4.844
3 NaN NaN 6 Austria 22.0 8751820.0 25.138
4 NaN NaN 49 Azerbaijan 1.0 9923914.0 1.008
... ... ... ... ... ... ... ...
74 NaN NaN 9 United Kingdom 133.0 66573504.0 19.429
75 NaN NaN 15 United States 383.0 326766748.0 11.721
76 NaN NaN 58 Venezuela 1.0 32381221.0 0.309
77 NaN NaN 68 Vietnam 1.0 96491146.0 0.104
78 NaN NaN 56 Yemen 1.0 28915284.0 0.346

79 rows × 7 columns

merged.head(40)
Country Consumption Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 NaN NaN 50 Algeria 2.0 4.200805e+07 0.476
1 NaN NaN 47 Argentina 5.0 4.468886e+07 1.119
2 NaN NaN 27 Australia 12.0 2.477225e+07 4.844
3 NaN NaN 6 Austria 22.0 8.751820e+06 25.138
4 NaN NaN 49 Azerbaijan 1.0 9.923914e+06 1.008
5 NaN NaN 73 Bangladesh 1.0 1.663681e+08 0.060
6 NaN NaN 36 Belarus 2.0 9.452113e+06 2.116
7 Belgium 6.4 19 Belgium 10.0 1.149852e+07 8.697
8 NaN NaN 24 Bosnia and Herzegovina 2.0 3.503554e+06 5.708
9 NaN NaN 45 Bulgaria 1.0 7.036848e+06 1.421
10 NaN NaN 22 Canada 25.0 3.695376e+07 6.765
11 NaN NaN 48 Chile 2.0 1.819721e+07 1.099
12 NaN NaN 72 China 9.0 1.415046e+09 0.064
13 NaN NaN 53 Colombia 2.0 4.946468e+07 0.404
14 NaN NaN 38 Costa Rica 1.0 4.953199e+06 2.019
15 Croatia 6.6 34 Croatia 1.0 4.164783e+06 2.401
16 NaN NaN 20 Cyprus 1.0 1.189085e+06 8.410
17 NaN NaN 30 Czech Republic 5.0 1.062525e+07 4.706
18 NaN NaN 67 DR Congo 1.0 8.400499e+07 0.119
19 Denmark 6.9 7 Denmark 14.0 5.754356e+06 24.329
20 NaN NaN 10 East Timor 2.0 1.324094e+06 15.105
21 NaN NaN 54 Egypt 4.0 9.937574e+07 0.403
22 Estonia 8.8 NaN NaN NaN NaN NaN
23 NaN NaN 70 Ethiopia 1.0 1.092244e+08 0.092
24 NaN NaN — Faroe Islands 1.0 4.948900e+04 202.065
25 Finland 7.4 18 Finland 5.0 5.542517e+06 9.021
26 NaN NaN 17 France 70.0 6.523327e+07 10.664
27 Germany 11.1 13 Germany 108.0 8.229346e+07 13.245
28 NaN NaN 57 Ghana 1.0 2.946364e+07 0.339
29 Greece 3.0 41 Greece 2.0 1.114216e+07 1.795
30 NaN NaN 46 Guatemala 2.0 1.724535e+07 1.160
31 NaN NaN — Hong Kong 1.0 7.428887e+06 1.346
32 Hungary 2.9 13 Hungary 13.0 9.688847e+06 13.417
33 NaN NaN 5 Iceland 1.0 3.377800e+05 29.605
34 NaN NaN 71 India 11.0 1.354052e+09 0.081
35 NaN NaN 63 Iran 2.0 8.201174e+07 0.244
36 NaN NaN 61 Iraq 1.0 3.933975e+07 0.254
37 NaN NaN 11 Ireland 7.0 4.803748e+06 14.572
38 NaN NaN 12 Israel 12.0 8.452841e+06 14.196
39 Italy 3.1 33 Italy 20.0 5.929097e+07 3.373
merged.tail(40)
Country Consumption Rank Entity Nobel laureates Population (2018) Laureates/10 million
39 Italy 3.1 33 Italy 20.0 59290969.0 3.373
40 NaN NaN 35 Japan 28.0 127185332.0 2.202
41 NaN NaN 65 Kenya 1.0 50950879.0 0.196
42 NaN NaN 25 Latvia 1.0 1929938.0 5.182
43 NaN NaN 31 Liberia 2.0 4853516.0 4.121
44 Lithuania 4.7 32 Lithuania 1.0 2876475.0 3.476
45 NaN NaN 2 Luxembourg 2.0 590321.0 33.880
46 NaN NaN 64 Mexico 3.0 130759074.0 0.229
47 NaN NaN 60 Morocco 1.0 36191805.0 0.276
48 NaN NaN 66 Myanmar 1.0 53855735.0 0.186
49 NaN NaN 16 Netherlands 21.0 17084459.0 11.707
50 NaN NaN 23 New Zealand 3.0 4749598.0 6.316
51 NaN NaN 74 Nigeria 1.0 195875237.0 0.051
52 NaN NaN 29 North Macedonia 1.0 2085051.0 4.796
53 NaN NaN 8 Norway 13.0 5353363.0 24.284
54 NaN NaN 69 Pakistan 2.0 200813818.0 0.100
55 NaN NaN 39 Palestine 1.0 5052776.0 1.979
56 NaN NaN 59 Peru 1.0 32551815.0 0.307
57 NaN NaN 26 Poland 19.0 38104832.0 4.986
58 Portugal 3.6 40 Portugal 2.0 10291196.0 1.943
59 NaN NaN 37 Romania 4.0 19580634.0 2.043
60 NaN NaN 44 Russia 23.0 143964709.0 1.598
61 NaN NaN 1 Saint Lucia 2.0 179667.0 111.317
62 NaN NaN 28 Slovenia 1.0 2081260.0 4.805
63 NaN NaN 42 South Africa 10.0 57398421.0 1.742
64 NaN NaN 55 South Korea 2.0 51164435.0 0.391
65 Spain 4.5 43 Spain 8.0 46397452.0 1.724
66 NaN NaN 4 Sweden 30.0 9982709.0 30.052
67 Switzerland 10.3 3 Switzerland 28.0 8544034.0 32.771
68 NaN NaN 52 Taiwan 1.0 23694089.0 0.422
69 NaN NaN — Tibet 1.0 3310836.0 3.020
70 NaN NaN 21 Trinidad and Tobago 1.0 1372598.0 7.285
71 NaN NaN 62 Turkey 2.0 81916871.0 0.244
72 UK 8.1 NaN NaN NaN NaN NaN
73 NaN NaN 51 Ukraine 2.0 44009214.0 0.454
74 NaN NaN 9 United Kingdom 133.0 66573504.0 19.429
75 NaN NaN 15 United States 383.0 326766748.0 11.721
76 NaN NaN 58 Venezuela 1.0 32381221.0 0.309
77 NaN NaN 68 Vietnam 1.0 96491146.0 0.104
78 NaN NaN 56 Yemen 1.0 28915284.0 0.346
consumption.replace("UK", "United Kingdom", inplace=True) consumption
Country Consumption
0 Germany 11.1
1 Switzerland 10.3
2 Estonia 8.8
3 United Kingdom 8.1
4 Finland 7.4
5 Denmark 6.9
6 Croatia 6.6
7 Belgium 6.4
8 Lithuania 4.7
9 Spain 4.5
10 Portugal 3.6
11 Italy 3.1
12 Greece 3.0
13 Hungary 2.9
prizesnums = prizesnums.append({"Entity" : "Estonia", "Nobel laureates" : 0, "Laureates/10 million" : 0}, ignore_index=True) prizesnums
Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 — Faroe Islands 1 4.948900e+04 202.065
1 1 Saint Lucia 2 1.796670e+05 111.317
2 2 Luxembourg 2 5.903210e+05 33.880
3 3 Switzerland 28 8.544034e+06 32.771
4 4 Sweden 30 9.982709e+06 30.052
... ... ... ... ... ...
73 71 India 11 1.354052e+09 0.081
74 72 China 9 1.415046e+09 0.064
75 73 Bangladesh 1 1.663681e+08 0.060
76 74 Nigeria 1 1.958752e+08 0.051
77 NaN Estonia 0 NaN 0.000

78 rows × 5 columns

prizesnums.dtypes
Rank object Entity object Nobel laureates int64 Population (2018) float64 Laureates/10 million float64 dtype: object
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1") merged
Country Consumption Rank Entity Nobel laureates Population (2018) Laureates/10 million
0 Germany 11.1 13 Germany 108 82293457.0 13.245
1 Switzerland 10.3 3 Switzerland 28 8544034.0 32.771
2 Estonia 8.8 NaN Estonia 0 NaN 0.000
3 United Kingdom 8.1 9 United Kingdom 133 66573504.0 19.429
4 Finland 7.4 18 Finland 5 5542517.0 9.021
5 Denmark 6.9 7 Denmark 14 5754356.0 24.329
6 Croatia 6.6 34 Croatia 1 4164783.0 2.401
7 Belgium 6.4 19 Belgium 10 11498519.0 8.697
8 Lithuania 4.7 32 Lithuania 1 2876475.0 3.476
9 Spain 4.5 43 Spain 8 46397452.0 1.724
10 Portugal 3.6 40 Portugal 2 10291196.0 1.943
11 Italy 3.1 33 Italy 20 59290969.0 3.373
12 Greece 3.0 41 Greece 2 11142161.0 1.795
13 Hungary 2.9 13 Hungary 13 9688847.0 13.417
merged.plot.scatter("Consumption", "Laureates/10 million", linewidth = 5) plt.show()
Image in a Jupyter notebook
merged.corr()
Consumption Nobel laureates Population (2018) Laureates/10 million
Consumption 1.000000 0.521937 0.292850 0.534097
Nobel laureates 0.521937 1.000000 0.788817 0.433335
Population (2018) 0.292850 0.788817 1.000000 0.012810
Laureates/10 million 0.534097 0.433335 0.012810 1.000000
wo_estonia = merged[ merged["Entity"] != "Estonia"] wo_estonia.plot.scatter("Consumption", "Laureates/10 million", linewidth=5) plt.show() wo_estonia.corr()
Image in a Jupyter notebook
Consumption Nobel laureates Population (2018) Laureates/10 million
Consumption 1.000000 0.598825 0.292850 0.660483
Nobel laureates 0.598825 1.000000 0.788817 0.407669
Population (2018) 0.292850 0.788817 1.000000 0.012810
Laureates/10 million 0.660483 0.407669 0.012810 1.000000
from scipy.stats import linregress # Also return r and p value plt.figure(figsize = (10, 8)) (slope, intercept, r, p, _) = linregress(merged["Consumption"], merged["Laureates/10 million"]) plt.scatter(merged["Consumption"], merged["Laureates/10 million"], linewidth = 5) plt.plot(merged["Consumption"], slope * merged["Consumption"] + intercept, "k", linewidth = 3, label = "Linear Model") plt.xlabel("Chocolate Consumption (kg/year/person)") plt.ylabel("Nobel Laureates per capita x 10^6") plt.legend() plt.xlim([0, 15]) plt.show() print("r = ", round(r, 3)) print("p < 0.05 (", round(p, 3), ")")
Image in a Jupyter notebook
r = 0.534 p < 0.05 ( 0.049 )
slope
1.9518222295023373