Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 5313
Image: default
Kernel: Python 3 (system-wide)
# FIrst read the raw data DATA = "2016Census_G59_AUS.csv" file = open(DATA, "r") # Now the content is a string contents = file.read() file.close() print(contents)
AUS_CODE_2016,One_method_Train_M,One_method_Train_F,One_method_Train_P,One_method_Bus_M,One_method_Bus_F,One_method_Bus_P,One_method_Ferry_M,One_method_Ferry_F,One_method_Ferry_P,One_met_Tram_incl_lt_rail_M,One_met_Tram_incl_lt_rail_F,One_met_Tram_incl_lt_rail_P,One_method_Taxi_M,One_method_Taxi_F,One_method_Taxi_P,One_method_Car_as_driver_M,One_method_Car_as_driver_F,One_method_Car_as_driver_P,One_method_Car_as_passenger_M,One_method_Car_as_passenger_F,One_method_Car_as_passenger_P,One_method_Truck_M,One_method_Truck_F,One_method_Truck_P,One_method_Motorbike_scootr_M,One_method_Motorbike_scootr_F,One_method_Motorbike_scootr_P,One_method_Bicycle_M,One_method_Bicycle_F,One_method_Bicycle_P,One_method_Other_M,One_method_Other_F,One_method_Other_P,One_method_Walked_only_M,One_method_Walked_only_F,One_method_Walked_only_P,One_method_Tot_one_method_M,One_method_Tot_one_method_F,One_method_Tot_one_method_P,Two_methods_Train_Bus_M,Two_methods_Train_Bus_F,Two_methods_Train_Bus_P,Two_methods_Train_Ferry_M,Two_methods_Train_Ferry_F,Two_methods_Train_Ferry_P,Two_mt_trn_Trm_incl_lt_rl_M,Two_mt_trn_Trm_incl_lt_rl_F,Two_mt_trn_Trm_incl_lt_rl_P,Two_methods_Trn_Car_as_drvr_M,Two_methods_Trn_Car_as_drvr_F,Two_methods_Trn_Car_as_drvr_P,Two_methods_Trn_Car_as_pass_M,Two_methods_Trn_Car_as_pass_F,Two_methods_Trn_Car_as_pass_P,Two_methods_Train_Other_M,Two_methods_Train_Other_F,Two_methods_Train_Other_P,Two_methods_Train_Tot_M,Two_methods_Train_Tot_F,Two_methods_Train_Tot_P,Two_methods_Bus_Ferry_M,Two_methods_Bus_Ferry_F,Two_methods_Bus_Ferry_P,Two_mth_Bu_Trm_inc_lt_rl_M,Two_mth_Bu_Trm_inc_lt_rl_F,Two_mth_Bu_Trm_inc_lt_rl_P,Two_methods_Bus_Car_as_drvr_M,Two_methods_Bus_Car_as_drvr_F,Two_methods_Bus_Car_as_drvr_P,Two_methods_Bus_Car_as_pass_M,Two_methods_Bus_Car_as_pass_F,Two_methods_Bus_Car_as_pass_P,Two_methods_Bus_Other_M,Two_methods_Bus_Other_F,Two_methods_Bus_Other_P,Two_methods_Bus_Tot_M,Two_methods_Bus_Tot_F,Two_methods_Bus_Tot_P,Two_methds_Othr_two_methds_M,Two_methds_Othr_two_methds_F,Two_methds_Othr_two_methds_P,Two_methods_Tot_two_methods_M,Two_methods_Tot_two_methods_F,Two_methods_Tot_two_methods_P,Three_meth_Trn_two_othr_met_M,Three_meth_Trn_two_othr_met_F,Three_meth_Trn_two_othr_met_P,Three_met_Bs_2_ot_met_ex_tr_M,Three_met_Bs_2_ot_met_ex_tr_F,Three_met_Bs_2_ot_met_ex_tr_P,Three_meth_Othr_three_meth_M,Three_meth_Othr_three_meth_F,Three_meth_Othr_three_meth_P,Three_meth_Tot_three_meth_M,Three_meth_Tot_three_meth_F,Three_meth_Tot_three_meth_P,Worked_home_M,Worked_home_F,Worked_home_P,Did_not_go_to_work_M,Did_not_go_to_work_F,Did_not_go_to_work_P,Method_travel_to_work_ns_M,Method_travel_to_work_ns_F,Method_travel_to_work_ns_P,Tot_M,Tot_F,Tot_P 036,252831,235186,488012,160344,162857,323201,6965,4893,11858,26528,32208,58736,11951,7774,19725,3598291,2976283,6574571,211930,277992,489922,83736,2159,85892,57396,7180,64580,79860,27899,107756,52201,21306,73512,186398,184029,370427,4728421,3939762,8668182,52499,51620,104122,934,805,1738,8974,9116,18088,47382,38623,86003,7429,12811,20239,6250,2485,8737,123467,115455,238926,1312,1013,2321,1742,2400,4145,9600,9905,19513,4250,8361,12611,2894,1954,4849,19806,23633,43440,44451,24608,69056,187721,163701,351423,21912,21913,43828,2995,2209,5198,2664,1109,3770,27571,25232,52803,217361,286225,503582,389563,614234,1003792,57145,46916,104061,5607777,5076066,10683842
# Now we turn the string into the lines # There are 2 lines in the csv files, so it becomes a list with 2 elements lines = contents.splitlines() lines
['AUS_CODE_2016,One_method_Train_M,One_method_Train_F,One_method_Train_P,One_method_Bus_M,One_method_Bus_F,One_method_Bus_P,One_method_Ferry_M,One_method_Ferry_F,One_method_Ferry_P,One_met_Tram_incl_lt_rail_M,One_met_Tram_incl_lt_rail_F,One_met_Tram_incl_lt_rail_P,One_method_Taxi_M,One_method_Taxi_F,One_method_Taxi_P,One_method_Car_as_driver_M,One_method_Car_as_driver_F,One_method_Car_as_driver_P,One_method_Car_as_passenger_M,One_method_Car_as_passenger_F,One_method_Car_as_passenger_P,One_method_Truck_M,One_method_Truck_F,One_method_Truck_P,One_method_Motorbike_scootr_M,One_method_Motorbike_scootr_F,One_method_Motorbike_scootr_P,One_method_Bicycle_M,One_method_Bicycle_F,One_method_Bicycle_P,One_method_Other_M,One_method_Other_F,One_method_Other_P,One_method_Walked_only_M,One_method_Walked_only_F,One_method_Walked_only_P,One_method_Tot_one_method_M,One_method_Tot_one_method_F,One_method_Tot_one_method_P,Two_methods_Train_Bus_M,Two_methods_Train_Bus_F,Two_methods_Train_Bus_P,Two_methods_Train_Ferry_M,Two_methods_Train_Ferry_F,Two_methods_Train_Ferry_P,Two_mt_trn_Trm_incl_lt_rl_M,Two_mt_trn_Trm_incl_lt_rl_F,Two_mt_trn_Trm_incl_lt_rl_P,Two_methods_Trn_Car_as_drvr_M,Two_methods_Trn_Car_as_drvr_F,Two_methods_Trn_Car_as_drvr_P,Two_methods_Trn_Car_as_pass_M,Two_methods_Trn_Car_as_pass_F,Two_methods_Trn_Car_as_pass_P,Two_methods_Train_Other_M,Two_methods_Train_Other_F,Two_methods_Train_Other_P,Two_methods_Train_Tot_M,Two_methods_Train_Tot_F,Two_methods_Train_Tot_P,Two_methods_Bus_Ferry_M,Two_methods_Bus_Ferry_F,Two_methods_Bus_Ferry_P,Two_mth_Bu_Trm_inc_lt_rl_M,Two_mth_Bu_Trm_inc_lt_rl_F,Two_mth_Bu_Trm_inc_lt_rl_P,Two_methods_Bus_Car_as_drvr_M,Two_methods_Bus_Car_as_drvr_F,Two_methods_Bus_Car_as_drvr_P,Two_methods_Bus_Car_as_pass_M,Two_methods_Bus_Car_as_pass_F,Two_methods_Bus_Car_as_pass_P,Two_methods_Bus_Other_M,Two_methods_Bus_Other_F,Two_methods_Bus_Other_P,Two_methods_Bus_Tot_M,Two_methods_Bus_Tot_F,Two_methods_Bus_Tot_P,Two_methds_Othr_two_methds_M,Two_methds_Othr_two_methds_F,Two_methds_Othr_two_methds_P,Two_methods_Tot_two_methods_M,Two_methods_Tot_two_methods_F,Two_methods_Tot_two_methods_P,Three_meth_Trn_two_othr_met_M,Three_meth_Trn_two_othr_met_F,Three_meth_Trn_two_othr_met_P,Three_met_Bs_2_ot_met_ex_tr_M,Three_met_Bs_2_ot_met_ex_tr_F,Three_met_Bs_2_ot_met_ex_tr_P,Three_meth_Othr_three_meth_M,Three_meth_Othr_three_meth_F,Three_meth_Othr_three_meth_P,Three_meth_Tot_three_meth_M,Three_meth_Tot_three_meth_F,Three_meth_Tot_three_meth_P,Worked_home_M,Worked_home_F,Worked_home_P,Did_not_go_to_work_M,Did_not_go_to_work_F,Did_not_go_to_work_P,Method_travel_to_work_ns_M,Method_travel_to_work_ns_F,Method_travel_to_work_ns_P,Tot_M,Tot_F,Tot_P', '036,252831,235186,488012,160344,162857,323201,6965,4893,11858,26528,32208,58736,11951,7774,19725,3598291,2976283,6574571,211930,277992,489922,83736,2159,85892,57396,7180,64580,79860,27899,107756,52201,21306,73512,186398,184029,370427,4728421,3939762,8668182,52499,51620,104122,934,805,1738,8974,9116,18088,47382,38623,86003,7429,12811,20239,6250,2485,8737,123467,115455,238926,1312,1013,2321,1742,2400,4145,9600,9905,19513,4250,8361,12611,2894,1954,4849,19806,23633,43440,44451,24608,69056,187721,163701,351423,21912,21913,43828,2995,2209,5198,2664,1109,3770,27571,25232,52803,217361,286225,503582,389563,614234,1003792,57145,46916,104061,5607777,5076066,10683842']
# Now you need to split the heading and the numbers in the elements of the list headings = lines[0].split(",") headings[:10] numbers = lines[1].split(",") numbers[:10]
['036', '252831', '235186', '488012', '160344', '162857', '323201', '6965', '4893', '11858']
# Always remember to doublecheck everything print(len(headings), len(numbers))
109 109
# We need to remove the first meaningless item of headings and numbers headings = headings[1:] numbers = numbers[1:] # Doublecheck again print(len(headings), len(numbers))
108 108
# Now we need to convert the type of number from str to int num_people = [] for stringdata in numbers: num_people.append(int(stringdata)) # Doublecheck the type again type(num_people[0])
int
import matplotlib.pyplot as plt plt.bar(headings, num_people) plt.show()
Image in a Jupyter notebook
headings[-3:]
['Tot_M', 'Tot_F', 'Tot_P']
headings = headings[:-3] num_people = num_people[:-3] len(num_people)
105
# Make a slice object wanted = slice(2, len(headings), 3) travel_headings = headings[wanted] travel_people = num_people[wanted] print(len(travel_headings), len(travel_people)) travel_headings
35 35
['One_method_Train_P', 'One_method_Bus_P', 'One_method_Ferry_P', 'One_met_Tram_incl_lt_rail_P', 'One_method_Taxi_P', 'One_method_Car_as_driver_P', 'One_method_Car_as_passenger_P', 'One_method_Truck_P', 'One_method_Motorbike_scootr_P', 'One_method_Bicycle_P', 'One_method_Other_P', 'One_method_Walked_only_P', 'One_method_Tot_one_method_P', 'Two_methods_Train_Bus_P', 'Two_methods_Train_Ferry_P', 'Two_mt_trn_Trm_incl_lt_rl_P', 'Two_methods_Trn_Car_as_drvr_P', 'Two_methods_Trn_Car_as_pass_P', 'Two_methods_Train_Other_P', 'Two_methods_Train_Tot_P', 'Two_methods_Bus_Ferry_P', 'Two_mth_Bu_Trm_inc_lt_rl_P', 'Two_methods_Bus_Car_as_drvr_P', 'Two_methods_Bus_Car_as_pass_P', 'Two_methods_Bus_Other_P', 'Two_methods_Bus_Tot_P', 'Two_methds_Othr_two_methds_P', 'Two_methods_Tot_two_methods_P', 'Three_meth_Trn_two_othr_met_P', 'Three_met_Bs_2_ot_met_ex_tr_P', 'Three_meth_Othr_three_meth_P', 'Three_meth_Tot_three_meth_P', 'Worked_home_P', 'Did_not_go_to_work_P', 'Method_travel_to_work_ns_P']
plt.bar(travel_headings, travel_people) plt.show()
Image in a Jupyter notebook
plt.figure(figsize = (9, 4)) plt.bar(travel_headings, travel_people) plt.tick_params(axis='x', labelrotation=90) plt.show()
Image in a Jupyter notebook
travel_headings
['One_method_Train_P', 'One_method_Bus_P', 'One_method_Ferry_P', 'One_met_Tram_incl_lt_rail_P', 'One_method_Taxi_P', 'One_method_Car_as_driver_P', 'One_method_Car_as_passenger_P', 'One_method_Truck_P', 'One_method_Motorbike_scootr_P', 'One_method_Bicycle_P', 'One_method_Other_P', 'One_method_Walked_only_P', 'One_method_Tot_one_method_P', 'Two_methods_Train_Bus_P', 'Two_methods_Train_Ferry_P', 'Two_mt_trn_Trm_incl_lt_rl_P', 'Two_methods_Trn_Car_as_drvr_P', 'Two_methods_Trn_Car_as_pass_P', 'Two_methods_Train_Other_P', 'Two_methods_Train_Tot_P', 'Two_methods_Bus_Ferry_P', 'Two_mth_Bu_Trm_inc_lt_rl_P', 'Two_methods_Bus_Car_as_drvr_P', 'Two_methods_Bus_Car_as_pass_P', 'Two_methods_Bus_Other_P', 'Two_methods_Bus_Tot_P', 'Two_methds_Othr_two_methds_P', 'Two_methods_Tot_two_methods_P', 'Three_meth_Trn_two_othr_met_P', 'Three_met_Bs_2_ot_met_ex_tr_P', 'Three_meth_Othr_three_meth_P', 'Three_meth_Tot_three_meth_P', 'Worked_home_P', 'Did_not_go_to_work_P', 'Method_travel_to_work_ns_P']
# Use a loop to get rid of all the "total" # And strip out the "_P" since it's meaningless any more travel_categories = [] travellers = [] for i in range(len(travel_headings)): if "Tot_" not in travel_headings[i]: travel_categories.append(travel_headings[i][:-2]) # Strip out the "_P" travellers.append(travel_people[i]) # Check print(travel_categories, len(travel_categories)) print(travellers, len(travellers))
['One_method_Train', 'One_method_Bus', 'One_method_Ferry', 'One_met_Tram_incl_lt_rail', 'One_method_Taxi', 'One_method_Car_as_driver', 'One_method_Car_as_passenger', 'One_method_Truck', 'One_method_Motorbike_scootr', 'One_method_Bicycle', 'One_method_Other', 'One_method_Walked_only', 'Two_methods_Train_Bus', 'Two_methods_Train_Ferry', 'Two_mt_trn_Trm_incl_lt_rl', 'Two_methods_Trn_Car_as_drvr', 'Two_methods_Trn_Car_as_pass', 'Two_methods_Train_Other', 'Two_methods_Bus_Ferry', 'Two_mth_Bu_Trm_inc_lt_rl', 'Two_methods_Bus_Car_as_drvr', 'Two_methods_Bus_Car_as_pass', 'Two_methods_Bus_Other', 'Two_methds_Othr_two_methds', 'Three_meth_Trn_two_othr_met', 'Three_met_Bs_2_ot_met_ex_tr', 'Three_meth_Othr_three_meth', 'Worked_home', 'Did_not_go_to_work', 'Method_travel_to_work_ns'] 30 [488012, 323201, 11858, 58736, 19725, 6574571, 489922, 85892, 64580, 107756, 73512, 370427, 104122, 1738, 18088, 86003, 20239, 8737, 2321, 4145, 19513, 12611, 4849, 69056, 43828, 5198, 3770, 503582, 1003792, 104061] 30
plt.figure(figsize = (9, 4)) plt.semilogy() plt.bar(sorted_travel_cats, sorted_travellers) plt.tick_params(axis='x', labelrotation=90) plt.grid(axis = "both", which = "both", alpha = 0.8) plt.xlabel("Method(s) of transport") plt.ylabel("Number of users") plt.title("Method of Travel to work, 2016 census") # Save the figure before showing showing that plt.savefig("Transport to Work Bar Chart.png", bbox_inches = "tight") plt.show()
Image in a Jupyter notebook
plt.pie(travellers, labels=travel_categories, rotatelabels=True) plt.title("Methods of Travel") plt.show()
Image in a Jupyter notebook
# Combine the travellers and categories together pairs = zip(travellers, travel_categories) pairs = list(pairs) print(pairs) # The default order of sort is increasing pairs.sort(reverse=True) print(pairs)
[(488012, 'One_method_Train'), (323201, 'One_method_Bus'), (11858, 'One_method_Ferry'), (58736, 'One_met_Tram_incl_lt_rail'), (19725, 'One_method_Taxi'), (6574571, 'One_method_Car_as_driver'), (489922, 'One_method_Car_as_passenger'), (85892, 'One_method_Truck'), (64580, 'One_method_Motorbike_scootr'), (107756, 'One_method_Bicycle'), (73512, 'One_method_Other'), (370427, 'One_method_Walked_only'), (104122, 'Two_methods_Train_Bus'), (1738, 'Two_methods_Train_Ferry'), (18088, 'Two_mt_trn_Trm_incl_lt_rl'), (86003, 'Two_methods_Trn_Car_as_drvr'), (20239, 'Two_methods_Trn_Car_as_pass'), (8737, 'Two_methods_Train_Other'), (2321, 'Two_methods_Bus_Ferry'), (4145, 'Two_mth_Bu_Trm_inc_lt_rl'), (19513, 'Two_methods_Bus_Car_as_drvr'), (12611, 'Two_methods_Bus_Car_as_pass'), (4849, 'Two_methods_Bus_Other'), (69056, 'Two_methds_Othr_two_methds'), (43828, 'Three_meth_Trn_two_othr_met'), (5198, 'Three_met_Bs_2_ot_met_ex_tr'), (3770, 'Three_meth_Othr_three_meth'), (503582, 'Worked_home'), (1003792, 'Did_not_go_to_work'), (104061, 'Method_travel_to_work_ns')] [(6574571, 'One_method_Car_as_driver'), (1003792, 'Did_not_go_to_work'), (503582, 'Worked_home'), (489922, 'One_method_Car_as_passenger'), (488012, 'One_method_Train'), (370427, 'One_method_Walked_only'), (323201, 'One_method_Bus'), (107756, 'One_method_Bicycle'), (104122, 'Two_methods_Train_Bus'), (104061, 'Method_travel_to_work_ns'), (86003, 'Two_methods_Trn_Car_as_drvr'), (85892, 'One_method_Truck'), (73512, 'One_method_Other'), (69056, 'Two_methds_Othr_two_methds'), (64580, 'One_method_Motorbike_scootr'), (58736, 'One_met_Tram_incl_lt_rail'), (43828, 'Three_meth_Trn_two_othr_met'), (20239, 'Two_methods_Trn_Car_as_pass'), (19725, 'One_method_Taxi'), (19513, 'Two_methods_Bus_Car_as_drvr'), (18088, 'Two_mt_trn_Trm_incl_lt_rl'), (12611, 'Two_methods_Bus_Car_as_pass'), (11858, 'One_method_Ferry'), (8737, 'Two_methods_Train_Other'), (5198, 'Three_met_Bs_2_ot_met_ex_tr'), (4849, 'Two_methods_Bus_Other'), (4145, 'Two_mth_Bu_Trm_inc_lt_rl'), (3770, 'Three_meth_Othr_three_meth'), (2321, 'Two_methods_Bus_Ferry'), (1738, 'Two_methods_Train_Ferry')]
sorted_travel_cats = [] sorted_travellers = [] for i in range(len(pairs)): sorted_travel_cats.append(pairs[i][1]) sorted_travellers.append(pairs[i][0])
plt.pie(sorted_travellers, labels=sorted_travel_cats, rotatelabels=True) plt.title("Methods of Travel") plt.show()
Image in a Jupyter notebook
from operator import itemgetter pairs1 = list(zip(travel_categories, travellers)) # Zip the lists together the right way around # travel_categories --> 0, travellers --> 1 pairs1.sort(key=itemgetter(1), reverse=True) print(pairs1)
[('One_method_Car_as_driver', 6574571), ('Did_not_go_to_work', 1003792), ('Worked_home', 503582), ('One_method_Car_as_passenger', 489922), ('One_method_Train', 488012), ('One_method_Walked_only', 370427), ('One_method_Bus', 323201), ('One_method_Bicycle', 107756), ('Two_methods_Train_Bus', 104122), ('Method_travel_to_work_ns', 104061), ('Two_methods_Trn_Car_as_drvr', 86003), ('One_method_Truck', 85892), ('One_method_Other', 73512), ('Two_methds_Othr_two_methds', 69056), ('One_method_Motorbike_scootr', 64580), ('One_met_Tram_incl_lt_rail', 58736), ('Three_meth_Trn_two_othr_met', 43828), ('Two_methods_Trn_Car_as_pass', 20239), ('One_method_Taxi', 19725), ('Two_methods_Bus_Car_as_drvr', 19513), ('Two_mt_trn_Trm_incl_lt_rl', 18088), ('Two_methods_Bus_Car_as_pass', 12611), ('One_method_Ferry', 11858), ('Two_methods_Train_Other', 8737), ('Three_met_Bs_2_ot_met_ex_tr', 5198), ('Two_methods_Bus_Other', 4849), ('Two_mth_Bu_Trm_inc_lt_rl', 4145), ('Three_meth_Othr_three_meth', 3770), ('Two_methods_Bus_Ferry', 2321), ('Two_methods_Train_Ferry', 1738)]
# 0 - 8, the last one (8) as others PIECES_OF_PIE = 9 # Compute the sum from 8 to the end others = sum(sorted_travellers[PIECES_OF_PIE - 1:]) plt.pie(sorted_travellers[:PIECES_OF_PIE - 1] + [others], labels=sorted_travel_cats[:PIECES_OF_PIE - 1] + ["Other methods"]) plt.show()
Image in a Jupyter notebook
PIECES_OF_PIE = 9 others = sum(sorted_travellers[PIECES_OF_PIE - 1:]) angle = 270 - sorted_travellers[0] / sum(sorted_travellers) * 360 plt.pie(sorted_travellers[:PIECES_OF_PIE - 1] + [others], labels=sorted_travel_cats[:PIECES_OF_PIE - 1] + ["Other methods"], counterclock=True, startangle=angle, radius=1.1) plt.title("Methodof travel to work, 2016 Census") plt.show()
Image in a Jupyter notebook