SharedNynke.ipynbOpen in CoCalc
Author: Glen Cripps
Views : 10
In [1]:
import pandas as pd df = pd.read_csv('trainingset.csv', sep=',', low_memory = False).head(200000) df = df.rename(columns={'bbl_id': 'building_id', "bldg_ctgy": "building_category", "tax_cls_p": "tax_class_present", "bldg_cls_p": "building_class_present", "res_unit": "residential_unit", "com_unit" : "community_unit", "tot_unit" : "total_units", "yr_built" : "year_built", "tax_cls_s": "tax_class_sale", "bldg_cls_s" : "building_class_sale"}) print(df.columns) id = df[['building_id', 'price']] df = df[df['price']>75000]
Index([u'Unnamed: 0', u'Unnamed: 0.1', u'Unnamed: 0_x', u'Sale_id_x', u'building_id', u'year', u'borough', u'building_category', u'tax_class_present', u'block', u'lot', u'easmnt', u'building_class_present', u'address', u'apt', u'zip', u'residential_unit', u'community_unit', u'total_units', u'land_sqft', u'tot_sqft', u'year_built', u'tax_class_sale', u'building_class_sale', u'sale_date', u'price', u'usable', u'long', u'lat', u'Unnamed: 0_y', u'Sale_id_y', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28', u'29', u'30', u'31'], dtype='object')
In [2]:
#sales_zip_dummies = significant_dummies(df['zip'],threshold=.01) #sales_cat_dummies = significant_dummies(df['building_category'],threshold=.01) # ============================================================================= # zip_dummies = pd.get_dummies(df["zip"], prefix=df["zip"].name) # df = df.drop(columns="zip") # df_zip = pd.concat([id, zip_dummies], axis=1) # #df_zip.to_csv('zips2.csv') # building_category_dummies = pd.get_dummies(df["building_category"], prefix=df["building_category"].name) # df = df.drop(columns="building_category") # df_build_cat = pd.concat([id, building_category_dummies], axis=1) # ============================================================================= import re df['address'] = df['address'].astype(str) df['address'] = df['address'].apply(lambda x: re.sub(r'(^[0-9]+\s)|N/A','', str(x))) address_dummies = pd.get_dummies(df["address"], prefix=df["address"].name) df = df.drop(columns="address") df_address = pd.concat([id, address_dummies], axis=1) print('YAY') #df_address.to_csv('addresses.csv') #sales_address_dummies = significant_dummies(df['address'],threshold=.01)
YAY
In [3]:
print(df[df['borough']==4]) df['borough'] = df["borough"].replace([1,2,3,4,5], ["Manhattan", "Bronx", 'Brooklyn','Queens','Staten Island']) #print(df[df['borough'] == 'Queens']) borough_dummies = pd.get_dummies(df["borough"], prefix=df["borough"].name) df_boroughs = pd.concat([id, borough_dummies], axis=1) #df_boroughs.to_csv('boroughs3.csv')
Unnamed: 0 Unnamed: 0.1 Unnamed: 0_x Sale_id_x building_id \ 95 95 4896054 77012 77013.0 44735129.0 132 132 4863345 70980 70981.0 0.0 723 723 4880381 58998 58999.0 41569928.0 791 791 4829161 56049 56050.0 0.0 885 885 4857415 66095 66096.0 0.0 972 972 4868891 73687 73688.0 0.0 1042 1042 4853067 66076 66077.0 0.0 1113 1113 4860443 66109 66110.0 0.0 1219 1219 4880244 58869 58870.0 4156815.0 1247 1247 4827692 56042 56043.0 0.0 1610 1610 4840970 65793 65794.0 0.0 1734 1734 4843207 65879 65880.0 0.0 1964 1964 4847227 65906 65907.0 0.0 2032 2032 4830690 56056 56057.0 0.0 2117 2117 4829635 56051 56052.0 0.0 2144 2144 4868930 73687 73688.0 0.0 2156 2156 4879810 58267 58268.0 428916.0 2183 2183 4865478 73637 73638.0 0.0 2317 2317 4867479 73680 73681.0 0.0 2458 2458 4829047 56048 56049.0 0.0 2569 2569 4865203 73636 73637.0 0.0 2613 2613 4835778 64921 64922.0 0.0 2634 2634 4852074 66071 66072.0 0.0 3005 3005 4863133 70979 70980.0 0.0 3163 3163 4859889 66106 66107.0 0.0 3190 3190 4839245 65785 65786.0 0.0 3292 3292 4865537 73638 73639.0 0.0 3337 3337 4833945 59110 59111.0 0.0 3446 3446 4890101 71302 71303.0 411137113.0 3588 3588 4886913 67603 67604.0 4993748.0 ... ... ... ... ... ... 197606 197606 4889903 71134 71135.0 41065183.0 197818 197818 4845597 65899 65900.0 0.0 197929 197929 4826180 53579 53580.0 41609143.0 197981 197981 4891213 72723 72724.0 4933767.0 198174 198174 4833522 59108 59109.0 0.0 198240 198240 4851410 66068 66069.0 0.0 198251 198251 4826944 53601 53602.0 0.0 198314 198314 4859904 66106 66107.0 0.0 198641 198641 4840682 65792 65793.0 0.0 198664 198664 4882774 61632 61633.0 453631.0 198743 198743 4826164 53565 53566.0 41605823.0 198751 198751 4880246 58871 58872.0 41568440.0 198785 198785 4852643 66074 66075.0 0.0 198883 198883 4836448 64924 64925.0 0.0 198913 198913 4853867 66079 66080.0 0.0 198920 198920 4890340 71507 71508.0 41110851.0 199019 199019 4892231 73646 73647.0 4161645.0 199081 199081 4868465 73685 73686.0 0.0 199088 199088 4846442 65903 65904.0 0.0 199160 199160 4831242 56058 56059.0 0.0 199245 199245 4880241 58866 58867.0 415663104.0 199318 199318 4896048 77006 77007.0 4472315.0 199367 199367 4855089 66085 66086.0 0.0 199428 199428 4851248 66067 66068.0 0.0 199660 199660 4894637 75686 75687.0 41307545.0 199807 199807 4875316 75914 75915.0 0.0 199830 199830 4850835 66065 66066.0 0.0 199850 199850 4846281 65902 65903.0 0.0 199914 199914 4874985 74055 74056.0 0.0 199917 199917 4853390 66077 66078.0 0.0 year borough building_category tax_class_present \ 95 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 132 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 723 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 791 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 885 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 972 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 1042 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 1113 2015.0 4.0 15 CONDOS - 2-10 UNIT RESIDENTIAL 2C 1219 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 1247 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 1610 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 1734 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 1964 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2032 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2117 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2144 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 2156 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 2183 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2317 2015.0 4.0 12 CONDOS - WALKUP APARTMENTS 2 2458 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2569 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 2613 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 2634 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 3005 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 3163 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 3190 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 3292 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 3337 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 3446 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 3588 2015.0 4.0 22 STORE BUILDINGS 4 ... ... ... ... ... 197606 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 197818 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 197929 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 197981 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 198174 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 198240 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 198251 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 198314 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 198641 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 198664 2015.0 4.0 29 COMMERCIAL GARAGES 4 198743 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 198751 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 198785 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 198883 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 198913 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 198920 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 199019 2015.0 4.0 07 RENTALS - WALKUP APARTMENTS 2 199081 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 199088 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 199160 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 199245 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 199318 2015.0 4.0 02 TWO FAMILY DWELLINGS 1 199367 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 199428 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 199660 2015.0 4.0 01 ONE FAMILY DWELLINGS 1 199807 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 199830 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 199850 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 199914 2015.0 4.0 04 TAX CLASS 1 CONDOS 1A 199917 2015.0 4.0 13 CONDOS - ELEVATOR APARTMENTS 2 block ... 22 23 24 25 26 \ 95 4735.0 ... 0.387927 -0.322232 -0.030267 0.000508 0.281295 132 11547.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 723 15699.0 ... 0.385935 -0.313424 -0.030138 -0.119021 0.267601 791 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 885 11431.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 972 16226.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1042 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1113 11426.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1219 15681.0 ... 0.426256 -0.346731 -0.032077 -0.097411 0.298292 1247 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1610 14248.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1734 14247.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 1964 13940.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2032 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2117 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2144 16226.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2156 2891.0 ... 0.560812 -0.464416 -0.042419 0.000808 0.403108 2183 16174.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2317 16173.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2458 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2569 16174.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2613 16112.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 2634 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3005 11544.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3163 11431.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3190 14243.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3292 16227.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3337 15768.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 3446 11137.0 ... 0.408205 -0.337107 -0.032118 -0.014619 0.293508 3588 9937.0 ... 0.408476 -0.336719 -0.031075 -0.013746 0.293169 ... ... ... ... ... ... ... ... 197606 10651.0 ... 0.410038 -0.340183 -0.029790 0.010097 0.295533 197818 11425.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 197929 16091.0 ... 0.551610 -0.460620 -0.043147 0.127454 0.405904 197981 9337.0 ... 0.494993 -0.409224 -0.037271 -0.037169 0.354415 198174 15768.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198240 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198251 15933.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198314 11431.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198641 14247.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198664 5363.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 198743 16058.0 ... 0.543680 -0.457424 -0.040990 0.166653 0.403033 198751 15684.0 ... 0.408004 -0.331908 -0.030712 -0.109610 0.284022 198785 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198883 16113.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198913 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 198920 11108.0 ... 0.427254 -0.353690 -0.031233 0.006198 0.307077 199019 16164.0 ... 0.625626 -0.515106 -0.048587 -0.050995 0.447258 199081 16226.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199088 11444.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199160 16234.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199245 15663.0 ... 0.440084 -0.354660 -0.034358 -0.205868 0.302985 199318 4723.0 ... 0.210929 -0.178074 -0.015426 -0.017193 0.153570 199367 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199428 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199660 13075.0 ... 0.380458 -0.314706 -0.030955 -0.015017 0.273836 199807 12572.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199830 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199850 11425.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199914 13682.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 199917 11417.0 ... 0.501195 -0.413724 -0.039621 -0.021067 0.360024 27 28 29 30 31 95 0.479920 -0.349796 -0.338143 0.235588 -0.065291 132 0.610382 -0.451644 -0.438753 0.303116 -0.085806 723 0.458685 -0.356530 -0.351164 0.253646 -0.081334 791 0.610382 -0.451644 -0.438753 0.303116 -0.085806 885 0.610382 -0.451644 -0.438753 0.303116 -0.085806 972 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1042 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1113 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1219 0.506828 -0.392578 -0.384352 0.274474 -0.084492 1247 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1610 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1734 0.610382 -0.451644 -0.438753 0.303116 -0.085806 1964 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2032 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2117 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2144 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2156 0.691306 -0.504016 -0.485738 0.339094 -0.094654 2183 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2317 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2458 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2569 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2613 0.610382 -0.451644 -0.438753 0.303116 -0.085806 2634 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3005 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3163 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3190 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3292 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3337 0.610382 -0.451644 -0.438753 0.303116 -0.085806 3446 0.495788 -0.367691 -0.357304 0.245389 -0.069261 3588 0.495251 -0.368265 -0.358123 0.245211 -0.068730 ... ... ... ... ... ... 197606 0.508910 -0.368444 -0.353686 0.248279 -0.068813 197818 0.610382 -0.451644 -0.438753 0.303116 -0.085806 197929 0.719680 -0.482383 -0.458197 0.321382 -0.081113 197981 0.584907 -0.449654 -0.435975 0.295537 -0.083338 198174 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198240 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198251 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198314 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198641 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198664 0.000000 0.000000 0.000000 0.000000 0.000000 198743 0.726663 -0.471201 -0.443774 0.313874 -0.076569 198751 0.485396 -0.376905 -0.369931 0.266218 -0.083527 198785 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198883 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198913 0.610382 -0.451644 -0.438753 0.303116 -0.085806 198920 0.530706 -0.383317 -0.368457 0.259120 -0.072215 199019 0.743344 -0.567611 -0.552715 0.376288 -0.106973 199081 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199088 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199160 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199245 0.458690 -0.417126 -0.418709 0.275103 -0.088279 199318 0.259426 -0.191587 -0.184952 0.129599 -0.037394 199367 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199428 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199660 0.465644 -0.342264 -0.332734 0.229576 -0.064901 199807 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199830 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199850 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199914 0.610382 -0.451644 -0.438753 0.303116 -0.085806 199917 0.610382 -0.451644 -0.438753 0.303116 -0.085806 [2407 rows x 62 columns]
In [4]:
# ============================================================================= # import matplotlib.pyplot as plt # dfborough = df.groupby('borough').mean().reset_index() # x = dfborough['borough'] # y = dfborough['price'] # plt.bar(x,y) # plt.show() # print('ok') # dfborough = df.groupby('borough').mean().reset_index() # x = dfborough['borough'] # y = dfborough['price'] # plt.bar(x,y) # plt.show() # print('ok') # # ============================================================================= #sales_bor_dummies = significant_dummies(df['borough'],threshold=.01) borough_dummies = pd.get_dummies(df["borough"], prefix=df["borough"].name) df = df.drop(columns="borough") df = pd.concat([df, borough_dummies], axis=1) import pandas as pd import numpy as np #df = pd.read_csv('addresses.csv') print(df.columns)
Index([u'Unnamed: 0', u'Unnamed: 0.1', u'Unnamed: 0_x', u'Sale_id_x', u'building_id', u'year', u'building_category', u'tax_class_present', u'block', u'lot', u'easmnt', u'building_class_present', u'apt', u'zip', u'residential_unit', u'community_unit', u'total_units', u'land_sqft', u'tot_sqft', u'year_built', u'tax_class_sale', u'building_class_sale', u'sale_date', u'price', u'usable', u'long', u'lat', u'Unnamed: 0_y', u'Sale_id_y', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28', u'29', u'30', u'31', u'borough_Bronx', u'borough_Brooklyn', u'borough_Manhattan', u'borough_Queens', u'borough_Staten Island'], dtype='object')
In [13]:
df2 = df_address print(df2.tail) df2.dropna(inplace=True) print(df2.tail()) X = df2.drop(columns=['price', 'building_id']) Y = df2.price Y = np.array(Y).reshape(-1)
<bound method DataFrame.tail of building_id price address_1 AVENUE \ 2 110061303.0 87193.0 0.0 3 110061302.0 128200.0 0.0 8 110061303.0 82233.0 0.0 9 110061303.0 142591.0 0.0 10 1100937.0 95428.0 0.0 17 110061302.0 88445.0 0.0 23 110061303.0 75135.0 0.0 25 110061302.0 79024.0 0.0 27 1100937.0 83645.0 0.0 28 110061302.0 78205.0 0.0 29 110061302.0 85000.0 0.0 31 1100937.0 81495.0 0.0 34 110061303.0 78305.0 0.0 37 110061303.0 90000.0 0.0 41 110061302.0 138330.0 0.0 45 1100937.0 83000.0 0.0 49 110061303.0 124647.0 0.0 57 1100937.0 96852.0 0.0 59 110061302.0 202787.0 0.0 60 1100937.0 161850.0 0.0 74 110061302.0 82894.0 0.0 80 1100937.0 97844.0 0.0 83 110061303.0 83695.0 0.0 85 110061303.0 98353.0 0.0 86 1100937.0 83000.0 0.0 87 110061302.0 123673.0 0.0 89 110061302.0 132318.0 0.0 90 110061303.0 100395.0 0.0 94 1100937.0 85000.0 0.0 95 44735129.0 775000.0 0.0 ... ... ... ... 199879 1100937.0 85000.0 0.0 199883 110061303.0 140734.0 0.0 199885 1100937.0 83000.0 0.0 199887 110061303.0 129744.0 0.0 199896 110061302.0 82894.0 0.0 199903 110061303.0 87995.0 0.0 199908 110061303.0 165345.0 0.0 199909 110061303.0 107100.0 0.0 199914 0.0 290000.0 0.0 199916 110061303.0 78495.0 0.0 199917 0.0 198000.0 0.0 199921 1100937.0 107800.0 0.0 199923 110061303.0 90000.0 0.0 199926 110061303.0 148552.0 0.0 199928 110061302.0 77179.0 0.0 199934 1100937.0 195651.0 0.0 199935 1100937.0 98010.0 0.0 199936 110061302.0 99515.0 0.0 199949 1100937.0 115657.0 0.0 199953 110061303.0 88016.0 0.0 199959 110061303.0 85939.0 0.0 199961 110061303.0 139444.0 0.0 199962 1100937.0 82882.0 0.0 199963 110061303.0 88016.0 0.0 199971 110061302.0 81185.0 0.0 199975 110061302.0 88835.0 0.0 199979 110061303.0 78738.0 0.0 199984 110061303.0 125477.0 0.0 199993 110061302.0 78606.0 0.0 199994 1100937.0 87532.0 0.0 address_1-76 BEACH 101 STREET address_10-11 NAMEOKE STREET \ 2 0.0 0.0 3 0.0 0.0 8 0.0 0.0 9 0.0 0.0 10 0.0 0.0 17 0.0 0.0 23 0.0 0.0 25 0.0 0.0 27 0.0 0.0 28 0.0 0.0 29 0.0 0.0 31 0.0 0.0 34 0.0 0.0 37 0.0 0.0 41 0.0 0.0 45 0.0 0.0 49 0.0 0.0 57 0.0 0.0 59 0.0 0.0 60 0.0 0.0 74 0.0 0.0 80 0.0 0.0 83 0.0 0.0 85 0.0 0.0 86 0.0 0.0 87 0.0 0.0 89 0.0 0.0 90 0.0 0.0 94 0.0 0.0 95 0.0 0.0 ... ... ... 199879 0.0 0.0 199883 0.0 0.0 199885 0.0 0.0 199887 0.0 0.0 199896 0.0 0.0 199903 0.0 0.0 199908 0.0 0.0 199909 0.0 0.0 199914 0.0 0.0 199916 0.0 0.0 199917 0.0 0.0 199921 0.0 0.0 199923 0.0 0.0 199926 0.0 0.0 199928 0.0 0.0 199934 0.0 0.0 199935 0.0 0.0 199936 0.0 0.0 199949 0.0 0.0 199953 0.0 0.0 199959 0.0 0.0 199961 0.0 0.0 199962 0.0 0.0 199963 0.0 0.0 199971 0.0 0.0 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_10-62 READS LANE address_100-03 ROCKAWAY BEACH BLVD \ 2 0.0 0.0 3 0.0 0.0 8 0.0 0.0 9 0.0 0.0 10 0.0 0.0 17 0.0 0.0 23 0.0 0.0 25 0.0 0.0 27 0.0 0.0 28 0.0 0.0 29 0.0 0.0 31 0.0 0.0 34 0.0 0.0 37 0.0 0.0 41 0.0 0.0 45 0.0 0.0 49 0.0 0.0 57 0.0 0.0 59 0.0 0.0 60 0.0 0.0 74 0.0 0.0 80 0.0 0.0 83 0.0 0.0 85 0.0 0.0 86 0.0 0.0 87 0.0 0.0 89 0.0 0.0 90 0.0 0.0 94 0.0 0.0 95 0.0 0.0 ... ... ... 199879 0.0 0.0 199883 0.0 0.0 199885 0.0 0.0 199887 0.0 0.0 199896 0.0 0.0 199903 0.0 0.0 199908 0.0 0.0 199909 0.0 0.0 199914 0.0 0.0 199916 0.0 0.0 199917 0.0 0.0 199921 0.0 0.0 199923 0.0 0.0 199926 0.0 0.0 199928 0.0 0.0 199934 0.0 0.0 199935 0.0 0.0 199936 0.0 0.0 199949 0.0 0.0 199953 0.0 0.0 199959 0.0 0.0 199961 0.0 0.0 199962 0.0 0.0 199963 0.0 0.0 199971 0.0 0.0 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_100-23 39 AVENUE address_101-13 97TH STREET \ 2 0.0 0.0 3 0.0 0.0 8 0.0 0.0 9 0.0 0.0 10 0.0 0.0 17 0.0 0.0 23 0.0 0.0 25 0.0 0.0 27 0.0 0.0 28 0.0 0.0 29 0.0 0.0 31 0.0 0.0 34 0.0 0.0 37 0.0 0.0 41 0.0 0.0 45 0.0 0.0 49 0.0 0.0 57 0.0 0.0 59 0.0 0.0 60 0.0 0.0 74 0.0 0.0 80 0.0 0.0 83 0.0 0.0 85 0.0 0.0 86 0.0 0.0 87 0.0 0.0 89 0.0 0.0 90 0.0 0.0 94 0.0 0.0 95 0.0 0.0 ... ... ... 199879 0.0 0.0 199883 0.0 0.0 199885 0.0 0.0 199887 0.0 0.0 199896 0.0 0.0 199903 0.0 0.0 199908 0.0 0.0 199909 0.0 0.0 199914 0.0 0.0 199916 0.0 0.0 199917 0.0 0.0 199921 0.0 0.0 199923 0.0 0.0 199926 0.0 0.0 199928 0.0 0.0 199934 0.0 0.0 199935 0.0 0.0 199936 0.0 0.0 199949 0.0 0.0 199953 0.0 0.0 199959 0.0 0.0 199961 0.0 0.0 199962 0.0 0.0 199963 0.0 0.0 199971 0.0 0.0 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_101-14 SHORE FRONT PARKWAY ... \ 2 0.0 ... 3 0.0 ... 8 0.0 ... 9 0.0 ... 10 0.0 ... 17 0.0 ... 23 0.0 ... 25 0.0 ... 27 0.0 ... 28 0.0 ... 29 0.0 ... 31 0.0 ... 34 0.0 ... 37 0.0 ... 41 0.0 ... 45 0.0 ... 49 0.0 ... 57 0.0 ... 59 0.0 ... 60 0.0 ... 74 0.0 ... 80 0.0 ... 83 0.0 ... 85 0.0 ... 86 0.0 ... 87 0.0 ... 89 0.0 ... 90 0.0 ... 94 0.0 ... 95 0.0 ... ... ... ... 199879 0.0 ... 199883 0.0 ... 199885 0.0 ... 199887 0.0 ... 199896 0.0 ... 199903 0.0 ... 199908 0.0 ... 199909 0.0 ... 199914 0.0 ... 199916 0.0 ... 199917 0.0 ... 199921 0.0 ... 199923 0.0 ... 199926 0.0 ... 199928 0.0 ... 199934 0.0 ... 199935 0.0 ... 199936 0.0 ... 199949 0.0 ... 199953 0.0 ... 199959 0.0 ... 199961 0.0 ... 199962 0.0 ... 199963 0.0 ... 199971 0.0 ... 199975 0.0 ... 199979 0.0 ... 199984 0.0 ... 199993 0.0 ... 199994 0.0 ... address_WINDHAM LOOP address_WINEGAR LANE address_WINTHROP STREET \ 2 0.0 0.0 0.0 3 0.0 0.0 0.0 8 0.0 0.0 0.0 9 0.0 0.0 0.0 10 0.0 0.0 0.0 17 0.0 0.0 0.0 23 0.0 0.0 0.0 25 0.0 0.0 0.0 27 0.0 0.0 0.0 28 0.0 0.0 0.0 29 0.0 0.0 0.0 31 0.0 0.0 0.0 34 0.0 0.0 0.0 37 0.0 0.0 0.0 41 0.0 0.0 0.0 45 0.0 0.0 0.0 49 0.0 0.0 0.0 57 0.0 0.0 0.0 59 0.0 0.0 0.0 60 0.0 0.0 0.0 74 0.0 0.0 0.0 80 0.0 0.0 0.0 83 0.0 0.0 0.0 85 0.0 0.0 0.0 86 0.0 0.0 0.0 87 0.0 0.0 0.0 89 0.0 0.0 0.0 90 0.0 0.0 0.0 94 0.0 0.0 0.0 95 0.0 0.0 0.0 ... ... ... ... 199879 0.0 0.0 0.0 199883 0.0 0.0 0.0 199885 0.0 0.0 0.0 199887 0.0 0.0 0.0 199896 0.0 0.0 0.0 199903 0.0 0.0 0.0 199908 0.0 0.0 0.0 199909 0.0 0.0 0.0 199914 0.0 0.0 0.0 199916 0.0 0.0 0.0 199917 0.0 0.0 0.0 199921 0.0 0.0 0.0 199923 0.0 0.0 0.0 199926 0.0 0.0 0.0 199928 0.0 0.0 0.0 199934 0.0 0.0 0.0 199935 0.0 0.0 0.0 199936 0.0 0.0 0.0 199949 0.0 0.0 0.0 199953 0.0 0.0 0.0 199959 0.0 0.0 0.0 199961 0.0 0.0 0.0 199962 0.0 0.0 0.0 199963 0.0 0.0 0.0 199971 0.0 0.0 0.0 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_WIRT AVENUE address_WOODBINE STREET address_WOODHULL AVENUE \ 2 0.0 0.0 0.0 3 0.0 0.0 0.0 8 0.0 0.0 0.0 9 0.0 0.0 0.0 10 0.0 0.0 0.0 17 0.0 0.0 0.0 23 0.0 0.0 0.0 25 0.0 0.0 0.0 27 0.0 0.0 0.0 28 0.0 0.0 0.0 29 0.0 0.0 0.0 31 0.0 0.0 0.0 34 0.0 0.0 0.0 37 0.0 0.0 0.0 41 0.0 0.0 0.0 45 0.0 0.0 0.0 49 0.0 0.0 0.0 57 0.0 0.0 0.0 59 0.0 0.0 0.0 60 0.0 0.0 0.0 74 0.0 0.0 0.0 80 0.0 0.0 0.0 83 0.0 0.0 0.0 85 0.0 0.0 0.0 86 0.0 0.0 0.0 87 0.0 0.0 0.0 89 0.0 0.0 0.0 90 0.0 0.0 0.0 94 0.0 0.0 0.0 95 0.0 0.0 0.0 ... ... ... ... 199879 0.0 0.0 0.0 199883 0.0 0.0 0.0 199885 0.0 0.0 0.0 199887 0.0 0.0 0.0 199896 0.0 0.0 0.0 199903 0.0 0.0 0.0 199908 0.0 0.0 0.0 199909 0.0 0.0 0.0 199914 0.0 0.0 0.0 199916 0.0 0.0 0.0 199917 0.0 0.0 0.0 199921 0.0 0.0 0.0 199923 0.0 0.0 0.0 199926 0.0 0.0 0.0 199928 0.0 0.0 0.0 199934 0.0 0.0 0.0 199935 0.0 0.0 0.0 199936 0.0 0.0 0.0 199949 0.0 0.0 0.0 199953 0.0 0.0 0.0 199959 0.0 0.0 0.0 199961 0.0 0.0 0.0 199962 0.0 0.0 0.0 199963 0.0 0.0 0.0 199971 0.0 0.0 0.0 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_WOODROW COURT address_WORTH STREET address_WYONA STREET \ 2 0.0 0.0 0.0 3 0.0 0.0 0.0 8 0.0 0.0 0.0 9 0.0 0.0 0.0 10 0.0 0.0 0.0 17 0.0 0.0 0.0 23 0.0 0.0 0.0 25 0.0 0.0 0.0 27 0.0 0.0 0.0 28 0.0 0.0 0.0 29 0.0 0.0 0.0 31 0.0 0.0 0.0 34 0.0 0.0 0.0 37 0.0 0.0 0.0 41 0.0 0.0 0.0 45 0.0 0.0 0.0 49 0.0 0.0 0.0 57 0.0 0.0 0.0 59 0.0 0.0 0.0 60 0.0 0.0 0.0 74 0.0 0.0 0.0 80 0.0 0.0 0.0 83 0.0 0.0 0.0 85 0.0 0.0 0.0 86 0.0 0.0 0.0 87 0.0 0.0 0.0 89 0.0 0.0 0.0 90 0.0 0.0 0.0 94 0.0 0.0 0.0 95 0.0 0.0 0.0 ... ... ... ... 199879 0.0 0.0 0.0 199883 0.0 0.0 0.0 199885 0.0 0.0 0.0 199887 0.0 0.0 0.0 199896 0.0 0.0 0.0 199903 0.0 0.0 0.0 199908 0.0 0.0 0.0 199909 0.0 0.0 0.0 199914 0.0 0.0 0.0 199916 0.0 0.0 0.0 199917 0.0 0.0 0.0 199921 0.0 0.0 0.0 199923 0.0 0.0 0.0 199926 0.0 0.0 0.0 199928 0.0 0.0 0.0 199934 0.0 0.0 0.0 199935 0.0 0.0 0.0 199936 0.0 0.0 0.0 199949 0.0 0.0 0.0 199953 0.0 0.0 0.0 199959 0.0 0.0 0.0 199961 0.0 0.0 0.0 199962 0.0 0.0 0.0 199963 0.0 0.0 0.0 199971 0.0 0.0 0.0 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_YOUNG AVENUE 2 0.0 3 0.0 8 0.0 9 0.0 10 0.0 17 0.0 23 0.0 25 0.0 27 0.0 28 0.0 29 0.0 31 0.0 34 0.0 37 0.0 41 0.0 45 0.0 49 0.0 57 0.0 59 0.0 60 0.0 74 0.0 80 0.0 83 0.0 85 0.0 86 0.0 87 0.0 89 0.0 90 0.0 94 0.0 95 0.0 ... ... 199879 0.0 199883 0.0 199885 0.0 199887 0.0 199896 0.0 199903 0.0 199908 0.0 199909 0.0 199914 0.0 199916 0.0 199917 0.0 199921 0.0 199923 0.0 199926 0.0 199928 0.0 199934 0.0 199935 0.0 199936 0.0 199949 0.0 199953 0.0 199959 0.0 199961 0.0 199962 0.0 199963 0.0 199971 0.0 199975 0.0 199979 0.0 199984 0.0 199993 0.0 199994 0.0 [57407 rows x 1818 columns]> building_id price address_1 AVENUE \ 199975 110061302.0 88835.0 0.0 199979 110061303.0 78738.0 0.0 199984 110061303.0 125477.0 0.0 199993 110061302.0 78606.0 0.0 199994 1100937.0 87532.0 0.0 address_1-76 BEACH 101 STREET address_10-11 NAMEOKE STREET \ 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_10-62 READS LANE address_100-03 ROCKAWAY BEACH BLVD \ 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_100-23 39 AVENUE address_101-13 97TH STREET \ 199975 0.0 0.0 199979 0.0 0.0 199984 0.0 0.0 199993 0.0 0.0 199994 0.0 0.0 address_101-14 SHORE FRONT PARKWAY ... \ 199975 0.0 ... 199979 0.0 ... 199984 0.0 ... 199993 0.0 ... 199994 0.0 ... address_WINDHAM LOOP address_WINEGAR LANE address_WINTHROP STREET \ 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_WIRT AVENUE address_WOODBINE STREET address_WOODHULL AVENUE \ 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_WOODROW COURT address_WORTH STREET address_WYONA STREET \ 199975 0.0 0.0 0.0 199979 0.0 0.0 0.0 199984 0.0 0.0 0.0 199993 0.0 0.0 0.0 199994 0.0 0.0 0.0 address_YOUNG AVENUE 199975 0.0 199979 0.0 199984 0.0 199993 0.0 199994 0.0 [5 rows x 1818 columns]
In [14]:
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .20, random_state = 40) X_train.shape, X_test.shape from sklearn.linear_model import LinearRegression lr = LinearRegression()
In [11]:
# Fitting the model lr.fit(X_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
In [12]:
# R^2 scores print('Train:', lr.score(X_train, y_train)) print('Validation:', lr.score(X_test, y_test)) from sklearn.metrics import mean_absolute_error print('MAE:', mean_absolute_error(y_test, lr.predict(X_test))) print('Random prediction:', abs(y_test - y_test.mean()).mean() )
('Train:', 0.48856354945998226) ('Validation:', -1.2107561430695182e+20) ('MAE:', 930308784373814.2) ('Random prediction:', 110117.39048009258)