CoCalc -- Nynke.ipynb

Kernel: Python 2 (system-wide)

In [1]:

import pandas as pd
df = pd.read_csv('trainingset.csv', sep=',', low_memory = False).head(200000)
df = df.rename(columns={'bbl_id': 'building_id', "bldg_ctgy": "building_category", "tax_cls_p": "tax_class_present", "bldg_cls_p": "building_class_present", "res_unit": "residential_unit", "com_unit" : "community_unit", "tot_unit" : "total_units", "yr_built" : "year_built", "tax_cls_s": "tax_class_sale", "bldg_cls_s" : "building_class_sale"})
print(df.columns)
id = df[['building_id', 'price']]
df = df[df['price']>75000]

Index([u'Unnamed: 0', u'Unnamed: 0.1', u'Unnamed: 0_x', u'Sale_id_x',
       u'building_id', u'year', u'borough', u'building_category',
       u'tax_class_present', u'block', u'lot', u'easmnt',
       u'building_class_present', u'address', u'apt', u'zip',
       u'residential_unit', u'community_unit', u'total_units', u'land_sqft',
       u'tot_sqft', u'year_built', u'tax_class_sale', u'building_class_sale',
       u'sale_date', u'price', u'usable', u'long', u'lat', u'Unnamed: 0_y',
       u'Sale_id_y', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8',
       u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18',
       u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28',
       u'29', u'30', u'31'],
      dtype='object')

In [2]:

#sales_zip_dummies = significant_dummies(df['zip'],threshold=.01)
#sales_cat_dummies = significant_dummies(df['building_category'],threshold=.01)
# =============================================================================
# zip_dummies = pd.get_dummies(df["zip"], prefix=df["zip"].name)
# df = df.drop(columns="zip")
# df_zip = pd.concat([id, zip_dummies], axis=1)
# #df_zip.to_csv('zips2.csv')
# building_category_dummies = pd.get_dummies(df["building_category"], prefix=df["building_category"].name)
# df = df.drop(columns="building_category")
# df_build_cat = pd.concat([id, building_category_dummies], axis=1)
# =============================================================================

import re
df['address'] = df['address'].astype(str)
df['address'] =  df['address'].apply(lambda x: re.sub(r'(^[0-9]+\s)|N/A','', str(x)))
address_dummies = pd.get_dummies(df["address"], prefix=df["address"].name)
df = df.drop(columns="address")
df_address = pd.concat([id, address_dummies], axis=1)
print('YAY')
#df_address.to_csv('addresses.csv')
#sales_address_dummies = significant_dummies(df['address'],threshold=.01)

YAY

In [3]:

print(df[df['borough']==4])
df['borough'] = df["borough"].replace([1,2,3,4,5], ["Manhattan", "Bronx", 'Brooklyn','Queens','Staten Island'])
#print(df[df['borough'] == 'Queens'])
borough_dummies = pd.get_dummies(df["borough"], prefix=df["borough"].name)
df_boroughs = pd.concat([id, borough_dummies], axis=1)
#df_boroughs.to_csv('boroughs3.csv')

        Unnamed: 0  Unnamed: 0.1  Unnamed: 0_x  Sale_id_x  building_id  \
95              95       4896054         77012    77013.0   44735129.0   
132            132       4863345         70980    70981.0          0.0   
723            723       4880381         58998    58999.0   41569928.0   
791            791       4829161         56049    56050.0          0.0   
885            885       4857415         66095    66096.0          0.0   
972            972       4868891         73687    73688.0          0.0   
1042          1042       4853067         66076    66077.0          0.0   
1113          1113       4860443         66109    66110.0          0.0   
1219          1219       4880244         58869    58870.0    4156815.0   
1247          1247       4827692         56042    56043.0          0.0   
1610          1610       4840970         65793    65794.0          0.0   
1734          1734       4843207         65879    65880.0          0.0   
1964          1964       4847227         65906    65907.0          0.0   
2032          2032       4830690         56056    56057.0          0.0   
2117          2117       4829635         56051    56052.0          0.0   
2144          2144       4868930         73687    73688.0          0.0   
2156          2156       4879810         58267    58268.0     428916.0   
2183          2183       4865478         73637    73638.0          0.0   
2317          2317       4867479         73680    73681.0          0.0   
2458          2458       4829047         56048    56049.0          0.0   
2569          2569       4865203         73636    73637.0          0.0   
2613          2613       4835778         64921    64922.0          0.0   
2634          2634       4852074         66071    66072.0          0.0   
3005          3005       4863133         70979    70980.0          0.0   
3163          3163       4859889         66106    66107.0          0.0   
3190          3190       4839245         65785    65786.0          0.0   
3292          3292       4865537         73638    73639.0          0.0   
3337          3337       4833945         59110    59111.0          0.0   
3446          3446       4890101         71302    71303.0  411137113.0   
3588          3588       4886913         67603    67604.0    4993748.0   
...            ...           ...           ...        ...          ...   
197606      197606       4889903         71134    71135.0   41065183.0   
197818      197818       4845597         65899    65900.0          0.0   
197929      197929       4826180         53579    53580.0   41609143.0   
197981      197981       4891213         72723    72724.0    4933767.0   
198174      198174       4833522         59108    59109.0          0.0   
198240      198240       4851410         66068    66069.0          0.0   
198251      198251       4826944         53601    53602.0          0.0   
198314      198314       4859904         66106    66107.0          0.0   
198641      198641       4840682         65792    65793.0          0.0   
198664      198664       4882774         61632    61633.0     453631.0   
198743      198743       4826164         53565    53566.0   41605823.0   
198751      198751       4880246         58871    58872.0   41568440.0   
198785      198785       4852643         66074    66075.0          0.0   
198883      198883       4836448         64924    64925.0          0.0   
198913      198913       4853867         66079    66080.0          0.0   
198920      198920       4890340         71507    71508.0   41110851.0   
199019      199019       4892231         73646    73647.0    4161645.0   
199081      199081       4868465         73685    73686.0          0.0   
199088      199088       4846442         65903    65904.0          0.0   
199160      199160       4831242         56058    56059.0          0.0   
199245      199245       4880241         58866    58867.0  415663104.0   
199318      199318       4896048         77006    77007.0    4472315.0   
199367      199367       4855089         66085    66086.0          0.0   
199428      199428       4851248         66067    66068.0          0.0   
199660      199660       4894637         75686    75687.0   41307545.0   
199807      199807       4875316         75914    75915.0          0.0   
199830      199830       4850835         66065    66066.0          0.0   
199850      199850       4846281         65902    65903.0          0.0   
199914      199914       4874985         74055    74056.0          0.0   
199917      199917       4853390         66077    66078.0          0.0   

          year  borough                   building_category tax_class_present  \
95      2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
132     2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
723     2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
791     2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
885     2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
972     2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
1042    2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
1113    2015.0      4.0  15  CONDOS - 2-10 UNIT RESIDENTIAL                2C   
1219    2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
1247    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
1610    2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
1734    2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
1964    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2032    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2117    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2144    2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
2156    2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
2183    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2317    2015.0      4.0      12  CONDOS - WALKUP APARTMENTS                 2   
2458    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2569    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
2613    2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
2634    2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
3005    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
3163    2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
3190    2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
3292    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
3337    2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
3446    2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
3588    2015.0      4.0                 22  STORE BUILDINGS                 4   
...        ...      ...                                 ...               ...   
197606  2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
197818  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
197929  2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
197981  2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
198174  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
198240  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
198251  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
198314  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
198641  2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
198664  2015.0      4.0              29  COMMERCIAL GARAGES                 4   
198743  2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
198751  2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
198785  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
198883  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
198913  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
198920  2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
199019  2015.0      4.0     07  RENTALS - WALKUP APARTMENTS                 2   
199081  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
199088  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
199160  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
199245  2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
199318  2015.0      4.0            02  TWO FAMILY DWELLINGS                 1   
199367  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
199428  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
199660  2015.0      4.0            01  ONE FAMILY DWELLINGS                 1   
199807  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
199830  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   
199850  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
199914  2015.0      4.0              04  TAX CLASS 1 CONDOS                1A   
199917  2015.0      4.0    13  CONDOS - ELEVATOR APARTMENTS                 2   

          block    ...           22        23        24        25        26  \
95       4735.0    ...     0.387927 -0.322232 -0.030267  0.000508  0.281295   
132     11547.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
723     15699.0    ...     0.385935 -0.313424 -0.030138 -0.119021  0.267601   
791     16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
885     11431.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
972     16226.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1042    11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1113    11426.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1219    15681.0    ...     0.426256 -0.346731 -0.032077 -0.097411  0.298292   
1247    16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1610    14248.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1734    14247.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
1964    13940.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2032    16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2117    16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2144    16226.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2156     2891.0    ...     0.560812 -0.464416 -0.042419  0.000808  0.403108   
2183    16174.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2317    16173.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2458    16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2569    16174.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2613    16112.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
2634    11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3005    11544.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3163    11431.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3190    14243.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3292    16227.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3337    15768.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
3446    11137.0    ...     0.408205 -0.337107 -0.032118 -0.014619  0.293508   
3588     9937.0    ...     0.408476 -0.336719 -0.031075 -0.013746  0.293169   
...         ...    ...          ...       ...       ...       ...       ...   
197606  10651.0    ...     0.410038 -0.340183 -0.029790  0.010097  0.295533   
197818  11425.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
197929  16091.0    ...     0.551610 -0.460620 -0.043147  0.127454  0.405904   
197981   9337.0    ...     0.494993 -0.409224 -0.037271 -0.037169  0.354415   
198174  15768.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198240  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198251  15933.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198314  11431.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198641  14247.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198664   5363.0    ...     0.000000  0.000000  0.000000  0.000000  0.000000   
198743  16058.0    ...     0.543680 -0.457424 -0.040990  0.166653  0.403033   
198751  15684.0    ...     0.408004 -0.331908 -0.030712 -0.109610  0.284022   
198785  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198883  16113.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198913  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
198920  11108.0    ...     0.427254 -0.353690 -0.031233  0.006198  0.307077   
199019  16164.0    ...     0.625626 -0.515106 -0.048587 -0.050995  0.447258   
199081  16226.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199088  11444.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199160  16234.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199245  15663.0    ...     0.440084 -0.354660 -0.034358 -0.205868  0.302985   
199318   4723.0    ...     0.210929 -0.178074 -0.015426 -0.017193  0.153570   
199367  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199428  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199660  13075.0    ...     0.380458 -0.314706 -0.030955 -0.015017  0.273836   
199807  12572.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199830  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199850  11425.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199914  13682.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   
199917  11417.0    ...     0.501195 -0.413724 -0.039621 -0.021067  0.360024   

              27        28        29        30        31  
95      0.479920 -0.349796 -0.338143  0.235588 -0.065291  
132     0.610382 -0.451644 -0.438753  0.303116 -0.085806  
723     0.458685 -0.356530 -0.351164  0.253646 -0.081334  
791     0.610382 -0.451644 -0.438753  0.303116 -0.085806  
885     0.610382 -0.451644 -0.438753  0.303116 -0.085806  
972     0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1042    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1113    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1219    0.506828 -0.392578 -0.384352  0.274474 -0.084492  
1247    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1610    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1734    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
1964    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2032    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2117    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2144    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2156    0.691306 -0.504016 -0.485738  0.339094 -0.094654  
2183    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2317    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2458    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2569    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2613    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
2634    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3005    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3163    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3190    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3292    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3337    0.610382 -0.451644 -0.438753  0.303116 -0.085806  
3446    0.495788 -0.367691 -0.357304  0.245389 -0.069261  
3588    0.495251 -0.368265 -0.358123  0.245211 -0.068730  
...          ...       ...       ...       ...       ...  
197606  0.508910 -0.368444 -0.353686  0.248279 -0.068813  
197818  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
197929  0.719680 -0.482383 -0.458197  0.321382 -0.081113  
197981  0.584907 -0.449654 -0.435975  0.295537 -0.083338  
198174  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198240  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198251  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198314  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198641  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198664  0.000000  0.000000  0.000000  0.000000  0.000000  
198743  0.726663 -0.471201 -0.443774  0.313874 -0.076569  
198751  0.485396 -0.376905 -0.369931  0.266218 -0.083527  
198785  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198883  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198913  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
198920  0.530706 -0.383317 -0.368457  0.259120 -0.072215  
199019  0.743344 -0.567611 -0.552715  0.376288 -0.106973  
199081  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199088  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199160  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199245  0.458690 -0.417126 -0.418709  0.275103 -0.088279  
199318  0.259426 -0.191587 -0.184952  0.129599 -0.037394  
199367  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199428  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199660  0.465644 -0.342264 -0.332734  0.229576 -0.064901  
199807  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199830  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199850  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199914  0.610382 -0.451644 -0.438753  0.303116 -0.085806  
199917  0.610382 -0.451644 -0.438753  0.303116 -0.085806  

[2407 rows x 62 columns]

In [4]:

# =============================================================================
# import matplotlib.pyplot as plt
# dfborough = df.groupby('borough').mean().reset_index()
# x = dfborough['borough']
# y = dfborough['price']
# plt.bar(x,y)
# plt.show()
# print('ok')
# dfborough = df.groupby('borough').mean().reset_index()
# x = dfborough['borough']
# y = dfborough['price']
# plt.bar(x,y)
# plt.show()
# print('ok')
# 
# =============================================================================



#sales_bor_dummies = significant_dummies(df['borough'],threshold=.01)
borough_dummies = pd.get_dummies(df["borough"], prefix=df["borough"].name)
df = df.drop(columns="borough")
df = pd.concat([df, borough_dummies], axis=1)


import pandas as pd
import numpy as np

#df = pd.read_csv('addresses.csv')
print(df.columns)

Index([u'Unnamed: 0', u'Unnamed: 0.1', u'Unnamed: 0_x', u'Sale_id_x',
       u'building_id', u'year', u'building_category', u'tax_class_present',
       u'block', u'lot', u'easmnt', u'building_class_present', u'apt', u'zip',
       u'residential_unit', u'community_unit', u'total_units', u'land_sqft',
       u'tot_sqft', u'year_built', u'tax_class_sale', u'building_class_sale',
       u'sale_date', u'price', u'usable', u'long', u'lat', u'Unnamed: 0_y',
       u'Sale_id_y', u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8',
       u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18',
       u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28',
       u'29', u'30', u'31', u'borough_Bronx', u'borough_Brooklyn',
       u'borough_Manhattan', u'borough_Queens', u'borough_Staten Island'],
      dtype='object')

In [13]:

df2 = df_address

print(df2.tail)
df2.dropna(inplace=True)
print(df2.tail())
X = df2.drop(columns=['price', 'building_id'])
Y = df2.price
Y = np.array(Y).reshape(-1)

<bound method DataFrame.tail of         building_id     price  address_1 AVENUE  \
2       110061303.0   87193.0               0.0   
3       110061302.0  128200.0               0.0   
8       110061303.0   82233.0               0.0   
9       110061303.0  142591.0               0.0   
10        1100937.0   95428.0               0.0   
17      110061302.0   88445.0               0.0   
23      110061303.0   75135.0               0.0   
25      110061302.0   79024.0               0.0   
27        1100937.0   83645.0               0.0   
28      110061302.0   78205.0               0.0   
29      110061302.0   85000.0               0.0   
31        1100937.0   81495.0               0.0   
34      110061303.0   78305.0               0.0   
37      110061303.0   90000.0               0.0   
41      110061302.0  138330.0               0.0   
45        1100937.0   83000.0               0.0   
49      110061303.0  124647.0               0.0   
57        1100937.0   96852.0               0.0   
59      110061302.0  202787.0               0.0   
60        1100937.0  161850.0               0.0   
74      110061302.0   82894.0               0.0   
80        1100937.0   97844.0               0.0   
83      110061303.0   83695.0               0.0   
85      110061303.0   98353.0               0.0   
86        1100937.0   83000.0               0.0   
87      110061302.0  123673.0               0.0   
89      110061302.0  132318.0               0.0   
90      110061303.0  100395.0               0.0   
94        1100937.0   85000.0               0.0   
95       44735129.0  775000.0               0.0   
...             ...       ...               ...   
199879    1100937.0   85000.0               0.0   
199883  110061303.0  140734.0               0.0   
199885    1100937.0   83000.0               0.0   
199887  110061303.0  129744.0               0.0   
199896  110061302.0   82894.0               0.0   
199903  110061303.0   87995.0               0.0   
199908  110061303.0  165345.0               0.0   
199909  110061303.0  107100.0               0.0   
199914          0.0  290000.0               0.0   
199916  110061303.0   78495.0               0.0   
199917          0.0  198000.0               0.0   
199921    1100937.0  107800.0               0.0   
199923  110061303.0   90000.0               0.0   
199926  110061303.0  148552.0               0.0   
199928  110061302.0   77179.0               0.0   
199934    1100937.0  195651.0               0.0   
199935    1100937.0   98010.0               0.0   
199936  110061302.0   99515.0               0.0   
199949    1100937.0  115657.0               0.0   
199953  110061303.0   88016.0               0.0   
199959  110061303.0   85939.0               0.0   
199961  110061303.0  139444.0               0.0   
199962    1100937.0   82882.0               0.0   
199963  110061303.0   88016.0               0.0   
199971  110061302.0   81185.0               0.0   
199975  110061302.0   88835.0               0.0   
199979  110061303.0   78738.0               0.0   
199984  110061303.0  125477.0               0.0   
199993  110061302.0   78606.0               0.0   
199994    1100937.0   87532.0               0.0   

        address_1-76 BEACH 101 STREET  address_10-11 NAMEOKE STREET  \
2                                 0.0                           0.0   
3                                 0.0                           0.0   
8                                 0.0                           0.0   
9                                 0.0                           0.0   
10                                0.0                           0.0   
17                                0.0                           0.0   
23                                0.0                           0.0   
25                                0.0                           0.0   
27                                0.0                           0.0   
28                                0.0                           0.0   
29                                0.0                           0.0   
31                                0.0                           0.0   
34                                0.0                           0.0   
37                                0.0                           0.0   
41                                0.0                           0.0   
45                                0.0                           0.0   
49                                0.0                           0.0   
57                                0.0                           0.0   
59                                0.0                           0.0   
60                                0.0                           0.0   
74                                0.0                           0.0   
80                                0.0                           0.0   
83                                0.0                           0.0   
85                                0.0                           0.0   
86                                0.0                           0.0   
87                                0.0                           0.0   
89                                0.0                           0.0   
90                                0.0                           0.0   
94                                0.0                           0.0   
95                                0.0                           0.0   
...                               ...                           ...   
199879                            0.0                           0.0   
199883                            0.0                           0.0   
199885                            0.0                           0.0   
199887                            0.0                           0.0   
199896                            0.0                           0.0   
199903                            0.0                           0.0   
199908                            0.0                           0.0   
199909                            0.0                           0.0   
199914                            0.0                           0.0   
199916                            0.0                           0.0   
199917                            0.0                           0.0   
199921                            0.0                           0.0   
199923                            0.0                           0.0   
199926                            0.0                           0.0   
199928                            0.0                           0.0   
199934                            0.0                           0.0   
199935                            0.0                           0.0   
199936                            0.0                           0.0   
199949                            0.0                           0.0   
199953                            0.0                           0.0   
199959                            0.0                           0.0   
199961                            0.0                           0.0   
199962                            0.0                           0.0   
199963                            0.0                           0.0   
199971                            0.0                           0.0   
199975                            0.0                           0.0   
199979                            0.0                           0.0   
199984                            0.0                           0.0   
199993                            0.0                           0.0   
199994                            0.0                           0.0   

        address_10-62 READS LANE  address_100-03 ROCKAWAY BEACH BLVD  \
2                            0.0                                 0.0   
3                            0.0                                 0.0   
8                            0.0                                 0.0   
9                            0.0                                 0.0   
10                           0.0                                 0.0   
17                           0.0                                 0.0   
23                           0.0                                 0.0   
25                           0.0                                 0.0   
27                           0.0                                 0.0   
28                           0.0                                 0.0   
29                           0.0                                 0.0   
31                           0.0                                 0.0   
34                           0.0                                 0.0   
37                           0.0                                 0.0   
41                           0.0                                 0.0   
45                           0.0                                 0.0   
49                           0.0                                 0.0   
57                           0.0                                 0.0   
59                           0.0                                 0.0   
60                           0.0                                 0.0   
74                           0.0                                 0.0   
80                           0.0                                 0.0   
83                           0.0                                 0.0   
85                           0.0                                 0.0   
86                           0.0                                 0.0   
87                           0.0                                 0.0   
89                           0.0                                 0.0   
90                           0.0                                 0.0   
94                           0.0                                 0.0   
95                           0.0                                 0.0   
...                          ...                                 ...   
199879                       0.0                                 0.0   
199883                       0.0                                 0.0   
199885                       0.0                                 0.0   
199887                       0.0                                 0.0   
199896                       0.0                                 0.0   
199903                       0.0                                 0.0   
199908                       0.0                                 0.0   
199909                       0.0                                 0.0   
199914                       0.0                                 0.0   
199916                       0.0                                 0.0   
199917                       0.0                                 0.0   
199921                       0.0                                 0.0   
199923                       0.0                                 0.0   
199926                       0.0                                 0.0   
199928                       0.0                                 0.0   
199934                       0.0                                 0.0   
199935                       0.0                                 0.0   
199936                       0.0                                 0.0   
199949                       0.0                                 0.0   
199953                       0.0                                 0.0   
199959                       0.0                                 0.0   
199961                       0.0                                 0.0   
199962                       0.0                                 0.0   
199963                       0.0                                 0.0   
199971                       0.0                                 0.0   
199975                       0.0                                 0.0   
199979                       0.0                                 0.0   
199984                       0.0                                 0.0   
199993                       0.0                                 0.0   
199994                       0.0                                 0.0   

        address_100-23 39 AVENUE  address_101-13 97TH STREET  \
2                            0.0                         0.0   
3                            0.0                         0.0   
8                            0.0                         0.0   
9                            0.0                         0.0   
10                           0.0                         0.0   
17                           0.0                         0.0   
23                           0.0                         0.0   
25                           0.0                         0.0   
27                           0.0                         0.0   
28                           0.0                         0.0   
29                           0.0                         0.0   
31                           0.0                         0.0   
34                           0.0                         0.0   
37                           0.0                         0.0   
41                           0.0                         0.0   
45                           0.0                         0.0   
49                           0.0                         0.0   
57                           0.0                         0.0   
59                           0.0                         0.0   
60                           0.0                         0.0   
74                           0.0                         0.0   
80                           0.0                         0.0   
83                           0.0                         0.0   
85                           0.0                         0.0   
86                           0.0                         0.0   
87                           0.0                         0.0   
89                           0.0                         0.0   
90                           0.0                         0.0   
94                           0.0                         0.0   
95                           0.0                         0.0   
...                          ...                         ...   
199879                       0.0                         0.0   
199883                       0.0                         0.0   
199885                       0.0                         0.0   
199887                       0.0                         0.0   
199896                       0.0                         0.0   
199903                       0.0                         0.0   
199908                       0.0                         0.0   
199909                       0.0                         0.0   
199914                       0.0                         0.0   
199916                       0.0                         0.0   
199917                       0.0                         0.0   
199921                       0.0                         0.0   
199923                       0.0                         0.0   
199926                       0.0                         0.0   
199928                       0.0                         0.0   
199934                       0.0                         0.0   
199935                       0.0                         0.0   
199936                       0.0                         0.0   
199949                       0.0                         0.0   
199953                       0.0                         0.0   
199959                       0.0                         0.0   
199961                       0.0                         0.0   
199962                       0.0                         0.0   
199963                       0.0                         0.0   
199971                       0.0                         0.0   
199975                       0.0                         0.0   
199979                       0.0                         0.0   
199984                       0.0                         0.0   
199993                       0.0                         0.0   
199994                       0.0                         0.0   

        address_101-14 SHORE FRONT PARKWAY          ...           \
2                                      0.0          ...            
3                                      0.0          ...            
8                                      0.0          ...            
9                                      0.0          ...            
10                                     0.0          ...            
17                                     0.0          ...            
23                                     0.0          ...            
25                                     0.0          ...            
27                                     0.0          ...            
28                                     0.0          ...            
29                                     0.0          ...            
31                                     0.0          ...            
34                                     0.0          ...            
37                                     0.0          ...            
41                                     0.0          ...            
45                                     0.0          ...            
49                                     0.0          ...            
57                                     0.0          ...            
59                                     0.0          ...            
60                                     0.0          ...            
74                                     0.0          ...            
80                                     0.0          ...            
83                                     0.0          ...            
85                                     0.0          ...            
86                                     0.0          ...            
87                                     0.0          ...            
89                                     0.0          ...            
90                                     0.0          ...            
94                                     0.0          ...            
95                                     0.0          ...            
...                                    ...          ...            
199879                                 0.0          ...            
199883                                 0.0          ...            
199885                                 0.0          ...            
199887                                 0.0          ...            
199896                                 0.0          ...            
199903                                 0.0          ...            
199908                                 0.0          ...            
199909                                 0.0          ...            
199914                                 0.0          ...            
199916                                 0.0          ...            
199917                                 0.0          ...            
199921                                 0.0          ...            
199923                                 0.0          ...            
199926                                 0.0          ...            
199928                                 0.0          ...            
199934                                 0.0          ...            
199935                                 0.0          ...            
199936                                 0.0          ...            
199949                                 0.0          ...            
199953                                 0.0          ...            
199959                                 0.0          ...            
199961                                 0.0          ...            
199962                                 0.0          ...            
199963                                 0.0          ...            
199971                                 0.0          ...            
199975                                 0.0          ...            
199979                                 0.0          ...            
199984                                 0.0          ...            
199993                                 0.0          ...            
199994                                 0.0          ...            

        address_WINDHAM LOOP  address_WINEGAR LANE  address_WINTHROP STREET  \
2                        0.0                   0.0                      0.0   
3                        0.0                   0.0                      0.0   
8                        0.0                   0.0                      0.0   
9                        0.0                   0.0                      0.0   
10                       0.0                   0.0                      0.0   
17                       0.0                   0.0                      0.0   
23                       0.0                   0.0                      0.0   
25                       0.0                   0.0                      0.0   
27                       0.0                   0.0                      0.0   
28                       0.0                   0.0                      0.0   
29                       0.0                   0.0                      0.0   
31                       0.0                   0.0                      0.0   
34                       0.0                   0.0                      0.0   
37                       0.0                   0.0                      0.0   
41                       0.0                   0.0                      0.0   
45                       0.0                   0.0                      0.0   
49                       0.0                   0.0                      0.0   
57                       0.0                   0.0                      0.0   
59                       0.0                   0.0                      0.0   
60                       0.0                   0.0                      0.0   
74                       0.0                   0.0                      0.0   
80                       0.0                   0.0                      0.0   
83                       0.0                   0.0                      0.0   
85                       0.0                   0.0                      0.0   
86                       0.0                   0.0                      0.0   
87                       0.0                   0.0                      0.0   
89                       0.0                   0.0                      0.0   
90                       0.0                   0.0                      0.0   
94                       0.0                   0.0                      0.0   
95                       0.0                   0.0                      0.0   
...                      ...                   ...                      ...   
199879                   0.0                   0.0                      0.0   
199883                   0.0                   0.0                      0.0   
199885                   0.0                   0.0                      0.0   
199887                   0.0                   0.0                      0.0   
199896                   0.0                   0.0                      0.0   
199903                   0.0                   0.0                      0.0   
199908                   0.0                   0.0                      0.0   
199909                   0.0                   0.0                      0.0   
199914                   0.0                   0.0                      0.0   
199916                   0.0                   0.0                      0.0   
199917                   0.0                   0.0                      0.0   
199921                   0.0                   0.0                      0.0   
199923                   0.0                   0.0                      0.0   
199926                   0.0                   0.0                      0.0   
199928                   0.0                   0.0                      0.0   
199934                   0.0                   0.0                      0.0   
199935                   0.0                   0.0                      0.0   
199936                   0.0                   0.0                      0.0   
199949                   0.0                   0.0                      0.0   
199953                   0.0                   0.0                      0.0   
199959                   0.0                   0.0                      0.0   
199961                   0.0                   0.0                      0.0   
199962                   0.0                   0.0                      0.0   
199963                   0.0                   0.0                      0.0   
199971                   0.0                   0.0                      0.0   
199975                   0.0                   0.0                      0.0   
199979                   0.0                   0.0                      0.0   
199984                   0.0                   0.0                      0.0   
199993                   0.0                   0.0                      0.0   
199994                   0.0                   0.0                      0.0   

        address_WIRT AVENUE  address_WOODBINE STREET  address_WOODHULL AVENUE  \
2                       0.0                      0.0                      0.0   
3                       0.0                      0.0                      0.0   
8                       0.0                      0.0                      0.0   
9                       0.0                      0.0                      0.0   
10                      0.0                      0.0                      0.0   
17                      0.0                      0.0                      0.0   
23                      0.0                      0.0                      0.0   
25                      0.0                      0.0                      0.0   
27                      0.0                      0.0                      0.0   
28                      0.0                      0.0                      0.0   
29                      0.0                      0.0                      0.0   
31                      0.0                      0.0                      0.0   
34                      0.0                      0.0                      0.0   
37                      0.0                      0.0                      0.0   
41                      0.0                      0.0                      0.0   
45                      0.0                      0.0                      0.0   
49                      0.0                      0.0                      0.0   
57                      0.0                      0.0                      0.0   
59                      0.0                      0.0                      0.0   
60                      0.0                      0.0                      0.0   
74                      0.0                      0.0                      0.0   
80                      0.0                      0.0                      0.0   
83                      0.0                      0.0                      0.0   
85                      0.0                      0.0                      0.0   
86                      0.0                      0.0                      0.0   
87                      0.0                      0.0                      0.0   
89                      0.0                      0.0                      0.0   
90                      0.0                      0.0                      0.0   
94                      0.0                      0.0                      0.0   
95                      0.0                      0.0                      0.0   
...                     ...                      ...                      ...   
199879                  0.0                      0.0                      0.0   
199883                  0.0                      0.0                      0.0   
199885                  0.0                      0.0                      0.0   
199887                  0.0                      0.0                      0.0   
199896                  0.0                      0.0                      0.0   
199903                  0.0                      0.0                      0.0   
199908                  0.0                      0.0                      0.0   
199909                  0.0                      0.0                      0.0   
199914                  0.0                      0.0                      0.0   
199916                  0.0                      0.0                      0.0   
199917                  0.0                      0.0                      0.0   
199921                  0.0                      0.0                      0.0   
199923                  0.0                      0.0                      0.0   
199926                  0.0                      0.0                      0.0   
199928                  0.0                      0.0                      0.0   
199934                  0.0                      0.0                      0.0   
199935                  0.0                      0.0                      0.0   
199936                  0.0                      0.0                      0.0   
199949                  0.0                      0.0                      0.0   
199953                  0.0                      0.0                      0.0   
199959                  0.0                      0.0                      0.0   
199961                  0.0                      0.0                      0.0   
199962                  0.0                      0.0                      0.0   
199963                  0.0                      0.0                      0.0   
199971                  0.0                      0.0                      0.0   
199975                  0.0                      0.0                      0.0   
199979                  0.0                      0.0                      0.0   
199984                  0.0                      0.0                      0.0   
199993                  0.0                      0.0                      0.0   
199994                  0.0                      0.0                      0.0   

        address_WOODROW COURT  address_WORTH STREET  address_WYONA STREET  \
2                         0.0                   0.0                   0.0   
3                         0.0                   0.0                   0.0   
8                         0.0                   0.0                   0.0   
9                         0.0                   0.0                   0.0   
10                        0.0                   0.0                   0.0   
17                        0.0                   0.0                   0.0   
23                        0.0                   0.0                   0.0   
25                        0.0                   0.0                   0.0   
27                        0.0                   0.0                   0.0   
28                        0.0                   0.0                   0.0   
29                        0.0                   0.0                   0.0   
31                        0.0                   0.0                   0.0   
34                        0.0                   0.0                   0.0   
37                        0.0                   0.0                   0.0   
41                        0.0                   0.0                   0.0   
45                        0.0                   0.0                   0.0   
49                        0.0                   0.0                   0.0   
57                        0.0                   0.0                   0.0   
59                        0.0                   0.0                   0.0   
60                        0.0                   0.0                   0.0   
74                        0.0                   0.0                   0.0   
80                        0.0                   0.0                   0.0   
83                        0.0                   0.0                   0.0   
85                        0.0                   0.0                   0.0   
86                        0.0                   0.0                   0.0   
87                        0.0                   0.0                   0.0   
89                        0.0                   0.0                   0.0   
90                        0.0                   0.0                   0.0   
94                        0.0                   0.0                   0.0   
95                        0.0                   0.0                   0.0   
...                       ...                   ...                   ...   
199879                    0.0                   0.0                   0.0   
199883                    0.0                   0.0                   0.0   
199885                    0.0                   0.0                   0.0   
199887                    0.0                   0.0                   0.0   
199896                    0.0                   0.0                   0.0   
199903                    0.0                   0.0                   0.0   
199908                    0.0                   0.0                   0.0   
199909                    0.0                   0.0                   0.0   
199914                    0.0                   0.0                   0.0   
199916                    0.0                   0.0                   0.0   
199917                    0.0                   0.0                   0.0   
199921                    0.0                   0.0                   0.0   
199923                    0.0                   0.0                   0.0   
199926                    0.0                   0.0                   0.0   
199928                    0.0                   0.0                   0.0   
199934                    0.0                   0.0                   0.0   
199935                    0.0                   0.0                   0.0   
199936                    0.0                   0.0                   0.0   
199949                    0.0                   0.0                   0.0   
199953                    0.0                   0.0                   0.0   
199959                    0.0                   0.0                   0.0   
199961                    0.0                   0.0                   0.0   
199962                    0.0                   0.0                   0.0   
199963                    0.0                   0.0                   0.0   
199971                    0.0                   0.0                   0.0   
199975                    0.0                   0.0                   0.0   
199979                    0.0                   0.0                   0.0   
199984                    0.0                   0.0                   0.0   
199993                    0.0                   0.0                   0.0   
199994                    0.0                   0.0                   0.0   

        address_YOUNG AVENUE  
2                        0.0  
3                        0.0  
8                        0.0  
9                        0.0  
10                       0.0  
17                       0.0  
23                       0.0  
25                       0.0  
27                       0.0  
28                       0.0  
29                       0.0  
31                       0.0  
34                       0.0  
37                       0.0  
41                       0.0  
45                       0.0  
49                       0.0  
57                       0.0  
59                       0.0  
60                       0.0  
74                       0.0  
80                       0.0  
83                       0.0  
85                       0.0  
86                       0.0  
87                       0.0  
89                       0.0  
90                       0.0  
94                       0.0  
95                       0.0  
...                      ...  
199879                   0.0  
199883                   0.0  
199885                   0.0  
199887                   0.0  
199896                   0.0  
199903                   0.0  
199908                   0.0  
199909                   0.0  
199914                   0.0  
199916                   0.0  
199917                   0.0  
199921                   0.0  
199923                   0.0  
199926                   0.0  
199928                   0.0  
199934                   0.0  
199935                   0.0  
199936                   0.0  
199949                   0.0  
199953                   0.0  
199959                   0.0  
199961                   0.0  
199962                   0.0  
199963                   0.0  
199971                   0.0  
199975                   0.0  
199979                   0.0  
199984                   0.0  
199993                   0.0  
199994                   0.0  

[57407 rows x 1818 columns]>
        building_id     price  address_1 AVENUE  \
199975  110061302.0   88835.0               0.0   
199979  110061303.0   78738.0               0.0   
199984  110061303.0  125477.0               0.0   
199993  110061302.0   78606.0               0.0   
199994    1100937.0   87532.0               0.0   

        address_1-76 BEACH 101 STREET  address_10-11 NAMEOKE STREET  \
199975                            0.0                           0.0   
199979                            0.0                           0.0   
199984                            0.0                           0.0   
199993                            0.0                           0.0   
199994                            0.0                           0.0   

        address_10-62 READS LANE  address_100-03 ROCKAWAY BEACH BLVD  \
199975                       0.0                                 0.0   
199979                       0.0                                 0.0   
199984                       0.0                                 0.0   
199993                       0.0                                 0.0   
199994                       0.0                                 0.0   

        address_100-23 39 AVENUE  address_101-13 97TH STREET  \
199975                       0.0                         0.0   
199979                       0.0                         0.0   
199984                       0.0                         0.0   
199993                       0.0                         0.0   
199994                       0.0                         0.0   

        address_101-14 SHORE FRONT PARKWAY          ...           \
199975                                 0.0          ...            
199979                                 0.0          ...            
199984                                 0.0          ...            
199993                                 0.0          ...            
199994                                 0.0          ...            

        address_WINDHAM LOOP  address_WINEGAR LANE  address_WINTHROP STREET  \
199975                   0.0                   0.0                      0.0   
199979                   0.0                   0.0                      0.0   
199984                   0.0                   0.0                      0.0   
199993                   0.0                   0.0                      0.0   
199994                   0.0                   0.0                      0.0   

        address_WIRT AVENUE  address_WOODBINE STREET  address_WOODHULL AVENUE  \
199975                  0.0                      0.0                      0.0   
199979                  0.0                      0.0                      0.0   
199984                  0.0                      0.0                      0.0   
199993                  0.0                      0.0                      0.0   
199994                  0.0                      0.0                      0.0   

        address_WOODROW COURT  address_WORTH STREET  address_WYONA STREET  \
199975                    0.0                   0.0                   0.0   
199979                    0.0                   0.0                   0.0   
199984                    0.0                   0.0                   0.0   
199993                    0.0                   0.0                   0.0   
199994                    0.0                   0.0                   0.0   

        address_YOUNG AVENUE  
199975                   0.0  
199979                   0.0  
199984                   0.0  
199993                   0.0  
199994                   0.0  

[5 rows x 1818 columns]

In [14]:

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .20, random_state = 40)
X_train.shape, X_test.shape

from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [11]:

# Fitting the model
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [12]:

# R^2 scores
print('Train:', lr.score(X_train, y_train))
print('Validation:', lr.score(X_test, y_test))

from sklearn.metrics import mean_absolute_error

print('MAE:', mean_absolute_error(y_test, lr.predict(X_test)))

print('Random prediction:', abs(y_test - y_test.mean()).mean() )

('Train:', 0.48856354945998226)
('Validation:', -1.2107561430695182e+20)
('MAE:', 930308784373814.2)
('Random prediction:', 110117.39048009258)