Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 647
Image: ubuntu2004
Kernel: Python 3 (system-wide)
#Using the attached csv we are going to pracitce some default dataframe methods and functions as a refresher and then we'll take a look at the actual data set and clustering algorithm #Useeful video: https://www.youtube.com/watch?v=F6kmIpWWEdU&list=PLeo1K3hjS3uuASpe-1LjfG5f14Bnozjwy&index=2 #https://www.geeksforgeeks.org/create-a-new-column-in-pandas-dataframe-based-on-the-existing-columns/ #Using K Means Clustering, determine any insights from the 2012 arrest data.
import pandas as pd #allows us simple data access features
import numpy as np #allows us to use numpy specific attributes
import matplotlib.pyplot as plt #allows us to create plotting and visualizations for our program
import sklearn #imports the machine learning algorithm we desire
from sklearn.cluster import KMeans #imports the KMeans clustering algorithm specifically
nypd_data = pd.read_csv("nypd_arrest_2012.csv") #searches for a local csv file and converts into a pandas dataframe. #A dataframe is almost like an excel/sheets data structure that exists in your program. #A dataframe behaves like an object.
nypd_data.head() #prints the first 5 rows of data in our dataframe.
ARREST_KEY ARREST_DATE PD_CD PD_DESC KY_CD OFNS_DESC LAW_CODE LAW_CAT_CD ARREST_BORO ARREST_PRECINCT JURISDICTION_CODE AGE_GROUP PERP_SEX PERP_RACE X_COORD_CD Y_COORD_CD Latitude Longitude New Georeferenced Column
0 211666750 04/01/2020 508.0 DRUG PARAPHERNALIA, POSSESSE 235.0 DANGEROUS DRUGS PL 2205003 M K 60 0 25-44 F WHITE 988243 150934 40.580962 -73.985627 POINT (-73.98562653999994 40.58096231800005)
1 211666559 04/01/2020 792.0 WEAPONS POSSESSION 1 & 2 118.0 DANGEROUS WEAPONS PL 265101B F K 63 0 45-64 M WHITE 1010797 163280 40.614811 -73.904383 POINT (-73.90438310299999 40.61481073200008)
2 211651547 04/01/2020 101.0 ASSAULT 3 344.0 ASSAULT 3 & RELATED OFFENSES PL 1200001 M S 121 0 25-44 M WHITE 950704 159671 40.604881 -74.120811 POINT (-74.12081117499997 40.60488103400007)
3 211665193 04/01/2020 109.0 ASSAULT 2,1,UNCLASSIFIED 106.0 FELONY ASSAULT PL 1200501 F Q 100 0 45-64 M WHITE 1035212 152992 40.586466 -73.816522 POINT (-73.81652172299994 40.586465529000066)
4 211651539 04/01/2020 105.0 STRANGULATION 1ST 106.0 FELONY ASSAULT PL 1211200 F K 73 0 18-24 M UNKNOWN 1008276 183623 40.670655 -73.913391 POINT (-73.91339091999998 40.670655072000045)
nypd_data.tail() #prints the last five data entries
ARREST_KEY ARREST_DATE PD_CD PD_DESC KY_CD OFNS_DESC LAW_CODE LAW_CAT_CD ARREST_BORO ARREST_PRECINCT JURISDICTION_CODE AGE_GROUP PERP_SEX PERP_RACE X_COORD_CD Y_COORD_CD Latitude Longitude New Georeferenced Column
8291 212524393 04/30/2020 397.0 ROBBERY,OPEN AREA UNCLASSIFIED 105.0 ROBBERY PL 1601502 F Q 108 0 25-44 M BLACK 1008502 210409 40.744176 -73.912480 POINT (-73.91247974799995 40.74417566900007)
8292 212542054 04/30/2020 847.0 NY STATE LAWS,UNCLASSIFIED FEL 125.0 NYS LAWS-UNCLASSIFIED FELONY VTL05110FE F B 52 0 65+ M BLACK 1017885 256368 40.870290 -73.878388 POINT (-73.87838773799996 40.870289599000046)
8293 212531230 04/30/2020 779.0 PUBLIC ADMINISTRATION,UNCLASSI 126.0 MISCELLANEOUS PENAL LAW PL 215510B F Q 113 0 25-44 M BLACK 1042559 190547 40.689500 -73.789746 POINT (-73.78974570499997 40.689500416000044)
8294 212539364 04/30/2020 397.0 ROBBERY,OPEN AREA UNCLASSIFIED 105.0 ROBBERY PL 1600500 F M 19 0 25-44 M BLACK 994709 218899 40.767506 -73.962243 POINT (-73.96224332699995 40.76750562600006)
8295 212516625 04/30/2020 503.0 CONTROLLED SUBSTANCE,INTENT TO 117.0 DANGEROUS DRUGS PL 2201601 F B 41 0 25-44 M BLACK 1015419 237298 40.817957 -73.887393 POINT (-73.88739265399994 40.81795712400003)
nypd_data.shape #(rows,col)
(8296, 19)
nypd_data.dtypes #Tells us the type of data that each of our entries are.
ARREST_KEY int64 ARREST_DATE object PD_CD float64 PD_DESC object KY_CD float64 OFNS_DESC object LAW_CODE object LAW_CAT_CD object ARREST_BORO object ARREST_PRECINCT int64 JURISDICTION_CODE int64 AGE_GROUP object PERP_SEX object PERP_RACE object X_COORD_CD int64 Y_COORD_CD int64 Latitude float64 Longitude float64 New Georeferenced Column object dtype: object
nypd_data.describe() #gives us the mean, median, mode and standard deviation of our data
ARREST_KEY PD_CD KY_CD ARREST_PRECINCT JURISDICTION_CODE X_COORD_CD Y_COORD_CD Latitude Longitude
count 8.296000e+03 8294.000000 8287.000000 8296.000000 8296.00000 8.296000e+03 8296.000000 8296.000000 8296.000000
mean 2.121254e+08 371.991681 231.880174 64.921890 1.08703 1.006293e+06 208044.256630 40.737663 -73.920432
std 2.651079e+05 254.724013 131.551535 34.279202 9.26257 2.194875e+04 30804.020531 0.084557 0.079146
min 2.116338e+08 0.000000 101.000000 1.000000 0.00000 9.147250e+05 121131.000000 40.498905 -74.250035
25% 2.118964e+08 113.000000 109.000000 40.000000 0.00000 9.946040e+05 185056.000000 40.674600 -73.962674
50% 2.121417e+08 339.000000 233.000000 66.000000 0.00000 1.005947e+06 204819.000000 40.728860 -73.921668
75% 2.123598e+08 511.000000 344.000000 102.000000 0.00000 1.017982e+06 236544.000000 40.815930 -73.878056
max 2.125640e+08 969.000000 995.000000 123.000000 97.00000 1.064833e+06 271820.000000 40.912723 -73.709220
nypd_data.columns
Index(['ARREST_KEY', 'ARREST_DATE', 'PD_CD', 'PD_DESC', 'KY_CD', 'OFNS_DESC', 'LAW_CODE', 'LAW_CAT_CD', 'ARREST_BORO', 'ARREST_PRECINCT', 'JURISDICTION_CODE', 'AGE_GROUP', 'PERP_SEX', 'PERP_RACE', 'X_COORD_CD', 'Y_COORD_CD', 'Latitude', 'Longitude', 'New Georeferenced Column'], dtype='object')
nypd_data[0:2] #Data Slicing here to access the first two rows of my dataframe
ARREST_KEY ARREST_DATE PD_CD PD_DESC KY_CD OFNS_DESC LAW_CODE LAW_CAT_CD ARREST_BORO ARREST_PRECINCT JURISDICTION_CODE AGE_GROUP PERP_SEX PERP_RACE X_COORD_CD Y_COORD_CD Latitude Longitude New Georeferenced Column
0 211666750 04/01/2020 508.0 DRUG PARAPHERNALIA, POSSESSE 235.0 DANGEROUS DRUGS PL 2205003 M K 60 0 25-44 F WHITE 988243 150934 40.580962 -73.985627 POINT (-73.98562653999994 40.58096231800005)
1 211666559 04/01/2020 792.0 WEAPONS POSSESSION 1 & 2 118.0 DANGEROUS WEAPONS PL 265101B F K 63 0 45-64 M WHITE 1010797 163280 40.614811 -73.904383 POINT (-73.90438310299999 40.61481073200008)
nypd_data["OFNS_DESC"][0:5] #I can call data by passing in the column name and I can even slice that data as well to get a piece of it
0 DANGEROUS DRUGS 1 DANGEROUS WEAPONS 2 ASSAULT 3 & RELATED OFFENSES 3 FELONY ASSAULT 4 FELONY ASSAULT Name: OFNS_DESC, dtype: object
#Grab the first 10 entries of the agegroup column
nypd_data["AGE_GROUP"][0:10]
0 25-44 1 45-64 2 25-44 3 45-64 4 18-24 5 <18 6 25-44 7 18-24 8 25-44 9 25-44 Name: AGE_GROUP, dtype: object
nypd_data[["AGE_GROUP", "PERP_SEX"]][0:10] #I can pass in multiple columns and slice them as well.
AGE_GROUP PERP_SEX
0 25-44 F
1 45-64 M
2 25-44 M
3 45-64 M
4 18-24 M
5 <18 F
6 25-44 M
7 18-24 M
8 25-44 M
9 25-44 M
nypd_data.columns
Index(['ARREST_KEY', 'ARREST_DATE', 'PD_CD', 'PD_DESC', 'KY_CD', 'OFNS_DESC', 'LAW_CODE', 'LAW_CAT_CD', 'ARREST_BORO', 'ARREST_PRECINCT', 'JURISDICTION_CODE', 'AGE_GROUP', 'PERP_SEX', 'PERP_RACE', 'X_COORD_CD', 'Y_COORD_CD', 'Latitude', 'Longitude', 'New Georeferenced Column'], dtype='object')
#SLice the first ten rows of arrest precicnt and perp race nypd_data[["ARREST_PRECINCT", "PERP_RACE"]][0:10]
ARREST_PRECINCT PERP_RACE
0 60 WHITE
1 63 WHITE
2 121 WHITE
3 100 WHITE
4 73 UNKNOWN
5 106 WHITE HISPANIC
6 112 WHITE
7 120 WHITE HISPANIC
8 9 WHITE HISPANIC
9 83 WHITE
#SLice the first ten rows of arrest precicnt and arrest boro nypd_data[["ARREST_PRECINCT","ARREST_BORO"]][0:10]
ARREST_PRECINCT ARREST_BORO
0 60 K
1 63 K
2 121 S
3 100 Q
4 73 K
5 106 Q
6 112 Q
7 120 S
8 9 M
9 83 K
#SLice the first ten rows of arrest precicnt and arrest boro and perp race nypd_data[["ARREST_PRECINCT","ARREST_BORO", "PERP_RACE"]][0:10]
ARREST_PRECINCT ARREST_BORO PERP_RACE
0 60 K WHITE
1 63 K WHITE
2 121 S WHITE
3 100 Q WHITE
4 73 K UNKNOWN
5 106 Q WHITE HISPANIC
6 112 Q WHITE
7 120 S WHITE HISPANIC
8 9 M WHITE HISPANIC
9 83 K WHITE
#SLice the first ten rows of arrest precicnt and arrest boro and perp sex nypd_data[["ARREST_PRECINCT","ARREST_BORO", "PERP_SEX"]][0:10]
ARREST_PRECINCT ARREST_BORO PERP_SEX
0 60 K F
1 63 K M
2 121 S M
3 100 Q M
4 73 K M
5 106 Q F
6 112 Q M
7 120 S M
8 9 M M
9 83 K M
#SLice the LAST ten rows of arrest precicnt and perp sex nypd_data[["ARREST_PRECINCT","PERP_SEX"]][8285:]
ARREST_PRECINCT PERP_SEX
8285 46 M
8286 34 M
8287 63 M
8288 83 M
8289 113 M
8290 45 M
8291 108 M
8292 52 M
8293 113 M
8294 19 M
8295 41 M
#Values method allows us to see all the different types of data for our column nypd_data["PERP_SEX"].values
array(['F', 'M', 'M', ..., 'M', 'M', 'M'], dtype=object)
nypd_data["PERP_SEX"].value_counts()
M 6998 F 1298 Name: PERP_SEX, dtype: int64
#Use value counts on the following individual categories: OFNS DESC, ARRRST BORO & PERP RACE
nypd_data["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 1516 FELONY ASSAULT 804 PETIT LARCENY 786 MISCELLANEOUS PENAL LAW 609 CRIMINAL MISCHIEF & RELATED OF 558 BURGLARY 520 DANGEROUS DRUGS 519 ROBBERY 492 DANGEROUS WEAPONS 427 OFFENSES AGAINST PUBLIC ADMINI 314 GRAND LARCENY 311 SEX CRIMES 161 POSSESSION OF STOLEN PROPERTY 133 VEHICLE AND TRAFFIC LAWS 129 OFF. AGNST PUB ORD SENSBLTY & 123 CRIMINAL TRESPASS 123 GRAND LARCENY OF MOTOR VEHICLE 82 MURDER & NON-NEGL. MANSLAUGHTE 71 INTOXICATED & IMPAIRED DRIVING 70 FORGERY 66 BURGLAR'S TOOLS 66 OFFENSES AGAINST THE PERSON 53 UNAUTHORIZED USE OF A VEHICLE 45 FOR OTHER AUTHORITIES 43 RAPE 37 OFFENSES INVOLVING FRAUD 33 OTHER STATE LAWS (NON PENAL LA 32 OTHER OFFENSES RELATED TO THEF 27 FRAUDS 25 GAMBLING 22 NYS LAWS-UNCLASSIFIED FELONY 22 OTHER TRAFFIC INFRACTION 19 INTOXICATED/IMPAIRED DRIVING 10 OTHER STATE LAWS 7 ARSON 6 THEFT-FRAUD 3 ADMINISTRATIVE CODE 3 ALCOHOLIC BEVERAGE CONTROL LAW 3 KIDNAPPING & RELATED OFFENSES 2 OFFENSES RELATED TO CHILDREN 2 ENDAN WELFARE INCOMP 2 HARRASSMENT 2 2 OFFENSES AGAINST PUBLIC SAFETY 2 MOVING INFRACTIONS 2 DISORDERLY CONDUCT 2 JOSTLING 1 CHILD ABANDONMENT/NON SUPPORT 1 HOMICIDE-NEGLIGENT,UNCLASSIFIE 1 Name: OFNS_DESC, dtype: int64
nypd_data["ARREST_BORO"].value_counts()
K 2291 B 1964 Q 1843 M 1811 S 387 Name: ARREST_BORO, dtype: int64
nypd_data["PERP_RACE"].value_counts()
BLACK 4046 WHITE HISPANIC 2210 WHITE 911 BLACK HISPANIC 703 ASIAN / PACIFIC ISLANDER 388 UNKNOWN 21 AMERICAN INDIAN/ALASKAN NATIVE 17 Name: PERP_RACE, dtype: int64
#Conditional Formatting
females = nypd_data[nypd_data["PERP_SEX"] == "F"]
females["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 363 FELONY ASSAULT 211 PETIT LARCENY 128 CRIMINAL MISCHIEF & RELATED OF 103 MISCELLANEOUS PENAL LAW 72 ROBBERY 61 DANGEROUS DRUGS 42 OFFENSES AGAINST PUBLIC ADMINI 40 BURGLARY 40 DANGEROUS WEAPONS 37 GRAND LARCENY 35 SEX CRIMES 26 CRIMINAL TRESPASS 24 OFF. AGNST PUB ORD SENSBLTY & 18 INTOXICATED & IMPAIRED DRIVING 15 GRAND LARCENY OF MOTOR VEHICLE 12 VEHICLE AND TRAFFIC LAWS 11 POSSESSION OF STOLEN PROPERTY 9 OTHER OFFENSES RELATED TO THEF 9 MURDER & NON-NEGL. MANSLAUGHTE 6 OFFENSES AGAINST THE PERSON 6 UNAUTHORIZED USE OF A VEHICLE 5 FOR OTHER AUTHORITIES 3 FORGERY 3 BURGLAR'S TOOLS 3 OFFENSES INVOLVING FRAUD 2 OFFENSES RELATED TO CHILDREN 2 FRAUDS 2 OTHER STATE LAWS (NON PENAL LA 2 ALCOHOLIC BEVERAGE CONTROL LAW 1 INTOXICATED/IMPAIRED DRIVING 1 OTHER STATE LAWS 1 GAMBLING 1 NYS LAWS-UNCLASSIFIED FELONY 1 CHILD ABANDONMENT/NON SUPPORT 1 ARSON 1 THEFT-FRAUD 1 Name: OFNS_DESC, dtype: int64
#Use conditional formatting to create groups of data based on borough.
#bronx filtering bronx = nypd_data[nypd_data["ARREST_BORO"] == "B"]
#brooklyn filtering brooklyn = nypd_data[nypd_data["ARREST_BORO"] == "K"]
#staten island filtering staten = nypd_data[nypd_data["ARREST_BORO"] == "S"]
#queens filtering queens = nypd_data[nypd_data["ARREST_BORO"] == "Q"]
#manhattan filtering manhattan = nypd_data[nypd_data["ARREST_BORO"] == "M"]
#Use value counts to find the ofns desc by borough
#bronx value counts bronx["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 436 FELONY ASSAULT 227 DANGEROUS DRUGS 172 CRIMINAL MISCHIEF & RELATED OF 136 ROBBERY 131 PETIT LARCENY 131 OFFENSES AGAINST PUBLIC ADMINI 108 DANGEROUS WEAPONS 103 BURGLARY 76 MISCELLANEOUS PENAL LAW 64 GRAND LARCENY 49 VEHICLE AND TRAFFIC LAWS 39 OFF. AGNST PUB ORD SENSBLTY & 38 SEX CRIMES 32 POSSESSION OF STOLEN PROPERTY 24 MURDER & NON-NEGL. MANSLAUGHTE 22 CRIMINAL TRESPASS 18 FOR OTHER AUTHORITIES 16 INTOXICATED & IMPAIRED DRIVING 14 OFFENSES AGAINST THE PERSON 14 RAPE 13 FORGERY 12 OFFENSES INVOLVING FRAUD 11 BURGLAR'S TOOLS 11 OTHER OFFENSES RELATED TO THEF 10 GAMBLING 9 GRAND LARCENY OF MOTOR VEHICLE 9 UNAUTHORIZED USE OF A VEHICLE 9 FRAUDS 6 OTHER TRAFFIC INFRACTION 5 NYS LAWS-UNCLASSIFIED FELONY 4 OTHER STATE LAWS (NON PENAL LA 4 ALCOHOLIC BEVERAGE CONTROL LAW 2 OFFENSES AGAINST PUBLIC SAFETY 2 INTOXICATED/IMPAIRED DRIVING 1 HOMICIDE-NEGLIGENT,UNCLASSIFIE 1 ENDAN WELFARE INCOMP 1 JOSTLING 1 DISORDERLY CONDUCT 1 KIDNAPPING & RELATED OFFENSES 1 ARSON 1 Name: OFNS_DESC, dtype: int64
#brooklyn value counts brooklyn["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 371 FELONY ASSAULT 231 PETIT LARCENY 205 MISCELLANEOUS PENAL LAW 203 DANGEROUS WEAPONS 163 BURGLARY 144 CRIMINAL MISCHIEF & RELATED OF 138 DANGEROUS DRUGS 135 ROBBERY 109 GRAND LARCENY 88 OFFENSES AGAINST PUBLIC ADMINI 79 SEX CRIMES 46 POSSESSION OF STOLEN PROPERTY 43 GRAND LARCENY OF MOTOR VEHICLE 38 CRIMINAL TRESPASS 36 VEHICLE AND TRAFFIC LAWS 33 FORGERY 33 OFF. AGNST PUB ORD SENSBLTY & 28 BURGLAR'S TOOLS 27 INTOXICATED & IMPAIRED DRIVING 21 OFFENSES AGAINST THE PERSON 17 RAPE 13 GAMBLING 10 OTHER OFFENSES RELATED TO THEF 9 OFFENSES INVOLVING FRAUD 9 MURDER & NON-NEGL. MANSLAUGHTE 9 OTHER STATE LAWS (NON PENAL LA 8 FOR OTHER AUTHORITIES 8 NYS LAWS-UNCLASSIFIED FELONY 6 UNAUTHORIZED USE OF A VEHICLE 6 INTOXICATED/IMPAIRED DRIVING 5 OTHER TRAFFIC INFRACTION 4 FRAUDS 4 OTHER STATE LAWS 2 ADMINISTRATIVE CODE 2 HARRASSMENT 2 1 ALCOHOLIC BEVERAGE CONTROL LAW 1 DISORDERLY CONDUCT 1 ARSON 1 MOVING INFRACTIONS 1 Name: OFNS_DESC, dtype: int64
#staten value counts staten["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 55 MISCELLANEOUS PENAL LAW 52 PETIT LARCENY 49 CRIMINAL MISCHIEF & RELATED OF 32 FELONY ASSAULT 29 BURGLARY 24 GRAND LARCENY 23 DANGEROUS DRUGS 17 DANGEROUS WEAPONS 16 OFFENSES AGAINST PUBLIC ADMINI 13 OFF. AGNST PUB ORD SENSBLTY & 10 POSSESSION OF STOLEN PROPERTY 10 VEHICLE AND TRAFFIC LAWS 9 ROBBERY 7 SEX CRIMES 6 GRAND LARCENY OF MOTOR VEHICLE 6 CRIMINAL TRESPASS 5 MURDER & NON-NEGL. MANSLAUGHTE 4 UNAUTHORIZED USE OF A VEHICLE 3 OTHER STATE LAWS (NON PENAL LA 3 INTOXICATED & IMPAIRED DRIVING 3 NYS LAWS-UNCLASSIFIED FELONY 2 OFFENSES AGAINST THE PERSON 2 ARSON 1 OTHER TRAFFIC INFRACTION 1 OFFENSES INVOLVING FRAUD 1 FRAUDS 1 OTHER OFFENSES RELATED TO THEF 1 RAPE 1 Name: OFNS_DESC, dtype: int64
#queens value counts queens["OFNS_DESC"].value_counts()
ASSAULT 3 & RELATED OFFENSES 386 MISCELLANEOUS PENAL LAW 213 FELONY ASSAULT 191 CRIMINAL MISCHIEF & RELATED OF 146 PETIT LARCENY 133 ROBBERY 120 BURGLARY 107 DANGEROUS WEAPONS 69 SEX CRIMES 60 GRAND LARCENY 59 DANGEROUS DRUGS 53 OFFENSES AGAINST PUBLIC ADMINI 39 POSSESSION OF STOLEN PROPERTY 28 INTOXICATED & IMPAIRED DRIVING 26 GRAND LARCENY OF MOTOR VEHICLE 21 VEHICLE AND TRAFFIC LAWS 19 UNAUTHORIZED USE OF A VEHICLE 18 OTHER STATE LAWS (NON PENAL LA 17 OFF. AGNST PUB ORD SENSBLTY & 17 CRIMINAL TRESPASS 15 OFFENSES AGAINST THE PERSON 13 MURDER & NON-NEGL. MANSLAUGHTE 11 FRAUDS 10 BURGLAR'S TOOLS 8 FOR OTHER AUTHORITIES 7 FORGERY 6 RAPE 6 OTHER TRAFFIC INFRACTION 6 OFFENSES INVOLVING FRAUD 5 NYS LAWS-UNCLASSIFIED FELONY 5 OTHER OFFENSES RELATED TO THEF 5 INTOXICATED/IMPAIRED DRIVING 4 OTHER STATE LAWS 4 THEFT-FRAUD 3 ARSON 3 OFFENSES RELATED TO CHILDREN 2 ENDAN WELFARE INCOMP 1 CHILD ABANDONMENT/NON SUPPORT 1 MOVING INFRACTIONS 1 ADMINISTRATIVE CODE 1 Name: OFNS_DESC, dtype: int64
#manhattan value counts manhattan["OFNS_DESC"].value_counts()
PETIT LARCENY 268 ASSAULT 3 & RELATED OFFENSES 268 BURGLARY 169 DANGEROUS DRUGS 142 FELONY ASSAULT 126 ROBBERY 125 CRIMINAL MISCHIEF & RELATED OF 106 GRAND LARCENY 92 MISCELLANEOUS PENAL LAW 77 DANGEROUS WEAPONS 76 OFFENSES AGAINST PUBLIC ADMINI 75 CRIMINAL TRESPASS 49 OFF. AGNST PUB ORD SENSBLTY & 30 VEHICLE AND TRAFFIC LAWS 29 POSSESSION OF STOLEN PROPERTY 28 MURDER & NON-NEGL. MANSLAUGHTE 25 BURGLAR'S TOOLS 20 SEX CRIMES 17 FORGERY 15 FOR OTHER AUTHORITIES 12 UNAUTHORIZED USE OF A VEHICLE 9 GRAND LARCENY OF MOTOR VEHICLE 8 OFFENSES INVOLVING FRAUD 7 OFFENSES AGAINST THE PERSON 7 INTOXICATED & IMPAIRED DRIVING 6 NYS LAWS-UNCLASSIFIED FELONY 5 RAPE 4 FRAUDS 4 OTHER TRAFFIC INFRACTION 3 GAMBLING 3 OTHER OFFENSES RELATED TO THEF 2 KIDNAPPING & RELATED OFFENSES 1 HARRASSMENT 2 1 OTHER STATE LAWS 1 Name: OFNS_DESC, dtype: int64
nypd_data["PERP_SEX"]
0 F 1 M 2 M 3 M 4 M .. 8291 M 8292 M 8293 M 8294 M 8295 M Name: PERP_SEX, Length: 8296, dtype: object
#Often times we have data that might not be suitable for our algorithm. #In this case our data is not numerical and so we need to convert it to be numerical. #Let's create a function that will assign a 1 to F and a 0 to M. def perp_gender(row): if(row == "F"): return 1 elif(row == "M"): return 0
nypd_data["GENDER_NUM"] = nypd_data["PERP_SEX"].apply(perp_gender)
nypd_data[["PERP_SEX","GENDER_NUM"]]
PERP_SEX GENDER_NUM
0 F 1
1 M 0
2 M 0
3 M 0
4 M 0
... ... ...
8291 M 0
8292 M 0
8293 M 0
8294 M 0
8295 M 0

8296 rows × 2 columns

# For HW TONIGHT create a column for the different races groups. # Using a function and the apply method assign a numerical value to each different race group.