import pandas as pd
import numpy as np


df_crime = pd.read_csv('Boston Crime Dataset (updated July 2020).csv')

/var/folders/y4/t0fs65x56m39rqs7vzp6m5cr0000gn/T/ipykernel_79157/3857216148.py:1: DtypeWarning: Columns (0,2,6,12) have mixed types. Specify dtype option on import or set low_memory=False.
  df_crime = pd.read_csv('Boston Crime Dataset (updated July 2020).csv')


df_crime


# get each column's name
print(df_crime.columns.values)

['INCIDENT_NUMBER' 'OFFENSE_CODE' 'OFFENSE_CODE_GROUP'
 'OFFENSE_DESCRIPTION' 'DISTRICT' 'REPORTING_AREA' 'SHOOTING'
 'OCCURRED_ON_DATE' 'YEAR' 'MONTH' 'DAY_OF_WEEK' 'HOUR' 'UCR_PART'
 'STREET' 'Lat' 'Long' 'Location']


# create a dictionary that explains the meaning of each feature present
crime_dict = {'INCIDENT_NUMBER': 'the unique number assigned to a specific person during a specific arrest',
'OFFENSE_CODE': 'the numeric code that corresponds to criminal offenses under Texas State law',
'OFFENSE_CODE_GROUP': 'the offense code group the crime belongs to',
'OFFENSE_DESCRIPTION': 'the description of the offense code group',
'DISTRICT': 'a district where the crime has been committed',
'REPORTING_AREA': 'code for reporting area',
'SHOOTING': 'column depicting if the shooting was involved',
'OCCURRED_ON_DATE': 'date of the crime',
'YEAR': 'the year of crime',
'MONTH': 'the month of crime',
'DAY_OF_WEEK': 'the name of the day of week',
'HOUR': 'the hour of crime(24hr format)',
'UCR_PART': 'FBI ucr part name',
'STREET': 'street name',
'Lat': 'latitude info',
'Long': 'longitude info',
'Location': 'coordinate info'}
crime_dict

{'INCIDENT_NUMBER': 'the unique number assigned to a specific person during a specific arrest',
 'OFFENSE_CODE': 'the numeric code that corresponds to criminal offenses under Texas State law',
 'OFFENSE_CODE_GROUP': 'the offense code group the crime belongs to',
 'OFFENSE_DESCRIPTION': 'the description of the offense code group',
 'DISTRICT': 'a district where the crime has been committed',
 'REPORTING_AREA': 'code for reporting area',
 'SHOOTING': 'column depicting if the shooting was involved',
 'OCCURRED_ON_DATE': 'date of the crime',
 'YEAR': 'the year of crime',
 'MONTH': 'the month of crime',
 'DAY_OF_WEEK': 'the name of the day of week',
 'HOUR': 'the hour of crime(24hr format)',
 'UCR_PART': 'FBI ucr part name',
 'STREET': 'street name',
 'Lat': 'latitude info',
 'Long': 'longitude info',
 'Location': 'coordinate info'}

	INCIDENT_NUMBER	OFFENSE_CODE	OFFENSE_CODE_GROUP	OFFENSE_DESCRIPTION	DISTRICT	REPORTING_AREA	SHOOTING	OCCURRED_ON_DATE	YEAR	MONTH	DAY_OF_WEEK	HOUR	UCR_PART	STREET	Lat	Long	Location
0	TESTTEST2	423	NaN	ASSAULT - AGGRAVATED	External		0	2019-10-16 00:00:00	2019	10	Wednesday	0	NaN	RIVERVIEW DR	NaN	NaN	(0.00000000, 0.00000000)
1	S97333701	3301	NaN	VERBAL DISPUTE	C6	915	0	2020-07-18 14:34:00	2020	7	Saturday	14	NaN	MARY BOYLE WAY	42.330813	-71.051368	(42.33081300, -71.05136800)
2	S47513131	2647	NaN	THREATS TO DO BODILY HARM	E18	530	0	2020-06-24 10:15:00	2020	6	Wednesday	10	NaN	READVILLE ST	42.239491	-71.135954	(42.23949100, -71.13595400)
3	I92102201	3301	NaN	VERBAL DISPUTE	E13	583	0	2019-12-20 03:08:00	2019	12	Friday	3	NaN	DAY ST	42.325122	-71.107779	(42.32512200, -71.10777900)
4	I92097173	3115	NaN	INVESTIGATE PERSON	C11	355	0	2019-10-23 00:00:00	2019	10	Wednesday	0	NaN	GIBSON ST	42.297555	-71.059709	(42.29755500, -71.05970900)
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
501065	20224065	3018	NaN	SICK/INJURED/MEDICAL - POLICE	B2	282	0	2020-03-19 07:30:00	2020	3	Thursday	7	NaN	WASHINGTON ST	42.353272	-71.173738	(42.35327200, -71.17373800)
501066	20202856	2672	NaN	BIOLOGICAL THREATS	B2	282	0	2020-03-19 08:30:00	2020	3	Thursday	8	NaN	WARREN ST	42.328234	-71.083289	(42.32823400, -71.08328900)
501067	20046400	1501	NaN	WEAPON VIOLATION - CARRY/ POSSESSING/ SALE/ TR...	B2	330	0	2020-07-02 01:38:00	2020	7	Thursday	1	NaN	PASADENA RD	42.305760	-71.083771	(42.30576000, -71.08377100)
501068	20038446	1501	NaN	WEAPON VIOLATION - CARRY/ POSSESSING/ SALE/ TR...	B2	300	0	2020-06-03 01:15:00	2020	6	Wednesday	1	NaN	WASHINGTON ST	42.323807	-71.089150	(42.32380700, -71.08915000)
501069	20030892	540	NaN	BURGLARY - COMMERICAL	C11	380	0	2020-05-03 00:00:00	2020	5	Sunday	0	NaN	GALLIVAN BLVD	42.283700	-71.047761	(42.28370000, -71.04776100)

Question: On which days do people travel or move around in which areas are they less likely to be targeted by criminals?¶

Email I got:¶

The dataset¶

Each Features in Dataset means(show in data dictionary):¶

How the data will be used to solved the problem:¶