import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('911.csv')
df.info
<bound method DataFrame.info of lat lng \ 0 40.297876 -75.581294 1 40.258061 -75.264680 2 40.121182 -75.351975 3 40.116153 -75.343513 4 40.251492 -75.603350 ... ... ... 99487 40.132869 -75.333515 99488 40.006974 -75.289080 99489 40.115429 -75.334679 99490 40.186431 -75.192555 99491 40.207055 -75.317952 desc zip \ 0 REINDEER CT & DEAD END; NEW HANOVER; Station ... 19525.0 1 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... 19446.0 2 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... 19401.0 3 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... 19401.0 4 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... NaN ... ... ... 99487 MARKLEY ST & W LOGAN ST; NORRISTOWN; 2016-08-2... 19401.0 99488 LANCASTER AVE & RITTENHOUSE PL; LOWER MERION; ... 19003.0 99489 CHESTNUT ST & WALNUT ST; NORRISTOWN; Station ... 19401.0 99490 WELSH RD & WEBSTER LN; HORSHAM; Station 352; ... 19002.0 99491 MORRIS RD & S BROAD ST; UPPER GWYNEDD; 2016-08... 19446.0 title timeStamp twp \ 0 EMS: BACK PAINS/INJURY 2015-12-10 17:40:00 NEW HANOVER 1 EMS: DIABETIC EMERGENCY 2015-12-10 17:40:00 HATFIELD TOWNSHIP 2 Fire: GAS-ODOR/LEAK 2015-12-10 17:40:00 NORRISTOWN 3 EMS: CARDIAC EMERGENCY 2015-12-10 17:40:01 NORRISTOWN 4 EMS: DIZZINESS 2015-12-10 17:40:01 LOWER POTTSGROVE ... ... ... ... 99487 Traffic: VEHICLE ACCIDENT - 2016-08-24 11:06:00 NORRISTOWN 99488 Traffic: VEHICLE ACCIDENT - 2016-08-24 11:07:02 LOWER MERION 99489 EMS: FALL VICTIM 2016-08-24 11:12:00 NORRISTOWN 99490 EMS: NAUSEA/VOMITING 2016-08-24 11:17:01 HORSHAM 99491 Traffic: VEHICLE ACCIDENT - 2016-08-24 11:17:02 UPPER GWYNEDD addr e 0 REINDEER CT & DEAD END 1 1 BRIAR PATH & WHITEMARSH LN 1 2 HAWS AVE 1 3 AIRY ST & SWEDE ST 1 4 CHERRYWOOD CT & DEAD END 1 ... ... .. 99487 MARKLEY ST & W LOGAN ST 1 99488 LANCASTER AVE & RITTENHOUSE PL 1 99489 CHESTNUT ST & WALNUT ST 1 99490 WELSH RD & WEBSTER LN 1 99491 MORRIS RD & S BROAD ST 1 [99492 rows x 9 columns]>
df.head()
lat | lng | desc | zip | title | timeStamp | twp | addr | e | |
---|---|---|---|---|---|---|---|---|---|
0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 |
1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 |
2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 |
3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 |
4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 |
Top 5 townships (twp) for 911 calls
df['twp'].value_counts().head(5)
twp LOWER MERION 8443 ABINGTON 5977 NORRISTOWN 5890 UPPER MERION 5227 CHELTENHAM 4575 Name: count, dtype: int64
Unique title codes
len(df['title'].unique())
110
x=df['title'].iloc[0]
x.split(':')[0]
'EMS'
The most common Reason for a 911 call based off of this new column
df.dtypes
lat float64 lng float64 desc object zip float64 title object timeStamp object twp object addr object e int64 dtype: object
df['Reason'] = df['title'].apply(lambda title: title.split(':')[0])
df['Reason'].value_counts()
Reason EMS 48877 Traffic 35695 Fire 14920 Name: count, dtype: int64
Countplot of 911 calls by Reason
sns.countplot(x = 'Reason' , data = df, palette = 'viridis')
<Axes: xlabel='Reason', ylabel='count'>
The data type of the objects in the timeStamp column
type(df['timeStamp'].iloc[0])
str
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
type(df['timeStamp'].iloc[0])
pandas._libs.tslibs.timestamps.Timestamp
time = df['timeStamp'].iloc[0]
time.hour
17
time.dayofweek
3
df['Hour'] = df['timeStamp'].apply(lambda time : time.hour)
df['Hour']
0 17 1 17 2 17 3 17 4 17 .. 99487 11 99488 11 99489 11 99490 11 99491 11 Name: Hour, Length: 99492, dtype: int64
df['Month'] = df['timeStamp'].apply(lambda time : time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time : time.dayofweek)
The Day of Week is an integer 0-6. Use the .map() with this dictionary to map the actual string names to the day of the week:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df.head()
lat | lng | desc | zip | title | timeStamp | twp | addr | e | Reason | Hour | Month | Day of Week | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 | EMS | 17 | 12 | 3 |
1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 | EMS | 17 | 12 | 3 |
2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 | Fire | 17 | 12 | 3 |
3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 | EMS | 17 | 12 | 3 |
4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 | EMS | 17 | 12 | 3 |
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)
Countplot of the Day of Week column with the hue based off of the Reason column.
sns.countplot(x = 'Day of Week',data = df,hue = 'Reason',palette='viridis')
plt.legend(bbox_to_anchor = (1.05,1),loc = 2, borderaxespad = 0.)
<matplotlib.legend.Legend at 0x259d9a815d0>
sns.countplot(x = 'Month',data = df,hue = 'Reason',palette='viridis')
plt.legend(bbox_to_anchor = (1.05,1),loc = 2, borderaxespad = 0.)
<matplotlib.legend.Legend at 0x259d9a81c90>
Create a gropuby object called byMonth, where you group the DataFrame by the month column and use the count() method for aggregation.
byMonth = df.groupby('Month').count()
Simple plot off of the dataframe indicating the count of calls per month.
byMonth.head()
lat | lng | desc | zip | title | timeStamp | twp | addr | e | Reason | Hour | Day of Week | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Month | ||||||||||||
1 | 13205 | 13205 | 13205 | 11527 | 13205 | 13205 | 13203 | 13096 | 13205 | 13205 | 13205 | 13205 |
2 | 11467 | 11467 | 11467 | 9930 | 11467 | 11467 | 11465 | 11396 | 11467 | 11467 | 11467 | 11467 |
3 | 11101 | 11101 | 11101 | 9755 | 11101 | 11101 | 11092 | 11059 | 11101 | 11101 | 11101 | 11101 |
4 | 11326 | 11326 | 11326 | 9895 | 11326 | 11326 | 11323 | 11283 | 11326 | 11326 | 11326 | 11326 |
5 | 11423 | 11423 | 11423 | 9946 | 11423 | 11423 | 11420 | 11378 | 11423 | 11423 | 11423 | 11423 |
sns.lmplot(x = 'Month',y = 'twp',data = byMonth.reset_index())
C:\Users\ACER\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
<seaborn.axisgrid.FacetGrid at 0x259da906e10>
Create a new column called 'Date' that contains the date from the timeStamp column.
t = df['timeStamp'].iloc[0]
df['Date']=df['timeStamp'].apply(lambda t: t.date())
df.groupby('Date').count()['twp'].plot()
plt.tight_layout()
Groupby this Date column with the count() aggregate and create a plot of counts of 911 calls.
df[df['Reason']=='Traffic'].groupby('Date').count()['twp'].plot()
plt.title('Traffic')
plt.tight_layout()
Separate plots with each plot representing a Reason for the 911 call
df[df['Reason'] == 'Fire'].groupby('Date').count()['twp'].plot()
plt.title('Fire')
plt.tight_layout()
df[df['Reason'] == 'EMS'].groupby('Date').count()['twp'].plot()
plt.title('EMS')
plt.tight_layout()
dayHour= df.groupby(by = ['Day of Week','Hour' ]).count()['Reason'].unstack()
plt.figure(figsize = (12,6))
sns.heatmap(dayHour,cmap = 'viridis')
<Axes: xlabel='Hour', ylabel='Day of Week'>
sns.clustermap(dayHour,cmap = 'viridis')
<seaborn.matrix.ClusterGrid at 0x259e4c2ed90>
dayMonth= df.groupby(by = ['Day of Week','Month' ]).count()['Reason'].unstack()
plt.figure(figsize = (12,6))
sns.heatmap(dayMonth,cmap = 'viridis')
<Axes: xlabel='Month', ylabel='Day of Week'>
sns.clustermap(dayMonth,cmap = 'viridis')
<seaborn.matrix.ClusterGrid at 0x259e4f9d8d0>