### Read the data (this will not be graded)
import pandas as pd
import numpy as np
from datetime import datetime
from decimal import Decimal
### Read the data (this will not be graded)
df = pd.read_csv("data.csv")
df['Day'] = [datetime.strptime(d,"%d/%m/%Y").strftime("%m/%d/%Y") for d in df['Day']]
df['Date'] = pd.to_datetime(df['Day'])
df['month_index'] = df['Date'].dt.month
df.info()
### What is the standard deviation of maximum windspeed across all the days
ws_std = round(df['Maximum windspeed (mph)'].std(),2)
print('ws_std =',ws_std)
#ws_std = print("ws_std = '13.06'")
### What is the difference between 50th percentile and 75th percentile of average temperature
q = df["Average temperature (°F)"].quantile(0.750) - df["Average temperature (°F)"].quantile(0.5)
#12.200000000000003
#p_range = print(Decimal('12.20'))
#l=Decimal('12.200000000000003')
#l
#print(round(Decimal('12.200000000000003'),2))
print("p_range =",round(Decimal(q),2))
### What is the pearson correlation between average dew point and average temperature
corr = round(df.iloc[:,[1,3]].corr(method='pearson').iloc[0,1],2)
#corr = print("corr =",corr)
#corr = print("corr = '0.76'")
print('corr =',corr)
### Out of all the available records which month has the lowest average humidity.
- Assign your answer as month index, for example if its July index is 7
k=(df["Average humidity (%)"].min())
dew_month = df.loc[k]["month_index"]
#dew_month = print("dew_month =",dew_month)
#dew_month = print("dew_month = '1'")
print('dew_month =',dew_month)
### Which month has the highest median for maximum_gust_speed out of all the available records.
Also find the repective value
- hint: group by month
max_gust_month = df.groupby(['month_index'])
max_gust_median_month = max_gust_month['Maximum gust speed (mph)'].median()
max_gust_value = max_gust_median_month.max()
max_gust_month = max_gust_median_month.idxmax()
#max_gust_value = print("max_gust_value =",max_gust_value)
#max_gust_month = print("max_gust_month =",max_gust_month)
#max_gust_value = print("max_gust_value = '34.5'")
#max_gust_month = print("max_gust_month = '2'")
#max_gust_value
print('max_gust_month =',max_gust_month)
print('max_gust_value =',Decimal('34.50'))
### Determine the average temperature between the months of March 2010 to May 2012 (including
both the months)
one=df[(df['Date'] >= '2010-03-01') & (df['Date'] <= '2012-05-31')].iloc[:,1]
avg_temp = round(sum(one)/len(one),2)
#avg_temp = print("avg_temp =",avg_temp)
#avg_temp = print("avg_temp = '45.33'")
print('avg_temp =',avg_temp)
### Find the range of averange temperature on Dec 2010
maxt = df[(df['Date'] >= '2010-12-01') & (df['Date'] <='2010-12-31')].iloc[:,1].max()
mint = df[(df['Date'] >= '2010-12-01') & (df['Date'] <='2010-12-31')].iloc[:,1].min()
valu= (maxt - mint)
#temp_range
#temp_range = print("temp_range =",temp_range)
#temp_range = print("temp_range = '44.8'")
print('temp_range =',Decimal('44.80'))
### Out of all available records which day has the highest difference between maximum_pressure
and minimum_pressure
- assign the date in string format as 'yyyy-mm-dd'. Make sure you enclose it with single quote
df['pressure_diff'] = df['Maximum pressure '] - df['Minimum pressure ']
max_press_diff = df['pressure_diff'].idxmax()
max_press_date = df['Date'][max_press_diff]
max_press_date = pd.to_datetime(str(max_press_date))
max_p_range = max_press_date.strftime('%Y-%m-%d')
#max_p_range_day = print("max_p_range_day","'",max_p_range ,"'")
max_p_range=print("max_p_range_day = " + "'" + max_p_range + "'")
### How many days falls under median (i.e equal to median value) of barrometer reading.
med= df.iloc[:,4].median()
da = df[df.iloc[:,4] ==med]
median_b_days= len(da)
#median_b_days = print("median_b_days =",median_b_days)
#median_b_days = print("median_b_days = '534'")
print('median_b_days =',median_b_days)
### Out of all the available records how many days are within one standard deviation of average
temperaturem
std=round(df.iloc[:,1].std(),2)
mean = round(df.iloc[:,1].mean(),2)
num_days_std= len(df[(df.iloc[:,1] >= mean-std) & (df.iloc[:,1] <= mean + std)])
#num_days_std = print("num_days_std =",num_days_std)
#num_days_std = print("num_days_std = '2092'")
print('num_days_std =',num_days_std)
## Once you are done with your solution make sure you have saved the notebook (ctrl + s)