Using quandl zillow api for Chicago and Evanston Home Sale Prices using ARIMA and EWMAs
This post includes code adapted from python for finance and trading algorithms udemy course and python for finance and trading algorithms udemy course notebooks.
Find the quandl api documentation here -
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import quandl
%matplotlib inline
quandl_call = (
"ZILLOW/{category}{code}_{indicator}"
)
def download_data(category, code, indicator):
"""
Reads in a single dataset from the John Hopkins GitHub repo
as a DataFrame
Parameters
----------
category : "Chicago_Area" or "Evanston"
code : "Evanston" or "Chicago"
indicator : "Sales_Price" or "other"
Returns
-------
DataFrame
"""
AREA_CATEGORY_dict = {"Evanston": "C", "Chicago_Area": "C"}
AREA_CODE_dict = {"Evanston": "64604", "Chicago": "36156"}
INDICATOR_CODE_dict = {"Sales_Price": "SP"}
category = AREA_CATEGORY_dict[category]
code = AREA_CODE_dict[code]
indicator = INDICATOR_CODE_dict[indicator]
return quandl.get(quandl_call.format(category=category, code=code, indicator=indicator))
df = download_data('Chicago_Area', 'Evanston', 'Sales_Price')
df.plot()
df['Value'].plot(label='Evanston House Prices')
timeseries = df['Value']
timeseries.rolling(12).mean().plot(label='12 Month Rolling Mean')
timeseries.rolling(12).std().plot(label='12 Month Rolling Std')
timeseries.plot()
plt.legend()
timeseries.rolling(12).mean().plot(label='12 Month Rolling Mean')
timeseries.plot()
plt.legend()
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(df['Value'], freq=12)
fig = plt.figure()
fig = decomposition.plot()
fig.set_size_inches(15, 8)
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
model = sm.tsa.statespace.SARIMAX(df['Value'],order=(0,1,0), seasonal_order=(1,1,1,12))
results = model.fit()
print(results.summary())
results.resid.plot()
results.resid.plot(kind='kde')
df['forecast'] = results.predict(start = 1, end= 200, dynamic= True)
df[['Value','forecast']].plot(figsize=(12,8))
from pandas.tseries.offsets import DateOffset
future_dates = [df.index[-1] + DateOffset(months=x) for x in range(0,24) ]
future_dates
future_dates_df = pd.DataFrame(index=future_dates[1:],columns=df.columns)
future_df = pd.concat([df,future_dates_df])
future_df.head()
future_df.tail()
future_df['forecast'] = results.predict(start = 1, end = 720, dynamic= True)
future_df[['Value', 'forecast']].plot(figsize=(12, 8))
df['6-month-SMA']=df['Value'].rolling(window=6).mean()
df['12-month-SMA']=df['Value'].rolling(window=12).mean()
df['EWMA12'] = df['Value'].ewm(span=12).mean()
df[['Value','EWMA12']].plot()
# Tuple unpacking
df_cycle, df_trend = sm.tsa.filters.hpfilter(df.Value)
df_cycle
df["trend"] = df_trend
df[['trend','Value']].plot()
df[['trend','Value']]["2010-01-31":].plot(figsize=(12,8))
EV_SP = download_data('Chicago_Area', 'Evanston', 'Sales_Price')
CH_SP = download_data('Chicago_Area', 'Chicago', 'Sales_Price')
fig = plt.figure(figsize=(12, 6))
plt.title('Value')
EV_SP['Value'].plot(label='Evanston')
CH_SP['Value'].plot(label='Chicago')
plt.legend()
CH_SP.plot(figsize=(12,6))