Using quandl zillow api for Chicago and Evanston Home Sale Prices using ARIMA and EWMAs
This post includes code adapted from python for finance and trading algorithms udemy course and python for finance and trading algorithms udemy course notebooks.
Find the quandl api documentation here -
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
X = california.data
y = california.target * 100000
print(f'Data shape is {X.shape}')
print(f'Target shape is {y.shape}')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import quandl
quandl.ApiConfig.api_key = ''
%matplotlib inline
quandl_call = (
"ZILLOW/{category}{code}_{indicator}"
)
def download_data(category, code, indicator):
"""
Reads in a single dataset from Zillow Quandl API
Parameters
----------
category : "Chicago_Area" or "Evanston"
code : "Evanston" or "Chicago"
indicator : "Sales_Price" or "other"
Returns
-------
DataFrame
"""
AREA_CATEGORY_dict = {"Evanston": "C", "Chicago_Area": "C"}
AREA_CODE_dict = {"Evanston": "64604", "Chicago": "36156"}
INDICATOR_CODE_dict = {"Sales_Price": "SP"}
category = AREA_CATEGORY_dict[category]
code = AREA_CODE_dict[code]
indicator = INDICATOR_CODE_dict[indicator]
return quandl.get(quandl_call.format(category=category, code=code, indicator=indicator))
# data = quandl.get_table("ZILLOW/REGIONS", paginate=True)
# col = 'region'
# mask = np.column_stack([data[col].str.contains(r"Boston", na=False) for col in data])
# data.loc[mask.any(axis=1)]
# col = 'region'
# mask = np.column_stack([data[col].str.contains(r"Evanston", na=False) for col in data])
# df=data.loc[mask.any(axis=1)]
#df['region']
EV_SP = download_data('Chicago_Area', 'Evanston', 'Sales_Price')
CH_SP = download_data('Chicago_Area', 'Chicago', 'Sales_Price')
CH_SP.query("Value > 270000")
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
q = """SELECT date
,Value
FROM CH_SP
WHERE Value > 270000
LIMIT 10;"""
values = pysqldf(q)
values
fig = plt.figure(figsize=(12, 6))
plt.title('Value')
EV_SP['Value'].plot(label='Evanston')
CH_SP['Value'].plot(label='Chicago')
plt.legend()
import seaborn as sns
from scipy.stats import norm
sns.distplot(CH_SP['Value'], fit=norm);
sns.distplot(EV_SP['Value'], fit=norm);