Forecasting running data
#os.getcwd()
#os.listdir()
from datetime import date
import os
today = date.today()
for file in os.listdir():
if file.endswith('.ipynb'):
cd=today
os.rename(file, f'{today}-{file}')
import shutil
shutil.copy(
os.path.join('2021-06-19-health_data.ipynb'),
os.path.join('../git-repos/Kearney_Data_Science/_notebooks')
)
import sqlalchemy as db
from sqlalchemy import create_engine
import sqlite3
import pandas as pd
engine = db.create_engine('sqlite:///../../Downloads/fitbit.db')
connection = engine.connect()
metadata = db.MetaData()
sql = """
select DATE(date_time) as day
, sum(distance_miles) as distance
from distance_v
group by DATE(date_time)
"""
cnxn = connection
df = pd.read_sql(sql, cnxn)
df
df['ds'] = df.day
df['y'] = df.distance
df.info()
import statsmodels.api as sm
import pandas as pd
from prophet import Prophet
import pandas as pd
pd.set_option('compute.use_numexpr', False)
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=365)
future.tail()
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
fig1 = m.plot(forecast)
fig2 = m.plot_components(forecast)
# Python
fig1 = m.plot(forecast)
# Python
fig2 = m.plot_components(forecast)
# Python
from prophet.plot import plot_plotly, plot_components_plotly
plot_plotly(m, forecast)
# Python
plot_components_plotly(m, forecast)
# Model fit
m = Prophet() #Instanticate from Prophet class.
m.fit(df) # Fit the Prophet model.
# Predict
future = m.make_future_dataframe(periods=365) # Make future date data frame for the next 365 days (it gives daily because it follows the frequency in input dataframe by default).
forecast = m.predict(future) # Predict future value.
# Plot results
fig1 = m.plot(forecast) # Plot the fit to past data and future forcast.
fig2 = m.plot_components(forecast) # Plot breakdown of components.
plt.show()
forecast # Displaying various results in table format.
import pandas as pd
import matplotlib.pyplot as plt
# Load test data: log-transformed daily page views for the Wikipedia page for Peyton Manning.
df['cap'] = 10 # Saturating maximum
df['floor'] = 7 # Saturating minimum
# Model setup
m = Prophet(growth='logistic')
m.add_country_holidays(country_name='US') # Adding US holiday regressor
m.fit(df)
# Future data generation
future = m.make_future_dataframe(periods=365*5)
future['cap'] = 10 # Saturating maximum
future['floor'] = 7 # Saturating minimum
# Future forecast
forecast = m.predict(future)
# Visualize
fig1 = m.plot(forecast) # Plot the fit to past data and future forcast.
fig2 = m.plot_components(forecast) # Plot breakdown of components.
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
def is_nfl_season(ds):
date = pd.to_datetime(ds)
return (date.month > 8 or date.month < 2)
df['on_season'] = df['ds'].apply(is_nfl_season) #on_season dummy.
df['off_season'] = ~df['ds'].apply(is_nfl_season) #off_season dummy.
# set user-defined seasonality and fit
m = Prophet(weekly_seasonality=False)
m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3, condition_name='on_season')
m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3, condition_name='off_season')
m.fit(df)
# Make the same columns to future data.
future = m.make_future_dataframe(periods=365*5) # Make future date data frame for the next 365 days (it gives daily because it follows the frequency in input dataframe by default).
future['on_season'] = future['ds'].apply(is_nfl_season)
future['off_season'] = ~future['ds'].apply(is_nfl_season)
# Predict future value.
forecast = m.predict(future)
# Plot results
fig1 = m.plot(forecast) # Plot the fit to past data and future forcast.
fig2 = m.plot_components(forecast) # Plot breakdown of components.
plt.show()
# After getting forecast dataframe using user-defined seasonality "on-season"/"off-season" above...
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
df['ds'] = pd.to_datetime(df['ds'],format='%Y-%m-%d')
df_res = df.merge(forecast,how="inner",on="ds")
df_res['residual'] = df_res['y'] - df_res['yhat']
plot_acf(df_res['residual'])
plot_pacf(df_res['residual'])
plt.show()