Portfolio Optimization
This post includes code and notes from python for finance and trading algorithms udemy course and python for finance and trading algorithms udemy course notebooks.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('https://stocks-snp-500.herokuapp.com/stocks/index_stocks_table.csv?_size=max')
df.head()
df['Date'] = pd.to_datetime(df.Date)
df.index = pd.to_datetime(df.Date)
df2 = df.drop('Date', axis=1)
df3 = df2.drop('rowid', axis=1)
df3 = df3.drop('Russell_2000_stock', axis=1)
df3 = df3.drop('SP500_stock', axis=1)
df3
stocks = df3
# start = pd.to_datetime('2018-01-01')
# end = pd.to_datetime('today')
# import pandas as pd
# import pandas_datareader.data as web
# import datetime
# MSFT_stock = web.DataReader('MSFT', 'yahoo', start, end)
# MSFT_stock.head()
# ZOOM_stock = web.DataReader('ZM', 'yahoo', start, end)
# ZOOM_stock.head()
# aapl_stock = web.DataReader('aapl', 'yahoo', start, end)
# aapl_stock.head()
# stock_df = pd.concat([df3, aapl_stock['Close'],ZOOM_stock['Close'],MSFT_stock['Close']],axis=1)
# stock_df.columns = ['snp','aapl_stock','ZOOM_stock','MSFT_stock']
# stock_df
# stocks = pd.concat([df3, aapl,cisco,amzn],axis=1)
# stocks.columns = ['snp','aapl','cisco','amzn']
# stock_df
# stocks = stock_df
mean_daily_ret = stocks.pct_change(1).mean()
mean_daily_ret
stocks.pct_change(1).corr()
stocks.head()
stock_normed = stocks/stocks.iloc[0]
stock_normed.plot()
stock_daily_ret = stocks.pct_change(1)
stock_daily_ret.head()
Log Returns vs Arithmetic Returns
We will now switch over to using log returns instead of arithmetic returns, for many of our use cases they are almost the same,but most technical analyses require detrending/normalizing the time series and using log returns is a nice way to do that. Log returns are convenient to work with in many of the algorithms we will encounter.
For a full analysis of why we use log returns, check this great article.
log_ret = np.log(stocks/stocks.shift(1))
log_ret.head()
log_ret.hist(bins=100,figsize=(12,6));
plt.tight_layout()
log_ret.describe().transpose()
log_ret.mean() * 252
# Compute pairwise covariance of columns
log_ret.cov()
log_ret.cov()*252 # multiply by days
# Set seed (optional)
np.random.seed(101)
# Stock Columns
print('Stocks')
print(stocks.columns)
print('\n')
# Create Random Weights
print('Creating Random Weights')
weights = np.array(np.random.random(4))
print(weights)
print('\n')
# Rebalance Weights
print('Rebalance to sum to 1.0')
weights = weights / np.sum(weights)
print(weights)
print('\n')
# Expected Return
print('Expected Portfolio Return')
exp_ret = np.sum(log_ret.mean() * weights) *252
print(exp_ret)
print('\n')
# Expected Variance
print('Expected Volatility')
exp_vol = np.sqrt(np.dot(weights.T, np.dot(log_ret.cov() * 252, weights)))
print(exp_vol)
print('\n')
# Sharpe Ratio
SR = exp_ret/exp_vol
print('Sharpe Ratio')
print(SR)
num_ports = 15000
all_weights = np.zeros((num_ports,len(stocks.columns)))
ret_arr = np.zeros(num_ports)
vol_arr = np.zeros(num_ports)
sharpe_arr = np.zeros(num_ports)
for ind in range(num_ports):
# Create Random Weights
weights = np.array(np.random.random(4))
# Rebalance Weights
weights = weights / np.sum(weights)
# Save Weights
all_weights[ind,:] = weights
# Expected Return
ret_arr[ind] = np.sum((log_ret.mean() * weights) *252)
# Expected Variance
vol_arr[ind] = np.sqrt(np.dot(weights.T, np.dot(log_ret.cov() * 252, weights)))
# Sharpe Ratio
sharpe_arr[ind] = ret_arr[ind]/vol_arr[ind]
sharpe_arr.max()
sharpe_arr.argmax()
all_weights[1419,:]
max_sr_ret = ret_arr[1419]
max_sr_vol = vol_arr[1419]
plt.figure(figsize=(12,8))
plt.scatter(vol_arr,ret_arr,c=sharpe_arr,cmap='plasma')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Return')
# Add red dot for max SR
plt.scatter(max_sr_vol,max_sr_ret,c='red',s=50,edgecolors='black')
def get_ret_vol_sr(weights):
"""
Takes in weights, returns array or return,volatility, sharpe ratio
"""
weights = np.array(weights)
ret = np.sum(log_ret.mean() * weights) * 252
vol = np.sqrt(np.dot(weights.T, np.dot(log_ret.cov() * 252, weights)))
sr = ret/vol
return np.array([ret,vol,sr])
from scipy.optimize import minimize
To fully understand all the parameters, check out: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
#help(minimize)
Optimization works as a minimization function, since we actually want to maximize the Sharpe Ratio, we will need to turn it negative so we can minimize the negative sharpe (same as maximizing the postive sharpe)
def neg_sharpe(weights):
return get_ret_vol_sr(weights)[2] * -1
# Contraints
def check_sum(weights):
'''
Returns 0 if sum of weights is 1.0
'''
return np.sum(weights) - 1
# By convention of minimize function it should be a function that returns zero for conditions
cons = ({'type':'eq','fun': check_sum})
# 0-1 bounds for each weight
bounds = ((0, 1), (0, 1), (0, 1), (0, 1))
# Initial Guess (equal distribution)
init_guess = [0.25,0.25,0.25,0.25]
# Sequential Least SQuares Programming (SLSQP).
opt_results = minimize(neg_sharpe,init_guess,method='SLSQP',bounds=bounds,constraints=cons)
opt_results
opt_results.x
get_ret_vol_sr(opt_results.x)
All Optimal Portfolios (Efficient Frontier)
Efficient Frontier http://www.investopedia.com/terms/e/efficientfrontier
# Our returns go from 0 to somewhere along 0.3
# Create a linspace number of points to calculate x on
frontier_y = np.linspace(0,0.3,100) # Change 100 to a lower number for slower computers!
def minimize_volatility(weights):
return get_ret_vol_sr(weights)[1]
frontier_volatility = []
for possible_return in frontier_y:
# function for return
cons = ({'type':'eq','fun': check_sum},
{'type':'eq','fun': lambda w: get_ret_vol_sr(w)[0] - possible_return})
result = minimize(minimize_volatility,init_guess,method='SLSQP',bounds=bounds,constraints=cons)
frontier_volatility.append(result['fun'])
plt.figure(figsize=(12,8))
plt.scatter(vol_arr,ret_arr,c=sharpe_arr,cmap='plasma')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Return')
# Add frontier line
plt.plot(frontier_volatility,frontier_y,'g--',linewidth=3)