SharedEQUITY MEAN REVERSION 2018-08-29-093828.ipynbOpen in CoCalc
MEAN REVERSION HURST

Testing Mean Reversion Code

from datetime import datetime
from pandas_datareader import data
import pandas as pd
import numpy as np
from numpy import log, polyfit, sqrt, std, subtract
import statsmodels.tsa.stattools as ts
/ext/anaconda3/lib/python3.5/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead. from pandas.core import datetools
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import pprint
symbList = ['GOOG','FB']
start_date = '2008/01/01'
end_date = '2018/01/01'
# Fetching data and initializing the IV column
#here's the way to add a new column: Initialize it, ha!

dataAll = pd.read_csv("CompaniesSP.csv")
#create a new column and give value 0, just to do something new in the data file
dataAll['IV'] = 0

#take the first8 columns of the file, iterating name range
data=dataAll.loc[:, 'Date':'SBUX']

data.head()
Date SPX AAPL MSFT GOOG AMZN FB SBUX
0 2000-08-31 1517.680054 2.935255 25.539095 NaN 41.5000 NaN 3.735864
1 2000-09-01 1520.770020 3.055676 25.676279 NaN 41.5000 NaN 3.844242
2 2000-09-05 1507.079956 3.007508 25.653412 NaN 45.6875 NaN 3.837867
3 2000-09-06 1492.250000 2.814835 25.401903 NaN 45.8750 NaN 3.767739
4 2000-09-07 1502.510010 2.986434 25.630543 NaN 43.5000 NaN 3.837867
tickers =  ['AAPL', 'MSFT']
from pandas import read_csv
series = read_csv('CompaniesSP.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
print(type(series))
series.describe()
<class 'pandas.core.frame.DataFrame'>
SPX AAPL MSFT GOOG AMZN FB SBUX SPX_ret AAPL_ret MSFT_ret ... AMZN_ret_HV21_vol_prem FB_ret_HV21_vol_prem SBUX_ret_HV21_vol_prem SPX_ret_Vcorr AAPL_ret_Vcorr MSFT_ret_Vcorr GOOG_ret_Vcorr AMZN_ret_Vcorr FB_ret_Vcorr SBUX_ret_Vcorr
count 4442.000000 4442.000000 4442.000000 3448.000000 4442.000000 1496.000000 4442.000000 4441.000000 4441.000000 4441.000000 ... 3723.000000 1136.000000 3723.000000 4.190000e+03 4190.000000 4190.000000 3196.000000 4190.000000 1244.000000 4190.000000
mean 1458.840833 41.359003 29.691175 406.808014 239.160950 90.675635 20.903147 0.000198 0.001241 0.000466 ... 21.959204 19.149914 11.369873 1.000000e+00 0.535958 0.647527 0.573460 0.499291 0.473103 0.574429
std 466.505382 48.123965 16.563054 256.515001 309.531626 49.050331 18.269219 0.012050 0.025153 0.018710 ... 9.674753 10.134565 7.060319 1.295721e-14 0.129706 0.121225 0.129951 0.092332 0.149839 0.106975
min 676.530029 0.631968 12.002091 49.681866 5.970000 17.730000 2.856087 -0.090350 -0.518691 -0.118155 ... 4.831570 4.221414 -0.477096 1.000000e+00 0.193932 0.364624 0.086375 0.298710 0.082368 0.234126
25% 1131.440033 3.426210 20.062912 229.441452 38.905001 49.689999 6.667941 -0.004764 -0.010371 -0.008068 ... 14.349202 11.526887 6.827820 1.000000e+00 0.434108 0.577042 0.505172 0.426852 0.442305 0.500515
50% 1309.825012 17.599251 22.606724 298.446395 89.005001 81.975003 12.474978 0.000538 0.000765 0.000117 ... 21.428194 16.117665 9.167523 1.000000e+00 0.546415 0.665509 0.589889 0.487428 0.523660 0.571625
75% 1782.164978 66.533138 32.544173 555.451157 306.270004 125.405000 33.679004 0.005616 0.012922 0.008630 ... 27.531779 32.496460 14.903605 1.000000e+00 0.651333 0.748857 0.659155 0.555657 0.564986 0.648543
max 2872.870117 181.021957 96.352371 1175.839966 1598.390015 193.089996 63.265320 0.115800 0.139050 0.195652 ... 45.250383 38.788546 33.339979 1.000000e+00 0.750971 0.819369 0.825443 0.761214 0.673141 0.786925

8 rows × 35 columns

# This line is necessary for the plot to appear in a Jupyter notebook (you don't need it if you're working in Rodeo)
%matplotlib inline
# Control the default size of figures in this Jupyter notebook (n/a if working in Rodeo)
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 9)   # Change the size of plots

# Plot the adjusted closing price of AAPL
series.loc[:, 'AAPL':'SBUX'].plot(grid = True)
Populating the interactive namespace from numpy and matplotlib
/ext/anaconda3/lib/python3.5/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
<matplotlib.axes._subplots.AxesSubplot at 0x7fb0b21b54e0>
series.loc[:, 'AAPL'].plot(grid = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb0b1de2320>
#create changes and add the corresponding new columns
series['GOOG_Ret'] = series['GOOG'].pct_change().dropna()
series['SPX_Ret'] = series['SPX'].pct_change().dropna()
series['AAPL_Ret'] = series['AAPL'].pct_change().dropna()

#rolling correlation of google and sp500 of returns yearly (250 days)
series['GOOG_Ret'].rolling(252).corr(series['SPX_Ret']).plot(figsize=(15,4))
<matplotlib.axes._subplots.AxesSubplot at 0x7fb0b1ff3470>
#create the historical 30day vol
series['SPX_HV21'] = series['SPX_Ret'].rolling(21).std() * np.sqrt(252) * 100
series['GOOG_HV21'] = series['GOOG_Ret'].rolling(21).std() * np.sqrt(252) * 100
series['AAPL_HV21'] = series['AAPL_Ret'].rolling(21).std() * np.sqrt(252) * 100

series[['GOOG_HV21','AAPL_HV21','SPX_HV21']].plot(figsize=(15,8), grid= True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb0b1f56908>
AAPL_TS=series['AAPL_Ret']
def hurst(AAPL_TS):
    # Create the range of lag values
    lags= range (2,100)
     # Calculate the array of variances of the lagged differences
    tau=[sqrt(std(subtract(AAPL_TS[lag:], AAPL_TS[-lag]))) for lag in lags]
     # Use a linear fit to estimate the hurst exponent
    poly = polyfit(log(lags), log(tau), 1)
     # Return the hurst exponent from the polyfit output
    return poly[0]*2.0
hurst(AAPL_TS)
-0.029860508371503839
series.loc[:, 'AAPL_Ret'].plot(grid = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb0b1df9278>
AAPL_Price=series['AAPL']
def hurst(AAPL_Price):
    # Create the range of lag values
    lags= range (2,100)
     # Calculate the array of variances of the lagged differences
    tau=[sqrt(std(subtract(AAPL_Price[lag:], AAPL_Price[-lag]))) for lag in lags]
     # Use a linear fit to estimate the hurst exponent
    poly = polyfit(log(lags), log(tau), 1)
     # Return the hurst exponent from the polyfit output
    return poly[0]*2.0
hurst(AAPL_Price)
0.0010023252339726358
AAPL_Price2=series['2000-08-31':'2008-08-31']['AAPL']
AAPL_Price2.head()
Date 2000-08-31 2.935255 2000-09-01 3.055676 2000-09-05 3.007508 2000-09-06 2.814835 2000-09-07 2.986434 Name: AAPL, dtype: float64
AAPL_Price2.tail()
Date 2008-08-25 16.622875 2008-08-26 16.727882 2008-08-27 16.827114 2008-08-28 16.737513 2008-08-29 16.331944 Name: AAPL, dtype: float64
hurst(AAPL_Price2)
0.00350593165566435
AAPL_Price3=series['2008-08-31':'2018-04-30']['AAPL']
hurst(AAPL_Price3)
-0.0052954428489820054
AAPL_Price3=series['2008-08-31':'2018-04-30']['AAPL'].plot()
AAPL_Price16=series['2008-08-31':'2016-04-30']['AAPL']
hurst(AAPL_Price16)
-0.0067036462086532758