| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7120License: GPL3
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import pandas10import numpy as np11import statsmodels.formula.api as smf1213import thinkplot14import thinkstats215import regression16import timeseries171819def RunQuadraticModel(daily):20"""Runs a linear model of prices versus years.2122daily: DataFrame of daily prices2324returns: model, results25"""26daily['years2'] = daily.years**227model = smf.ols('ppg ~ years + years2', data=daily)28results = model.fit()29return model, results303132def PlotQuadraticModel(daily, name):33"""34"""35model, results = RunQuadraticModel(daily)36regression.SummarizeResults(results)37timeseries.PlotFittedValues(model, results, label=name)38thinkplot.Save(root='timeseries11',39title='fitted values',40xlabel='years',41xlim=[-0.1, 3.8],42ylabel='price per gram ($)')4344timeseries.PlotResidualPercentiles(model, results)45thinkplot.Save(root='timeseries12',46title='residuals',47xlabel='years',48ylabel='price per gram ($)')4950years = np.linspace(0, 5, 101)51thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)52timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)53thinkplot.Save(root='timeseries13',54title='predictions',55xlabel='years',56xlim=[years[0]-0.1, years[-1]+0.1],57ylabel='price per gram ($)')585960def PlotEwmaPredictions(daily, name):61"""62"""6364# use EWMA to estimate slopes65filled = timeseries.FillMissing(daily)66filled['slope'] = pandas.ewma(filled.ppg.diff(), span=180)67filled[-1:]6869# extract the last inter and slope70start = filled.index[-1]71inter = filled.ewma[-1]72slope = filled.slope[-1]7374# reindex the DataFrame, adding a year to the end75dates = pandas.date_range(filled.index.min(),76filled.index.max() + np.timedelta64(365, 'D'))77predicted = filled.reindex(dates)7879# generate predicted values and add them to the end80predicted['date'] = predicted.index81one_day = np.timedelta64(1, 'D')82predicted['days'] = (predicted.date - start) / one_day83predict = inter + slope * predicted.days84predicted.ewma.fillna(predict, inplace=True)8586# plot the actual values and predictions87thinkplot.Scatter(daily.ppg, alpha=0.1, label=name)88thinkplot.Plot(predicted.ewma)89thinkplot.Save()909192class SerialCorrelationTest(thinkstats2.HypothesisTest):93"""Tests serial correlations by permutation."""9495def TestStatistic(self, data):96"""Computes the test statistic.9798data: tuple of xs and ys99"""100series, lag = data101test_stat = abs(thinkstats2.SerialCorr(series, lag))102return test_stat103104def RunModel(self):105"""Run the model of the null hypothesis.106107returns: simulated data108"""109series, lag = self.data110permutation = series.reindex(np.random.permutation(series.index))111return permutation, lag112113114def TestSerialCorr(daily):115"""Tests serial correlations in daily prices and their residuals.116117daily: DataFrame of daily prices118"""119# test the correlation between consecutive prices120series = daily.ppg121test = SerialCorrelationTest((series, 1))122pvalue = test.PValue()123print(test.actual, pvalue)124125# test for serial correlation in residuals of the linear model126_, results = timeseries.RunLinearModel(daily)127series = results.resid128test = SerialCorrelationTest((series, 1))129pvalue = test.PValue()130print(test.actual, pvalue)131132# test for serial correlation in residuals of the quadratic model133_, results = RunQuadraticModel(daily)134series = results.resid135test = SerialCorrelationTest((series, 1))136pvalue = test.PValue()137print(test.actual, pvalue)138139140def main(name):141transactions = timeseries.ReadData()142143dailies = timeseries.GroupByQualityAndDay(transactions)144name = 'high'145daily = dailies[name]146147PlotQuadraticModel(daily, name)148TestSerialCorr(daily)149PlotEwmaPredictions(daily, name)150151152if __name__ == '__main__':153import sys154main(*sys.argv)155156157