| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7115License: GPL3
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function89import pandas10import numpy as np1112import thinkplot13import thinkstats214import survival151617def CleanData(resp):18"""Cleans respondent data.1920resp: DataFrame21"""22resp.cmdivorcx.replace([9998, 9999], np.nan, inplace=True)2324resp['notdivorced'] = resp.cmdivorcx.isnull().astype(int)25resp['duration'] = (resp.cmdivorcx - resp.cmmarrhx) / 12.026resp['durationsofar'] = (resp.cmintvw - resp.cmmarrhx) / 12.02728month0 = pandas.to_datetime('1899-12-15')29dates = [month0 + pandas.DateOffset(months=cm)30for cm in resp.cmbirth]31resp['decade'] = (pandas.DatetimeIndex(dates).year - 1900) // 10323334def ResampleDivorceCurve(resps):35"""Plots divorce curves based on resampled data.3637resps: list of respondent DataFrames38"""39for _ in range(41):40samples = [thinkstats2.ResampleRowsWeighted(resp)41for resp in resps]42sample = pandas.concat(samples, ignore_index=True)43PlotDivorceCurveByDecade(sample, color='#225EA8', alpha=0.1)4445thinkplot.Show(xlabel='years',46axis=[0, 28, 0, 1])474849def ResampleDivorceCurveByDecade(resps):50"""Plots divorce curves for each birth cohort.5152resps: list of respondent DataFrames53"""54for i in range(41):55samples = [thinkstats2.ResampleRowsWeighted(resp)56for resp in resps]57sample = pandas.concat(samples, ignore_index=True)58groups = sample.groupby('decade')59if i == 0:60survival.AddLabelsByDecade(groups, alpha=0.7)6162EstimateSurvivalByDecade(groups, alpha=0.1)6364thinkplot.Save(root='survival7',65xlabel='years',66axis=[0, 28, 0, 1])676869def EstimateSurvivalByDecade(groups, **options):70"""Groups respondents by decade and plots survival curves.7172groups: GroupBy object73"""74thinkplot.PrePlot(len(groups))75for name, group in groups:76print(name, len(group))77_, sf = EstimateSurvival(group)78thinkplot.Plot(sf, **options)798081def EstimateSurvival(resp):82"""Estimates the survival curve.8384resp: DataFrame of respondents8586returns: pair of HazardFunction, SurvivalFunction87"""88complete = resp[resp.notdivorced == 0].duration89ongoing = resp[resp.notdivorced == 1].durationsofar9091hf = survival.EstimateHazardFunction(complete, ongoing)92sf = hf.MakeSurvival()9394return hf, sf959697def main():98resp6 = survival.ReadFemResp2002()99CleanData(resp6)100married6 = resp6[resp6.evrmarry==1]101102resp7 = survival.ReadFemResp2010()103CleanData(resp7)104married7 = resp7[resp7.evrmarry==1]105106ResampleDivorceCurveByDecade([married6, married7])107108109if __name__ == '__main__':110main()111112113