| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7138License: GPL3
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import numpy as np10import sys1112import nsfg13import thinkstats2141516def ReadFemResp(dct_file='2002FemResp.dct',17dat_file='2002FemResp.dat.gz',18nrows=None):19"""Reads the NSFG respondent data.2021dct_file: string file name22dat_file: string file name2324returns: DataFrame25"""26dct = thinkstats2.ReadStataDct(dct_file)27df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)28CleanFemResp(df)29return df303132def CleanFemResp(df):33"""Recodes variables from the respondent frame.3435df: DataFrame36"""37pass383940def ValidatePregnum(resp):41"""Validate pregnum in the respondent file.4243resp: respondent DataFrame44"""45# read the pregnancy frame46preg = nsfg.ReadFemPreg()4748# make the map from caseid to list of pregnancy indices49preg_map = nsfg.MakePregMap(preg)5051# iterate through the respondent pregnum series52for index, pregnum in resp.pregnum.items():53caseid = resp.caseid[index]54indices = preg_map[caseid]5556# check that pregnum from the respondent file equals57# the number of records in the pregnancy file58if len(indices) != pregnum:59print(caseid, len(indices), pregnum)60return False6162return True636465def main(script):66"""Tests the functions in this module.6768script: string script name69"""70resp = ReadFemResp()7172assert(len(resp) == 7643)73assert(resp.pregnum.value_counts()[1] == 1267)74assert(ValidatePregnum(resp))7576print('%s: All tests passed.' % script)777879if __name__ == '__main__':80main(*sys.argv)818283