"""This file contains code for use with "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function
import sys
from operator import itemgetter
import first
import thinkstats2
def Mode(hist):
"""Returns the value with the highest frequency.
hist: Hist object
returns: value from Hist
"""
p, x = max([(p, x) for x, p in hist.Items()])
return x
def AllModes(hist):
"""Returns value-freq pairs in decreasing order of frequency.
hist: Hist object
returns: iterator of value-freq pairs
"""
return sorted(hist.Items(), key=itemgetter(1), reverse=True)
def WeightDifference(live, firsts, others):
"""Explore the difference in weight between first babies and others.
live: DataFrame of all live births
firsts: DataFrame of first babies
others: DataFrame of others
"""
mean0 = live.totalwgt_lb.mean()
mean1 = firsts.totalwgt_lb.mean()
mean2 = others.totalwgt_lb.mean()
var1 = firsts.totalwgt_lb.var()
var2 = others.totalwgt_lb.var()
print('Mean')
print('First babies', mean1)
print('Others', mean2)
print('Variance')
print('First babies', var1)
print('Others', var2)
print('Difference in lbs', mean1 - mean2)
print('Difference in oz', (mean1 - mean2) * 16)
print('Difference relative to mean (%age points)',
(mean1 - mean2) / mean0 * 100)
d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
print('Cohen d', d)
def main(script):
"""Tests the functions in this module.
script: string script name
"""
live, firsts, others = first.MakeFrames()
hist = thinkstats2.Hist(live.prglngth)
WeightDifference(live, firsts, others)
mode = Mode(hist)
print('Mode of preg length', mode)
assert(mode == 39)
modes = AllModes(hist)
assert(modes[0][1] == 4693)
for value, freq in modes[:5]:
print(value, freq)
print('%s: All tests passed.' % script)
if __name__ == '__main__':
main(*sys.argv)