Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96164
License: OTHER
1
"""This file contains code for use with "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2010 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import math
9
import sys
10
import survey
11
import thinkstats
12
13
14
class Respondents(survey.Table):
15
"""Represents the respondent table."""
16
17
def ReadRecords(self, data_dir='.', n=None):
18
filename = self.GetFilename()
19
self.ReadFile(data_dir,
20
filename,
21
self.GetFields(),
22
survey.Respondent,
23
n)
24
self.Recode()
25
26
def GetFilename(self):
27
"""Get the name of the data file.
28
29
This function can be overridden by child classes.
30
31
The BRFSS data is available from thinkstats.com/CDBRFS08.ASC.gz
32
33
"""
34
return 'CDBRFS08.ASC.gz'
35
36
def GetFields(self):
37
"""Returns a tuple specifying the fields to extract.
38
39
BRFSS codebook
40
http://www.cdc.gov/brfss/technical_infodata/surveydata/2008.htm
41
42
The elements of the tuple are field, start, end, case.
43
44
field is the name of the variable
45
start and end are the indices as specified in the NSFG docs
46
case is a callable that converts the result to int, float, etc.
47
"""
48
return [
49
('age', 101, 102, int),
50
('weight2', 119, 122, int),
51
('wtyrago', 127, 130, int),
52
('wtkg2', 1254, 1258, int),
53
('htm3', 1251, 1253, int),
54
('sex', 143, 143, int),
55
]
56
57
def Recode(self):
58
"""Recode variables that need cleaning."""
59
60
def CleanWeight(weight):
61
if weight in [7777, 9999]:
62
return 'NA'
63
elif weight < 1000:
64
return weight / 2.2
65
elif 9000 < weight < 9999:
66
return weight - 9000
67
else:
68
return weight
69
70
for rec in self.records:
71
# recode wtkg2
72
if rec.wtkg2 in ['NA', 99999]:
73
rec.wtkg2 = 'NA'
74
else:
75
rec.wtkg2 /= 100.0
76
77
# recode wtyrago
78
rec.weight2 = CleanWeight(rec.weight2)
79
rec.wtyrago = CleanWeight(rec.wtyrago)
80
81
# recode htm3
82
if rec.htm3 == 999:
83
rec.htm3 = 'NA'
84
85
# recode age
86
if rec.age in [7, 9]:
87
rec.age = 'NA'
88
89
90
def SummarizeHeight(self):
91
"""Print summary statistics for male and female height."""
92
93
# make a dictionary that maps from gender code to list of heights
94
d = {1:[], 2:[], 'all':[]}
95
[d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA']
96
[d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA']
97
98
print 'Height (cm):'
99
print 'key n mean var sigma cv'
100
for key, t in d.iteritems():
101
mu, var = thinkstats.TrimmedMeanVar(t)
102
sigma = math.sqrt(var)
103
cv = sigma / mu
104
print key, len(t), mu, var, sigma, cv
105
106
return d
107
108
def SummarizeWeight(self):
109
"""Print summary statistics for male and female weight."""
110
111
# make a dictionary that maps from gender code to list of weights
112
d = {1:[], 2:[], 'all':[]}
113
[d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA']
114
[d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA']
115
116
print 'Weight (kg):'
117
print 'key n mean var sigma cv'
118
for key, t in d.iteritems():
119
mu, var = thinkstats.TrimmedMeanVar(t)
120
sigma = math.sqrt(var)
121
cv = sigma / mu
122
print key, len(t), mu, var, sigma, cv
123
124
125
def SummarizeWeightChange(self):
126
"""Print the mean reported change in weight in kg."""
127
128
data = [(r.weight2, r.wtyrago) for r in self.records
129
if r.weight2 != 'NA' and r.wtyrago != 'NA']
130
131
changes = [(curr - prev) for curr, prev in data]
132
133
print 'Mean change', thinkstats.Mean(changes)
134
135
136
def main(name, data_dir='.'):
137
resp = Respondents()
138
resp.ReadRecords(data_dir)
139
resp.SummarizeHeight()
140
resp.SummarizeWeight()
141
resp.SummarizeWeightChange()
142
143
if __name__ == '__main__':
144
main(*sys.argv)
145
146