Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96160
License: OTHER
1
"""This file contains code for use with "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2010 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import sys
9
import gzip
10
import os
11
12
class Record(object):
13
"""Represents a record."""
14
15
class Respondent(Record):
16
"""Represents a respondent."""
17
18
class Pregnancy(Record):
19
"""Represents a pregnancy."""
20
21
class Table(object):
22
"""Represents a table as a list of objects"""
23
24
def __init__(self):
25
self.records = []
26
27
def __len__(self):
28
return len(self.records)
29
30
def ReadFile(self, data_dir, filename, fields, constructor, n=None):
31
"""Reads a compressed data file builds one object per record.
32
33
Args:
34
data_dir: string directory name
35
filename: string name of the file to read
36
37
fields: sequence of (name, start, end, case) tuples specifying
38
the fields to extract
39
40
constructor: what kind of object to create
41
"""
42
filename = os.path.join(data_dir, filename)
43
44
if filename.endswith('gz'):
45
fp = gzip.open(filename)
46
else:
47
fp = open(filename)
48
49
for i, line in enumerate(fp):
50
if i == n:
51
break
52
record = self.MakeRecord(line, fields, constructor)
53
self.AddRecord(record)
54
fp.close()
55
56
def MakeRecord(self, line, fields, constructor):
57
"""Scans a line and returns an object with the appropriate fields.
58
59
Args:
60
line: string line from a data file
61
62
fields: sequence of (name, start, end, cast) tuples specifying
63
the fields to extract
64
65
constructor: callable that makes an object for the record.
66
67
Returns:
68
Record with appropriate fields.
69
"""
70
obj = constructor()
71
for (field, start, end, cast) in fields:
72
try:
73
s = line[start-1:end]
74
val = cast(s)
75
except ValueError:
76
#print line
77
#print field, start, end, s
78
val = 'NA'
79
setattr(obj, field, val)
80
return obj
81
82
def AddRecord(self, record):
83
"""Adds a record to this table.
84
85
Args:
86
record: an object of one of the record types.
87
"""
88
self.records.append(record)
89
90
def ExtendRecords(self, records):
91
"""Adds records to this table.
92
93
Args:
94
records: a sequence of record object
95
"""
96
self.records.extend(records)
97
98
def Recode(self):
99
"""Child classes can override this to recode values."""
100
pass
101
102
103
class Respondents(Table):
104
"""Represents the respondent table."""
105
106
def ReadRecords(self, data_dir='.', n=None):
107
filename = self.GetFilename()
108
self.ReadFile(data_dir, filename, self.GetFields(), Respondent, n)
109
self.Recode()
110
111
def GetFilename(self):
112
return '2002FemResp.dat.gz'
113
114
def GetFields(self):
115
"""Returns a tuple specifying the fields to extract.
116
117
The elements of the tuple are field, start, end, case.
118
119
field is the name of the variable
120
start and end are the indices as specified in the NSFG docs
121
cast is a callable that converts the result to int, float, etc.
122
"""
123
return [
124
('caseid', 1, 12, int),
125
]
126
127
class Pregnancies(Table):
128
"""Contains survey data about a Pregnancy."""
129
130
def ReadRecords(self, data_dir='.', n=None):
131
filename = self.GetFilename()
132
self.ReadFile(data_dir, filename, self.GetFields(), Pregnancy, n)
133
self.Recode()
134
135
def GetFilename(self):
136
return '2002FemPreg.dat.gz'
137
138
def GetFields(self):
139
"""Gets information about the fields to extract from the survey data.
140
141
Documentation of the fields for Cycle 6 is at
142
http://nsfg.icpsr.umich.edu/cocoon/WebDocs/NSFG/public/index.htm
143
144
Returns:
145
sequence of (name, start, end, type) tuples
146
"""
147
return [
148
('caseid', 1, 12, int),
149
('nbrnaliv', 22, 22, int),
150
('babysex', 56, 56, int),
151
('birthwgt_lb', 57, 58, int),
152
('birthwgt_oz', 59, 60, int),
153
('prglength', 275, 276, int),
154
('outcome', 277, 277, int),
155
('birthord', 278, 279, int),
156
('agepreg', 284, 287, int),
157
('finalwgt', 423, 440, float),
158
]
159
160
def Recode(self):
161
for rec in self.records:
162
163
# divide mother's age by 100
164
try:
165
if rec.agepreg != 'NA':
166
rec.agepreg /= 100.0
167
except AttributeError:
168
pass
169
170
# convert weight at birth from lbs/oz to total ounces
171
# note: there are some very low birthweights
172
# that are almost certainly errors, but for now I am not
173
# filtering
174
try:
175
if (rec.birthwgt_lb != 'NA' and rec.birthwgt_lb < 20 and
176
rec.birthwgt_oz != 'NA' and rec.birthwgt_oz <= 16):
177
rec.totalwgt_oz = rec.birthwgt_lb * 16 + rec.birthwgt_oz
178
else:
179
rec.totalwgt_oz = 'NA'
180
except AttributeError:
181
pass
182
183
184
def main(name, data_dir='.'):
185
resp = Respondents()
186
resp.ReadRecords(data_dir)
187
print 'Number of respondents', len(resp.records)
188
189
preg = Pregnancies()
190
preg.ReadRecords(data_dir)
191
print 'Number of pregnancies', len(preg.records)
192
193
194
if __name__ == '__main__':
195
main(*sys.argv)
196
197