Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 805
1
"""
2
readligo.py
3
4
This module provides tools for reading LIGO data
5
files. Data along with supporting documentation
6
can be downloaded from the losc web site:
7
https://losc.ligo.org
8
The method getstrain() is the main function for
9
loading LIGO data. Some possible use
10
cases are shown below.
11
12
Example #1:
13
segList = getsegs(842657792, 842658792, 'H1')
14
for (start, stop) in segList:
15
strain, meta, dq = getstrain(start, stop, 'H1')
16
# -- Analysis code here
17
...
18
19
This default configuration assumes that the needed LIGO data
20
files are available in the current working directory or a
21
subdirectory. LIGO data between the input GPS times is loaded
22
into STRAIN. META is a dictionary of gps start, gps stop, and the
23
sample time. DQ is a dictionary of data quality flags.
24
25
Example #2
26
segList = SegmentList('H1_segs.txt')
27
28
In Example 2, 'H1_segs.txt' is a segment list downloaded from the
29
LOSC web site using the Timeline application. This may be used in the same
30
manner as segList in example 1.
31
32
Example #3
33
filelist = FileList(directory='/home/ligodata')
34
segList = getsegs(842657792, 842658792, 'H1', filelist=filelist)
35
for start, stop in segList:
36
strain, meta, dq = getstrain(start, stop, 'H1', filelist=filelist)
37
# -- Analysis code here
38
39
In this example, the first command searches the indicated directory and
40
sub-directories for LIGO data files. This list of data files is then
41
used to construct a segment list and load the requested data.
42
43
-- SEGMENT LISTS --
44
45
Segment lists may be downloaded from the LOSC web site
46
using the Timeline Query Form or constructed directly
47
from the data files.
48
49
Read in a segment list downloaded from the Timeline
50
application on the LOSC web site with SegmentList:
51
>> seglist = SegmentList('H1_segs.txt')
52
OR
53
Construct a segment list directly from the LIGO
54
data files with getsegs():
55
>> seglist = getsegs(842657792, 842658792, 'H1', flag='DATA', filelist=None)
56
57
Written by Jonah Kanner
58
2014
59
"""
60
61
import numpy as np
62
import os
63
import fnmatch
64
65
def read_frame(filename, ifo, readstrain=True):
66
"""
67
Helper function to read frame files
68
"""
69
try:
70
import Fr
71
except:
72
from pylal import Fr
73
74
if ifo is None:
75
raise TypeError("""To read GWF data, ifo must be 'H1', 'H2', or 'L1'.
76
def loaddata(filename, ifo=None):""")
77
78
#-- Read strain channel
79
strain_name = ifo + ':LOSC-STRAIN'
80
if readstrain:
81
sd = Fr.frgetvect(filename, strain_name)
82
strain = sd[0]
83
gpsStart = sd[1]
84
ts = sd[3][0]
85
else:
86
ts = 1
87
strain = 0
88
89
#-- Read DQ channel
90
dq_name = ifo + ':LOSC-DQMASK'
91
qd = Fr.frgetvect(filename, dq_name)
92
gpsStart = qd[1]
93
qmask = np.array(qd[0])
94
dq_ts = qd[3][0]
95
shortnameList_wbit = qd[5].split()
96
shortnameList = [name.split(':')[1] for name in shortnameList_wbit]
97
98
#-- Read Injection channel
99
inj_name = ifo + ':LOSC-INJMASK'
100
injdata = Fr.frgetvect(filename, inj_name)
101
injmask = injdata[0]
102
injnamelist_bit = injdata[5].split()
103
injnamelist = [name.split(':')[1] for name in injnamelist_bit]
104
105
return strain, gpsStart, ts, qmask, shortnameList, injmask, injnamelist
106
107
def read_hdf5(filename, readstrain=True):
108
"""
109
Helper function to read HDF5 files
110
"""
111
import h5py
112
dataFile = h5py.File(filename, 'r')
113
114
#-- Read the strain
115
if readstrain:
116
strain = dataFile['strain']['Strain'][...]
117
else:
118
strain = 0
119
120
ts = dataFile['strain']['Strain'].attrs['Xspacing']
121
122
#-- Read the DQ information
123
dqInfo = dataFile['quality']['simple']
124
qmask = dqInfo['DQmask'][...]
125
shortnameArray = dqInfo['DQShortnames'].value
126
shortnameList = list(shortnameArray)
127
128
# -- Read the INJ information
129
injInfo = dataFile['quality/injections']
130
injmask = injInfo['Injmask'][...]
131
injnameArray = injInfo['InjShortnames'].value
132
injnameList = list(injnameArray)
133
134
#-- Read the meta data
135
meta = dataFile['meta']
136
gpsStart = meta['GPSstart'].value
137
138
dataFile.close()
139
return strain, gpsStart, ts, qmask, shortnameList, injmask, injnameList
140
141
def loaddata(filename, ifo=None, tvec=True, readstrain=True):
142
"""
143
The input filename should be a LOSC .hdf5 file or a LOSC .gwf
144
file. The file type will be determined from the extenstion.
145
The detector should be H1, H2, or L1.
146
147
The return value is:
148
STRAIN, TIME, CHANNEL_DICT
149
150
STRAIN is a vector of strain values
151
TIME is a vector of time values to match the STRAIN vector
152
unless the flag tvec=False. In that case, TIME is a
153
dictionary of meta values.
154
CHANNEL_DICT is a dictionary of data quality channels
155
"""
156
157
# -- Check for zero length file
158
if os.stat(filename).st_size == 0:
159
return None, None, None
160
161
file_ext = os.path.splitext(filename)[1]
162
if (file_ext.upper() == '.GWF'):
163
strain, gpsStart, ts, qmask, shortnameList, injmask, injnameList = read_frame(filename, ifo, readstrain)
164
else:
165
strain, gpsStart, ts, qmask, shortnameList, injmask, injnameList = read_hdf5(filename, readstrain)
166
167
#-- Create the time vector
168
gpsEnd = gpsStart + len(qmask)
169
if tvec:
170
time = np.arange(gpsStart, gpsEnd, ts)
171
else:
172
meta = {}
173
meta['start'] = gpsStart
174
meta['stop'] = gpsEnd
175
meta['dt'] = ts
176
177
#-- Create 1 Hz DQ channel for each DQ and INJ channel
178
channel_dict = {} #-- 1 Hz, mask
179
slice_dict = {} #-- sampling freq. of stain, a list of slices
180
final_one_hz = np.zeros(qmask.shape, dtype='int32')
181
for flag in shortnameList:
182
bit = shortnameList.index(flag)
183
channel_dict[flag] = (qmask >> bit) & 1
184
185
for flag in injnameList:
186
bit = injnameList.index(flag)
187
channel_dict[flag] = (injmask >> bit) & 1
188
189
#-- Calculate the DEFAULT channel
190
try:
191
channel_dict['DEFAULT'] = ( channel_dict['DATA'] )
192
except:
193
print "Warning: Failed to calculate DEFAULT data quality channel"
194
195
if tvec:
196
return strain, time, channel_dict
197
else:
198
return strain, meta, channel_dict
199
200
201
def dq2segs(channel, gps_start):
202
"""
203
This function takes a DQ CHANNEL (as returned by loaddata or getstrain) and
204
the GPS_START time of the channel and returns a segment
205
list. The DQ Channel is assumed to be a 1 Hz channel.
206
207
Returns of a list of segment GPS start and stop times.
208
"""
209
#-- Check if the user input a dictionary
210
if type(channel) == dict:
211
try:
212
channel = channel['DEFAULT']
213
except:
214
print "ERROR: Could not find DEFAULT channel in dictionary"
215
raise
216
217
#-- Create the segment list
218
segments = dq_channel_to_seglist(channel, fs=1)
219
t0 = gps_start
220
segList = [(int(seg.start+t0), int(seg.stop+t0)) for seg in segments]
221
return SegmentList(segList)
222
223
def dq_channel_to_seglist(channel, fs=4096):
224
"""
225
WARNING:
226
This function is designed to work the output of the low level function
227
LOADDATA, not the output from the main data loading function GETSTRAIN.
228
229
Takes a data quality 1 Hz channel, as returned by
230
loaddata, and returns a segment list. The segment
231
list is really a list of slices for the strain
232
associated strain vector.
233
234
If CHANNEL is a dictionary instead of a single channel,
235
an attempt is made to return a segment list for the DEFAULT
236
channel.
237
238
Returns a list of slices which can be used directly with the
239
strain and time outputs of LOADDATA.
240
"""
241
#-- Check if the user input a dictionary
242
if type(channel) == dict:
243
try:
244
channel = channel['DEFAULT']
245
except:
246
print "ERROR: Could not find DEFAULT channel in dictionary"
247
raise
248
249
# -- Create the segment list
250
condition = channel > 0
251
boundaries = np.where(np.diff(condition) == True)[0]
252
# -- Need to +1 due to how np.diff works
253
boundaries = boundaries + 1
254
# if the array "begins" True, we need to complete the first segment
255
if condition[0]:
256
boundaries = np.append(0,boundaries)
257
# if the array "ends" True, we need to complete the last segment
258
if condition[-1]:
259
boundaries = np.append(boundaries,len(condition))
260
261
# -- group the segment boundaries two by two
262
segments = boundaries.reshape((len(boundaries)/2,2))
263
# -- Account for sampling frequency and return a slice
264
segment_list = [slice(start*fs, stop*fs) for (start,stop) in segments]
265
266
return segment_list
267
268
class FileList():
269
"""
270
Class for lists of LIGO data files.
271
272
When a FileList instance is created, DIRECTORY will
273
be searched for LIGO data files. Sub-directories
274
will be searched as well. By default, the current
275
working directory is searched.
276
"""
277
def __init__(self, directory=None, cache=None):
278
279
# -- Set default directory
280
if directory is None:
281
if os.path.isdir('/archive/losc/strain-gwf'):
282
directory='/archive/losc/strain-gwf'
283
else:
284
directory='.'
285
286
print "Using data directory {0} ...".format(directory)
287
self.directory = directory
288
self.cache = cache
289
if cache is None:
290
self.list = self.searchdir(directory)
291
else:
292
self.readcache()
293
294
def searchdir(self, directory='.'):
295
frameList = []
296
hdfList = []
297
for root, dirnames, filenames in os.walk(directory):
298
for filename in fnmatch.filter(filenames, '*.gwf'):
299
frameList.append(os.path.join(root, filename))
300
for filename in fnmatch.filter(filenames, '*.hdf5'):
301
hdfList.append(os.path.join(root, filename))
302
return frameList + hdfList
303
304
def writecache(self, cacheName):
305
outfile = open(cacheName, 'w')
306
for file in self.list:
307
outfile.write(file + '\n')
308
outfile.close()
309
310
def readcache(self):
311
infile = open(self.cache, 'r')
312
self.list = infile.read().split()
313
infile.close()
314
315
def findfile(self, gps, ifo):
316
start_gps = gps - (gps % 4096)
317
filenamelist = fnmatch.filter(self.list, '*' + '-' + ifo + '*' + '-' + str(start_gps) + '-' + '*')
318
if len(filenamelist) == 0:
319
print "WARNING! No file found for GPS {0} and IFO {1}".format(gps, ifo)
320
return None
321
else:
322
return filenamelist[0]
323
324
def getstrain(start, stop, ifo, filelist=None):
325
"""
326
START should be the starting gps time of the data to be loaded.
327
STOP should be the end gps time of the data to be loaded.
328
IFO should be 'H1', 'H2', or 'L1'.
329
FILELIST is an optional argument that is a FileList() instance.
330
331
The return value is (strain, meta, dq)
332
333
STRAIN: The data as a strain time series
334
META: A dictionary of meta data, especially the start time, stop time,
335
and sample time
336
DQ: A dictionary of the data quality flags
337
"""
338
339
if filelist is None:
340
filelist = FileList()
341
342
# -- Check if this is a science segment
343
segList = getsegs(start, stop, ifo, flag='DATA', filelist=filelist)
344
sl = segList.seglist
345
if (sl[0][0] == start) and (sl[0][1] == stop):
346
pass
347
else:
348
raise TypeError("""Error in getstrain.
349
Requested times include times where the data file was not found
350
or instrument not in SCIENCE mode.
351
Use readligo.getsegs() to construct a segment list.
352
The science mode segment list for the requested time range is:
353
{0}""".format(segList))
354
355
# -- Construct list of expected file start times
356
first = start - (start % 4096)
357
gpsList = np.arange(first, stop, 4096)
358
359
m_strain = np.array([])
360
m_dq = None
361
# -- Loop over needed files
362
for time in gpsList:
363
filename = filelist.findfile(time, ifo)
364
print "Loading {0}".format(filename)
365
366
#-- Read in data
367
strain, meta, dq = loaddata(filename, ifo, tvec=False)
368
if len(m_strain) == 0:
369
m_start = meta['start']
370
dt = meta['dt']
371
m_stop = meta['stop']
372
m_strain = np.append(m_strain, strain)
373
if m_dq is None:
374
m_dq = dq
375
else:
376
for key in dq.keys():
377
m_dq[key] = np.append(m_dq[key], dq[key])
378
379
# -- Trim the data
380
lndx = np.abs(start - m_start)*(1.0/dt)
381
rndx = np.abs(stop - m_start)*(1.0/dt)
382
383
m_strain = m_strain[lndx:rndx]
384
for key in m_dq.keys():
385
m_dq[key] = m_dq[key][lndx*dt:rndx*dt]
386
387
meta['start'] = start
388
meta['stop'] = stop
389
meta['dt'] = dt
390
391
return m_strain, meta, m_dq
392
393
class SegmentList():
394
def __init__(self, filename, numcolumns=3):
395
396
if type(filename) is str:
397
if numcolumns == 4:
398
number, start, stop, duration = np.loadtxt(filename, dtype='int',unpack=True)
399
elif numcolumns == 2:
400
start, stop = np.loadtxt(filename, dtype='int',unpack=True)
401
elif numcolumns == 3:
402
start, stop, duration = np.loadtxt(filename, dtype='int',unpack=True)
403
self.seglist = zip(start, stop)
404
elif type(filename) is list:
405
self.seglist = filename
406
else:
407
raise TypeError("SegmentList() expects the name of a segmentlist file from the LOSC website Timeline")
408
409
def __repr__(self):
410
return 'SegmentList( {0} )'.format(self.seglist)
411
def __iter__(self):
412
return iter(self.seglist)
413
def __getitem__(self, key):
414
return self.seglist[key]
415
416
def getsegs(start, stop, ifo, flag='DATA', filelist=None):
417
"""
418
Method for constructing a segment list from
419
LOSC data files. By default, the method uses
420
files in the current working directory to
421
construct a segment list.
422
423
If a FileList is passed in the flag FILELIST,
424
then those files will be searched for segments
425
passing the DQ flag passed as the FLAG argument.
426
"""
427
428
if filelist is None:
429
filelist = FileList()
430
431
# -- Construct list of expected file start times
432
first = start - (start % 4096)
433
gpsList = np.arange(first, stop, 4096)
434
m_dq = None
435
436
# -- Initialize segment list
437
segList = []
438
439
# -- Loop over needed files
440
for time in gpsList:
441
filename = filelist.findfile(time, ifo)
442
443
#-- Read in data
444
if filename is None:
445
print "WARNING! No file found with GPS start time {0}".format(time)
446
print "Segment list may contain errors due to missing files."
447
continue
448
else:
449
try:
450
strain, meta, dq = loaddata(filename, ifo, tvec=False, readstrain=False)
451
except:
452
print "WARNING! Failed to load file {0}".format(filename)
453
print "Segment list may contain errors due to corrupt files."
454
continue
455
456
if dq is None:
457
print "Warning! Found zero length file {0}".format(filename)
458
print "Segment list may contain errors."
459
continue
460
461
#-- Add segments to list on a file-by-file basis
462
chan = dq[flag]
463
indxlist = dq_channel_to_seglist(chan, fs=1.0)
464
i_start = meta['start']
465
i_seglist = [(indx.start+i_start, indx.stop+i_start) for indx in indxlist]
466
i_seglist = [(int(begin), int(end)) for begin, end in i_seglist]
467
segList = segList + i_seglist
468
469
# -- Sort segments
470
segList.sort()
471
472
# -- Merge overlapping segments
473
for i in range(0, len(segList)-1):
474
seg1 = segList[i]
475
seg2 = segList[i+1]
476
477
if seg1[1] == seg2[0]:
478
segList[i] = None
479
segList[i+1] = (seg1[0], seg2[1])
480
# -- Remove placeholder segments
481
segList = [seg for seg in segList if seg is not None]
482
483
# -- Trim segment list to fit within requested start/stop times
484
for seg in segList:
485
idx = segList.index(seg)
486
if (seg[1] < start):
487
segList[idx] = None
488
elif (seg[0] > stop):
489
segList[idx] = None
490
elif (seg[0] < start) and (seg[1] > stop):
491
segList[idx] = (start, stop)
492
elif (seg[0] < start):
493
segList[idx] = (start, seg[1])
494
elif (seg[1] > stop):
495
segList[idx] = (seg[0], stop)
496
# -- Remove placeholder segments
497
segList = [seg for seg in segList if seg is not None]
498
499
return SegmentList(segList)
500
501
502
503