TdlpackIO
TdlpackIO is a pure Python implementation for performing IO with TDLPACK sequential files (i.e. Fortran unformatted files). Instead of using Fortran for perform IO, we are using Python builtins.open() in binary mode. This allows us to perform stream-based IO for TDLPACK files. When a file is opened for reading, its contents (TDLPACK records) are automatically indexed and stored in a dictionary. The dictionary stores the byte offset the data record; the size of the data record; date and lead time; and MOS-2000 ID.
This indexing allow the user to access a TDLPACK sequential file in a random-access nature. For example if a users wants to read the 500th record in the file, the first 499 records in their entirety do not need to be read.
1""" 2TdlpackIO is a pure Python implementation for performing IO with TDLPACK sequential files 3(i.e. Fortran unformatted files). Instead of using Fortran for perform IO, we are using 4Python builtins.open() in binary mode. This allows us to perform stream-based IO for TDLPACK 5files. When a file is opened for reading, its contents (TDLPACK records) are automatically 6indexed and stored in a dictionary. The dictionary stores the byte offset the data record; 7the size of the data record; date and lead time; and MOS-2000 ID. 8 9This indexing allow the user to access a TDLPACK sequential file in a random-access nature. 10For example if a users wants to read the 500th record in the file, the first 499 records in 11their entirety do not need to be read. 12""" 13import logging 14import numpy as np 15import os 16import pdb 17import pytdlpack 18import struct 19import sys 20import warnings 21 22__version__ = pytdlpack.__version__ # Share the version number 23 24_IS_PYTHON3 = sys.version_info.major >= 3 25 26if _IS_PYTHON3: 27 import builtins 28else: 29 import __builtin__ as builtins 30 31ONE_MB = 1048576 32 33class open(object): 34 def __init__(self,filename,mode='r'): 35 """ 36 Class Constructor 37 38 Parameters 39 ---------- 40 41 **`filename : str`** 42 43 File name. 44 45 **`mode : str, optional, default = 'r'`** 46 47 File handle mode. The default is open for reading ('r'). 48 """ 49 if mode == 'r' or mode == 'w': 50 mode = mode+'b' 51 elif mode == 'a': 52 mode = 'wb' 53 self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB) 54 self._hasindex = False 55 self._index = {} 56 self.mode = mode 57 self.name = os.path.abspath(filename) 58 self.records = 0 59 self.recordnumber = 0 60 self.size = os.path.getsize(self.name) 61 # Perform indexing on read 62 if 'r' in self.mode: 63 self._get_index() 64 65 def __enter__(self): 66 """ 67 """ 68 return self 69 70 def __exit__(self,atype,value,traceback): 71 """ 72 """ 73 self.close() 74 75 def __iter__(self): 76 """ 77 """ 78 return self 79 80 def __next__(self): 81 """ 82 """ 83 if self.recordnumber < self.records: 84 return self.read(1)[0] 85 else: 86 raise StopIteration 87 88 def __repr__(self): 89 """ 90 """ 91 strings = [] 92 keys = self.__dict__.keys() 93 for k in keys: 94 if not k.startswith('_'): 95 strings.append('%s = %s\n'%(k,self.__dict__[k])) 96 return ''.join(strings) 97 98 def __getitem__(self,key): 99 """ 100 """ 101 if isinstance(key,slice): 102 beg, end, inc = key.indices(self.records) 103 self.seek(beg) 104 return [self.record(i+1) for i in range(beg,end,inc)] 105 elif isinstance(key,int): 106 if key == 0: return None 107 self.seek(key) 108 return self.record(key) 109 else: 110 raise KeyError('Key must be an integer record number or a slice') 111 112 def _get_index(self): 113 """ 114 Perform indexing of data records. 115 """ 116 #pdb.set_trace() 117 # Initialize index dictionary 118 self._index['offset'] = [] 119 self._index['size'] = [] 120 self._index['type'] = [] 121 self._index['date'] = [] 122 self._index['lead'] = [] 123 self._index['id1'] = [] 124 self._index['id2'] = [] 125 self._index['id3'] = [] 126 self._index['id4'] = [] 127 self._index['dims'] = [] 128 self._index['linked_station_id_record'] = [] 129 _last_station_id_record = 0 130 131 # Iterate 132 while True: 133 try: 134 # First read 4-byte Fortran record header, then read the next 135 # 44 bytes which provides enough information to catalog the 136 # data record. 137 pos = self._filehandle.tell() 138 fortran_header = struct.unpack('>i',self._filehandle.read(4))[0] 139 if fortran_header >= 132: 140 bytes_to_read = 132 141 else: 142 bytes_to_read = fortran_header 143 temp = np.frombuffer(self._filehandle.read(bytes_to_read),dtype='>i4') 144 _header = struct.unpack('>4s',temp[2])[0].decode() 145 146 # Check to first 4 bytes of the data record to determine the data 147 # record type. 148 if _header == 'PLDT': 149 # TDLPACK data record 150 # Here we create a dimension dictionary per TDLPACK record and store in 151 # the index. 152 _dimdict = {} 153 _pos = 16+temp.tobytes()[16] 154 if bool(int(bin(temp.tobytes()[17])[-1])): 155 # Grid 156 _dimdict['nx'] = struct.unpack('>h',temp.tobytes()[_pos+2:_pos+4])[0] 157 _dimdict['ny'] = struct.unpack('>h',temp.tobytes()[_pos+4:_pos+6])[0] 158 else: 159 # Vector 160 _dimdict['nsta'] = struct.unpack('>i',temp.tobytes()[_pos+4:_pos+8])[0] 161 self._index['size'].append(temp[1]) 162 self._index['type'].append('data') 163 self._index['date'].append(temp[6]) 164 self._index['lead'].append(int(str(temp[9])[-3:])) 165 self._index['id1'].append(temp[7]) 166 self._index['id2'].append(temp[8]) 167 self._index['id3'].append(temp[9]) 168 self._index['id4'].append(temp[10]) 169 self._index['dims'].append(_dimdict) 170 self._index['linked_station_id_record'].append(_last_station_id_record) 171 else: 172 if temp[1] == 24 and temp[6] == 9999: 173 # Trailer record 174 self._index['size'].append(temp[1]) 175 self._index['type'].append('trailer') 176 self._index['date'].append(None) 177 self._index['lead'].append(None) 178 self._index['id1'].append(None) 179 self._index['id2'].append(None) 180 self._index['id3'].append(None) 181 self._index['id4'].append(None) 182 self._index['dims'].append(None) 183 self._index['linked_station_id_record'].append(_last_station_id_record) 184 else: 185 # Station ID record 186 self._index['size'].append(temp[1]) 187 self._index['type'].append('station') 188 self._index['date'].append(None) 189 self._index['lead'].append(None) 190 self._index['id1'].append(400001000) 191 self._index['id2'].append(0) 192 self._index['id3'].append(0) 193 self._index['id4'].append(0) 194 self._index['dims'].append(None) 195 self._index['linked_station_id_record'].append(_last_station_id_record) 196 197 # At this point we have successfully identified a TDLPACK record from 198 # the file. Increment self.records and position the file pointer to 199 # now read the Fortran trailer. 200 self.records += 1 # Includes trailer records 201 self._filehandle.seek(fortran_header-bytes_to_read,1) 202 fortran_trailer = struct.unpack('>i',self._filehandle.read(4))[0] 203 204 # Check Fortran header and trailer for the record. 205 if fortran_header != fortran_trailer: 206 raise IOError('Bad Fortran record.') 207 208 # NOTE: The 'offset' key contains the byte position in the file of where 209 # data record begins. A value of 12 is added to consider a 4-byte Fortran 210 # header, 4-byte "trash", and 4-byte ioctet value (already) stored on index. 211 self._index['offset'].append(pos+12) # 4-byte header + 4-byte trash + 4-byte ioctet 212 213 # Hold the record number of the last station ID record 214 if self._index['type'][-1] == 'station': 215 _last_station_id_record = self.records # This should be OK. 216 217 except(struct.error): 218 self._filehandle.seek(0) 219 break 220 221 self._hasindex = True 222 self.dates = tuple(sorted(set(list(filter(None,self._index['date']))))) 223 self.leadtimes = tuple(sorted(set(list(filter(None,self._index['lead']))))) 224 225 def close(self): 226 """ 227 Close the file handle 228 """ 229 self._filehandle.close() 230 231 def read(self,num=None,unpack=True): 232 """ 233 Read num records from the current position. 234 """ 235 #pdb.set_trace() 236 recs = [] 237 if num == 0: 238 return recs 239 elif num == 1: 240 reclist = [self.recordnumber+1] 241 elif num > 1: 242 reclist = list(range(self.recordnumber+1,self.recordnumber+1+num)) 243 for n in reclist: 244 nn = n-1 # Use this for the self._index referencing 245 kwargs = {} 246 self.seek(n) 247 kwargs['ioctet'] = self._index['size'][nn] 248 kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4') 249 if self._index['type'][nn] == 'data': 250 kwargs['reference_date'] = self._index['date'][nn] 251 rec = pytdlpack.TdlpackRecord(**kwargs) 252 if unpack: rec.unpack() 253 recs.append(rec) 254 elif self._index['type'][nn] == 'station': 255 kwargs['ipack'] = kwargs['ipack'].byteswap() 256 kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR) 257 rec = pytdlpack.TdlpackStationRecord(**kwargs) 258 if unpack: rec.unpack() 259 recs.append(rec) 260 elif self._index['type'][nn] == 'trailer': 261 recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs)) 262 self.recordnumber = n 263 return recs 264 265 def record(self,rec,unpack=True): 266 """ 267 Read the N-th record. 268 """ 269 #pdb.set_trace() 270 if rec is None: 271 return None 272 if rec <= 0: 273 warnings.warn("Record numbers begin at 1.") 274 return None 275 elif rec > self.records: 276 warnings.warn("Not that many records in the file.") 277 return None 278 else: 279 self.seek(rec) # Use the actual record number here. 280 return self.read(1,unpack=unpack)[0] 281 282 def seek(self,offset): 283 """ 284 Set the position within the file in units of data records. 285 """ 286 #pdb.set_trace() 287 if self._hasindex: 288 if offset == 0: 289 self._filehandle.seek(self._index['offset'][offset]) 290 self.recordnumber = offset 291 elif offset > 0: 292 self._filehandle.seek(self._index['offset'][offset-1]) 293 self.recordnumber = offset-1 294 295 def fetch(self,date=None,id=None,lead=None,unpack=True): 296 """ 297 Fetch TDLPACK data record by means of date, lead time, id or any combination 298 thereof. 299 """ 300 #pdb.set_trace() 301 recs = [] 302 idx = None 303 match_count = 0 304 305 # Match by date. 306 if type(date) is not list: 307 if date is None: 308 date = [] 309 else: 310 date = [date] 311 if len(date) > 0: match_count += 1 312 for d in date: 313 if d is not None: 314 if idx is None: 315 idx = np.where(np.array(self._index['date'])==d)[0] 316 else: 317 idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0])) 318 319 # Match by ID. 320 if id is not None: 321 # Test for type 322 if type(id) is str: 323 # Need all 4 words for now.... 324 id = [int(i) for i in list(filter(None,id.split(' ')))] 325 print(id) 326 # Match by MOS ID (all 4 words) 327 match_count += 4 328 allrecs = np.arange(self.records) 329 # ID1 330 if id[0] == -1: 331 idx1 = allrecs 332 elif id[0] >= 0: 333 idx1 = np.where(np.array(self._index['id1'])==id[0])[0] 334 # ID2 335 if id[1] == -1: 336 idx2 = allrecs 337 elif id[1] >= 0: 338 idx2 = np.where(np.array(self._index['id2'])==id[1])[0] 339 # ID3 340 if id[2] == -1: 341 idx3 = allrecs 342 elif id[2] >= 0: 343 idx3 = np.where(np.array(self._index['id3'])==id[2])[0] 344 # ID4 345 if id[3] == -1: 346 idx4 = allrecs 347 elif id[3] >= 0: 348 idx4 = np.where(np.array(self._index['id4'])==id[3])[0] 349 350 if idx is not None: 351 idx = np.concatenate((idx,idx1,idx2,idx3,idx4)) 352 else: 353 idx = np.concatenate((idx1,idx2,idx3,idx4)) 354 355 # Match by lead times(s). 356 if type(lead) is not list: 357 if lead is None: 358 lead = [] 359 else: 360 lead = [lead] 361 if len(lead) > 0: match_count += 1 362 for l in lead: 363 if l is not None: 364 if idx is None: 365 idx = np.where(np.array(self._index['lead'])==l)[0] 366 else: 367 idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0])) 368 369 # Now determine the count of unique index values. The count needs to match the 370 # value of match_count. Where this occurs, the index values are extracted. 371 vals,cnts = np.unique(idx,return_counts=True) 372 idx = vals[np.where(cnts==match_count)[0]] 373 374 # Now we iterate over the matching index values and build the list of 375 # records. 376 for i in idx: 377 recs.append(self.record(i+1,unpack=unpack)) 378 return recs 379 380 def tell(self): 381 """ 382 Return the position in units of records. 383 """ 384 return self.recordnumber
34class open(object): 35 def __init__(self,filename,mode='r'): 36 """ 37 Class Constructor 38 39 Parameters 40 ---------- 41 42 **`filename : str`** 43 44 File name. 45 46 **`mode : str, optional, default = 'r'`** 47 48 File handle mode. The default is open for reading ('r'). 49 """ 50 if mode == 'r' or mode == 'w': 51 mode = mode+'b' 52 elif mode == 'a': 53 mode = 'wb' 54 self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB) 55 self._hasindex = False 56 self._index = {} 57 self.mode = mode 58 self.name = os.path.abspath(filename) 59 self.records = 0 60 self.recordnumber = 0 61 self.size = os.path.getsize(self.name) 62 # Perform indexing on read 63 if 'r' in self.mode: 64 self._get_index() 65 66 def __enter__(self): 67 """ 68 """ 69 return self 70 71 def __exit__(self,atype,value,traceback): 72 """ 73 """ 74 self.close() 75 76 def __iter__(self): 77 """ 78 """ 79 return self 80 81 def __next__(self): 82 """ 83 """ 84 if self.recordnumber < self.records: 85 return self.read(1)[0] 86 else: 87 raise StopIteration 88 89 def __repr__(self): 90 """ 91 """ 92 strings = [] 93 keys = self.__dict__.keys() 94 for k in keys: 95 if not k.startswith('_'): 96 strings.append('%s = %s\n'%(k,self.__dict__[k])) 97 return ''.join(strings) 98 99 def __getitem__(self,key): 100 """ 101 """ 102 if isinstance(key,slice): 103 beg, end, inc = key.indices(self.records) 104 self.seek(beg) 105 return [self.record(i+1) for i in range(beg,end,inc)] 106 elif isinstance(key,int): 107 if key == 0: return None 108 self.seek(key) 109 return self.record(key) 110 else: 111 raise KeyError('Key must be an integer record number or a slice') 112 113 def _get_index(self): 114 """ 115 Perform indexing of data records. 116 """ 117 #pdb.set_trace() 118 # Initialize index dictionary 119 self._index['offset'] = [] 120 self._index['size'] = [] 121 self._index['type'] = [] 122 self._index['date'] = [] 123 self._index['lead'] = [] 124 self._index['id1'] = [] 125 self._index['id2'] = [] 126 self._index['id3'] = [] 127 self._index['id4'] = [] 128 self._index['dims'] = [] 129 self._index['linked_station_id_record'] = [] 130 _last_station_id_record = 0 131 132 # Iterate 133 while True: 134 try: 135 # First read 4-byte Fortran record header, then read the next 136 # 44 bytes which provides enough information to catalog the 137 # data record. 138 pos = self._filehandle.tell() 139 fortran_header = struct.unpack('>i',self._filehandle.read(4))[0] 140 if fortran_header >= 132: 141 bytes_to_read = 132 142 else: 143 bytes_to_read = fortran_header 144 temp = np.frombuffer(self._filehandle.read(bytes_to_read),dtype='>i4') 145 _header = struct.unpack('>4s',temp[2])[0].decode() 146 147 # Check to first 4 bytes of the data record to determine the data 148 # record type. 149 if _header == 'PLDT': 150 # TDLPACK data record 151 # Here we create a dimension dictionary per TDLPACK record and store in 152 # the index. 153 _dimdict = {} 154 _pos = 16+temp.tobytes()[16] 155 if bool(int(bin(temp.tobytes()[17])[-1])): 156 # Grid 157 _dimdict['nx'] = struct.unpack('>h',temp.tobytes()[_pos+2:_pos+4])[0] 158 _dimdict['ny'] = struct.unpack('>h',temp.tobytes()[_pos+4:_pos+6])[0] 159 else: 160 # Vector 161 _dimdict['nsta'] = struct.unpack('>i',temp.tobytes()[_pos+4:_pos+8])[0] 162 self._index['size'].append(temp[1]) 163 self._index['type'].append('data') 164 self._index['date'].append(temp[6]) 165 self._index['lead'].append(int(str(temp[9])[-3:])) 166 self._index['id1'].append(temp[7]) 167 self._index['id2'].append(temp[8]) 168 self._index['id3'].append(temp[9]) 169 self._index['id4'].append(temp[10]) 170 self._index['dims'].append(_dimdict) 171 self._index['linked_station_id_record'].append(_last_station_id_record) 172 else: 173 if temp[1] == 24 and temp[6] == 9999: 174 # Trailer record 175 self._index['size'].append(temp[1]) 176 self._index['type'].append('trailer') 177 self._index['date'].append(None) 178 self._index['lead'].append(None) 179 self._index['id1'].append(None) 180 self._index['id2'].append(None) 181 self._index['id3'].append(None) 182 self._index['id4'].append(None) 183 self._index['dims'].append(None) 184 self._index['linked_station_id_record'].append(_last_station_id_record) 185 else: 186 # Station ID record 187 self._index['size'].append(temp[1]) 188 self._index['type'].append('station') 189 self._index['date'].append(None) 190 self._index['lead'].append(None) 191 self._index['id1'].append(400001000) 192 self._index['id2'].append(0) 193 self._index['id3'].append(0) 194 self._index['id4'].append(0) 195 self._index['dims'].append(None) 196 self._index['linked_station_id_record'].append(_last_station_id_record) 197 198 # At this point we have successfully identified a TDLPACK record from 199 # the file. Increment self.records and position the file pointer to 200 # now read the Fortran trailer. 201 self.records += 1 # Includes trailer records 202 self._filehandle.seek(fortran_header-bytes_to_read,1) 203 fortran_trailer = struct.unpack('>i',self._filehandle.read(4))[0] 204 205 # Check Fortran header and trailer for the record. 206 if fortran_header != fortran_trailer: 207 raise IOError('Bad Fortran record.') 208 209 # NOTE: The 'offset' key contains the byte position in the file of where 210 # data record begins. A value of 12 is added to consider a 4-byte Fortran 211 # header, 4-byte "trash", and 4-byte ioctet value (already) stored on index. 212 self._index['offset'].append(pos+12) # 4-byte header + 4-byte trash + 4-byte ioctet 213 214 # Hold the record number of the last station ID record 215 if self._index['type'][-1] == 'station': 216 _last_station_id_record = self.records # This should be OK. 217 218 except(struct.error): 219 self._filehandle.seek(0) 220 break 221 222 self._hasindex = True 223 self.dates = tuple(sorted(set(list(filter(None,self._index['date']))))) 224 self.leadtimes = tuple(sorted(set(list(filter(None,self._index['lead']))))) 225 226 def close(self): 227 """ 228 Close the file handle 229 """ 230 self._filehandle.close() 231 232 def read(self,num=None,unpack=True): 233 """ 234 Read num records from the current position. 235 """ 236 #pdb.set_trace() 237 recs = [] 238 if num == 0: 239 return recs 240 elif num == 1: 241 reclist = [self.recordnumber+1] 242 elif num > 1: 243 reclist = list(range(self.recordnumber+1,self.recordnumber+1+num)) 244 for n in reclist: 245 nn = n-1 # Use this for the self._index referencing 246 kwargs = {} 247 self.seek(n) 248 kwargs['ioctet'] = self._index['size'][nn] 249 kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4') 250 if self._index['type'][nn] == 'data': 251 kwargs['reference_date'] = self._index['date'][nn] 252 rec = pytdlpack.TdlpackRecord(**kwargs) 253 if unpack: rec.unpack() 254 recs.append(rec) 255 elif self._index['type'][nn] == 'station': 256 kwargs['ipack'] = kwargs['ipack'].byteswap() 257 kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR) 258 rec = pytdlpack.TdlpackStationRecord(**kwargs) 259 if unpack: rec.unpack() 260 recs.append(rec) 261 elif self._index['type'][nn] == 'trailer': 262 recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs)) 263 self.recordnumber = n 264 return recs 265 266 def record(self,rec,unpack=True): 267 """ 268 Read the N-th record. 269 """ 270 #pdb.set_trace() 271 if rec is None: 272 return None 273 if rec <= 0: 274 warnings.warn("Record numbers begin at 1.") 275 return None 276 elif rec > self.records: 277 warnings.warn("Not that many records in the file.") 278 return None 279 else: 280 self.seek(rec) # Use the actual record number here. 281 return self.read(1,unpack=unpack)[0] 282 283 def seek(self,offset): 284 """ 285 Set the position within the file in units of data records. 286 """ 287 #pdb.set_trace() 288 if self._hasindex: 289 if offset == 0: 290 self._filehandle.seek(self._index['offset'][offset]) 291 self.recordnumber = offset 292 elif offset > 0: 293 self._filehandle.seek(self._index['offset'][offset-1]) 294 self.recordnumber = offset-1 295 296 def fetch(self,date=None,id=None,lead=None,unpack=True): 297 """ 298 Fetch TDLPACK data record by means of date, lead time, id or any combination 299 thereof. 300 """ 301 #pdb.set_trace() 302 recs = [] 303 idx = None 304 match_count = 0 305 306 # Match by date. 307 if type(date) is not list: 308 if date is None: 309 date = [] 310 else: 311 date = [date] 312 if len(date) > 0: match_count += 1 313 for d in date: 314 if d is not None: 315 if idx is None: 316 idx = np.where(np.array(self._index['date'])==d)[0] 317 else: 318 idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0])) 319 320 # Match by ID. 321 if id is not None: 322 # Test for type 323 if type(id) is str: 324 # Need all 4 words for now.... 325 id = [int(i) for i in list(filter(None,id.split(' ')))] 326 print(id) 327 # Match by MOS ID (all 4 words) 328 match_count += 4 329 allrecs = np.arange(self.records) 330 # ID1 331 if id[0] == -1: 332 idx1 = allrecs 333 elif id[0] >= 0: 334 idx1 = np.where(np.array(self._index['id1'])==id[0])[0] 335 # ID2 336 if id[1] == -1: 337 idx2 = allrecs 338 elif id[1] >= 0: 339 idx2 = np.where(np.array(self._index['id2'])==id[1])[0] 340 # ID3 341 if id[2] == -1: 342 idx3 = allrecs 343 elif id[2] >= 0: 344 idx3 = np.where(np.array(self._index['id3'])==id[2])[0] 345 # ID4 346 if id[3] == -1: 347 idx4 = allrecs 348 elif id[3] >= 0: 349 idx4 = np.where(np.array(self._index['id4'])==id[3])[0] 350 351 if idx is not None: 352 idx = np.concatenate((idx,idx1,idx2,idx3,idx4)) 353 else: 354 idx = np.concatenate((idx1,idx2,idx3,idx4)) 355 356 # Match by lead times(s). 357 if type(lead) is not list: 358 if lead is None: 359 lead = [] 360 else: 361 lead = [lead] 362 if len(lead) > 0: match_count += 1 363 for l in lead: 364 if l is not None: 365 if idx is None: 366 idx = np.where(np.array(self._index['lead'])==l)[0] 367 else: 368 idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0])) 369 370 # Now determine the count of unique index values. The count needs to match the 371 # value of match_count. Where this occurs, the index values are extracted. 372 vals,cnts = np.unique(idx,return_counts=True) 373 idx = vals[np.where(cnts==match_count)[0]] 374 375 # Now we iterate over the matching index values and build the list of 376 # records. 377 for i in idx: 378 recs.append(self.record(i+1,unpack=unpack)) 379 return recs 380 381 def tell(self): 382 """ 383 Return the position in units of records. 384 """ 385 return self.recordnumber
35 def __init__(self,filename,mode='r'): 36 """ 37 Class Constructor 38 39 Parameters 40 ---------- 41 42 **`filename : str`** 43 44 File name. 45 46 **`mode : str, optional, default = 'r'`** 47 48 File handle mode. The default is open for reading ('r'). 49 """ 50 if mode == 'r' or mode == 'w': 51 mode = mode+'b' 52 elif mode == 'a': 53 mode = 'wb' 54 self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB) 55 self._hasindex = False 56 self._index = {} 57 self.mode = mode 58 self.name = os.path.abspath(filename) 59 self.records = 0 60 self.recordnumber = 0 61 self.size = os.path.getsize(self.name) 62 # Perform indexing on read 63 if 'r' in self.mode: 64 self._get_index()
Class Constructor
Parameters
filename : str
File name.
mode : str, optional, default = 'r'
File handle mode. The default is open for reading ('r').
232 def read(self,num=None,unpack=True): 233 """ 234 Read num records from the current position. 235 """ 236 #pdb.set_trace() 237 recs = [] 238 if num == 0: 239 return recs 240 elif num == 1: 241 reclist = [self.recordnumber+1] 242 elif num > 1: 243 reclist = list(range(self.recordnumber+1,self.recordnumber+1+num)) 244 for n in reclist: 245 nn = n-1 # Use this for the self._index referencing 246 kwargs = {} 247 self.seek(n) 248 kwargs['ioctet'] = self._index['size'][nn] 249 kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4') 250 if self._index['type'][nn] == 'data': 251 kwargs['reference_date'] = self._index['date'][nn] 252 rec = pytdlpack.TdlpackRecord(**kwargs) 253 if unpack: rec.unpack() 254 recs.append(rec) 255 elif self._index['type'][nn] == 'station': 256 kwargs['ipack'] = kwargs['ipack'].byteswap() 257 kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR) 258 rec = pytdlpack.TdlpackStationRecord(**kwargs) 259 if unpack: rec.unpack() 260 recs.append(rec) 261 elif self._index['type'][nn] == 'trailer': 262 recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs)) 263 self.recordnumber = n 264 return recs
Read num records from the current position.
266 def record(self,rec,unpack=True): 267 """ 268 Read the N-th record. 269 """ 270 #pdb.set_trace() 271 if rec is None: 272 return None 273 if rec <= 0: 274 warnings.warn("Record numbers begin at 1.") 275 return None 276 elif rec > self.records: 277 warnings.warn("Not that many records in the file.") 278 return None 279 else: 280 self.seek(rec) # Use the actual record number here. 281 return self.read(1,unpack=unpack)[0]
Read the N-th record.
283 def seek(self,offset): 284 """ 285 Set the position within the file in units of data records. 286 """ 287 #pdb.set_trace() 288 if self._hasindex: 289 if offset == 0: 290 self._filehandle.seek(self._index['offset'][offset]) 291 self.recordnumber = offset 292 elif offset > 0: 293 self._filehandle.seek(self._index['offset'][offset-1]) 294 self.recordnumber = offset-1
Set the position within the file in units of data records.
296 def fetch(self,date=None,id=None,lead=None,unpack=True): 297 """ 298 Fetch TDLPACK data record by means of date, lead time, id or any combination 299 thereof. 300 """ 301 #pdb.set_trace() 302 recs = [] 303 idx = None 304 match_count = 0 305 306 # Match by date. 307 if type(date) is not list: 308 if date is None: 309 date = [] 310 else: 311 date = [date] 312 if len(date) > 0: match_count += 1 313 for d in date: 314 if d is not None: 315 if idx is None: 316 idx = np.where(np.array(self._index['date'])==d)[0] 317 else: 318 idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0])) 319 320 # Match by ID. 321 if id is not None: 322 # Test for type 323 if type(id) is str: 324 # Need all 4 words for now.... 325 id = [int(i) for i in list(filter(None,id.split(' ')))] 326 print(id) 327 # Match by MOS ID (all 4 words) 328 match_count += 4 329 allrecs = np.arange(self.records) 330 # ID1 331 if id[0] == -1: 332 idx1 = allrecs 333 elif id[0] >= 0: 334 idx1 = np.where(np.array(self._index['id1'])==id[0])[0] 335 # ID2 336 if id[1] == -1: 337 idx2 = allrecs 338 elif id[1] >= 0: 339 idx2 = np.where(np.array(self._index['id2'])==id[1])[0] 340 # ID3 341 if id[2] == -1: 342 idx3 = allrecs 343 elif id[2] >= 0: 344 idx3 = np.where(np.array(self._index['id3'])==id[2])[0] 345 # ID4 346 if id[3] == -1: 347 idx4 = allrecs 348 elif id[3] >= 0: 349 idx4 = np.where(np.array(self._index['id4'])==id[3])[0] 350 351 if idx is not None: 352 idx = np.concatenate((idx,idx1,idx2,idx3,idx4)) 353 else: 354 idx = np.concatenate((idx1,idx2,idx3,idx4)) 355 356 # Match by lead times(s). 357 if type(lead) is not list: 358 if lead is None: 359 lead = [] 360 else: 361 lead = [lead] 362 if len(lead) > 0: match_count += 1 363 for l in lead: 364 if l is not None: 365 if idx is None: 366 idx = np.where(np.array(self._index['lead'])==l)[0] 367 else: 368 idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0])) 369 370 # Now determine the count of unique index values. The count needs to match the 371 # value of match_count. Where this occurs, the index values are extracted. 372 vals,cnts = np.unique(idx,return_counts=True) 373 idx = vals[np.where(cnts==match_count)[0]] 374 375 # Now we iterate over the matching index values and build the list of 376 # records. 377 for i in idx: 378 recs.append(self.record(i+1,unpack=unpack)) 379 return recs
Fetch TDLPACK data record by means of date, lead time, id or any combination thereof.