TdlpackIO

TdlpackIO is a pure Python implementation for performing IO with TDLPACK sequential files (i.e. Fortran unformatted files). Instead of using Fortran for perform IO, we are using Python builtins.open() in binary mode. This allows us to perform stream-based IO for TDLPACK files. When a file is opened for reading, its contents (TDLPACK records) are automatically indexed and stored in a dictionary. The dictionary stores the byte offset the data record; the size of the data record; date and lead time; and MOS-2000 ID.

This indexing allow the user to access a TDLPACK sequential file in a random-access nature. For example if a users wants to read the 500th record in the file, the first 499 records in their entirety do not need to be read.

View Source

  1"""
  2TdlpackIO is a pure Python implementation for performing IO with TDLPACK sequential files
  3(i.e. Fortran unformatted files).  Instead of using Fortran for perform IO, we are using
  4Python builtins.open() in binary mode.  This allows us to perform stream-based IO for TDLPACK
  5files.  When a file is opened for reading, its contents (TDLPACK records) are automatically
  6indexed and stored in a dictionary.  The dictionary stores the byte offset the data record;
  7the size of the data record; date and lead time; and MOS-2000 ID.
  8
  9This indexing allow the user to access a TDLPACK sequential file in a random-access nature.
 10For example if a users wants to read the 500th record in the file, the first 499 records in
 11their entirety do not need to be read.
 12"""
 13import logging
 14import numpy as np
 15import os
 16import pdb
 17import pytdlpack
 18import struct
 19import sys  
 20import warnings
 21
 22__version__ = pytdlpack.__version__ # Share the version number
 23
 24_IS_PYTHON3 = sys.version_info.major >= 3
 25
 26if _IS_PYTHON3:
 27    import builtins
 28else:
 29    import __builtin__ as builtins
 30
 31ONE_MB = 1048576
 32
 33class open(object):
 34    def __init__(self,filename,mode='r'):
 35        """
 36        Class Constructor
 37
 38        Parameters
 39        ----------
 40
 41        **`filename : str`**
 42
 43        File name.
 44
 45        **`mode : str, optional, default = 'r'`**
 46
 47        File handle mode.  The default is open for reading ('r').
 48        """
 49        if mode == 'r' or mode == 'w':
 50            mode = mode+'b'
 51        elif mode == 'a':
 52            mode = 'wb'
 53        self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB)
 54        self._hasindex = False
 55        self._index = {}
 56        self.mode = mode
 57        self.name = os.path.abspath(filename)
 58        self.records = 0
 59        self.recordnumber = 0
 60        self.size = os.path.getsize(self.name)
 61        # Perform indexing on read
 62        if 'r' in self.mode:
 63            self._get_index()
 64
 65    def __enter__(self):
 66        """
 67        """
 68        return self
 69
 70    def __exit__(self,atype,value,traceback):
 71        """
 72        """
 73        self.close()
 74
 75    def __iter__(self):
 76        """
 77        """
 78        return self
 79
 80    def __next__(self):
 81        """
 82        """
 83        if self.recordnumber < self.records:
 84            return self.read(1)[0]
 85        else:
 86            raise StopIteration
 87
 88    def __repr__(self):
 89        """
 90        """
 91        strings = []
 92        keys = self.__dict__.keys()
 93        for k in keys:
 94            if not k.startswith('_'):
 95                strings.append('%s = %s\n'%(k,self.__dict__[k]))
 96        return ''.join(strings)
 97
 98    def __getitem__(self,key):
 99        """
100        """
101        if isinstance(key,slice):
102            beg, end, inc = key.indices(self.records)
103            self.seek(beg)
104            return [self.record(i+1) for i in range(beg,end,inc)]
105        elif isinstance(key,int):
106            if key == 0: return None
107            self.seek(key)
108            return self.record(key)
109        else:
110            raise KeyError('Key must be an integer record number or a slice')
111
112    def _get_index(self):
113        """
114        Perform indexing of data records.
115        """
116        #pdb.set_trace()
117        # Initialize index dictionary
118        self._index['offset'] = []
119        self._index['size'] = []
120        self._index['type'] = []
121        self._index['date'] = []
122        self._index['lead'] = []
123        self._index['id1'] = []
124        self._index['id2'] = []
125        self._index['id3'] = []
126        self._index['id4'] = []
127        self._index['dims'] = []
128        self._index['linked_station_id_record'] = []
129        _last_station_id_record = 0
130
131        # Iterate
132        while True:
133            try:
134                # First read 4-byte Fortran record header, then read the next
135                # 44 bytes which provides enough information to catalog the
136                # data record.
137                pos = self._filehandle.tell()
138                fortran_header = struct.unpack('>i',self._filehandle.read(4))[0]
139                if fortran_header >= 132:
140                    bytes_to_read = 132
141                else:
142                    bytes_to_read = fortran_header
143                temp = np.frombuffer(self._filehandle.read(bytes_to_read),dtype='>i4')
144                _header = struct.unpack('>4s',temp[2])[0].decode()
145
146                # Check to first 4 bytes of the data record to determine the data
147                # record type.
148                if _header == 'PLDT':
149                    # TDLPACK data record
150                    # Here we create a dimension dictionary per TDLPACK record and store in
151                    # the index.
152                    _dimdict = {}
153                    _pos = 16+temp.tobytes()[16]
154                    if bool(int(bin(temp.tobytes()[17])[-1])):
155                        # Grid
156                        _dimdict['nx'] = struct.unpack('>h',temp.tobytes()[_pos+2:_pos+4])[0]
157                        _dimdict['ny'] = struct.unpack('>h',temp.tobytes()[_pos+4:_pos+6])[0]
158                    else:
159                        # Vector
160                        _dimdict['nsta'] = struct.unpack('>i',temp.tobytes()[_pos+4:_pos+8])[0]
161                    self._index['size'].append(temp[1])
162                    self._index['type'].append('data')
163                    self._index['date'].append(temp[6])
164                    self._index['lead'].append(int(str(temp[9])[-3:]))
165                    self._index['id1'].append(temp[7])
166                    self._index['id2'].append(temp[8])
167                    self._index['id3'].append(temp[9])
168                    self._index['id4'].append(temp[10])
169                    self._index['dims'].append(_dimdict)
170                    self._index['linked_station_id_record'].append(_last_station_id_record)
171                else:
172                    if temp[1] == 24 and temp[6] == 9999:
173                        # Trailer record
174                        self._index['size'].append(temp[1])
175                        self._index['type'].append('trailer')
176                        self._index['date'].append(None)
177                        self._index['lead'].append(None)
178                        self._index['id1'].append(None)
179                        self._index['id2'].append(None)
180                        self._index['id3'].append(None)
181                        self._index['id4'].append(None)
182                        self._index['dims'].append(None)
183                        self._index['linked_station_id_record'].append(_last_station_id_record)
184                    else:
185                        # Station ID record
186                        self._index['size'].append(temp[1])
187                        self._index['type'].append('station')
188                        self._index['date'].append(None)
189                        self._index['lead'].append(None)
190                        self._index['id1'].append(400001000)
191                        self._index['id2'].append(0)  
192                        self._index['id3'].append(0)
193                        self._index['id4'].append(0)
194                        self._index['dims'].append(None)
195                        self._index['linked_station_id_record'].append(_last_station_id_record)
196
197                # At this point we have successfully identified a TDLPACK record from
198                # the file. Increment self.records and position the file pointer to
199                # now read the Fortran trailer.
200                self.records += 1 # Includes trailer records
201                self._filehandle.seek(fortran_header-bytes_to_read,1)
202                fortran_trailer = struct.unpack('>i',self._filehandle.read(4))[0]
203
204                # Check Fortran header and trailer for the record.
205                if fortran_header != fortran_trailer:
206                    raise IOError('Bad Fortran record.')
207
208                # NOTE: The 'offset' key contains the byte position in the file of where
209                # data record begins. A value of 12 is added to consider a 4-byte Fortran
210                # header, 4-byte "trash", and 4-byte ioctet value (already) stored on index.
211                self._index['offset'].append(pos+12) # 4-byte header + 4-byte trash + 4-byte ioctet
212
213                # Hold the record number of the last station ID record
214                if self._index['type'][-1] == 'station':
215                    _last_station_id_record = self.records # This should be OK.
216
217            except(struct.error):
218                self._filehandle.seek(0)
219                break
220
221        self._hasindex = True
222        self.dates = tuple(sorted(set(list(filter(None,self._index['date'])))))
223        self.leadtimes = tuple(sorted(set(list(filter(None,self._index['lead'])))))
224
225    def close(self):
226        """
227        Close the file handle
228        """
229        self._filehandle.close()
230
231    def read(self,num=None,unpack=True):
232        """
233        Read num records from the current position.
234        """
235        #pdb.set_trace()
236        recs = []
237        if num == 0:
238            return recs
239        elif num == 1:
240            reclist = [self.recordnumber+1]
241        elif num > 1:
242            reclist = list(range(self.recordnumber+1,self.recordnumber+1+num))
243        for n in reclist:
244            nn = n-1 # Use this for the self._index referencing
245            kwargs = {}
246            self.seek(n)
247            kwargs['ioctet'] = self._index['size'][nn]
248            kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4')
249            if self._index['type'][nn] == 'data':
250                kwargs['reference_date'] = self._index['date'][nn]
251                rec = pytdlpack.TdlpackRecord(**kwargs)
252                if unpack: rec.unpack()
253                recs.append(rec)
254            elif self._index['type'][nn] == 'station':
255                kwargs['ipack'] = kwargs['ipack'].byteswap()
256                kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR)
257                rec = pytdlpack.TdlpackStationRecord(**kwargs)
258                if unpack: rec.unpack()
259                recs.append(rec)
260            elif self._index['type'][nn] == 'trailer':
261                recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs))
262            self.recordnumber = n
263        return recs
264    
265    def record(self,rec,unpack=True):
266        """
267        Read the N-th record.
268        """
269        #pdb.set_trace()
270        if rec is None:
271            return None
272        if rec <= 0:
273            warnings.warn("Record numbers begin at 1.") 
274            return None
275        elif rec > self.records:
276            warnings.warn("Not that many records in the file.")
277            return None
278        else:
279            self.seek(rec) # Use the actual record number here.
280            return self.read(1,unpack=unpack)[0]
281
282    def seek(self,offset):
283        """
284        Set the position within the file in units of data records.
285        """
286        #pdb.set_trace()
287        if self._hasindex:
288            if offset == 0:
289                self._filehandle.seek(self._index['offset'][offset])
290                self.recordnumber = offset
291            elif offset > 0:
292                self._filehandle.seek(self._index['offset'][offset-1])
293                self.recordnumber = offset-1
294    
295    def fetch(self,date=None,id=None,lead=None,unpack=True):
296        """
297        Fetch TDLPACK data record by means of date, lead time, id or any combination
298        thereof.
299        """
300        #pdb.set_trace()
301        recs = []
302        idx = None
303        match_count = 0
304
305        # Match by date.
306        if type(date) is not list:
307           if date is None:
308               date = []
309           else:
310               date = [date]
311        if len(date) > 0: match_count += 1
312        for d in date:
313            if d is not None:
314                if idx is None:
315                    idx = np.where(np.array(self._index['date'])==d)[0]
316                else:
317                    idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0]))
318
319        # Match by ID.
320        if id is not None:
321            # Test for type
322            if type(id) is str:
323                # Need all 4 words for now....
324                id = [int(i) for i in list(filter(None,id.split(' ')))]
325                print(id)
326            # Match by MOS ID (all 4 words)
327            match_count += 4
328            allrecs = np.arange(self.records)
329            # ID1
330            if id[0] == -1:
331                idx1 = allrecs
332            elif id[0] >= 0:
333                idx1 = np.where(np.array(self._index['id1'])==id[0])[0]
334            # ID2
335            if id[1] == -1:
336                idx2 = allrecs
337            elif id[1] >= 0:
338                idx2 = np.where(np.array(self._index['id2'])==id[1])[0]
339            # ID3
340            if id[2] == -1:
341                idx3 = allrecs
342            elif id[2] >= 0:
343                idx3 = np.where(np.array(self._index['id3'])==id[2])[0]
344            # ID4
345            if id[3] == -1:
346                idx4 = allrecs
347            elif id[3] >= 0:
348                idx4 = np.where(np.array(self._index['id4'])==id[3])[0]
349
350            if idx is not None:
351                idx = np.concatenate((idx,idx1,idx2,idx3,idx4))
352            else:
353                idx = np.concatenate((idx1,idx2,idx3,idx4))
354
355        # Match by lead times(s).
356        if type(lead) is not list:
357            if lead is None:
358                lead = []
359            else:
360                lead = [lead]
361        if len(lead) > 0: match_count += 1
362        for l in lead:
363            if l is not None:
364                if idx is None:
365                    idx = np.where(np.array(self._index['lead'])==l)[0]
366                else:
367                    idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0]))
368
369        # Now determine the count of unique index values.  The count needs to match the
370        # value of match_count.  Where this occurs, the index values are extracted.
371        vals,cnts = np.unique(idx,return_counts=True)
372        idx = vals[np.where(cnts==match_count)[0]]
373
374        # Now we iterate over the matching index values and build the list of
375        # records.
376        for i in idx:
377            recs.append(self.record(i+1,unpack=unpack))
378        return recs
379    
380    def tell(self):
381        """
382        Return the position in units of records.
383        """
384        return self.recordnumber

ONE_MB = 1048576

class open: View Source

 34class open(object):
 35    def __init__(self,filename,mode='r'):
 36        """
 37        Class Constructor
 38
 39        Parameters
 40        ----------
 41
 42        **`filename : str`**
 43
 44        File name.
 45
 46        **`mode : str, optional, default = 'r'`**
 47
 48        File handle mode.  The default is open for reading ('r').
 49        """
 50        if mode == 'r' or mode == 'w':
 51            mode = mode+'b'
 52        elif mode == 'a':
 53            mode = 'wb'
 54        self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB)
 55        self._hasindex = False
 56        self._index = {}
 57        self.mode = mode
 58        self.name = os.path.abspath(filename)
 59        self.records = 0
 60        self.recordnumber = 0
 61        self.size = os.path.getsize(self.name)
 62        # Perform indexing on read
 63        if 'r' in self.mode:
 64            self._get_index()
 65
 66    def __enter__(self):
 67        """
 68        """
 69        return self
 70
 71    def __exit__(self,atype,value,traceback):
 72        """
 73        """
 74        self.close()
 75
 76    def __iter__(self):
 77        """
 78        """
 79        return self
 80
 81    def __next__(self):
 82        """
 83        """
 84        if self.recordnumber < self.records:
 85            return self.read(1)[0]
 86        else:
 87            raise StopIteration
 88
 89    def __repr__(self):
 90        """
 91        """
 92        strings = []
 93        keys = self.__dict__.keys()
 94        for k in keys:
 95            if not k.startswith('_'):
 96                strings.append('%s = %s\n'%(k,self.__dict__[k]))
 97        return ''.join(strings)
 98
 99    def __getitem__(self,key):
100        """
101        """
102        if isinstance(key,slice):
103            beg, end, inc = key.indices(self.records)
104            self.seek(beg)
105            return [self.record(i+1) for i in range(beg,end,inc)]
106        elif isinstance(key,int):
107            if key == 0: return None
108            self.seek(key)
109            return self.record(key)
110        else:
111            raise KeyError('Key must be an integer record number or a slice')
112
113    def _get_index(self):
114        """
115        Perform indexing of data records.
116        """
117        #pdb.set_trace()
118        # Initialize index dictionary
119        self._index['offset'] = []
120        self._index['size'] = []
121        self._index['type'] = []
122        self._index['date'] = []
123        self._index['lead'] = []
124        self._index['id1'] = []
125        self._index['id2'] = []
126        self._index['id3'] = []
127        self._index['id4'] = []
128        self._index['dims'] = []
129        self._index['linked_station_id_record'] = []
130        _last_station_id_record = 0
131
132        # Iterate
133        while True:
134            try:
135                # First read 4-byte Fortran record header, then read the next
136                # 44 bytes which provides enough information to catalog the
137                # data record.
138                pos = self._filehandle.tell()
139                fortran_header = struct.unpack('>i',self._filehandle.read(4))[0]
140                if fortran_header >= 132:
141                    bytes_to_read = 132
142                else:
143                    bytes_to_read = fortran_header
144                temp = np.frombuffer(self._filehandle.read(bytes_to_read),dtype='>i4')
145                _header = struct.unpack('>4s',temp[2])[0].decode()
146
147                # Check to first 4 bytes of the data record to determine the data
148                # record type.
149                if _header == 'PLDT':
150                    # TDLPACK data record
151                    # Here we create a dimension dictionary per TDLPACK record and store in
152                    # the index.
153                    _dimdict = {}
154                    _pos = 16+temp.tobytes()[16]
155                    if bool(int(bin(temp.tobytes()[17])[-1])):
156                        # Grid
157                        _dimdict['nx'] = struct.unpack('>h',temp.tobytes()[_pos+2:_pos+4])[0]
158                        _dimdict['ny'] = struct.unpack('>h',temp.tobytes()[_pos+4:_pos+6])[0]
159                    else:
160                        # Vector
161                        _dimdict['nsta'] = struct.unpack('>i',temp.tobytes()[_pos+4:_pos+8])[0]
162                    self._index['size'].append(temp[1])
163                    self._index['type'].append('data')
164                    self._index['date'].append(temp[6])
165                    self._index['lead'].append(int(str(temp[9])[-3:]))
166                    self._index['id1'].append(temp[7])
167                    self._index['id2'].append(temp[8])
168                    self._index['id3'].append(temp[9])
169                    self._index['id4'].append(temp[10])
170                    self._index['dims'].append(_dimdict)
171                    self._index['linked_station_id_record'].append(_last_station_id_record)
172                else:
173                    if temp[1] == 24 and temp[6] == 9999:
174                        # Trailer record
175                        self._index['size'].append(temp[1])
176                        self._index['type'].append('trailer')
177                        self._index['date'].append(None)
178                        self._index['lead'].append(None)
179                        self._index['id1'].append(None)
180                        self._index['id2'].append(None)
181                        self._index['id3'].append(None)
182                        self._index['id4'].append(None)
183                        self._index['dims'].append(None)
184                        self._index['linked_station_id_record'].append(_last_station_id_record)
185                    else:
186                        # Station ID record
187                        self._index['size'].append(temp[1])
188                        self._index['type'].append('station')
189                        self._index['date'].append(None)
190                        self._index['lead'].append(None)
191                        self._index['id1'].append(400001000)
192                        self._index['id2'].append(0)  
193                        self._index['id3'].append(0)
194                        self._index['id4'].append(0)
195                        self._index['dims'].append(None)
196                        self._index['linked_station_id_record'].append(_last_station_id_record)
197
198                # At this point we have successfully identified a TDLPACK record from
199                # the file. Increment self.records and position the file pointer to
200                # now read the Fortran trailer.
201                self.records += 1 # Includes trailer records
202                self._filehandle.seek(fortran_header-bytes_to_read,1)
203                fortran_trailer = struct.unpack('>i',self._filehandle.read(4))[0]
204
205                # Check Fortran header and trailer for the record.
206                if fortran_header != fortran_trailer:
207                    raise IOError('Bad Fortran record.')
208
209                # NOTE: The 'offset' key contains the byte position in the file of where
210                # data record begins. A value of 12 is added to consider a 4-byte Fortran
211                # header, 4-byte "trash", and 4-byte ioctet value (already) stored on index.
212                self._index['offset'].append(pos+12) # 4-byte header + 4-byte trash + 4-byte ioctet
213
214                # Hold the record number of the last station ID record
215                if self._index['type'][-1] == 'station':
216                    _last_station_id_record = self.records # This should be OK.
217
218            except(struct.error):
219                self._filehandle.seek(0)
220                break
221
222        self._hasindex = True
223        self.dates = tuple(sorted(set(list(filter(None,self._index['date'])))))
224        self.leadtimes = tuple(sorted(set(list(filter(None,self._index['lead'])))))
225
226    def close(self):
227        """
228        Close the file handle
229        """
230        self._filehandle.close()
231
232    def read(self,num=None,unpack=True):
233        """
234        Read num records from the current position.
235        """
236        #pdb.set_trace()
237        recs = []
238        if num == 0:
239            return recs
240        elif num == 1:
241            reclist = [self.recordnumber+1]
242        elif num > 1:
243            reclist = list(range(self.recordnumber+1,self.recordnumber+1+num))
244        for n in reclist:
245            nn = n-1 # Use this for the self._index referencing
246            kwargs = {}
247            self.seek(n)
248            kwargs['ioctet'] = self._index['size'][nn]
249            kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4')
250            if self._index['type'][nn] == 'data':
251                kwargs['reference_date'] = self._index['date'][nn]
252                rec = pytdlpack.TdlpackRecord(**kwargs)
253                if unpack: rec.unpack()
254                recs.append(rec)
255            elif self._index['type'][nn] == 'station':
256                kwargs['ipack'] = kwargs['ipack'].byteswap()
257                kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR)
258                rec = pytdlpack.TdlpackStationRecord(**kwargs)
259                if unpack: rec.unpack()
260                recs.append(rec)
261            elif self._index['type'][nn] == 'trailer':
262                recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs))
263            self.recordnumber = n
264        return recs
265    
266    def record(self,rec,unpack=True):
267        """
268        Read the N-th record.
269        """
270        #pdb.set_trace()
271        if rec is None:
272            return None
273        if rec <= 0:
274            warnings.warn("Record numbers begin at 1.") 
275            return None
276        elif rec > self.records:
277            warnings.warn("Not that many records in the file.")
278            return None
279        else:
280            self.seek(rec) # Use the actual record number here.
281            return self.read(1,unpack=unpack)[0]
282
283    def seek(self,offset):
284        """
285        Set the position within the file in units of data records.
286        """
287        #pdb.set_trace()
288        if self._hasindex:
289            if offset == 0:
290                self._filehandle.seek(self._index['offset'][offset])
291                self.recordnumber = offset
292            elif offset > 0:
293                self._filehandle.seek(self._index['offset'][offset-1])
294                self.recordnumber = offset-1
295    
296    def fetch(self,date=None,id=None,lead=None,unpack=True):
297        """
298        Fetch TDLPACK data record by means of date, lead time, id or any combination
299        thereof.
300        """
301        #pdb.set_trace()
302        recs = []
303        idx = None
304        match_count = 0
305
306        # Match by date.
307        if type(date) is not list:
308           if date is None:
309               date = []
310           else:
311               date = [date]
312        if len(date) > 0: match_count += 1
313        for d in date:
314            if d is not None:
315                if idx is None:
316                    idx = np.where(np.array(self._index['date'])==d)[0]
317                else:
318                    idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0]))
319
320        # Match by ID.
321        if id is not None:
322            # Test for type
323            if type(id) is str:
324                # Need all 4 words for now....
325                id = [int(i) for i in list(filter(None,id.split(' ')))]
326                print(id)
327            # Match by MOS ID (all 4 words)
328            match_count += 4
329            allrecs = np.arange(self.records)
330            # ID1
331            if id[0] == -1:
332                idx1 = allrecs
333            elif id[0] >= 0:
334                idx1 = np.where(np.array(self._index['id1'])==id[0])[0]
335            # ID2
336            if id[1] == -1:
337                idx2 = allrecs
338            elif id[1] >= 0:
339                idx2 = np.where(np.array(self._index['id2'])==id[1])[0]
340            # ID3
341            if id[2] == -1:
342                idx3 = allrecs
343            elif id[2] >= 0:
344                idx3 = np.where(np.array(self._index['id3'])==id[2])[0]
345            # ID4
346            if id[3] == -1:
347                idx4 = allrecs
348            elif id[3] >= 0:
349                idx4 = np.where(np.array(self._index['id4'])==id[3])[0]
350
351            if idx is not None:
352                idx = np.concatenate((idx,idx1,idx2,idx3,idx4))
353            else:
354                idx = np.concatenate((idx1,idx2,idx3,idx4))
355
356        # Match by lead times(s).
357        if type(lead) is not list:
358            if lead is None:
359                lead = []
360            else:
361                lead = [lead]
362        if len(lead) > 0: match_count += 1
363        for l in lead:
364            if l is not None:
365                if idx is None:
366                    idx = np.where(np.array(self._index['lead'])==l)[0]
367                else:
368                    idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0]))
369
370        # Now determine the count of unique index values.  The count needs to match the
371        # value of match_count.  Where this occurs, the index values are extracted.
372        vals,cnts = np.unique(idx,return_counts=True)
373        idx = vals[np.where(cnts==match_count)[0]]
374
375        # Now we iterate over the matching index values and build the list of
376        # records.
377        for i in idx:
378            recs.append(self.record(i+1,unpack=unpack))
379        return recs
380    
381    def tell(self):
382        """
383        Return the position in units of records.
384        """
385        return self.recordnumber

open(filename, mode='r') View Source

35    def __init__(self,filename,mode='r'):
36        """
37        Class Constructor
38
39        Parameters
40        ----------
41
42        **`filename : str`**
43
44        File name.
45
46        **`mode : str, optional, default = 'r'`**
47
48        File handle mode.  The default is open for reading ('r').
49        """
50        if mode == 'r' or mode == 'w':
51            mode = mode+'b'
52        elif mode == 'a':
53            mode = 'wb'
54        self._filehandle = builtins.open(filename,mode=mode,buffering=ONE_MB)
55        self._hasindex = False
56        self._index = {}
57        self.mode = mode
58        self.name = os.path.abspath(filename)
59        self.records = 0
60        self.recordnumber = 0
61        self.size = os.path.getsize(self.name)
62        # Perform indexing on read
63        if 'r' in self.mode:
64            self._get_index()

Class Constructor

Parameters

filename : str

File name.

mode : str, optional, default = 'r'

File handle mode. The default is open for reading ('r').

mode

name

records

recordnumber

size

def close(self): View Source

226    def close(self):
227        """
228        Close the file handle
229        """
230        self._filehandle.close()

Close the file handle

def read(self, num=None, unpack=True): View Source

232    def read(self,num=None,unpack=True):
233        """
234        Read num records from the current position.
235        """
236        #pdb.set_trace()
237        recs = []
238        if num == 0:
239            return recs
240        elif num == 1:
241            reclist = [self.recordnumber+1]
242        elif num > 1:
243            reclist = list(range(self.recordnumber+1,self.recordnumber+1+num))
244        for n in reclist:
245            nn = n-1 # Use this for the self._index referencing
246            kwargs = {}
247            self.seek(n)
248            kwargs['ioctet'] = self._index['size'][nn]
249            kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4')
250            if self._index['type'][nn] == 'data':
251                kwargs['reference_date'] = self._index['date'][nn]
252                rec = pytdlpack.TdlpackRecord(**kwargs)
253                if unpack: rec.unpack()
254                recs.append(rec)
255            elif self._index['type'][nn] == 'station':
256                kwargs['ipack'] = kwargs['ipack'].byteswap()
257                kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR)
258                rec = pytdlpack.TdlpackStationRecord(**kwargs)
259                if unpack: rec.unpack()
260                recs.append(rec)
261            elif self._index['type'][nn] == 'trailer':
262                recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs))
263            self.recordnumber = n
264        return recs

Read num records from the current position.

def record(self, rec, unpack=True): View Source

266    def record(self,rec,unpack=True):
267        """
268        Read the N-th record.
269        """
270        #pdb.set_trace()
271        if rec is None:
272            return None
273        if rec <= 0:
274            warnings.warn("Record numbers begin at 1.") 
275            return None
276        elif rec > self.records:
277            warnings.warn("Not that many records in the file.")
278            return None
279        else:
280            self.seek(rec) # Use the actual record number here.
281            return self.read(1,unpack=unpack)[0]

Read the N-th record.

def seek(self, offset): View Source

283    def seek(self,offset):
284        """
285        Set the position within the file in units of data records.
286        """
287        #pdb.set_trace()
288        if self._hasindex:
289            if offset == 0:
290                self._filehandle.seek(self._index['offset'][offset])
291                self.recordnumber = offset
292            elif offset > 0:
293                self._filehandle.seek(self._index['offset'][offset-1])
294                self.recordnumber = offset-1

Set the position within the file in units of data records.

def fetch(self, date=None, id=None, lead=None, unpack=True): View Source

296    def fetch(self,date=None,id=None,lead=None,unpack=True):
297        """
298        Fetch TDLPACK data record by means of date, lead time, id or any combination
299        thereof.
300        """
301        #pdb.set_trace()
302        recs = []
303        idx = None
304        match_count = 0
305
306        # Match by date.
307        if type(date) is not list:
308           if date is None:
309               date = []
310           else:
311               date = [date]
312        if len(date) > 0: match_count += 1
313        for d in date:
314            if d is not None:
315                if idx is None:
316                    idx = np.where(np.array(self._index['date'])==d)[0]
317                else:
318                    idx = np.concatenate((idx,np.where(np.array(self._index['date'])==d)[0]))
319
320        # Match by ID.
321        if id is not None:
322            # Test for type
323            if type(id) is str:
324                # Need all 4 words for now....
325                id = [int(i) for i in list(filter(None,id.split(' ')))]
326                print(id)
327            # Match by MOS ID (all 4 words)
328            match_count += 4
329            allrecs = np.arange(self.records)
330            # ID1
331            if id[0] == -1:
332                idx1 = allrecs
333            elif id[0] >= 0:
334                idx1 = np.where(np.array(self._index['id1'])==id[0])[0]
335            # ID2
336            if id[1] == -1:
337                idx2 = allrecs
338            elif id[1] >= 0:
339                idx2 = np.where(np.array(self._index['id2'])==id[1])[0]
340            # ID3
341            if id[2] == -1:
342                idx3 = allrecs
343            elif id[2] >= 0:
344                idx3 = np.where(np.array(self._index['id3'])==id[2])[0]
345            # ID4
346            if id[3] == -1:
347                idx4 = allrecs
348            elif id[3] >= 0:
349                idx4 = np.where(np.array(self._index['id4'])==id[3])[0]
350
351            if idx is not None:
352                idx = np.concatenate((idx,idx1,idx2,idx3,idx4))
353            else:
354                idx = np.concatenate((idx1,idx2,idx3,idx4))
355
356        # Match by lead times(s).
357        if type(lead) is not list:
358            if lead is None:
359                lead = []
360            else:
361                lead = [lead]
362        if len(lead) > 0: match_count += 1
363        for l in lead:
364            if l is not None:
365                if idx is None:
366                    idx = np.where(np.array(self._index['lead'])==l)[0]
367                else:
368                    idx = np.concatenate((idx,np.where(np.array(self._index['lead'])==l)[0]))
369
370        # Now determine the count of unique index values.  The count needs to match the
371        # value of match_count.  Where this occurs, the index values are extracted.
372        vals,cnts = np.unique(idx,return_counts=True)
373        idx = vals[np.where(cnts==match_count)[0]]
374
375        # Now we iterate over the matching index values and build the list of
376        # records.
377        for i in idx:
378            recs.append(self.record(i+1,unpack=unpack))
379        return recs

Fetch TDLPACK data record by means of date, lead time, id or any combination thereof.

def tell(self): View Source

381    def tell(self):
382        """
383        Return the position in units of records.
384        """
385        return self.recordnumber

Return the position in units of records.