@@ -1139,13 +1139,17 @@ def _read_new_header(self, first_char):
11391139 # The first part of the header is common to 117 and 118.
11401140 self .path_or_buf .read (27 ) # stata_dta><header><release>
11411141 self .format_version = int (self .path_or_buf .read (3 ))
1142- if self .format_version not in [117 , 118 ]:
1142+ if self .format_version not in [117 , 118 , 119 ]:
11431143 raise ValueError (_version_error )
11441144 self ._set_encoding ()
11451145 self .path_or_buf .read (21 ) # </release><byteorder>
11461146 self .byteorder = self .path_or_buf .read (3 ) == b"MSF" and ">" or "<"
11471147 self .path_or_buf .read (15 ) # </byteorder><K>
1148- self .nvar = struct .unpack (self .byteorder + "H" , self .path_or_buf .read (2 ))[0 ]
1148+ nvar_type = "H" if self .format_version <= 118 else "I"
1149+ nvar_size = 2 if self .format_version <= 118 else 4
1150+ self .nvar = struct .unpack (
1151+ self .byteorder + nvar_type , self .path_or_buf .read (nvar_size )
1152+ )[0 ]
11491153 self .path_or_buf .read (7 ) # </K><N>
11501154
11511155 self .nobs = self ._get_nobs ()
@@ -1207,7 +1211,7 @@ def _read_new_header(self, first_char):
12071211 self .path_or_buf .seek (self ._seek_variable_labels )
12081212 self ._variable_labels = self ._get_variable_labels ()
12091213
1210- # Get data type information, works for versions 117-118 .
1214+ # Get data type information, works for versions 117-119 .
12111215 def _get_dtypes (self , seek_vartypes ):
12121216
12131217 self .path_or_buf .seek (seek_vartypes )
@@ -1241,14 +1245,14 @@ def f(typ):
12411245 def _get_varlist (self ):
12421246 if self .format_version == 117 :
12431247 b = 33
1244- elif self .format_version = = 118 :
1248+ elif self .format_version > = 118 :
12451249 b = 129
12461250
12471251 return [self ._decode (self .path_or_buf .read (b )) for i in range (self .nvar )]
12481252
12491253 # Returns the format list
12501254 def _get_fmtlist (self ):
1251- if self .format_version = = 118 :
1255+ if self .format_version > = 118 :
12521256 b = 57
12531257 elif self .format_version > 113 :
12541258 b = 49
@@ -1270,7 +1274,7 @@ def _get_lbllist(self):
12701274 return [self ._decode (self .path_or_buf .read (b )) for i in range (self .nvar )]
12711275
12721276 def _get_variable_labels (self ):
1273- if self .format_version = = 118 :
1277+ if self .format_version > = 118 :
12741278 vlblist = [
12751279 self ._decode (self .path_or_buf .read (321 )) for i in range (self .nvar )
12761280 ]
@@ -1285,13 +1289,13 @@ def _get_variable_labels(self):
12851289 return vlblist
12861290
12871291 def _get_nobs (self ):
1288- if self .format_version = = 118 :
1292+ if self .format_version > = 118 :
12891293 return struct .unpack (self .byteorder + "Q" , self .path_or_buf .read (8 ))[0 ]
12901294 else :
12911295 return struct .unpack (self .byteorder + "I" , self .path_or_buf .read (4 ))[0 ]
12921296
12931297 def _get_data_label (self ):
1294- if self .format_version = = 118 :
1298+ if self .format_version > = 118 :
12951299 strlen = struct .unpack (self .byteorder + "H" , self .path_or_buf .read (2 ))[0 ]
12961300 return self ._decode (self .path_or_buf .read (strlen ))
12971301 elif self .format_version == 117 :
@@ -1303,7 +1307,7 @@ def _get_data_label(self):
13031307 return self ._decode (self .path_or_buf .read (32 ))
13041308
13051309 def _get_time_stamp (self ):
1306- if self .format_version = = 118 :
1310+ if self .format_version > = 118 :
13071311 strlen = struct .unpack ("b" , self .path_or_buf .read (1 ))[0 ]
13081312 return self .path_or_buf .read (strlen ).decode ("utf-8" )
13091313 elif self .format_version == 117 :
@@ -1321,7 +1325,7 @@ def _get_seek_variable_labels(self):
13211325 # a work around that uses the previous label, 33 bytes for each
13221326 # variable, 20 for the closing tag and 17 for the opening tag
13231327 return self ._seek_value_label_names + (33 * self .nvar ) + 20 + 17
1324- elif self .format_version = = 118 :
1328+ elif self .format_version > = 118 :
13251329 return struct .unpack (self .byteorder + "q" , self .path_or_buf .read (8 ))[0 ] + 17
13261330 else :
13271331 raise ValueError ()
@@ -1519,10 +1523,12 @@ def _read_strls(self):
15191523 else :
15201524 buf = self .path_or_buf .read (12 )
15211525 # Only tested on little endian file on little endian machine.
1526+ v_size = 2 if self .format_version == 118 else 3
15221527 if self .byteorder == "<" :
1523- buf = buf [0 :2 ] + buf [4 : 10 ]
1528+ buf = buf [0 :v_size ] + buf [4 : 12 - v_size ]
15241529 else :
1525- buf = buf [0 :2 ] + buf [6 :]
1530+ # This path may not be correct, impossible to test
1531+ buf = buf [0 :v_size ] + buf [4 + v_size :]
15261532 v_o = struct .unpack ("Q" , buf )[0 ]
15271533 typ = struct .unpack ("B" , self .path_or_buf .read (1 ))[0 ]
15281534 length = struct .unpack (self .byteorder + "I" , self .path_or_buf .read (4 ))[0 ]
0 commit comments