@@ -469,7 +469,7 @@ class PossiblePrecisionLoss(Warning):
469469
470470
471471precision_loss_doc = """
472- Column converted from %s to %s , and some data are outside of the lossless
472+ Column converted from {0} to {1} , and some data are outside of the lossless
473473conversion range. This may result in a loss of precision in the saved data.
474474"""
475475
@@ -543,7 +543,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
543543 object in a DataFrame.
544544 """
545545 ws = ""
546- # original, if small, if large
546+ # original, if small, if large
547547 conversion_data = (
548548 (np .bool_ , np .int8 , np .int8 ),
549549 (np .uint8 , np .int8 , np .int16 ),
@@ -563,7 +563,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
563563 dtype = c_data [1 ]
564564 else :
565565 dtype = c_data [2 ]
566- if c_data [2 ] == np .float64 : # Warn if necessary
566+ if c_data [2 ] == np .int64 : # Warn if necessary
567567 if data [col ].max () >= 2 ** 53 :
568568 ws = precision_loss_doc .format ("uint64" , "float64" )
569569
@@ -627,12 +627,12 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
627627 self .value_labels = list (zip (np .arange (len (categories )), categories ))
628628 self .value_labels .sort (key = lambda x : x [0 ])
629629 self .text_len = 0
630- self .off : List [int ] = []
631- self .val : List [int ] = []
632630 self .txt : List [bytes ] = []
633631 self .n = 0
634632
635633 # Compute lengths and setup lists of offsets and labels
634+ offsets : List [int ] = []
635+ values : List [int ] = []
636636 for vl in self .value_labels :
637637 category = vl [1 ]
638638 if not isinstance (category , str ):
@@ -642,9 +642,9 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
642642 ValueLabelTypeMismatch ,
643643 )
644644 category = category .encode (encoding )
645- self . off .append (self .text_len )
645+ offsets .append (self .text_len )
646646 self .text_len += len (category ) + 1 # +1 for the padding
647- self . val .append (vl [0 ])
647+ values .append (vl [0 ])
648648 self .txt .append (category )
649649 self .n += 1
650650
@@ -655,8 +655,8 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
655655 )
656656
657657 # Ensure int32
658- self .off = np .array (self . off , dtype = np .int32 )
659- self .val = np .array (self . val , dtype = np .int32 )
658+ self .off = np .array (offsets , dtype = np .int32 )
659+ self .val = np .array (values , dtype = np .int32 )
660660
661661 # Total length
662662 self .len = 4 + 4 + 4 * self .n + 4 * self .n + self .text_len
@@ -868,23 +868,23 @@ def __init__(self):
868868 # with a label, but the underlying variable is -127 to 100
869869 # we're going to drop the label and cast to int
870870 self .DTYPE_MAP = dict (
871- list (zip (range (1 , 245 ), ["a" + str (i ) for i in range (1 , 245 )]))
871+ list (zip (range (1 , 245 ), [np . dtype ( "a" + str (i ) ) for i in range (1 , 245 )]))
872872 + [
873- (251 , np .int8 ),
874- (252 , np .int16 ),
875- (253 , np .int32 ),
876- (254 , np .float32 ),
877- (255 , np .float64 ),
873+ (251 , np .dtype ( np . int8 ) ),
874+ (252 , np .dtype ( np . int16 ) ),
875+ (253 , np .dtype ( np . int32 ) ),
876+ (254 , np .dtype ( np . float32 ) ),
877+ (255 , np .dtype ( np . float64 ) ),
878878 ]
879879 )
880880 self .DTYPE_MAP_XML = dict (
881881 [
882- (32768 , np .uint8 ), # Keys to GSO
883- (65526 , np .float64 ),
884- (65527 , np .float32 ),
885- (65528 , np .int32 ),
886- (65529 , np .int16 ),
887- (65530 , np .int8 ),
882+ (32768 , np .dtype ( np . uint8 ) ), # Keys to GSO
883+ (65526 , np .dtype ( np . float64 ) ),
884+ (65527 , np .dtype ( np . float32 ) ),
885+ (65528 , np .dtype ( np . int32 ) ),
886+ (65529 , np .dtype ( np . int16 ) ),
887+ (65530 , np .dtype ( np . int8 ) ),
888888 ]
889889 )
890890 # error: Argument 1 to "list" has incompatible type "str";
@@ -1057,7 +1057,7 @@ def __init__(
10571057 self ._column_selector_set = False
10581058 self ._value_labels_read = False
10591059 self ._data_read = False
1060- self ._dtype = None
1060+ self ._dtype : Optional [ np . dtype ] = None
10611061 self ._lines_read = 0
10621062
10631063 self ._native_byteorder = _set_endianness (sys .byteorder )
@@ -1193,7 +1193,7 @@ def _read_new_header(self) -> None:
11931193 # Get data type information, works for versions 117-119.
11941194 def _get_dtypes (
11951195 self , seek_vartypes : int
1196- ) -> Tuple [List [Union [int , str ]], List [Union [int , np .dtype ]]]:
1196+ ) -> Tuple [List [Union [int , str ]], List [Union [str , np .dtype ]]]:
11971197
11981198 self .path_or_buf .seek (seek_vartypes )
11991199 raw_typlist = [
@@ -1518,11 +1518,8 @@ def _read_strls(self) -> None:
15181518 self .GSO [str (v_o )] = decoded_va
15191519
15201520 def __next__ (self ) -> DataFrame :
1521- if self ._chunksize is None :
1522- raise ValueError (
1523- "chunksize must be set to a positive integer to use as an iterator."
1524- )
1525- return self .read (nrows = self ._chunksize or 1 )
1521+ self ._chunksize = 1 if self ._chunksize is None else self ._chunksize
1522+ return self .read (nrows = self ._chunksize )
15261523
15271524 def get_chunk (self , size : Optional [int ] = None ) -> DataFrame :
15281525 """
@@ -1690,11 +1687,15 @@ def any_startswith(x: str) -> bool:
16901687 convert = False
16911688 for col in data :
16921689 dtype = data [col ].dtype
1693- if dtype in (np .float16 , np .float32 ):
1694- dtype = np .float64
1690+ if dtype in (np .dtype ( np . float16 ) , np .dtype ( np . float32 ) ):
1691+ dtype = np .dtype ( np . float64 )
16951692 convert = True
1696- elif dtype in (np .int8 , np .int16 , np .int32 ):
1697- dtype = np .int64
1693+ elif dtype in (
1694+ np .dtype (np .int8 ),
1695+ np .dtype (np .int16 ),
1696+ np .dtype (np .int32 ),
1697+ ):
1698+ dtype = np .dtype (np .int64 )
16981699 convert = True
16991700 retyped_data .append ((col , data [col ].astype (dtype )))
17001701 if convert :
@@ -1807,7 +1808,7 @@ def _do_convert_categoricals(
18071808 column = data [col ]
18081809 key_matches = column .isin (keys )
18091810 if self ._chunksize is not None and key_matches .all ():
1810- initial_categories = keys
1811+ initial_categories : Optional [ np . ndarray ] = keys
18111812 # If all categories are in the keys and we are iterating,
18121813 # use the same keys for all chunks. If some are missing
18131814 # value labels, then we will fall back to the categories
@@ -2024,7 +2025,7 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype:
20242025 "ty" ,
20252026 "%ty" ,
20262027 ]:
2027- return np .float64 # Stata expects doubles for SIFs
2028+ return np .dtype ( np . float64 ) # Stata expects doubles for SIFs
20282029 else :
20292030 raise NotImplementedError (f"Format { fmt } not implemented" )
20302031
0 commit comments