167167stata_epoch = datetime .datetime (1960 , 1 , 1 )
168168
169169
170- def _stata_elapsed_date_to_datetime_vec (dates : Series , fmt : str ) -> Series :
170+ # TODO: Add typing. As of January 2020 it is not possible to type this function since
171+ # mypy doesn't understand that a Series and an int can be combined using mathematical
172+ # operations. (+, -).
173+ def _stata_elapsed_date_to_datetime_vec (dates , fmt ) -> Series :
171174 """
172175 Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime
173176
@@ -219,7 +222,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
219222 MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
220223 MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000
221224
222- def convert_year_month_safe (year : Series , month : Series ) -> Series :
225+ def convert_year_month_safe (year , month ) -> Series :
223226 """
224227 Convert year and month to datetimes, using pandas vectorized versions
225228 when the date range falls within the range supported by pandas.
@@ -234,7 +237,7 @@ def convert_year_month_safe(year: Series, month: Series) -> Series:
234237 [datetime .datetime (y , m , 1 ) for y , m in zip (year , month )], index = index
235238 )
236239
237- def convert_year_days_safe (year : Series , days : Series ) -> Series :
240+ def convert_year_days_safe (year , days ) -> Series :
238241 """
239242 Converts year (e.g. 1999) and days since the start of the year to a
240243 datetime or datetime64 Series
@@ -249,9 +252,7 @@ def convert_year_days_safe(year: Series, days: Series) -> Series:
249252 ]
250253 return Series (value , index = index )
251254
252- def convert_delta_safe (
253- base : datetime .datetime , deltas : Series , unit : str
254- ) -> Series :
255+ def convert_delta_safe (base , deltas , unit ) -> Series :
255256 """
256257 Convert base dates and deltas to datetimes, using pandas vectorized
257258 versions if the deltas satisfy restrictions required to be expressed
@@ -298,21 +299,21 @@ def convert_delta_safe(
298299 # Delta days relative to base
299300 elif fmt .startswith (("%td" , "td" , "%d" , "d" )):
300301 base = stata_epoch
301- days : Series = dates
302+ days = dates
302303 conv_dates = convert_delta_safe (base , days , "d" )
303304 # does not count leap days - 7 days is a week.
304305 # 52nd week may have more than 7 days
305306 elif fmt .startswith (("%tw" , "tw" )):
306- year : Series = stata_epoch .year + dates // 52
307+ year = stata_epoch .year + dates // 52
307308 days = (dates % 52 ) * 7
308309 conv_dates = convert_year_days_safe (year , days )
309310 elif fmt .startswith (("%tm" , "tm" )): # Delta months relative to base
310311 year = stata_epoch .year + dates // 12
311- month : Series = (dates % 12 ) + 1
312+ month = (dates % 12 ) + 1
312313 conv_dates = convert_year_month_safe (year , month )
313314 elif fmt .startswith (("%tq" , "tq" )): # Delta quarters relative to base
314315 year = stata_epoch .year + dates // 4
315- quarter_month : Series = (dates % 4 ) * 3 + 1
316+ quarter_month = (dates % 4 ) * 3 + 1
316317 conv_dates = convert_year_month_safe (year , quarter_month )
317318 elif fmt .startswith (("%th" , "th" )): # Delta half-years relative to base
318319 year = stata_epoch .year + dates // 2
@@ -347,19 +348,19 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
347348 NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000
348349 US_PER_DAY = NS_PER_DAY / 1000
349350
350- def parse_dates_safe (
351- dates : Series , delta : bool = False , year : bool = False , days : bool = False
352- ):
353- d : Dict [str , Any ] = {}
351+ def parse_dates_safe (dates , delta = False , year = False , days = False ):
352+ d = {}
354353 if is_datetime64_dtype (dates .values ):
355354 if delta :
356- time_delta : Series = dates - stata_epoch
355+ time_delta = dates - stata_epoch
357356 d ["delta" ] = time_delta .values .astype (np .int64 ) // 1000 # microseconds
358357 if days or year :
358+ # ignore since mypy reports that DatetimeIndex has no year/month
359359 date_index = DatetimeIndex (dates )
360- d ["year" ], d ["month" ] = date_index .year , date_index .month
360+ d ["year" ] = date_index .year # type: ignore
361+ d ["month" ] = date_index .month # type: ignore
361362 if days :
362- days_in_ns : Series = dates .astype (np .int64 ) - to_datetime (
363+ days_in_ns = dates .astype (np .int64 ) - to_datetime (
363364 d ["year" ], format = "%Y"
364365 ).astype (np .int64 )
365366 d ["days" ] = days_in_ns // NS_PER_DAY
@@ -588,10 +589,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
588589 categories = catarray .cat .categories
589590 self .value_labels = list (zip (np .arange (len (categories )), categories ))
590591 self .value_labels .sort (key = lambda x : x [0 ])
591- self .text_len = np . int32 ( 0 )
592- self .off = []
593- self .val = []
594- self .txt = []
592+ self .text_len = 0
593+ self .off : List [ int ] = []
594+ self .val : List [ int ] = []
595+ self .txt : List [ bytes ] = []
595596 self .n = 0
596597
597598 # Compute lengths and setup lists of offsets and labels
@@ -2131,7 +2132,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
21312132
21322133 is_cat = [is_categorical_dtype (data [col ]) for col in data ]
21332134 self ._is_col_cat = is_cat
2134- self ._value_labels = []
2135+ self ._value_labels : List [ StataValueLabel ] = []
21352136 if not any (is_cat ):
21362137 return data
21372138
@@ -2290,8 +2291,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame:
22902291 return data
22912292
22922293 def _set_formats_and_types (self , dtypes : Series ) -> None :
2293- self .typlist = []
2294- self .fmtlist = []
2294+ self .fmtlist : List [ str ] = []
2295+ self .typlist : List [ int ] = []
22952296 for col , dtype in dtypes .items ():
22962297 self .fmtlist .append (_dtype_to_default_stata_fmt (dtype , self .data [col ]))
22972298 self .typlist .append (_dtype_to_stata_type (dtype , self .data [col ]))
0 commit comments