diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1a07b76583fca..e5071701b044d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1644,6 +1644,7 @@ cdef inline bint is_timedelta(object o): return PyDelta_Check(o) or util.is_timedelta64_object(o) +@cython.internal cdef class Validator: cdef: @@ -1662,6 +1663,7 @@ cdef class Validator: return False if self.is_array_typed(): + # i.e. this ndarray is already of the desired dtype return True elif self.dtype.type_num == NPY_OBJECT: if self.skipna: @@ -1717,11 +1719,16 @@ cdef class Validator: return True cdef bint finalize_validate_skipna(self): + """ + If we _only_ saw non-dtype-specific NA values, even if they are valid + for this dtype, we do not infer this dtype. + """ # TODO(phillipc): Remove the existing validate methods and replace them # with the skipna versions upon full deprecation of skipna=False return True +@cython.internal cdef class BoolValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_bool_object(value) @@ -1738,6 +1745,7 @@ cpdef bint is_bool_array(ndarray values, bint skipna=False): return validator.validate(values) +@cython.internal cdef class IntegerValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) @@ -1746,6 +1754,7 @@ cdef class IntegerValidator(Validator): return issubclass(self.dtype.type, np.integer) +# Note: only python-exposed for tests cpdef bint is_integer_array(ndarray values): cdef: IntegerValidator validator = IntegerValidator(len(values), @@ -1753,6 +1762,7 @@ cpdef bint is_integer_array(ndarray values): return validator.validate(values) +@cython.internal cdef class IntegerNaValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return (util.is_integer_object(value) @@ -1766,6 +1776,7 @@ cdef bint is_integer_na_array(ndarray values): return validator.validate(values) +@cython.internal cdef class IntegerFloatValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) or util.is_float_object(value) @@ -1781,6 +1792,7 @@ cdef bint is_integer_float_array(ndarray values): return validator.validate(values) +@cython.internal cdef class FloatValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_float_object(value) @@ -1789,12 +1801,14 @@ cdef class FloatValidator(Validator): return issubclass(self.dtype.type, np.floating) +# Note: only python-exposed for tests cpdef bint is_float_array(ndarray values): cdef: FloatValidator validator = FloatValidator(len(values), values.dtype) return validator.validate(values) +@cython.internal cdef class ComplexValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return ( @@ -1812,6 +1826,7 @@ cdef bint is_complex_array(ndarray values): return validator.validate(values) +@cython.internal cdef class DecimalValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return is_decimal(value) @@ -1823,6 +1838,7 @@ cdef bint is_decimal_array(ndarray values): return validator.validate(values) +@cython.internal cdef class StringValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return isinstance(value, str) @@ -1843,6 +1859,7 @@ cpdef bint is_string_array(ndarray values, bint skipna=False): return validator.validate(values) +@cython.internal cdef class BytesValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return isinstance(value, bytes) @@ -1858,6 +1875,7 @@ cdef bint is_bytes_array(ndarray values, bint skipna=False): return validator.validate(values) +@cython.internal cdef class TemporalValidator(Validator): cdef: Py_ssize_t generic_null_count @@ -1884,9 +1902,14 @@ cdef class TemporalValidator(Validator): return self.is_value_typed(value) or is_typed_null or is_generic_null cdef inline bint finalize_validate_skipna(self): + """ + If we _only_ saw non-dtype-specific NA values, even if they are valid + for this dtype, we do not infer this dtype. + """ return self.generic_null_count != self.n +@cython.internal cdef class DatetimeValidator(TemporalValidator): cdef bint is_value_typed(self, object value) except -1: return PyDateTime_Check(value) @@ -1902,11 +1925,13 @@ cpdef bint is_datetime_array(ndarray values, bint skipna=True): return validator.validate(values) +@cython.internal cdef class Datetime64Validator(DatetimeValidator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_datetime64_object(value) +# Note: only python-exposed for tests cpdef bint is_datetime64_array(ndarray values): cdef: Datetime64Validator validator = Datetime64Validator(len(values), @@ -1914,7 +1939,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -# TODO: only non-here use is in test +# Note: only python-exposed for tests def is_datetime_with_singletz_array(values: ndarray) -> bool: """ Check values have the same tzinfo attribute. @@ -1945,6 +1970,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: return True +@cython.internal cdef class TimedeltaValidator(TemporalValidator): cdef bint is_value_typed(self, object value) except -1: return PyDelta_Check(value) @@ -1953,12 +1979,13 @@ cdef class TimedeltaValidator(TemporalValidator): return is_null_timedelta64(value) +@cython.internal cdef class AnyTimedeltaValidator(TimedeltaValidator): cdef inline bint is_value_typed(self, object value) except -1: return is_timedelta(value) -# TODO: only non-here use is in test +# Note: only python-exposed for tests cpdef bint is_timedelta_or_timedelta64_array(ndarray values): """ Infer with timedeltas and/or nat/none. @@ -1969,22 +1996,26 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values): return validator.validate(values) +@cython.internal cdef class DateValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return PyDate_Check(value) +# Note: only python-exposed for tests cpdef bint is_date_array(ndarray values, bint skipna=False): cdef: DateValidator validator = DateValidator(len(values), skipna=skipna) return validator.validate(values) +@cython.internal cdef class TimeValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return PyTime_Check(value) +# Note: only python-exposed for tests cpdef bint is_time_array(ndarray values, bint skipna=False): cdef: TimeValidator validator = TimeValidator(len(values), skipna=skipna) @@ -2022,6 +2053,7 @@ cdef bint is_period_array(ndarray[object] values): return True +# Note: only python-exposed for tests cpdef bint is_interval_array(ndarray values): """ Is this an ndarray of Interval (or np.nan) with a single dtype?