Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1644,6 +1644,7 @@ cdef inline bint is_timedelta(object o):
return PyDelta_Check(o) or util.is_timedelta64_object(o)


@cython.internal
cdef class Validator:

cdef:
Expand All @@ -1662,6 +1663,7 @@ cdef class Validator:
return False

if self.is_array_typed():
# i.e. this ndarray is already of the desired dtype
return True
elif self.dtype.type_num == NPY_OBJECT:
if self.skipna:
Expand Down Expand Up @@ -1717,11 +1719,16 @@ cdef class Validator:
return True

cdef bint finalize_validate_skipna(self):
"""
If we _only_ saw non-dtype-specific NA values, even if they are valid
for this dtype, we do not infer this dtype.
"""
# TODO(phillipc): Remove the existing validate methods and replace them
# with the skipna versions upon full deprecation of skipna=False
return True


@cython.internal
cdef class BoolValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_bool_object(value)
Expand All @@ -1738,6 +1745,7 @@ cpdef bint is_bool_array(ndarray values, bint skipna=False):
return validator.validate(values)


@cython.internal
cdef class IntegerValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_integer_object(value)
Expand All @@ -1746,13 +1754,15 @@ cdef class IntegerValidator(Validator):
return issubclass(self.dtype.type, np.integer)


# Note: only python-exposed for tests
cpdef bint is_integer_array(ndarray values):
cdef:
IntegerValidator validator = IntegerValidator(len(values),
values.dtype)
return validator.validate(values)


@cython.internal
cdef class IntegerNaValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return (util.is_integer_object(value)
Expand All @@ -1766,6 +1776,7 @@ cdef bint is_integer_na_array(ndarray values):
return validator.validate(values)


@cython.internal
cdef class IntegerFloatValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_integer_object(value) or util.is_float_object(value)
Expand All @@ -1781,6 +1792,7 @@ cdef bint is_integer_float_array(ndarray values):
return validator.validate(values)


@cython.internal
cdef class FloatValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_float_object(value)
Expand All @@ -1789,12 +1801,14 @@ cdef class FloatValidator(Validator):
return issubclass(self.dtype.type, np.floating)


# Note: only python-exposed for tests
cpdef bint is_float_array(ndarray values):
cdef:
FloatValidator validator = FloatValidator(len(values), values.dtype)
return validator.validate(values)


@cython.internal
cdef class ComplexValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return (
Expand All @@ -1812,6 +1826,7 @@ cdef bint is_complex_array(ndarray values):
return validator.validate(values)


@cython.internal
cdef class DecimalValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return is_decimal(value)
Expand All @@ -1823,6 +1838,7 @@ cdef bint is_decimal_array(ndarray values):
return validator.validate(values)


@cython.internal
cdef class StringValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return isinstance(value, str)
Expand All @@ -1843,6 +1859,7 @@ cpdef bint is_string_array(ndarray values, bint skipna=False):
return validator.validate(values)


@cython.internal
cdef class BytesValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return isinstance(value, bytes)
Expand All @@ -1858,6 +1875,7 @@ cdef bint is_bytes_array(ndarray values, bint skipna=False):
return validator.validate(values)


@cython.internal
cdef class TemporalValidator(Validator):
cdef:
Py_ssize_t generic_null_count
Expand All @@ -1884,9 +1902,14 @@ cdef class TemporalValidator(Validator):
return self.is_value_typed(value) or is_typed_null or is_generic_null

cdef inline bint finalize_validate_skipna(self):
"""
If we _only_ saw non-dtype-specific NA values, even if they are valid
for this dtype, we do not infer this dtype.
"""
return self.generic_null_count != self.n


@cython.internal
cdef class DatetimeValidator(TemporalValidator):
cdef bint is_value_typed(self, object value) except -1:
return PyDateTime_Check(value)
Expand All @@ -1902,19 +1925,21 @@ cpdef bint is_datetime_array(ndarray values, bint skipna=True):
return validator.validate(values)


@cython.internal
cdef class Datetime64Validator(DatetimeValidator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_datetime64_object(value)


# Note: only python-exposed for tests
cpdef bint is_datetime64_array(ndarray values):
cdef:
Datetime64Validator validator = Datetime64Validator(len(values),
skipna=True)
return validator.validate(values)


# TODO: only non-here use is in test
# Note: only python-exposed for tests
def is_datetime_with_singletz_array(values: ndarray) -> bool:
"""
Check values have the same tzinfo attribute.
Expand Down Expand Up @@ -1945,6 +1970,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
return True


@cython.internal
cdef class TimedeltaValidator(TemporalValidator):
cdef bint is_value_typed(self, object value) except -1:
return PyDelta_Check(value)
Expand All @@ -1953,12 +1979,13 @@ cdef class TimedeltaValidator(TemporalValidator):
return is_null_timedelta64(value)


@cython.internal
cdef class AnyTimedeltaValidator(TimedeltaValidator):
cdef inline bint is_value_typed(self, object value) except -1:
return is_timedelta(value)


# TODO: only non-here use is in test
# Note: only python-exposed for tests
cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
"""
Infer with timedeltas and/or nat/none.
Expand All @@ -1969,22 +1996,26 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
return validator.validate(values)


@cython.internal
cdef class DateValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return PyDate_Check(value)


# Note: only python-exposed for tests
cpdef bint is_date_array(ndarray values, bint skipna=False):
cdef:
DateValidator validator = DateValidator(len(values), skipna=skipna)
return validator.validate(values)


@cython.internal
cdef class TimeValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return PyTime_Check(value)


# Note: only python-exposed for tests
cpdef bint is_time_array(ndarray values, bint skipna=False):
cdef:
TimeValidator validator = TimeValidator(len(values), skipna=skipna)
Expand Down Expand Up @@ -2022,6 +2053,7 @@ cdef bint is_period_array(ndarray[object] values):
return True


# Note: only python-exposed for tests
cpdef bint is_interval_array(ndarray values):
"""
Is this an ndarray of Interval (or np.nan) with a single dtype?
Expand Down