Skip to content
22 changes: 22 additions & 0 deletions asv_bench/benchmarks/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .pandas_vb_common import (
datetime_dtypes,
extension_dtypes,
lib,
numeric_dtypes,
string_dtypes,
)
Expand Down Expand Up @@ -40,4 +41,25 @@ def time_pandas_dtype_invalid(self, dtype):
pass


class InferDtypes:
param_names = ["dtype"]
data_dict = {
"np-object": np.array([1] * 100000, dtype="O"),
"py-object": [1] * 100000,
"np-null": np.array([1] * 50000 + [np.nan] * 50000),
"py-null": [1] * 50000 + [None] * 50000,
"np-int": np.array([1] * 100000, dtype=int),
"np-floating": np.array([1.0] * 100000, dtype=float),
"empty": [],
"bytes": [b"a"] * 100000,
}
params = list(data_dict.keys())

def time_infer_skipna(self, dtype):
lib.infer_dtype(self.data_dict[dtype], skipna=True)

def time_infer(self, dtype):
lib.infer_dtype(self.data_dict[dtype], skipna=False)


from .pandas_vb_common import setup # noqa: F401 isort:skip
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ Performance improvements
- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`)
- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)

.. _whatsnew_1000.bug_fixes:

Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1259,16 +1259,16 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
# make contiguous
values = values.ravel()

if skipna:
values = values[~isnaobj(values)]

val = _try_infer_map(values)
if val is not None:
return val

if values.dtype != np.object_:
values = values.astype('O')

if skipna:
values = values[~isnaobj(values)]

n = len(values)
if n == 0:
return 'empty'
Expand Down