|
1 | 1 | from collections import defaultdict |
2 | 2 | import itertools |
3 | | -import operator |
4 | | -import re |
5 | 3 | from typing import ( |
6 | 4 | Any, |
7 | 5 | DefaultDict, |
8 | 6 | Dict, |
9 | 7 | List, |
10 | 8 | Optional, |
11 | | - Pattern, |
12 | 9 | Sequence, |
13 | 10 | Tuple, |
14 | 11 | TypeVar, |
|
19 | 16 | import numpy as np |
20 | 17 |
|
21 | 18 | from pandas._libs import internals as libinternals, lib |
22 | | -from pandas._typing import ArrayLike, DtypeObj, Label, Scalar |
| 19 | +from pandas._typing import ArrayLike, DtypeObj, Label |
23 | 20 | from pandas.util._validators import validate_bool_kwarg |
24 | 21 |
|
25 | 22 | from pandas.core.dtypes.cast import ( |
|
29 | 26 | ) |
30 | 27 | from pandas.core.dtypes.common import ( |
31 | 28 | DT64NS_DTYPE, |
32 | | - is_datetimelike_v_numeric, |
33 | 29 | is_dtype_equal, |
34 | 30 | is_extension_array_dtype, |
35 | 31 | is_list_like, |
36 | | - is_numeric_v_string_like, |
37 | | - is_scalar, |
38 | 32 | ) |
39 | 33 | from pandas.core.dtypes.concat import concat_compat |
40 | 34 | from pandas.core.dtypes.dtypes import ExtensionDtype |
|
44 | 38 | import pandas.core.algorithms as algos |
45 | 39 | from pandas.core.arrays.sparse import SparseDtype |
46 | 40 | from pandas.core.base import PandasObject |
47 | | -import pandas.core.common as com |
48 | 41 | from pandas.core.construction import extract_array |
49 | 42 | from pandas.core.indexers import maybe_convert_indices |
50 | 43 | from pandas.core.indexes.api import Index, ensure_index |
@@ -628,31 +621,10 @@ def replace_list( |
628 | 621 | """ do a list replace """ |
629 | 622 | inplace = validate_bool_kwarg(inplace, "inplace") |
630 | 623 |
|
631 | | - # figure out our mask apriori to avoid repeated replacements |
632 | | - values = self.as_array() |
633 | | - |
634 | | - def comp(s: Scalar, mask: np.ndarray, regex: bool = False): |
635 | | - """ |
636 | | - Generate a bool array by perform an equality check, or perform |
637 | | - an element-wise regular expression matching |
638 | | - """ |
639 | | - if isna(s): |
640 | | - return ~mask |
641 | | - |
642 | | - s = com.maybe_box_datetimelike(s) |
643 | | - return _compare_or_regex_search(values, s, regex, mask) |
644 | | - |
645 | | - # Calculate the mask once, prior to the call of comp |
646 | | - # in order to avoid repeating the same computations |
647 | | - mask = ~isna(values) |
648 | | - |
649 | | - masks = [comp(s, mask, regex) for s in src_list] |
650 | | - |
651 | 624 | bm = self.apply( |
652 | 625 | "_replace_list", |
653 | 626 | src_list=src_list, |
654 | 627 | dest_list=dest_list, |
655 | | - masks=masks, |
656 | 628 | inplace=inplace, |
657 | 629 | regex=regex, |
658 | 630 | ) |
@@ -1900,80 +1872,6 @@ def _merge_blocks( |
1900 | 1872 | return blocks |
1901 | 1873 |
|
1902 | 1874 |
|
1903 | | -def _compare_or_regex_search( |
1904 | | - a: ArrayLike, |
1905 | | - b: Union[Scalar, Pattern], |
1906 | | - regex: bool = False, |
1907 | | - mask: Optional[ArrayLike] = None, |
1908 | | -) -> Union[ArrayLike, bool]: |
1909 | | - """ |
1910 | | - Compare two array_like inputs of the same shape or two scalar values |
1911 | | -
|
1912 | | - Calls operator.eq or re.search, depending on regex argument. If regex is |
1913 | | - True, perform an element-wise regex matching. |
1914 | | -
|
1915 | | - Parameters |
1916 | | - ---------- |
1917 | | - a : array_like |
1918 | | - b : scalar or regex pattern |
1919 | | - regex : bool, default False |
1920 | | - mask : array_like or None (default) |
1921 | | -
|
1922 | | - Returns |
1923 | | - ------- |
1924 | | - mask : array_like of bool |
1925 | | - """ |
1926 | | - |
1927 | | - def _check_comparison_types( |
1928 | | - result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern] |
1929 | | - ): |
1930 | | - """ |
1931 | | - Raises an error if the two arrays (a,b) cannot be compared. |
1932 | | - Otherwise, returns the comparison result as expected. |
1933 | | - """ |
1934 | | - if is_scalar(result) and isinstance(a, np.ndarray): |
1935 | | - type_names = [type(a).__name__, type(b).__name__] |
1936 | | - |
1937 | | - if isinstance(a, np.ndarray): |
1938 | | - type_names[0] = f"ndarray(dtype={a.dtype})" |
1939 | | - |
1940 | | - raise TypeError( |
1941 | | - f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" |
1942 | | - ) |
1943 | | - |
1944 | | - if not regex: |
1945 | | - op = lambda x: operator.eq(x, b) |
1946 | | - else: |
1947 | | - op = np.vectorize( |
1948 | | - lambda x: bool(re.search(b, x)) |
1949 | | - if isinstance(x, str) and isinstance(b, (str, Pattern)) |
1950 | | - else False |
1951 | | - ) |
1952 | | - |
1953 | | - # GH#32621 use mask to avoid comparing to NAs |
1954 | | - if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): |
1955 | | - mask = np.reshape(~(isna(a)), a.shape) |
1956 | | - if isinstance(a, np.ndarray): |
1957 | | - a = a[mask] |
1958 | | - |
1959 | | - if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): |
1960 | | - # GH#29553 avoid deprecation warnings from numpy |
1961 | | - _check_comparison_types(False, a, b) |
1962 | | - return False |
1963 | | - |
1964 | | - result = op(a) |
1965 | | - |
1966 | | - if isinstance(result, np.ndarray) and mask is not None: |
1967 | | - # The shape of the mask can differ to that of the result |
1968 | | - # since we may compare only a subset of a's or b's elements |
1969 | | - tmp = np.zeros(mask.shape, dtype=np.bool_) |
1970 | | - tmp[mask] = result |
1971 | | - result = tmp |
1972 | | - |
1973 | | - _check_comparison_types(result, a, b) |
1974 | | - return result |
1975 | | - |
1976 | | - |
1977 | 1875 | def _fast_count_smallints(arr: np.ndarray) -> np.ndarray: |
1978 | 1876 | """Faster version of set(arr) for sequences of small numbers.""" |
1979 | 1877 | counts = np.bincount(arr.astype(np.int_)) |
|
0 commit comments