diff --git a/pyproject.toml b/pyproject.toml index 291fd063aeb..5f5901d522f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ [tool.mypy] ignore_missing_imports = true +ignore_errors = false +enable_error_code = "ignore-without-code" +warn_unused_ignores = true # If we don't specify this, then mypy will check excluded files if # they are imported by a checked file. follow_imports = "skip" diff --git a/python/cudf/cudf/core/accessors/string.py b/python/cudf/cudf/core/accessors/string.py index e67eb6161d2..fa0932b93d9 100644 --- a/python/cudf/cudf/core/accessors/string.py +++ b/python/cudf/cudf/core/accessors/string.py @@ -503,7 +503,7 @@ def join( else: # If self._column is not a ListColumn, we will have to # split each row by character and create a ListColumn out of it. - list_column = self._column.fillna("").character_tokenize() # type: ignore[assignment] + list_column = self._column.fillna("").character_tokenize() if len(list_column) == 0: list_column = column_empty( # type: ignore[assignment] len(self._column), dtype=list_column.dtype @@ -524,9 +524,9 @@ def join( f"of type: {type(sep_na_rep)}" ) data = list_column.join_list_elements( # type: ignore[attr-defined] - sep_column, # type: ignore[arg-type] + sep_column, sep_na_rep, - string_na_rep, # type: ignore[arg-type] + string_na_rep, ) else: raise TypeError( @@ -753,7 +753,7 @@ def contains( result_col = self._column.contains_re(pat, flags) # type: ignore[arg-type] else: if case is False: - input_column = self._column.to_lower() # type: ignore[union-attr] + input_column = self._column.to_lower() pat_normed = pat.lower() # type: ignore[union-attr] else: input_column = self._column @@ -763,7 +763,7 @@ def contains( # TODO: we silently ignore the `regex=` flag here col_pat = as_column(pat, dtype=CUDF_STRING_DTYPE) if case is False: - input_column = self._column.to_lower() # type: ignore[union-attr] + input_column = self._column.to_lower() col_pat = col_pat.to_lower() # type: ignore[attr-defined] else: input_column = self._column @@ -990,7 +990,7 @@ def replace( # Pandas forces non-regex replace when pat is a single-character if regex is True and len(pat) > 1: - result = self._column.replace_re( # type: ignore[arg-type] + result = self._column.replace_re( pat, # type: ignore[arg-type] pa_repl, n, @@ -1106,7 +1106,7 @@ def slice( dtype: object """ return self._return_or_inplace( - self._column.slice_strings(start, stop, step) # type: ignore[arg-type] + self._column.slice_strings(start, stop, step) ) def isinteger(self) -> Series | Index: @@ -2133,7 +2133,7 @@ def slice_from(self, starts: Series, stops: Series) -> Series | Index: dtype: object """ return self._return_or_inplace( - self._column.slice_strings(starts._column, stops._column) # type: ignore[arg-type] + self._column.slice_strings(starts._column, stops._column) ) def slice_replace( @@ -2320,13 +2320,13 @@ def get(self, i: int = 0) -> Series | Index: if i < 0: next_index = i - 1 step = -1 - to_mask = str_lens < abs(i) # type: ignore[operator] + to_mask = str_lens < abs(i) else: next_index = i + 1 step = 1 - to_mask = str_lens <= i # type: ignore[operator] + to_mask = str_lens <= i result = self.slice(i, next_index, step) - if to_mask.any(): # type: ignore[union-attr] + if to_mask.any(): result[to_mask] = pd.NA # type: ignore[index] return result @@ -3796,7 +3796,7 @@ def _starts_ends_with( pat: str | tuple[str, ...], ) -> Series | Index: return self._return_or_inplace( - self._column.starts_ends_with(method, pat) # type: ignore[arg-type] + self._column.starts_ends_with(method, pat) ) def endswith(self, pat: str | tuple[str, ...]) -> Series | Index: @@ -3917,10 +3917,10 @@ def removesuffix(self, suffix: str) -> Series | Index: """ if suffix is None or len(suffix) == 0: return self._return_or_inplace(self._column) - ends_column = self.endswith(suffix)._column # type: ignore[union-attr] - removed_column = self.slice(0, -len(suffix), None)._column # type: ignore[union-attr] + ends_column = self.endswith(suffix)._column + removed_column = self.slice(0, -len(suffix), None)._column - result = removed_column.copy_if_else(self._column, ends_column) # type: ignore[arg-type] + result = removed_column.copy_if_else(self._column, ends_column) return self._return_or_inplace(result) def removeprefix(self, prefix: str) -> Series | Index: @@ -3956,9 +3956,9 @@ def removeprefix(self, prefix: str) -> Series | Index: """ if prefix is None or len(prefix) == 0: return self._return_or_inplace(self._column) - starts_column = self.startswith(prefix)._column # type: ignore[union-attr] - removed_column = self.slice(len(prefix), None, None)._column # type: ignore[union-attr] - result = removed_column.copy_if_else(self._column, starts_column) # type: ignore[arg-type] + starts_column = self.startswith(prefix)._column + removed_column = self.slice(len(prefix), None, None)._column + result = removed_column.copy_if_else(self._column, starts_column) return self._return_or_inplace(result) def _find( @@ -4125,7 +4125,7 @@ def index( if end is None: end = -1 - result_col = self.find(sub, start, end)._column # type: ignore[union-attr] + result_col = self.find(sub, start, end)._column result = self._return_or_inplace(result_col) @@ -4187,7 +4187,7 @@ def rindex( if end is None: end = -1 - result_col = self.rfind(sub, start, end)._column # type: ignore[union-attr] + result_col = self.rfind(sub, start, end)._column result = self._return_or_inplace(result_col) @@ -4494,7 +4494,7 @@ def tokenize(self, delimiter: str = " ") -> Series | Index: if isinstance(delim, ColumnBase): result = self._return_or_inplace( - self._column.tokenize_column(delim), # type: ignore[arg-type] + self._column.tokenize_column(delim), retain_index=False, ) elif isinstance(delim, plc.Scalar): @@ -4508,7 +4508,7 @@ def tokenize(self, delimiter: str = " ") -> Series | Index: for delimiters, but got {type(delimiter)}" ) if isinstance(self._parent, cudf.Series): - result.index = self._parent.index.repeat( # type: ignore + result.index = self._parent.index.repeat( # type: ignore[union-attr] self.token_count(delimiter=delimiter) ) return result @@ -4635,12 +4635,12 @@ def token_count(self, delimiter: str = " ") -> Series | Index: delim = _massage_string_arg(delimiter, "delimiter", allow_col=True) if isinstance(delim, ColumnBase): return self._return_or_inplace( - self._column.count_tokens_column(delim) # type: ignore[arg-type] + self._column.count_tokens_column(delim) ) elif isinstance(delim, plc.Scalar): return self._return_or_inplace( - self._column.count_tokens_scalar(delim) # type: ignore[arg-type] + self._column.count_tokens_scalar(delim) ) else: raise TypeError( @@ -4734,7 +4734,7 @@ def character_ngrams( if isinstance(result, cudf.Series) and not as_list: # before exploding, removes those lists which have 0 length result = result[result.list.len() > 0] - return result.explode() # type: ignore + return result.explode() # type: ignore[union-attr] return result def hash_character_ngrams( @@ -5032,9 +5032,7 @@ def is_consonant(self, position) -> Series | Index: """ if can_convert_to_column(position): position = as_column(position) - return self._return_or_inplace( - self._column.is_letter(False, position) # type: ignore[arg-type] - ) + return self._return_or_inplace(self._column.is_letter(False, position)) def is_vowel(self, position) -> Series | Index: """ @@ -5069,9 +5067,7 @@ def is_vowel(self, position) -> Series | Index: """ if can_convert_to_column(position): position = as_column(position) - return self._return_or_inplace( - self._column.is_letter(True, position) # type: ignore[arg-type] - ) + return self._return_or_inplace(self._column.is_letter(True, position)) def build_suffix_array(self, min_width: int) -> Series | Index: """ @@ -5095,7 +5091,7 @@ def build_suffix_array(self, min_width: int) -> Series | Index: New column of suffix array """ return self._return_or_inplace( - self._column.build_suffix_array(min_width), # type: ignore[arg-type] + self._column.build_suffix_array(min_width), inplace=False, expand=False, retain_index=False, @@ -5124,7 +5120,7 @@ def resolve_duplicates(self, sa, min_width: int) -> Series | Index: """ sa_column = sa._column return self._return_or_inplace( - self._column.resolve_duplicates(sa_column, min_width), # type: ignore[arg-type] + self._column.resolve_duplicates(sa_column, min_width), inplace=False, expand=False, retain_index=False, @@ -5163,7 +5159,7 @@ def resolve_duplicates_pair( return self._return_or_inplace( self._column.resolve_duplicates_pair( sa1_col, input2_col, sa2_col, min_width - ), # type: ignore[arg-type] + ), inplace=False, expand=False, retain_index=False, @@ -5505,5 +5501,5 @@ def jaccard_index(self, input: Series, width: int) -> Series | Index: dtype: float32 """ return self._return_or_inplace( - self._column.jaccard_index(input._column, width) # type: ignore[arg-type] + self._column.jaccard_index(input._column, width) ) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 512dbdda59e..a5f1bdf999f 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -81,7 +81,7 @@ class CategoricalColumn(column.ColumnBase): """ dtype: CategoricalDtype - _children: tuple[NumericalColumn] # type: ignore[assignment] + _children: tuple[NumericalColumn] _VALID_REDUCTIONS = { "max", "min", @@ -103,7 +103,7 @@ def __init__( mask: Buffer | None, offset: int, null_count: int, - children: tuple[NumericalColumn], # type: ignore[assignment] + children: tuple[NumericalColumn], ): if data is not None: raise ValueError(f"{data=} must be None") @@ -438,7 +438,7 @@ def find_and_replace( # However, it seems that this functionality has been broken for a # long time so for now we're just having mypy ignore and we'll come # back to this. - if fill_value in self.categories: # type: ignore + if fill_value in self.categories: # type: ignore[operator] replaced = self.fillna(fill_value) else: new_categories = self.categories.append( @@ -703,7 +703,7 @@ def _concat( elif newsize == 0: codes_col = column.column_empty(0, head.codes.dtype) else: - codes_col = column.concat_columns(codes) # type: ignore[arg-type] + codes_col = column.concat_columns(codes) return codes_col._with_type_metadata(CategoricalDtype(categories=cats)) # type: ignore[return-value] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 86766b48d31..db0aece973a 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -237,13 +237,13 @@ def size(self) -> int: @property def base_data(self) -> None | Buffer: - return self._base_data # type: ignore[has-type] + return self._base_data @property def data(self) -> None | Buffer: if self.base_data is None: return None - if self._data is None: # type: ignore[has-type] + if self._data is None: start = self.offset * self.dtype.itemsize end = start + self.size * self.dtype.itemsize self._data = self.base_data[start:end] # type: ignore[assignment] @@ -276,7 +276,7 @@ def set_base_data(self, value: None | Buffer) -> None: f"got {type(value).__name__}" ) - self._data = None # type: ignore[assignment] + self._data = None self._base_data = value @property @@ -288,11 +288,11 @@ def has_nulls(self, include_nan: bool = False) -> bool: @property def base_mask(self) -> None | Buffer: - return self._base_mask # type: ignore[has-type] + return self._base_mask @property def mask(self) -> None | Buffer: - if self._mask is None: # type: ignore[has-type] + if self._mask is None: if self.base_mask is None or self.offset == 0: self._mask = self.base_mask # type: ignore[assignment] else: @@ -412,7 +412,7 @@ def null_count(self) -> int: else: with acquire_spill_lock(): self._null_count = plc.null_mask.null_count( - plc.gpumemoryview(self.base_mask), # type: ignore[union-attr] + plc.gpumemoryview(self.base_mask), self.offset, self.offset + self.size, ) @@ -424,7 +424,7 @@ def offset(self) -> int: @property def base_children(self) -> tuple[ColumnBase, ...]: - return self._base_children # type: ignore[has-type] + return self._base_children @property def children(self) -> tuple[ColumnBase, ...]: @@ -686,7 +686,7 @@ def from_cuda_array_interface(cls, arbitrary: Any) -> ColumnBase: f"expected {required_num_bytes} bytes." ) column = column.set_mask(mask_buff) - return column # type: ignore[return-value] + return column def __len__(self) -> int: return self.size @@ -784,7 +784,7 @@ def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self: pa.scalar(hi, type=cudf_dtype_to_pa_type(self.dtype)) ), ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: if not isinstance(other, ColumnBase) or len(self) != len(other): @@ -987,7 +987,7 @@ def _fill( ) if self.dtype == CUDF_STRING_DTYPE: return self._mimic_inplace(result, inplace=True) - return result # type: ignore[return-value] + return result if not fill_value.is_valid() and not self.nullable: mask = as_buffer( @@ -1014,7 +1014,7 @@ def shift(self, offset: int, fill_value: ScalarLike) -> Self: offset, plc_fill_value, ) - return type(self).from_pylibcudf(plc_col) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_col) def copy(self, deep: bool = True) -> Self: """ @@ -1204,7 +1204,7 @@ def _scatter_by_slice( return self._fill(value, start, stop, inplace=True) else: with acquire_spill_lock(): - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.copying.copy_range( value.to_pylibcudf(mode="read"), self.to_pylibcudf(mode="read"), @@ -1315,7 +1315,7 @@ def _validate_fillna_value( def replace( self, values_to_replace: Self, replacement_values: Self ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.replace.find_and_replace_all( self.to_pylibcudf(mode="read"), values_to_replace.to_pylibcudf(mode="read"), @@ -1325,7 +1325,7 @@ def replace( @acquire_spill_lock() def repeat(self, repeats: int) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.filling.repeat( plc.Table([self.to_pylibcudf(mode="read")]), repeats ).columns()[0] @@ -1453,7 +1453,7 @@ def interpolate(self, index: Index) -> ColumnBase: # Each point is evenly spaced, index values don't matter known_x = cp.flatnonzero(valid_locs.values) else: - known_x = index._column.apply_boolean_mask(valid_locs).values # type: ignore[attr-defined] + known_x = index._column.apply_boolean_mask(valid_locs).values known_y = self.apply_boolean_mask(valid_locs).values result = cp.interp(index.to_cupy(), known_x, known_y) @@ -2230,7 +2230,7 @@ def one_hot_encode(self, categories: ColumnBase) -> Generator[ColumnBase]: @acquire_spill_lock() def scan(self, scan_op: str, inclusive: bool, **kwargs) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.reduce.scan( self.to_pylibcudf(mode="read"), aggregation.make_aggregation(scan_op, kwargs).plc_obj, @@ -2602,7 +2602,7 @@ def build_column( elif isinstance(dtype, ListDtype): return cudf.core.column.ListColumn( data=None, - size=size, # type: ignore[arg-type] + size=size, dtype=dtype, mask=mask, offset=offset, @@ -2612,7 +2612,7 @@ def build_column( elif isinstance(dtype, IntervalDtype): return cudf.core.column.IntervalColumn( data=None, - size=size, # type: ignore[arg-type] + size=size, dtype=dtype, mask=mask, offset=offset, @@ -2622,17 +2622,17 @@ def build_column( elif isinstance(dtype, StructDtype): return cudf.core.column.StructColumn( data=None, - size=size, # type: ignore[arg-type] + size=size, dtype=dtype, mask=mask, offset=offset, null_count=null_count, - children=children, # type: ignore[arg-type] + children=children, ) elif isinstance(dtype, cudf.Decimal64Dtype): return cudf.core.column.Decimal64Column( data=data, # type: ignore[arg-type] - size=size, # type: ignore[arg-type] + size=size, offset=offset, dtype=dtype, mask=mask, @@ -2642,7 +2642,7 @@ def build_column( elif isinstance(dtype, cudf.Decimal32Dtype): return cudf.core.column.Decimal32Column( data=data, # type: ignore[arg-type] - size=size, # type: ignore[arg-type] + size=size, offset=offset, dtype=dtype, mask=mask, @@ -2652,7 +2652,7 @@ def build_column( elif isinstance(dtype, cudf.Decimal128Dtype): return cudf.core.column.Decimal128Column( data=data, # type: ignore[arg-type] - size=size, # type: ignore[arg-type] + size=size, offset=offset, dtype=dtype, mask=mask, @@ -3399,4 +3399,4 @@ def concat_columns(objs: Sequence[ColumnBase]) -> ColumnBase: plc.concatenate.concatenate( [col.to_pylibcudf(mode="read") for col in objs_with_len] ) - )._with_type_metadata(objs_with_len[0].dtype) # type: ignore[return-value] + )._with_type_metadata(objs_with_len[0].dtype) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 742ccfc78da..e512b961de2 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -245,7 +245,7 @@ def is_month_end(self) -> ColumnBase: last_day_col = type(self).from_pylibcudf( plc.datetime.last_day_of_month(self.to_pylibcudf(mode="read")) ) - return (self.day == last_day_col.day).fillna(False) # type: ignore[attr-defined] + return (self.day == last_day_col.day).fillna(False) @functools.cached_property def is_quarter_end(self) -> ColumnBase: @@ -564,7 +564,7 @@ def as_string_column(self, dtype: DtypeObj) -> StringColumn: def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: reflect, op = self._check_reflected_op(op) if isinstance(other, cudf.DateOffset): - return other._datetime_binop(self, op, reflect=reflect) # type: ignore[attr-defined] + return other._datetime_binop(self, op, reflect=reflect) other = self._normalize_binop_operand(other) if other is NotImplemented: return NotImplemented @@ -659,10 +659,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: lhs_binop: plc.Scalar | ColumnBase = ( pa_scalar_to_plc_scalar(lhs) if isinstance(lhs, pa.Scalar) else lhs - ) # type: ignore[assignment] + ) rhs_binop: plc.Scalar | ColumnBase = ( pa_scalar_to_plc_scalar(rhs) if isinstance(rhs, pa.Scalar) else rhs - ) # type: ignore[assignment] + ) result_col = binaryop.binaryop(lhs_binop, rhs_binop, op, out_dtype) if out_dtype.kind != "b" and op == "__add__": @@ -679,11 +679,11 @@ def _with_type_metadata(self, dtype: DtypeObj) -> DatetimeColumn: return DatetimeTZColumn( data=self.base_data, # type: ignore[arg-type] dtype=dtype, - mask=self.base_mask, # type: ignore[arg-type] + mask=self.base_mask, size=self.size, offset=self.offset, null_count=self.null_count, - children=self.base_children, # type: ignore[arg-type] + children=self.base_children, ) if cudf.get_option("mode.pandas_compatible"): self._dtype = get_dtype_of_same_type(dtype, self.dtype) @@ -847,11 +847,11 @@ def _utc_time(self) -> DatetimeColumn: return DatetimeColumn( data=self.base_data, # type: ignore[arg-type] dtype=_get_base_dtype(self.dtype), - mask=self.base_mask, # type: ignore[arg-type] + mask=self.base_mask, size=self.size, offset=self.offset, null_count=self.null_count, - children=self.base_children, # type: ignore[arg-type] + children=self.base_children, ) @functools.cached_property diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index e22cdfc3766..f546ea5c204 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -254,7 +254,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: other = other.astype(self.dtype) other_cudf_dtype = other.dtype elif isinstance(other, (int, Decimal)): - other_cudf_dtype = self.dtype._from_decimal(Decimal(other)) # type: ignore[arg-type, union-attr] + other_cudf_dtype = self.dtype._from_decimal(Decimal(other)) # type: ignore[union-attr] elif isinstance(other, float): return self._binaryop(as_column(other, length=len(self)), op) elif is_na_like(other): diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 0906119eca9..a67cf6a3904 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -29,7 +29,7 @@ def __init__( mask: Buffer | None, offset: int, null_count: int, - children: tuple[ColumnBase, ColumnBase], # type: ignore[assignment] + children: tuple[ColumnBase, ColumnBase], ): if len(children) != 2: raise ValueError( @@ -129,7 +129,7 @@ def set_closed( IntervalDtype(self.dtype.subtype, closed) # type: ignore[union-attr] ) - def as_interval_column(self, dtype: IntervalDtype) -> Self: # type: ignore[override] + def as_interval_column(self, dtype: IntervalDtype) -> Self: if isinstance(dtype, IntervalDtype): return self._with_type_metadata(dtype) # type: ignore[return-value] else: @@ -159,7 +159,7 @@ def to_pandas( def element_indexing( self, index: int - ) -> pd.Interval | dict[Any, Any] | None: # type: ignore[override] + ) -> pd.Interval | dict[Any, Any] | None: result = super().element_indexing(index) if isinstance(result, dict) and cudf.get_option( "mode.pandas_compatible" diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 40b2ee92496..c01122a0bbf 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -49,7 +49,7 @@ def __init__( mask: Buffer | None, offset: int, null_count: int, - children: tuple[NumericalColumn, ColumnBase], # type: ignore[assignment] + children: tuple[NumericalColumn, ColumnBase], ): if data is not None: raise ValueError("data must be None") @@ -156,7 +156,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: return NotImplemented if isinstance(other.dtype, ListDtype): if op == "__add__": - return self.concatenate_rows([other]) # type: ignore[list-item] + return self.concatenate_rows([other]) else: raise NotImplementedError( "Lists concatenation for this operation is not yet" @@ -287,7 +287,7 @@ def from_sequences(cls, arbitrary: Sequence[ColumnLike]) -> Self: 0, [offset_col, data_plc_col], ) - return cls.from_pylibcudf(plc_column) # type: ignore[return-value] + return cls.from_pylibcudf(plc_column) @cached_property def _string_separators(self) -> plc.Column: @@ -537,7 +537,7 @@ def minhash_ngrams( f"seed must be in range [0, {np.iinfo(np.uint32).max}]" ) seed = np.uint32(seed) - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.minhash.minhash_ngrams( self.to_pylibcudf(mode="read"), width, @@ -562,7 +562,7 @@ def minhash64_ngrams( f"seed must be in range [0, {np.iinfo(np.uint64).max}]" ) seed = np.uint64(seed) - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.minhash.minhash64_ngrams( self.to_pylibcudf(mode="read"), width, diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 6c793d6416b..176e695cc74 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -352,7 +352,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: ) rhs_binaryop: plc.Scalar | ColumnBase = ( pa_scalar_to_plc_scalar(rhs) if isinstance(rhs, pa.Scalar) else rhs - ) # type: ignore[assignment] + ) res = binaryop.binaryop(lhs_binaryop, rhs_binaryop, op, out_dtype) if ( @@ -556,9 +556,9 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: if ( not is_pandas_nullable_extension_dtype(self.dtype) and is_pandas_nullable_extension_dtype(dtype) - and dtype.kind == "f" # type: ignore[union-attr] + and dtype.kind == "f" ): - res = self.nans_to_nulls().cast(dtype=dtype) # type: ignore[return-value] + res = self.nans_to_nulls().cast(dtype=dtype) res._dtype = dtype return res # type: ignore[return-value] if dtype_to_pylibcudf_type(dtype) == dtype_to_pylibcudf_type( @@ -588,7 +588,7 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: "Cannot convert non-finite values (NA or inf) to integer" ) # If casting from float to int, we need to convert nans to nulls - res = self.nans_to_nulls().cast(dtype=dtype) # type: ignore[return-value] + res = self.nans_to_nulls().cast(dtype=dtype) res._dtype = dtype return res # type: ignore[return-value] @@ -767,7 +767,7 @@ def _validate_fillna_value( return super()._validate_fillna_value(fill_value) else: cudf_obj = as_column(fill_value, nan_as_null=False) - if not cudf_obj.can_cast_safely(self.dtype): # type: ignore[attr-defined] + if not cudf_obj.can_cast_safely(self.dtype): raise TypeError( f"Cannot safely cast non-equivalent " f"{cudf_obj.dtype.type.__name__} to " @@ -963,7 +963,7 @@ def digitize(self, bins: np.ndarray, right: bool = False) -> Self: if bin_col.nullable: raise ValueError("`bins` cannot contain null entries.") - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( getattr(plc.search, "lower_bound" if right else "upper_bound")( plc.Table([bin_col.to_pylibcudf(mode="read")]), plc.Table([self.to_pylibcudf(mode="read")]), diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 7d9c4ce0831..e2263bd5850 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -164,7 +164,7 @@ def quantile( indices.to_pylibcudf(mode="read"), exact, ) - result = type(self).from_pylibcudf(plc_column) # type: ignore[assignment] + result = type(self).from_pylibcudf(plc_column) if return_scalar: scalar_result = result.element_indexing(0) if interpolation in {"lower", "higher", "nearest"}: @@ -268,7 +268,7 @@ def round( raise ValueError(f"{how=} must be either 'half_even' or 'half_up'") plc_how = plc.round.RoundingMethod[how.upper()] with acquire_spill_lock(): - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.round.round( self.to_pylibcudf(mode="read"), decimals, plc_how ) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 952a815273e..47222d03bbb 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -123,7 +123,7 @@ def __init__( mask: Buffer | None, offset: int, null_count: int, - children: tuple[ColumnBase], # type: ignore[assignment] + children: tuple[ColumnBase], ): if not isinstance(data, Buffer): raise ValueError("data must be a Buffer") @@ -332,7 +332,7 @@ def as_numerical_column(self, dtype: np.dtype) -> NumericalColumn: result = self.count_characters() > np.int8(0) if not is_pandas_nullable_extension_dtype(dtype): result = result.fillna(False) - return result._with_type_metadata(dtype) # type: ignore[return-value] + return result._with_type_metadata(dtype) cast_func: Callable[[plc.Column, plc.DataType], plc.Column] if dtype.kind in {"i", "u"}: @@ -678,7 +678,7 @@ def generate_ngrams(self, ngrams: int, separator: plc.Scalar) -> Self: ngrams, separator, ) - return type(self).from_pylibcudf(result) # type: ignore[return-value] + return type(self).from_pylibcudf(result) @acquire_spill_lock() def generate_character_ngrams(self, ngrams: int) -> ListColumn: @@ -708,7 +708,7 @@ def build_suffix_array(self, min_width: int) -> Self: result = plc.nvtext.deduplicate.build_suffix_array( self.to_pylibcudf(mode="read"), min_width ) - return type(self).from_pylibcudf(result) # type: ignore[return-value] + return type(self).from_pylibcudf(result) @acquire_spill_lock() def resolve_duplicates(self, sa: Self, min_width: int) -> Self: @@ -717,7 +717,7 @@ def resolve_duplicates(self, sa: Self, min_width: int) -> Self: sa.to_pylibcudf(mode="read"), min_width, ) - return type(self).from_pylibcudf(result) # type: ignore[return-value] + return type(self).from_pylibcudf(result) @acquire_spill_lock() def resolve_duplicates_pair( @@ -730,7 +730,7 @@ def resolve_duplicates_pair( sa2.to_pylibcudf(mode="read"), min_width, ) - return type(self).from_pylibcudf(result) # type: ignore[return-value] + return type(self).from_pylibcudf(result) @acquire_spill_lock() def edit_distance(self, targets: Self) -> NumericalColumn: @@ -752,7 +752,7 @@ def byte_pair_encoding( merge_pairs: plc.nvtext.byte_pair_encode.BPEMergePairs, separator: str, ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.byte_pair_encode.byte_pair_encoding( self.to_pylibcudf(mode="read"), merge_pairs, @@ -767,7 +767,7 @@ def ngrams_tokenize( delimiter: plc.Scalar, separator: plc.Scalar, ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.ngrams_tokenize.ngrams_tokenize( self.to_pylibcudf(mode="read"), ngrams, @@ -778,7 +778,7 @@ def ngrams_tokenize( @acquire_spill_lock() def normalize_spaces(self) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.normalize.normalize_spaces( self.to_pylibcudf(mode="read") ) @@ -799,7 +799,7 @@ def normalize_characters( def replace_tokens( self, targets: Self, replacements: Self, delimiter: plc.Scalar ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.replace.replace_tokens( self.to_pylibcudf(mode="read"), targets.to_pylibcudf(mode="read"), @@ -815,7 +815,7 @@ def filter_tokens( replacement: plc.Scalar, delimiter: plc.Scalar, ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.replace.filter_tokens( self.to_pylibcudf(mode="read"), min_token_length, @@ -834,7 +834,7 @@ def porter_stemmer_measure(self) -> NumericalColumn: @acquire_spill_lock() def is_letter(self, is_vowel: bool, index: int | NumericalColumn) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.stemmer.is_letter( self.to_pylibcudf(mode="read"), is_vowel, @@ -846,7 +846,7 @@ def is_letter(self, is_vowel: bool, index: int | NumericalColumn) -> Self: @acquire_spill_lock() def tokenize_scalar(self, delimiter: plc.Scalar) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.tokenize.tokenize_scalar( self.to_pylibcudf(mode="read"), delimiter ) @@ -854,7 +854,7 @@ def tokenize_scalar(self, delimiter: plc.Scalar) -> Self: @acquire_spill_lock() def tokenize_column(self, delimiters: Self) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.tokenize.tokenize_column( self.to_pylibcudf(mode="read"), delimiters.to_pylibcudf(mode="read"), @@ -880,7 +880,7 @@ def count_tokens_column(self, delimiters: Self) -> NumericalColumn: @acquire_spill_lock() def character_tokenize(self) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.tokenize.character_tokenize( self.to_pylibcudf(mode="read") ) @@ -893,7 +893,7 @@ def tokenize_with_vocabulary( delimiter: str, default_id: int, ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.tokenize.tokenize_with_vocabulary( self.to_pylibcudf(mode="read"), vocabulary, @@ -908,7 +908,7 @@ def wordpiece_tokenize( vocabulary: plc.nvtext.wordpiece_tokenize.WordPieceVocabulary, max_words_per_row: int, ) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.wordpiece_tokenize.wordpiece_tokenize( self.to_pylibcudf(mode="read"), vocabulary, @@ -918,7 +918,7 @@ def wordpiece_tokenize( @acquire_spill_lock() def detokenize(self, indices: ColumnBase, separator: plc.Scalar) -> Self: - return type(self).from_pylibcudf( # type: ignore[return-value] + return type(self).from_pylibcudf( plc.nvtext.tokenize.detokenize( self.to_pylibcudf(mode="read"), indices.to_pylibcudf(mode="read"), @@ -1080,7 +1080,7 @@ def _split_record( delimiter, maxsplit, ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) def split_record(self, delimiter: plc.Scalar, maxsplit: int) -> Self: return self._split_record( @@ -1149,14 +1149,14 @@ def url_decode(self) -> Self: plc_column = plc.strings.convert.convert_urls.url_decode( self.to_pylibcudf(mode="read") ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def url_encode(self) -> Self: plc_column = plc.strings.convert.convert_urls.url_encode( self.to_pylibcudf(mode="read") ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def is_integer(self) -> NumericalColumn: @@ -1193,7 +1193,7 @@ def join_strings(self, separator: str, na_rep: str | None) -> Self: pa_scalar_to_plc_scalar(pa.scalar(separator)), pa_scalar_to_plc_scalar(pa.scalar(na_rep, type=pa.string())), ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def concatenate( @@ -1209,7 +1209,7 @@ def concatenate( pa_scalar_to_plc_scalar(pa.scalar(sep)), pa_scalar_to_plc_scalar(pa.scalar(na_rep, type=pa.string())), ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def extract(self, pattern: str, flags: int) -> dict[int, Self]: @@ -1235,7 +1235,7 @@ def contains_re(self, pattern: str, flags: int) -> Self: plc_flags_from_re_flags(flags), ), ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def str_contains(self, pattern: str | Self) -> Self: @@ -1248,7 +1248,7 @@ def str_contains(self, pattern: str | Self) -> Self: self.to_pylibcudf(mode="read"), plc_pattern, ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def like(self, pattern: str, escape: str) -> Self: @@ -1257,7 +1257,7 @@ def like(self, pattern: str, escape: str) -> Self: pa_scalar_to_plc_scalar(pa.scalar(pattern)), pa_scalar_to_plc_scalar(pa.scalar(escape)), ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def repeat_strings(self, repeats: int | ColumnBase) -> Self: @@ -1270,7 +1270,7 @@ def repeat_strings(self, repeats: int | ColumnBase) -> Self: self.to_pylibcudf(mode="read"), plc_repeats, ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def replace_re( @@ -1298,7 +1298,7 @@ def replace_re( ) else: raise ValueError("Invalid pattern and replacement types") - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def replace_str( @@ -1310,7 +1310,7 @@ def replace_str( pa_scalar_to_plc_scalar(replacement), max_replace_count, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def replace_with_backrefs(self, pattern: str, replacement: str) -> Self: @@ -1321,7 +1321,7 @@ def replace_with_backrefs(self, pattern: str, replacement: str) -> Self: ), replacement, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def slice_strings( @@ -1352,7 +1352,7 @@ def slice_strings( plc_result = plc.strings.slice.slice_strings( self.to_pylibcudf(mode="read"), plc_start, plc_stop, plc_step ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def all_characters_of_type( @@ -1378,7 +1378,7 @@ def filter_characters_of_type( pa_scalar_to_plc_scalar(pa.scalar(replacement, type=pa.string())), types_to_keep, ) - return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_column) @acquire_spill_lock() def replace_slice(self, start: int, stop: int, repl: str) -> Self: @@ -1388,7 +1388,7 @@ def replace_slice(self, start: int, stop: int, repl: str) -> Self: start, stop, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def get_json_object( @@ -1410,7 +1410,7 @@ def get_json_object( pa_scalar_to_plc_scalar(pa.scalar(json_path)), options, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def pad( @@ -1422,7 +1422,7 @@ def pad( side, fillchar, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def zfill(self, width: int) -> Self: @@ -1430,7 +1430,7 @@ def zfill(self, width: int) -> Self: self.to_pylibcudf(mode="read"), width, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def strip( @@ -1443,7 +1443,7 @@ def strip( pa.scalar(to_strip or "", type=pa.string()) ), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def wrap(self, width: int) -> Self: @@ -1451,7 +1451,7 @@ def wrap(self, width: int) -> Self: self.to_pylibcudf(mode="read"), width, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def count_re(self, pattern: str, flags: int) -> NumericalColumn: @@ -1478,7 +1478,7 @@ def findall( pat, plc_flags_from_re_flags(flags) ), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def find_multiple(self, patterns: Self) -> Self: @@ -1486,7 +1486,7 @@ def find_multiple(self, patterns: Self) -> Self: self.to_pylibcudf(mode="read"), patterns.to_pylibcudf(mode="read"), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def starts_ends_with( @@ -1518,7 +1518,7 @@ def starts_ends_with( raise TypeError( f"expected a str or tuple[str, ...], not {type(pat).__name__}" ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def find( @@ -1534,7 +1534,7 @@ def find( start, end, ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def matches_re(self, pattern: str, flags: int) -> Self: @@ -1544,14 +1544,14 @@ def matches_re(self, pattern: str, flags: int) -> Self: pattern, plc_flags_from_re_flags(flags) ), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def code_points(self) -> Self: plc_result = plc.strings.attributes.code_points( self.to_pylibcudf(mode="read"), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def translate(self, table: dict) -> Self: @@ -1559,7 +1559,7 @@ def translate(self, table: dict) -> Self: self.to_pylibcudf(mode="read"), str.maketrans(table), # type: ignore[arg-type] ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) @acquire_spill_lock() def filter_characters( @@ -1576,4 +1576,4 @@ def filter_characters( else plc.strings.translate.FilterType.REMOVE, pa_scalar_to_plc_scalar(pa.scalar(repl, type=pa.string())), ) - return type(self).from_pylibcudf(plc_result) # type: ignore[return-value] + return type(self).from_pylibcudf(plc_result) diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index ae27c97985e..49e685cd5fe 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -210,7 +210,7 @@ def _with_type_metadata( null_count=self.null_count, children=tuple( # type: ignore[arg-type] child.astype(dtype.subtype) for child in self.base_children - ), # type: ignore[arg-type] + ), ) elif isinstance(dtype, StructDtype): return StructColumn( diff --git a/python/cudf/cudf/core/column/temporal_base.py b/python/cudf/cudf/core/column/temporal_base.py index 8ae4f8b7e6e..dc3a912b3ac 100644 --- a/python/cudf/cudf/core/column/temporal_base.py +++ b/python/cudf/cudf/core/column/temporal_base.py @@ -309,7 +309,7 @@ def find_and_replace( return self.copy(deep=True) def can_cast_safely(self, to_dtype: DtypeObj) -> bool: - if to_dtype.kind == self.dtype.kind: # type: ignore[union-attr] + if to_dtype.kind == self.dtype.kind: to_res, _ = np.datetime_data(to_dtype) # call-overload must be ignored because numpy stubs only accept literal strings # for time units (e.g., "ns", "us") to allow compile-time validation, diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index dacd4e866cf..7662e755fa1 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -275,7 +275,7 @@ def sum( # Since sum isn't overridden in Numerical[Base]Column, mypy only # sees the signature from Reducible (which doesn't have the extra # parameters from ColumnBase._reduce) so we have to ignore this. - self.astype(self._UNDERLYING_DTYPE).sum( # type: ignore + self.astype(self._UNDERLYING_DTYPE).sum( # type: ignore[call-arg] skipna=skipna, min_count=min_count ), unit=self.time_unit, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ec5758ffdd5..bb668afa554 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1633,7 +1633,7 @@ def __delitem__(self, name): self._drop_column(name) @_performance_tracking - def memory_usage(self, index: bool = True, deep: bool = False) -> Series: # type: ignore[override] + def memory_usage(self, index: bool = True, deep: bool = False) -> Series: """ Return the memory usage of the DataFrame. @@ -2600,7 +2600,7 @@ def to_dict( # Special case needed to avoid converting # Series objects into pd.Series if not inspect.isclass(into): - cons = type(into) # type: ignore[assignment] + cons = type(into) if isinstance(into, defaultdict): cons = functools.partial(cons, into.default_factory) elif issubclass(into, Mapping): @@ -2846,7 +2846,7 @@ def at(self): """ return _DataFrameAtIndexer(self) - @property # type: ignore + @property @_external_only_api( "Use _column_names instead, or _data.to_pandas_index if a pandas " "index is absolutely necessary. For checking if the columns are a " @@ -2857,7 +2857,7 @@ def columns(self): """Returns a tuple of columns""" return self._data.to_pandas_index - @columns.setter # type: ignore + @columns.setter @_performance_tracking def columns(self, columns): multiindex = False @@ -3550,7 +3550,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): value = as_column(value, nan_as_null=nan_as_null) self._data.insert(name, value, loc=loc) - @property # type:ignore + @property @_performance_tracking def axes(self): """ diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 3215d95f427..b74e5a78979 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -1048,7 +1048,7 @@ def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> Self: def to_pandas(self) -> pd.IntervalDtype: if cudf.get_option("mode.pandas_compatible"): return pd.IntervalDtype( - subtype=self.subtype.numpy_dtype # type: ignore + subtype=self.subtype.numpy_dtype if is_pandas_nullable_extension_dtype(self.subtype) else self.subtype, closed=self.closed, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 60dca874a3a..7f39133ae1c 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -636,8 +636,7 @@ def to_array( col.has_nulls() for col in self._columns ): if to_dtype.kind == "b" or any( - dtype.kind == "b" # type: ignore[union-attr] - for _, dtype in self._dtypes + dtype.kind == "b" for _, dtype in self._dtypes ): if module == cupy: raise ValueError( diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ce878857b35..bde1f0bf814 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1365,7 +1365,7 @@ def nth(self, n, dropna: Literal["any", "all", None] = None): """ if dropna is not None: raise NotImplementedError("dropna is not currently supported.") - self.obj["__groupbynth_order__"] = range(0, len(self.obj)) # type: ignore[index] + self.obj["__groupbynth_order__"] = range(0, len(self.obj)) # We perform another groupby here to have the grouping columns # be a part of dataframe columns. result = self.obj.groupby(self.grouping.keys).agg(lambda x: x.nth(n)) @@ -1734,7 +1734,7 @@ def _raise_invalid_type(x): # seems because unlike the builtin narrowings it only performs # narrowing in the positive case. normalized_aggs = [ - list(agg) if is_list_like(agg) else [agg] # type: ignore + list(agg) if is_list_like(agg) else [agg] # type: ignore[arg-type] for agg in aggs_per_column ] return column_names, columns, normalized_aggs diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c0ea6d7b105..7b9638c0496 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -175,7 +175,7 @@ def validate_range_arg(arg, arg_name: Literal["start", "stop", "step"]) -> int: return int(arg) -class Index(SingleColumnFrame): # type: ignore[misc] +class Index(SingleColumnFrame): """ Immutable sequence used for indexing and alignment. @@ -1909,7 +1909,7 @@ def inferred_type(self) -> str: def memory_usage(self, deep: bool = False) -> int: return self._column.memory_usage - @cached_property # type: ignore + @cached_property # type: ignore[explicit-override] @_performance_tracking def is_unique(self) -> bool: return self._column.is_unique @@ -3183,7 +3183,7 @@ def _try_reconstruct_range_index(self, index: Index) -> Self | Index: return index # Evenly spaced values can return a # RangeIndex instead of a materialized Index. - if not index._column.has_nulls(): # type: ignore[attr-defined] + if not index._column.has_nulls(): uniques = cupy.unique(cupy.diff(index.values)) if len(uniques) == 1 and (diff := uniques[0].get()) != 0: new_range = range(index[0], index[-1] + diff, diff) @@ -5237,7 +5237,7 @@ def __init__( ) if dtype: - interval_col = interval_col.astype(dtype) # type: ignore[assignment] + interval_col = interval_col.astype(dtype) SingleColumnFrame.__init__( self, ColumnAccessor({name: interval_col}, verify=False) @@ -5371,7 +5371,7 @@ def from_tuples( pidx = pd.IntervalIndex.from_tuples( data, closed=closed, name=name, copy=copy, dtype=dtype ) - return cls(pidx, name=name) # type: ignore[return-value] + return cls(pidx, name=name) def __getitem__(self, index): raise NotImplementedError( diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index f6657e773ea..0714e41915a 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -266,8 +266,8 @@ class IndexedFrame(Frame): """ # mypy can't handle bound type variables as class members - _loc_indexer_type: type[_LocIndexerClass] # type: ignore - _iloc_indexer_type: type[_IlocIndexerClass] # type: ignore + _loc_indexer_type: type[_LocIndexerClass] # type: ignore[valid-type] + _iloc_indexer_type: type[_IlocIndexerClass] # type: ignore[valid-type] _groupby = GroupBy _resampler = _Resampler @@ -353,7 +353,7 @@ def attrs(self, value: Mapping[Hashable, Any]) -> None: self._attrs = dict(value) @classmethod - def _from_data( # type: ignore[override] + def _from_data( cls, data: MutableMapping, index: Index | None = None, @@ -2440,7 +2440,7 @@ def iloc(self): """ return self._iloc_indexer_type(self) - @property # type:ignore + @property @_performance_tracking def axes(self): """ @@ -6459,7 +6459,7 @@ def rank( if numeric_only: if isinstance(source, cudf.Series) and not is_dtype_obj_numeric( source.dtype, include_decimal=False - ): # type: ignore[attr-defined] + ): raise TypeError( "Series.rank does not allow numeric_only=True with " "non-numeric dtype." @@ -6833,7 +6833,7 @@ def _drop_rows_by_labels( level = 0 levels_index = obj.index.get_level_values(level) - if errors == "raise" and not labels.isin(levels_index).all(): # type: ignore[union-attr] + if errors == "raise" and not labels.isin(levels_index).all(): raise KeyError("One or more values not found in axis") if isinstance(level, int): @@ -6888,7 +6888,7 @@ def _drop_rows_by_labels( else: orig_index_type = obj.index.dtype - if errors == "raise" and not labels.isin(obj.index).all(): # type: ignore[union-attr] + if errors == "raise" and not labels.isin(obj.index).all(): raise KeyError("One or more values not found in axis") if isinstance(labels, ColumnBase): diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 6d0e4a3c683..49f85a96571 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -239,12 +239,12 @@ def __init__( self._name = None self.names = names - @property # type: ignore + @property # type: ignore[explicit-override] @_performance_tracking def names(self): return self._names - @names.setter # type: ignore + @names.setter @_performance_tracking def names(self, value): if value is None: @@ -370,7 +370,7 @@ def set_names( existing_names[lev] = names[i] names = existing_names - return self._set_names(names=names, inplace=inplace) # type: ignore[return-value] + return self._set_names(names=names, inplace=inplace) def _maybe_materialize_codes_and_levels(self: Self) -> Self: """ @@ -443,12 +443,12 @@ def _simple_new( mi._name = name return mi - @property # type: ignore + @property # type: ignore[explicit-override] @_performance_tracking def name(self): return self._name - @name.setter # type: ignore + @name.setter @_performance_tracking def name(self, value): self._name = value @@ -577,7 +577,7 @@ def __repr__(self) -> str: data_output = "\n".join(lines) return output_prefix + data_output - @property # type: ignore + @property @_external_only_api("Use ._codes instead") @_performance_tracking def codes(self) -> pd.core.indexes.frozen.FrozenList: @@ -608,7 +608,7 @@ def get_slice_bound(self, label, side): "get_slice_bound is not currently implemented." ) - @property # type: ignore + @property @_performance_tracking def levels(self) -> list[cudf.Index]: """ @@ -642,7 +642,7 @@ def levels(self) -> list[cudf.Index]: for idx, name in zip(self._levels, self.names, strict=True) # type: ignore[arg-type] ] - @property # type: ignore + @property # type: ignore[explicit-override] @_performance_tracking def ndim(self) -> int: """Dimension of the data. For MultiIndex ndim is always 2.""" @@ -974,7 +974,7 @@ def __eq__(self, other): ) return NotImplemented - @property # type: ignore + @property # type: ignore[explicit-override] @_performance_tracking def size(self) -> int: # The size of a MultiIndex is only dependent on the number of rows. @@ -1760,7 +1760,7 @@ def from_pandas( nan_as_null=nan_as_null, ) - @cached_property # type: ignore + @cached_property # type: ignore[explicit-override] @_performance_tracking def is_unique(self) -> bool: return len(self) == self.nunique(dropna=False) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d9f3f065b3f..d693d4dab1c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -684,7 +684,7 @@ def from_pandas(cls, s: pd.Series, nan_as_null=no_default): result = cls(s, nan_as_null=nan_as_null) return result - @property # type: ignore + @property @_performance_tracking def is_unique(self): """Return boolean if values in the object are unique. @@ -695,7 +695,7 @@ def is_unique(self): """ return self._column.is_unique - @property # type: ignore + @property @_performance_tracking def dt(self): """ @@ -740,7 +740,7 @@ def dt(self): "Can only use .dt accessor with datetimelike values" ) - @property # type: ignore + @property @_performance_tracking def hasnans(self): """ @@ -1054,7 +1054,7 @@ def to_frame(self, name: Hashable = no_default) -> DataFrame: return res @_performance_tracking - def memory_usage(self, index: bool = True, deep: bool = False) -> int: # type: ignore[override] + def memory_usage(self, index: bool = True, deep: bool = False) -> int: """ Return the memory usage of the Series. @@ -1452,31 +1452,31 @@ def _make_operands_and_index_for_binop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return operands, lhs.index, ca_attributes - @copy_docstring(CategoricalAccessor) # type: ignore + @copy_docstring(CategoricalAccessor) # type: ignore[prop-decorator] @property @_performance_tracking def cat(self): return CategoricalAccessor(parent=self) - @copy_docstring(StringMethods) # type: ignore + @copy_docstring(StringMethods) # type: ignore[prop-decorator] @property @_performance_tracking def str(self): return StringMethods(parent=self) - @copy_docstring(ListMethods) # type: ignore + @copy_docstring(ListMethods) # type: ignore[prop-decorator] @property @_performance_tracking def list(self): return ListMethods(parent=self) - @copy_docstring(StructMethods) # type: ignore + @copy_docstring(StructMethods) # type: ignore[prop-decorator] @property @_performance_tracking def struct(self): return StructMethods(parent=self) - @property # type: ignore + @property @_performance_tracking def dtypes(self): """The dtype of the Series. @@ -1545,19 +1545,19 @@ def _concat(cls, objs, axis=0, index: bool = True): return cls._from_column(col, name=name, index=result_index) - @property # type: ignore + @property @_performance_tracking def valid_count(self): """Number of non-null values""" return len(self) - self._column.null_count - @property # type: ignore + @property @_performance_tracking def null_count(self): """Number of null values""" return self._column.null_count - @property # type: ignore + @property @_performance_tracking def has_nulls(self): """ @@ -1967,7 +1967,7 @@ def to_pandas( res.attrs = self.attrs return res - @property # type: ignore + @property @_performance_tracking def data(self): """The gpu buffer for the data @@ -3948,7 +3948,7 @@ class DatetimeProperties(BaseDatelikeProperties): dtype: int16 """ - @property # type: ignore + @property @_performance_tracking def year(self) -> Series: """ @@ -3973,7 +3973,7 @@ def year(self) -> Series: """ return self._return_result_like_self(self.series._column.year) - @property # type: ignore + @property @_performance_tracking def month(self) -> Series: """ @@ -3998,7 +3998,7 @@ def month(self) -> Series: """ return self._return_result_like_self(self.series._column.month) - @property # type: ignore + @property @_performance_tracking def day(self) -> Series: """ @@ -4023,7 +4023,7 @@ def day(self) -> Series: """ return self._return_result_like_self(self.series._column.day) - @property # type: ignore + @property @_performance_tracking def hour(self) -> Series: """ @@ -4048,7 +4048,7 @@ def hour(self) -> Series: """ return self._return_result_like_self(self.series._column.hour) - @property # type: ignore + @property @_performance_tracking def minute(self) -> Series: """ @@ -4073,7 +4073,7 @@ def minute(self) -> Series: """ return self._return_result_like_self(self.series._column.minute) - @property # type: ignore + @property @_performance_tracking def second(self) -> Series: """ @@ -4098,7 +4098,7 @@ def second(self) -> Series: """ return self._return_result_like_self(self.series._column.second) - @property # type: ignore + @property @_performance_tracking def microsecond(self) -> Series: """ @@ -4130,7 +4130,7 @@ def microsecond(self) -> Series: ) * np.int32(1000) return self._return_result_like_self(micro + extra) - @property # type: ignore + @property @_performance_tracking def nanosecond(self) -> Series: """ @@ -4155,7 +4155,7 @@ def nanosecond(self) -> Series: """ return self._return_result_like_self(self.series._column.nanosecond) - @property # type: ignore + @property @_performance_tracking def weekday(self) -> Series: """ @@ -4192,7 +4192,7 @@ def weekday(self) -> Series: """ return self._return_result_like_self(self.series._column.weekday) - @property # type: ignore + @property @_performance_tracking def dayofweek(self) -> Series: """ @@ -4239,7 +4239,7 @@ def dayofweek(self) -> Series: day_of_week = dayofweek - @property # type: ignore + @property @_performance_tracking def dayofyear(self) -> Series: """ @@ -4277,7 +4277,7 @@ def dayofyear(self) -> Series: """ return self._return_result_like_self(self.series._column.day_of_year) - @property # type: ignore + @property @_performance_tracking def day_of_year(self) -> Series: """ @@ -4315,7 +4315,7 @@ def day_of_year(self) -> Series: """ return self._return_result_like_self(self.series._column.day_of_year) - @property # type: ignore + @property @_performance_tracking def is_leap_year(self) -> Series: """ @@ -4371,7 +4371,7 @@ def is_leap_year(self) -> Series: self.series._column.is_leap_year.fillna(False) ) - @property # type: ignore + @property @_performance_tracking def quarter(self) -> Series: """ @@ -4513,7 +4513,7 @@ def isocalendar(self) -> DataFrame: ca, index=self.series.index, attrs=self.series.attrs ) - @property # type: ignore + @property @_performance_tracking def is_month_start(self) -> Series: """ @@ -4523,7 +4523,7 @@ def is_month_start(self) -> Series: self.series._column.is_month_start ) - @property # type: ignore + @property @_performance_tracking def days_in_month(self) -> Series: """ @@ -4604,7 +4604,7 @@ def timetz(self): def unit(self) -> str: return self.series._column.time_unit - @property # type: ignore + @property @_performance_tracking def is_month_end(self) -> Series: """ @@ -4645,7 +4645,7 @@ def is_month_end(self) -> Series: """ return self._return_result_like_self(self.series._column.is_month_end) - @property # type: ignore + @property @_performance_tracking def is_quarter_start(self) -> Series: """ @@ -4686,7 +4686,7 @@ def is_quarter_start(self) -> Series: self.series._column.is_quarter_start ) - @property # type: ignore + @property @_performance_tracking def is_quarter_end(self) -> Series: """ @@ -4727,7 +4727,7 @@ def is_quarter_end(self) -> Series: self.series._column.is_quarter_end ) - @property # type: ignore + @property @_performance_tracking def is_year_start(self) -> Series: """ @@ -4755,7 +4755,7 @@ def is_year_start(self) -> Series: """ return self._return_result_like_self(self.series._column.is_year_start) - @property # type: ignore + @property @_performance_tracking def is_year_end(self) -> Series: """ @@ -5062,7 +5062,7 @@ class TimedeltaProperties(BaseDatelikeProperties): dtype: int64 """ - @property # type: ignore + @property @_performance_tracking def days(self) -> Series: """ @@ -5094,7 +5094,7 @@ def days(self) -> Series: """ return self._return_result_like_self(self.series._column.days) - @property # type: ignore + @property @_performance_tracking def seconds(self) -> Series: """ @@ -5133,7 +5133,7 @@ def seconds(self) -> Series: """ return self._return_result_like_self(self.series._column.seconds) - @property # type: ignore + @property @_performance_tracking def microseconds(self) -> Series: """ @@ -5165,7 +5165,7 @@ def microseconds(self) -> Series: """ return self._return_result_like_self(self.series._column.microseconds) - @property # type: ignore + @property @_performance_tracking def nanoseconds(self) -> Series: """ diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index ad6b0ca1f01..7d79548aa8d 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -75,40 +75,40 @@ def _scan( return super()._scan(op, axis=axis, skipna=skipna, *args, **kwargs) - @property # type: ignore + @property @_performance_tracking def name(self) -> Hashable: """Get the name of this object.""" return next(iter(self._column_names)) - @name.setter # type: ignore + @name.setter @_performance_tracking def name(self, value: Hashable) -> None: self._data[value] = self._data.pop(self.name) - @property # type: ignore + @property @_performance_tracking def ndim(self) -> int: """Number of dimensions of the underlying data, by definition 1.""" return 1 - @property # type: ignore + @property @_performance_tracking def shape(self) -> tuple[int]: """Get a tuple representing the dimensionality of the Index.""" return (len(self),) - @property # type: ignore + @property @_performance_tracking def _num_columns(self) -> int: return 1 - @property # type: ignore + @property @_performance_tracking def _column(self) -> ColumnBase: return next(iter(self._columns)) - @property # type: ignore + @property @_performance_tracking def values(self) -> cp.ndarray: col = self._column @@ -158,7 +158,7 @@ def to_cupy( .reshape(len(self), order="F") ) - @property # type: ignore + @property # type: ignore[explicit-override] @_performance_tracking def values_host(self) -> np.ndarray: return self._column.values_host @@ -241,7 +241,7 @@ def _to_frame(self, name: Hashable, index: Index | None) -> DataFrame: # TODO: Avoid accessing DataFrame from the top level namespace return cudf.DataFrame._from_data(ca, index=index) - @property # type: ignore + @property @_performance_tracking def is_unique(self) -> bool: """Return boolean if values in the object are unique. @@ -252,7 +252,7 @@ def is_unique(self) -> bool: """ return self._column.is_unique - @property # type: ignore + @property @_performance_tracking def is_monotonic_increasing(self) -> bool: """Return boolean if values in the object are monotonically increasing. @@ -263,7 +263,7 @@ def is_monotonic_increasing(self) -> bool: """ return self._column.is_monotonic_increasing - @property # type: ignore + @property @_performance_tracking def is_monotonic_decreasing(self) -> bool: """Return boolean if values in the object are monotonically decreasing. @@ -274,7 +274,7 @@ def is_monotonic_decreasing(self) -> bool: """ return self._column.is_monotonic_decreasing - @property # type: ignore + @property @_performance_tracking def __cuda_array_interface__(self) -> Mapping[str, Any]: # While the parent column class has a `__cuda_array_interface__` method diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 8d260a6a114..c9413f68d3d 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -254,7 +254,7 @@ def __init__( if get_option("mode.pandas_compatible"): obj = obj.nans_to_nulls() - self.obj = obj # type: ignore[assignment] + self.obj = obj self.window, self.min_periods = self._normalize_window_and_min_periods( window, min_periods diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py index 6f9a5ca3528..ee37264c39e 100644 --- a/python/cudf/cudf/io/json.py +++ b/python/cudf/cudf/io/json.py @@ -221,7 +221,7 @@ def read_json( } df = DataFrame._from_data(data) # TODO: _add_df_col_struct_names expects dict but receives Mapping - ioutils._add_df_col_struct_names(df, res_child_names) # type: ignore[arg-type] + ioutils._add_df_col_struct_names(df, res_child_names) return df else: table_w_meta = plc.io.json.read_json( diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 6e366d13f33..fe13e5b8413 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -222,7 +222,7 @@ def _plc_write_parquet( ) if metadata_file_path is not None: if is_list_like(metadata_file_path): - options.set_column_chunks_file_paths(metadata_file_path) # type: ignore[arg-type] + options.set_column_chunks_file_paths(metadata_file_path) else: options.set_column_chunks_file_paths([metadata_file_path]) if row_group_size_bytes is not None: diff --git a/python/cudf/cudf/pandas/_wrappers/common.py b/python/cudf/cudf/pandas/_wrappers/common.py index 163897d7c0c..7788df0e0f2 100644 --- a/python/cudf/cudf/pandas/_wrappers/common.py +++ b/python/cudf/cudf/pandas/_wrappers/common.py @@ -40,12 +40,12 @@ def arrow_array_method(self: _FastSlowProxy, *args, **kwargs): return pa.array(self._fsproxy_slow, *args, **kwargs) -@property # type: ignore +@property # type: ignore[misc] def cuda_array_interface(self: _FastSlowProxy): return self._fsproxy_fast.__cuda_array_interface__ -@property # type: ignore +@property # type: ignore[misc] def array_interface(self: _FastSlowProxy): return self._fsproxy_slow.__array_interface__ @@ -59,5 +59,5 @@ def custom_iter(self: _FastSlowProxy): # falling back to GPU for iter method. return _maybe_wrap_result( iter(self._fsproxy_slow), - None, # type: ignore + None, # type: ignore[arg-type] ) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 3e2c98a10da..d745910efbe 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -1338,8 +1338,8 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs): ) -DataFrame.eval = _df_eval_method # type: ignore -DataFrame.query = _df_query_method # type: ignore +DataFrame.eval = _df_eval_method +DataFrame.query = _df_query_method _JsonReader = make_intermediate_proxy_type( "_JsonReader", diff --git a/python/cudf/cudf/pandas/annotation.py b/python/cudf/cudf/pandas/annotation.py index a35fd96997b..9997c6a236b 100644 --- a/python/cudf/cudf/pandas/annotation.py +++ b/python/cudf/cudf/pandas/annotation.py @@ -7,7 +7,7 @@ import nvtx except ImportError: - class nvtx: # type: ignore + class nvtx: # type: ignore[no-redef] """Noop-stub with the same API as nvtx.""" push_range = lambda *args, **kwargs: None # noqa: E731 diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 45cf672f228..c188c13e1f6 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -209,7 +209,7 @@ def as_gpu_object(self): def as_cpu_object(self): return self._fsproxy_fast_to_slow() - @property # type: ignore + @property # type: ignore[misc] def _fsproxy_state(self) -> _State: return ( _State.FAST @@ -253,7 +253,7 @@ def _fsproxy_state(self) -> _State: metaclass = _FastSlowProxyMeta if metaclasses: - metaclass = types.new_class( # type: ignore + metaclass = types.new_class( # type: ignore[assignment] f"{name}_Meta", (*metaclasses, _FastSlowProxyMeta), {}, @@ -312,7 +312,7 @@ def __init__(self, *args, **kwargs): f"Cannot directly instantiate object of type {type(self)}" ) - @property # type: ignore + @property # type: ignore[misc] def _fsproxy_state(self): return ( _State.FAST @@ -715,7 +715,7 @@ class _CallableProxyMixin: """ # For wrapped callables isinstance(self, FunctionType) should return True - __class__ = types.FunctionType # type: ignore + __class__ = types.FunctionType # type: ignore[assignment] def __call__(self, *args, **kwargs) -> Any: result, _ = _fast_slow_function_call( @@ -840,7 +840,7 @@ def __get__(self, instance, owner) -> Any: if instance is not None: return _maybe_wrap_result( getattr(instance._fsproxy_slow, self._name), - None, # type: ignore + None, # type: ignore[arg-type] ) else: raise e @@ -874,7 +874,7 @@ def __get__(self, instance, owner) -> Any: if self._private: return _maybe_wrap_result( getattr(instance._fsproxy_slow, self._name), - None, # type: ignore + None, # type: ignore[arg-type] ) return _fast_slow_function_call( getattr, @@ -1387,7 +1387,7 @@ def is_proxy_instance(obj, type): } -NUMPY_TYPES: set[type[np.generic]] = set(np.sctypeDict.values()) # type: ignore[arg-type] +NUMPY_TYPES: set[type[np.generic]] = set(np.sctypeDict.values()) _SPECIAL_METHODS: set[str] = { diff --git a/python/cudf/cudf/pandas/module_accelerator.py b/python/cudf/cudf/pandas/module_accelerator.py index 3087086f154..7213e5be961 100644 --- a/python/cudf/cudf/pandas/module_accelerator.py +++ b/python/cudf/cudf/pandas/module_accelerator.py @@ -547,7 +547,7 @@ def getattr_real_or_wrapped( # disabled when the child was launched. That is a fairly rare pattern though # and we can document the limitations. # The main thread is always started, so the ident is always an int - or loader._disable_count[threading.main_thread().ident] > 0 # type: ignore + or loader._disable_count[threading.main_thread().ident] > 0 # type: ignore[index] ) if not use_real: # Only need to check the denylist if we're not turned off. diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 8fdf321c2a2..56c29a144a4 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -32,14 +32,14 @@ def null_assert_warnings(*args, **kwargs): pass -@pytest.fixture(scope="session", autouse=True) # type: ignore +@pytest.fixture(scope="session", autouse=True) def patch_testing_functions(): tm.assert_produces_warning = null_assert_warnings # noqa: F821 pytest.raises = replace_kwargs({"match": None})(pytest.raises) # Dictionary to store function call counts -function_call_counts = defaultdict(lambda: defaultdict(int)) # type: ignore +function_call_counts = defaultdict(lambda: defaultdict(int)) # type: ignore[var-annotated] # The specific functions to track FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"} diff --git a/python/cudf_polars/cudf_polars/containers/datatype.py b/python/cudf_polars/cudf_polars/containers/datatype.py index 8290efccbf9..e2a1cfcd911 100644 --- a/python/cudf_polars/cudf_polars/containers/datatype.py +++ b/python/cudf_polars/cudf_polars/containers/datatype.py @@ -119,9 +119,9 @@ def children(self) -> list[DataType]: # these type ignores are needed because the type checker doesn't # see that these equality checks passing imply a specific type for each child field. if self.plc_type.id() == plc.TypeId.STRUCT: - return [DataType(field.dtype) for field in self.polars_type.fields] # type: ignore[attr-defined] + return [DataType(field.dtype) for field in self.polars_type.fields] elif self.plc_type.id() == plc.TypeId.LIST: - return [DataType(self.polars_type.inner)] # type: ignore[attr-defined] + return [DataType(self.polars_type.inner)] return [] def scale(self) -> int: diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index b23fb132217..a40afc644ee 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -42,9 +42,9 @@ def _dtypes_for_json_decode(dtype: DataType) -> JsonDecodeType: return [ (field.name, child.plc_type, _dtypes_for_json_decode(child)) for field, child in zip( - dtype.polars_type.fields, # type: ignore[attr-defined] + dtype.polars_type.fields, dtype.children, - strict=True, # type: ignore[attr-defined] + strict=True, ) ] else: @@ -853,13 +853,13 @@ def do_evaluate( # TODO: Maybe accept a string scalar in # cudf::strings::pad to avoid DtoH transfer # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore - width: int = width_col.obj.to_scalar().to_py() # type: ignore[assignment, no-redef] + width: int = width_col.obj.to_scalar().to_py() # type: ignore[no-redef] return Column( plc.strings.padding.pad( column.obj, width, # type: ignore[arg-type] plc.strings.SideType.LEFT, - char, # type: ignore[arg-type] + char, ), dtype=self.dtype, ) @@ -872,13 +872,13 @@ def do_evaluate( (char,) = self.options # TODO: Maybe accept a string scalar in # cudf::strings::pad to avoid DtoH transfer - width: int = width_col.obj.to_scalar().to_py() # type: ignore[assignment, no-redef] + width: int = width_col.obj.to_scalar().to_py() # type: ignore[no-redef] return Column( plc.strings.padding.pad( column.obj, width, # type: ignore[arg-type] plc.strings.SideType.RIGHT, - char, # type: ignore[arg-type] + char, ), dtype=self.dtype, ) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/struct.py b/python/cudf_polars/cudf_polars/dsl/expressions/struct.py index 5d3ff47da97..b9749a1bbba 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/struct.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/struct.py @@ -93,7 +93,7 @@ def do_evaluate( field_index = next( ( i - for i, field in enumerate(self.children[0].dtype.polars_type.fields) # type: ignore[attr-defined] + for i, field in enumerate(self.children[0].dtype.polars_type.fields) if field.name == self.options[0] ), None, @@ -113,7 +113,7 @@ def do_evaluate( table, [ (field.name, []) - for field in self.children[0].dtype.polars_type.fields # type: ignore[attr-defined] + for field in self.children[0].dtype.polars_type.fields ], ) options = ( diff --git a/python/cudf_polars/cudf_polars/dsl/to_ast.py b/python/cudf_polars/cudf_polars/dsl/to_ast.py index 2e053a34f8b..320868a6c25 100644 --- a/python/cudf_polars/cudf_polars/dsl/to_ast.py +++ b/python/cudf_polars/cudf_polars/dsl/to_ast.py @@ -224,7 +224,7 @@ def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression: # # the type-ignore is safe because the for plc.TypeID.LIST, we know # we have a polars.List type, which has an inner attribute. - plc_dtype = DataType(haystack.dtype.polars_type.inner).plc_type # type: ignore[attr-defined] + plc_dtype = DataType(haystack.dtype.polars_type.inner).plc_type else: plc_dtype = haystack.dtype.plc_type # pragma: no cover values = ( diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py index 6041d9a9d79..88dbca113e7 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py @@ -58,7 +58,7 @@ def valid_query(name: str) -> bool: class PDSDSQueriesMeta(type): """Metaclass used for query lookup.""" - def __getattr__(cls, name: str): # type: ignore + def __getattr__(cls, name: str): # type: ignore[no-untyped-def] """Query lookup.""" if valid_query(name): q_num = int(name[1:]) diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py index 68f06e105b8..c7b304c01c9 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py @@ -468,7 +468,7 @@ def print_query_plan( ) -def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace): # type: ignore +def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace): # type: ignore[no-untyped-def] """Initialize a Dask distributed cluster.""" if run_config.cluster != "distributed": return None @@ -849,7 +849,7 @@ def run_polars( validation_failures: list[int] = [] query_failures: list[tuple[int, int]] = [] - client = initialize_dask_cluster(run_config, args) # type: ignore + client = initialize_dask_cluster(run_config, args) records: defaultdict[int, list[Record]] = defaultdict(list) engine: pl.GPUEngine | None = None @@ -892,8 +892,8 @@ def run_polars( gather_shuffle_statistics, ) - shuffle_stats = gather_shuffle_statistics(client) # type: ignore[arg-type] - clear_shuffle_statistics(client) # type: ignore[arg-type] + shuffle_stats = gather_shuffle_statistics(client) + clear_shuffle_statistics(client) else: shuffle_stats = None diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 7b1497e6880..0db810cd0d6 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -111,8 +111,8 @@ def assert_gpu_result_equal( # These keywords are correct, but mypy doesn't see that. # the 'misc' is for 'error: Keywords must be strings' - expect = lazydf.collect(**final_polars_collect_kwargs) # type: ignore[call-overload,misc] - got = lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[call-overload,misc] + expect = lazydf.collect(**final_polars_collect_kwargs) + got = lazydf.collect(**final_cudf_collect_kwargs, engine=engine) assert_kwargs_bool: dict[str, bool] = { "check_row_order": check_row_order, @@ -135,7 +135,7 @@ def assert_gpu_result_equal( expect, got, **assert_kwargs_bool, - **tol_kwargs, # type: ignore[arg-type] + **tol_kwargs, ) @@ -292,7 +292,7 @@ def assert_collect_raises( ) try: - lazydf.collect(**final_polars_collect_kwargs) # type: ignore[call-overload,misc] + lazydf.collect(**final_polars_collect_kwargs) except polars_except: pass except Exception as e: @@ -305,7 +305,7 @@ def assert_collect_raises( engine = GPUEngine(raise_on_fail=True) try: - lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[call-overload,misc] + lazydf.collect(**final_cudf_collect_kwargs, engine=engine) except cudf_except: pass except Exception as e: diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 619901821d8..d8094d0e129 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -57,11 +57,11 @@ def pytest_configure(config: pytest.Config) -> None: collect = polars.LazyFrame.collect engine = polars.GPUEngine(raise_on_fail=no_fallback) # https://github.com/python/mypy/issues/2427 - polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign,assignment] + polars.LazyFrame.collect = partialmethod(collect, engine=engine) elif executor == "in-memory": collect = polars.LazyFrame.collect engine = polars.GPUEngine(executor=executor) - polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign,assignment] + polars.LazyFrame.collect = partialmethod(collect, engine=engine) elif executor == "streaming" and blocksize_mode == "small": executor_options: dict[str, Any] = {} executor_options["max_rows_per_partition"] = 4 @@ -70,7 +70,7 @@ def pytest_configure(config: pytest.Config) -> None: executor_options["fallback_mode"] = StreamingFallbackMode.SILENT collect = polars.LazyFrame.collect engine = polars.GPUEngine(executor=executor, executor_options=executor_options) - polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign,assignment] + polars.LazyFrame.collect = partialmethod(collect, engine=engine) else: # run with streaming executor and default blocksize polars.Config.set_engine_affinity("gpu") diff --git a/python/cudf_polars/tests/dsl/test_traversal.py b/python/cudf_polars/tests/dsl/test_traversal.py index a3cae36c845..05f2a07c2fc 100644 --- a/python/cudf_polars/tests/dsl/test_traversal.py +++ b/python/cudf_polars/tests/dsl/test_traversal.py @@ -237,7 +237,7 @@ def _transform(e: expr.Expr, fn: ExprTransformer) -> expr.Expr: @_transform.register def _(e: expr.Col, fn: ExprTransformer): # We've added an extra key to the state, so ignore this type error. - mapping = fn.state["mapping"] # type: ignore + mapping = fn.state["mapping"] # type: ignore[typeddict-item] if e.name in mapping: return type(e)(e.dtype, mapping[e.name]) return e diff --git a/python/cudf_polars/tests/experimental/test_shuffle.py b/python/cudf_polars/tests/experimental/test_shuffle.py index 014b6c7582b..90565b0be3d 100644 --- a/python/cudf_polars/tests/experimental/test_shuffle.py +++ b/python/cudf_polars/tests/experimental/test_shuffle.py @@ -72,5 +72,5 @@ def test_hash_shuffle(df: pl.LazyFrame, engine: pl.GPUEngine) -> None: result = evaluate_streaming(qir3, options, context=IRExecutionContext()).to_polars() # ignore is for polars' EngineType, which isn't publicly exported. # https://github.com/pola-rs/polars/issues/17420 - expect = df.collect(engine="cpu") # type: ignore[call-overload] + expect = df.collect(engine="cpu") assert_frame_equal(result, expect, check_row_order=False) diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 6d8befef718..16d0097450e 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -490,7 +490,7 @@ def test_validate_parquet_options(option: str) -> None: def test_validate_raise_on_fail() -> None: with pytest.raises(TypeError, match="'raise_on_fail' must be"): ConfigOptions.from_polars_engine( - pl.GPUEngine(executor="streaming", raise_on_fail=object()) # type: ignore[arg-type] + pl.GPUEngine(executor="streaming", raise_on_fail=object()) ) diff --git a/python/cudf_polars/tests/test_window_functions.py b/python/cudf_polars/tests/test_window_functions.py index 284dec13ffb..66cedb13488 100644 --- a/python/cudf_polars/tests/test_window_functions.py +++ b/python/cudf_polars/tests/test_window_functions.py @@ -106,7 +106,7 @@ def test_over_mapping_strategy(df: pl.LazyFrame, mapping_strategy: str): # ignore is for polars' WindowMappingStrategy, which isn't publicly exported. # https://github.com/pola-rs/polars/issues/17420 q = df.with_columns( - [pl.col("b").rank().over(pl.col("a"), mapping_strategy=mapping_strategy)] # type: ignore[arg-type] + [pl.col("b").rank().over(pl.col("a"), mapping_strategy=mapping_strategy)] ) if not POLARS_VERSION_LT_132 and mapping_strategy == "group_to_rows": assert_gpu_result_equal(q) @@ -143,6 +143,6 @@ def test_rolling_closed(df: pl.LazyFrame, closed: str): # ignore is for polars' ClosedInterval, which isn't publicly exported. # https://github.com/pola-rs/polars/issues/17420 query = df.with_columns( - [pl.col("b").sum().rolling(period="2d", index_column="date", closed=closed)] # type: ignore[arg-type] + [pl.col("b").sum().rolling(period="2d", index_column="date", closed=closed)] ) assert_gpu_result_equal(query)