diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c443dbad89..f976326ed4 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,7 +32,7 @@ jobs: - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=80 --constructors=pandas,pyarrow,polars[eager],polars[lazy] + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 --constructors=pandas,pyarrow,polars[eager],polars[lazy] pytest-windows: strategy: @@ -53,12 +53,12 @@ jobs: cache-dependency-glob: "pyproject.toml" - name: install-reqs # we are not testing pyspark on Windows here because it is very slow - run: uv pip install -e ".[dask, modin]" --group core-tests --group extra --system + run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra --system - name: show-deps run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95 --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb,sqlframe,ibis --durations=30 pytest-full-coverage: strategy: @@ -83,18 +83,13 @@ jobs: cache-suffix: ${{ matrix.python-version }} cache-dependency-glob: "pyproject.toml" - name: install-reqs - run: uv pip install -e ".[modin, dask]" --group core-tests --group extra --system + run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra --system - name: install duckdb nightly run: uv pip install -U --pre duckdb --system - - name: install ibis - run: uv pip install -e ".[ibis]" --system - # Ibis puts upper bounds on dependencies, and requires Python3.10+, - # which messes with other dependencies on lower Python versions - if: matrix.python-version == '3.11' - name: show-deps run: uv pip freeze - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb,sqlframe --durations=30 + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,modin[pyarrow],polars[eager],polars[lazy],dask,duckdb,sqlframe,ibis --durations=30 - name: Run doctests # reprs differ between versions, so we only run doctests on the latest Python if: matrix.python-version == '3.13' diff --git a/.github/workflows/random_ci_pytest.yml b/.github/workflows/random_ci_pytest.yml index 5c195fb8b7..9dcbf895e1 100644 --- a/.github/workflows/random_ci_pytest.yml +++ b/.github/workflows/random_ci_pytest.yml @@ -35,5 +35,5 @@ jobs: run: uv pip freeze - name: Run pytest run: | - pytest tests --cov=narwhals --cov=tests --cov-fail-under=80 \ + pytest tests --cov=narwhals --cov=tests --cov-fail-under=75 \ --constructors=pandas,pyarrow,polars[eager],polars[lazy] diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index cd089a25ae..922a69c12a 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -876,7 +876,7 @@ def struct(self) -> EagerExprStructNamespace[Self]: return EagerExprStructNamespace(self) -class LazyExpr( # type: ignore[misc] +class LazyExpr( CompliantExpr[CompliantLazyFrameT, NativeExprT], Protocol38[CompliantLazyFrameT, NativeExprT], ): @@ -909,6 +909,9 @@ def fn(names: Sequence[str]) -> Sequence[str]: return self._with_alias_output_names(fn) + @classmethod + def _alias_native(cls, expr: NativeExprT, name: str, /) -> NativeExprT: ... + @property def name(self) -> LazyExprNameNamespace[Self]: return LazyExprNameNamespace(self) diff --git a/narwhals/_compliant/group_by.py b/narwhals/_compliant/group_by.py index b96211a7ab..0216fb12a3 100644 --- a/narwhals/_compliant/group_by.py +++ b/narwhals/_compliant/group_by.py @@ -227,10 +227,10 @@ def _evaluate_expr(self, expr: LazyExprT_contra, /) -> Iterator[NativeExprT_co]: exclude = {*self._keys, *self._output_key_names} for native_expr, name, alias in zip(native_exprs, output_names, aliases): if name not in exclude: - yield native_expr.alias(alias) + yield expr._alias_native(native_expr, alias) else: for native_expr, alias in zip(native_exprs, aliases): - yield native_expr.alias(alias) + yield expr._alias_native(native_expr, alias) def _evaluate_exprs( self, exprs: Iterable[LazyExprT_contra], / diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 5f28e8e63b..4dd0830897 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -129,7 +129,11 @@ def matches(self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: p = re.compile(pattern) def series(df: FrameT) -> Sequence[SeriesOrExprT]: - if is_compliant_dataframe(df) and not self._implementation.is_duckdb(): + if ( + is_compliant_dataframe(df) + and not self._implementation.is_duckdb() + and not self._implementation.is_ibis() + ): return [df.get_column(col) for col in df.columns if p.search(col)] return [ser for ser, name in self._iter_columns_names(df) if p.search(name)] diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 61769f70ae..5b5cc1e2fb 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -674,3 +674,4 @@ def dt(self) -> DaskExprDateTimeNamespace: list = not_implemented() # pyright: ignore[reportAssignmentType] struct = not_implemented() # pyright: ignore[reportAssignmentType] rank = not_implemented() # pyright: ignore[reportAssignmentType] + _alias_native = not_implemented() diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index e6cf02c76d..eb9f906794 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -261,6 +261,10 @@ def _with_window_function(self, window_function: WindowFunction) -> Self: result._window_function = window_function return result + @classmethod + def _alias_native(cls, expr: duckdb.Expression, name: str) -> duckdb.Expression: + return expr.alias(name) + def __and__(self, other: DuckDBExpr) -> Self: return self._with_callable(lambda _input, other: _input & other, other=other) diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py index f3df793fc8..379e32e7f2 100644 --- a/narwhals/_ibis/dataframe.py +++ b/narwhals/_ibis/dataframe.py @@ -1,14 +1,28 @@ from __future__ import annotations -from functools import lru_cache +import operator from typing import TYPE_CHECKING from typing import Any +from typing import Iterable +from typing import Iterator +from typing import Literal +from typing import Mapping +from typing import Sequence +from typing import cast -import ibis.selectors as s +import ibis +import ibis.expr.types as ir -from narwhals.dependencies import get_ibis +from narwhals._ibis.utils import evaluate_exprs +from narwhals._ibis.utils import native_to_narwhals_dtype +from narwhals.exceptions import ColumnNotFoundError +from narwhals.exceptions import InvalidOperationError +from narwhals.typing import CompliantLazyFrame from narwhals.utils import Implementation from narwhals.utils import Version +from narwhals.utils import not_implemented +from narwhals.utils import parse_columns_to_drop +from narwhals.utils import parse_version from narwhals.utils import validate_backend_version if TYPE_CHECKING: @@ -16,146 +30,399 @@ import pandas as pd import pyarrow as pa + from ibis.expr.operations import Binary from typing_extensions import Self + from typing_extensions import TypeAlias + from typing_extensions import TypeIs + from narwhals._compliant.typing import CompliantDataFrameAny + from narwhals._ibis.expr import IbisExpr + from narwhals._ibis.group_by import IbisGroupBy + from narwhals._ibis.namespace import IbisNamespace from narwhals._ibis.series import IbisInterchangeSeries + from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType + from narwhals.stable.v1 import DataFrame as DataFrameV1 + from narwhals.typing import AsofJoinStrategy + from narwhals.typing import JoinStrategy + from narwhals.typing import LazyUniqueKeepStrategy + from narwhals.utils import _FullContext - -@lru_cache(maxsize=16) -def native_to_narwhals_dtype(ibis_dtype: Any, version: Version) -> DType: # noqa: C901, PLR0911, PLR0912 - dtypes = version.dtypes - if ibis_dtype.is_int64(): - return dtypes.Int64() - if ibis_dtype.is_int32(): - return dtypes.Int32() - if ibis_dtype.is_int16(): - return dtypes.Int16() - if ibis_dtype.is_int8(): - return dtypes.Int8() - if ibis_dtype.is_uint64(): - return dtypes.UInt64() - if ibis_dtype.is_uint32(): - return dtypes.UInt32() - if ibis_dtype.is_uint16(): - return dtypes.UInt16() - if ibis_dtype.is_uint8(): - return dtypes.UInt8() - if ibis_dtype.is_boolean(): - return dtypes.Boolean() - if ibis_dtype.is_float64(): - return dtypes.Float64() - if ibis_dtype.is_float32(): - return dtypes.Float32() - if ibis_dtype.is_string(): - return dtypes.String() - if ibis_dtype.is_date(): - return dtypes.Date() - if ibis_dtype.is_timestamp(): - return dtypes.Datetime() - if ibis_dtype.is_array(): - return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version)) - if ibis_dtype.is_struct(): - return dtypes.Struct( - [ - dtypes.Field( - ibis_dtype_name, - native_to_narwhals_dtype(ibis_dtype_field, version), - ) - for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items() - ] - ) - if ibis_dtype.is_decimal(): # pragma: no cover - # TODO(unassigned): cover this - return dtypes.Decimal() - if ibis_dtype.is_time(): - return dtypes.Time() - if ibis_dtype.is_binary(): - return dtypes.Binary() - return dtypes.Unknown() # pragma: no cover + JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]" -class IbisLazyFrame: +class IbisLazyFrame( + CompliantLazyFrame[ + "IbisExpr", "ir.Table", "LazyFrame[ir.Table] | DataFrameV1[ir.Table]" + ] +): _implementation = Implementation.IBIS def __init__( - self, df: Any, *, backend_version: tuple[int, ...], version: Version + self, df: ir.Table, *, backend_version: tuple[int, ...], version: Version ) -> None: - self._native_frame = df + self._native_frame: ir.Table = df self._version = version self._backend_version = backend_version + self._cached_schema: dict[str, DType] | None = None validate_backend_version(self._implementation, self._backend_version) - def __narwhals_dataframe__(self) -> Any: # pragma: no cover + @staticmethod + def _is_native(obj: ir.Table | Any) -> TypeIs[ir.Table]: + return isinstance(obj, ir.Table) + + @classmethod + def from_native(cls, data: ir.Table, /, *, context: _FullContext) -> Self: + return cls( + data, backend_version=context._backend_version, version=context._version + ) + + def to_narwhals(self) -> LazyFrame[ir.Table] | DataFrameV1[ir.Table]: + if self._version is Version.MAIN: + return self._version.lazyframe(self, level="lazy") + + from narwhals.stable.v1 import DataFrame as DataFrameV1 + + return DataFrameV1(self, level="interchange") + + def __narwhals_dataframe__(self) -> Self: # pragma: no cover # Keep around for backcompat. if self._version is not Version.V1: msg = "__narwhals_dataframe__ is not implemented for IbisLazyFrame" raise AttributeError(msg) return self - def __narwhals_lazyframe__(self) -> Any: + def __narwhals_lazyframe__(self) -> Self: return self def __native_namespace__(self) -> ModuleType: - return get_ibis() + return ibis + + def __narwhals_namespace__(self) -> IbisNamespace: + from narwhals._ibis.namespace import IbisNamespace + + return IbisNamespace(backend_version=self._backend_version, version=self._version) def get_column(self, name: str) -> IbisInterchangeSeries: from narwhals._ibis.series import IbisInterchangeSeries - return IbisInterchangeSeries(self._native_frame[name], version=self._version) + return IbisInterchangeSeries(self.native.select(name), version=self._version) - def to_pandas(self) -> pd.DataFrame: - return self._native_frame.to_pandas() + def _iter_columns(self) -> Iterator[ir.Expr]: + for name in self.columns: + yield self.native[name] - def to_arrow(self) -> pa.Table: - return self._native_frame.to_pyarrow() + def collect( + self, backend: ModuleType | Implementation | str | None, **kwargs: Any + ) -> CompliantDataFrameAny: + if backend is None or backend is Implementation.PYARROW: + import pyarrow as pa # ignore-banned-import + + from narwhals._arrow.dataframe import ArrowDataFrame + + return ArrowDataFrame( + self.native.to_pyarrow(), + backend_version=parse_version(pa), + version=self._version, + validate_column_names=True, + ) + + if backend is Implementation.PANDAS: + import pandas as pd # ignore-banned-import + + from narwhals._pandas_like.dataframe import PandasLikeDataFrame + + return PandasLikeDataFrame( + self.native.to_pandas(), + implementation=Implementation.PANDAS, + backend_version=parse_version(pd), + version=self._version, + validate_column_names=True, + ) + + if backend is Implementation.POLARS: + import polars as pl # ignore-banned-import + + from narwhals._polars.dataframe import PolarsDataFrame + + return PolarsDataFrame( + self.native.to_polars(), + backend_version=parse_version(pl), + version=self._version, + ) + + msg = f"Unsupported `backend` value: {backend}" # pragma: no cover + raise ValueError(msg) # pragma: no cover + + def head(self, n: int) -> Self: + return self._with_native(self.native.head(n)) def simple_select(self, *column_names: str) -> Self: - return self._with_native(self._native_frame.select(s.cols(*column_names))) + return self._with_native(self.native.select(*column_names)) - def aggregate(self, *exprs: Any) -> Self: - raise NotImplementedError + def aggregate(self, *exprs: IbisExpr) -> Self: + selection = [ + cast("ir.Scalar", val.name(name)) + for name, val in evaluate_exprs(self, *exprs) + ] + return self._with_native(self.native.aggregate(selection)) - def select( - self, - *exprs: Any, - ) -> Self: - msg = ( - "`select`-ing not by name is not supported for Ibis backend.\n\n" - "If you would like to see this kind of object better supported in " - "Narwhals, please open a feature request " - "at https://github.com/narwhals-dev/narwhals/issues." - ) - raise NotImplementedError(msg) + def select(self, *exprs: IbisExpr) -> Self: + selection = [val.name(name) for name, val in evaluate_exprs(self, *exprs)] + if not selection: + msg = "At least one expression must be provided to `select` with the Ibis backend." + raise ValueError(msg) + + t = self.native.select(*selection) + return self._with_native(t) + + def drop(self, columns: Sequence[str], *, strict: bool) -> Self: + columns_to_drop = parse_columns_to_drop(self, columns=columns, strict=strict) + selection = (col for col in self.columns if col not in columns_to_drop) + return self._with_native(self.native.select(*selection)) + + def lazy(self, *, backend: Implementation | None = None) -> Self: + # The `backend`` argument has no effect but we keep it here for + # backwards compatibility because in `narwhals.stable.v1` + # function `.from_native()` will return a DataFrame for Ibis. - def __getattr__(self, attr: str) -> Any: - if attr == "schema": - return { - column_name: native_to_narwhals_dtype(ibis_dtype, self._version) - for column_name, ibis_dtype in self._native_frame.schema().items() + if backend is not None: # pragma: no cover + msg = "`backend` argument is not supported for Ibis" + raise ValueError(msg) + return self + + def with_columns(self, *exprs: IbisExpr) -> Self: + new_columns_map = dict(evaluate_exprs(self, *exprs)) + return self._with_native(self.native.mutate(**new_columns_map)) + + def filter(self, predicate: IbisExpr) -> Self: + # `[0]` is safe as the predicate's expression only returns a single column + mask = cast("ir.BooleanValue", predicate(self)[0]) + return self._with_native(self.native.filter(mask)) + + @property + def schema(self) -> dict[str, DType]: + if self._cached_schema is None: + # Note: prefer `self._cached_schema` over `functools.cached_property` + # due to Python3.13 failures. + self._cached_schema = { + name: native_to_narwhals_dtype(dtype, self._version) + for name, dtype in self.native.schema().fields.items() } - elif attr == "columns": - return list(self._native_frame.columns) - msg = ( - f"Attribute {attr} is not supported for metadata-only dataframes.\n\n" - "If you would like to see this kind of object better supported in " - "Narwhals, please open a feature request " - "at https://github.com/narwhals-dev/narwhals/issues." - ) - raise NotImplementedError(msg) + return self._cached_schema + + @property + def columns(self) -> list[str]: + return list(self.native.columns) + + def to_pandas(self) -> pd.DataFrame: + # only if version is v1, keep around for backcompat + import pandas as pd # ignore-banned-import() + + if parse_version(pd) >= (1, 0, 0): + return self.native.to_pandas() + else: # pragma: no cover + msg = f"Conversion to pandas requires pandas>=1.0.0, found {pd.__version__}" + raise NotImplementedError(msg) + + def to_arrow(self) -> pa.Table: + # only if version is v1, keep around for backcompat + return self.native.to_pyarrow() def _with_version(self, version: Version) -> Self: return self.__class__( - self._native_frame, version=version, backend_version=self._backend_version + self.native, version=version, backend_version=self._backend_version ) - def _with_native(self, df: Any) -> Self: + def _with_native(self, df: ir.Table) -> Self: return self.__class__( - df, version=self._version, backend_version=self._backend_version + df, backend_version=self._backend_version, version=self._version ) + def group_by( + self, keys: Sequence[str] | Sequence[IbisExpr], *, drop_null_keys: bool + ) -> IbisGroupBy: + from narwhals._ibis.group_by import IbisGroupBy + + return IbisGroupBy(self, keys, drop_null_keys=drop_null_keys) + + def rename(self, mapping: Mapping[str, str]) -> Self: + def _rename(col: str) -> str: + return mapping.get(col, col) + + return self._with_native(self.native.rename(_rename)) + + @staticmethod + def _join_drop_duplicate_columns(df: ir.Table, columns: Iterable[str], /) -> ir.Table: + """Ibis adds a suffix to the right table col, even when it matches the left during a join.""" + duplicates = set(df.columns).intersection(columns) + return df.drop(*duplicates) if duplicates else df + + def join( + self, + other: Self, + *, + how: JoinStrategy, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, + suffix: str, + ) -> Self: + how_native = "outer" if how == "full" else how + rname = "{name}" + suffix + if other == self: + # Ibis does not support self-references unless created as a view + other = self._with_native(other.native.view()) + if how_native == "cross": + joined = self.native.join(other.native, how=how_native, rname=rname) + return self._with_native(joined) + # help mypy + assert left_on is not None # noqa: S101 + assert right_on is not None # noqa: S101 + predicates = self._convert_predicates(other, left_on, right_on) + joined = self.native.join(other.native, predicates, how=how_native, rname=rname) + if how_native == "left": + right_names = (n + suffix for n in right_on) + joined = self._join_drop_duplicate_columns(joined, right_names) + it = (cast("Binary", p.op()) for p in predicates if not isinstance(p, str)) + to_drop = [] + for pred in it: + right = pred.right.name + # Mirrors how polars works. + if right not in self.columns and pred.left.name != right: + to_drop.append(right) + if to_drop: + joined = joined.drop(*to_drop) + return self._with_native(joined) + + def join_asof( + self, + other: Self, + *, + left_on: str | None, + right_on: str | None, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, + strategy: AsofJoinStrategy, + suffix: str, + ) -> Self: + rname = "{name}" + suffix + strategy_op = {"backward": operator.ge, "forward": operator.le} + predicates: JoinPredicates = [] + # help mypy + assert left_on is not None # noqa: S101 + assert right_on is not None # noqa: S101 + if op := strategy_op.get(strategy): + on: ir.BooleanColumn = op(self.native[left_on], other.native[right_on]) + else: + msg = "Only `backward` and `forward` strategies are currently supported for Ibis" + raise NotImplementedError(msg) + if by_left is not None and by_right is not None: + predicates = self._convert_predicates(other, by_left, by_right) + joined = self.native.asof_join(other.native, on, predicates, rname=rname) + joined = self._join_drop_duplicate_columns(joined, [right_on + suffix]) + if by_right is not None: + right_names = (n + suffix for n in by_right) + joined = self._join_drop_duplicate_columns(joined, right_names) + return self._with_native(joined) + + def _convert_predicates( + self, other: Self, left_on: Sequence[str], right_on: Sequence[str] + ) -> JoinPredicates: + if left_on == right_on: + return left_on + return [ + cast("ir.BooleanColumn", (self.native[left] == other.native[right])) + for left, right in zip(left_on, right_on) + ] + def collect_schema(self) -> dict[str, DType]: return { - column_name: native_to_narwhals_dtype(ibis_dtype, self._version) - for column_name, ibis_dtype in self._native_frame.schema().items() + name: native_to_narwhals_dtype(dtype, self._version) + for name, dtype in self.native.schema().fields.items() } + + def unique( + self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy + ) -> Self: + if subset_ := subset if keep == "any" else (subset or self.columns): + # Sanitise input + if any(x not in self.columns for x in subset_): + msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}." + raise ColumnNotFoundError(msg) + + mapped_keep: dict[str, Literal["first"] | None] = { + "any": "first", + "none": None, + } + to_keep = mapped_keep[keep] + return self._with_native(self.native.distinct(on=subset_, keep=to_keep)) + return self._with_native(self.native.distinct(on=subset)) + + def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self: + if isinstance(descending, bool): + descending = [descending for _ in range(len(by))] + + sort_cols = [] + + for i in range(len(by)): + direction_fn = ibis.desc if descending[i] else ibis.asc + col = direction_fn(by[i], nulls_first=not nulls_last) + sort_cols.append(cast("ir.Column", col)) + + return self._with_native(self.native.order_by(*sort_cols)) + + def drop_nulls(self, subset: Sequence[str] | None) -> Self: + subset_ = subset if subset is not None else self.columns + return self._with_native(self.native.drop_null(subset_)) + + def explode(self, columns: Sequence[str]) -> Self: + dtypes = self._version.dtypes + schema = self.collect_schema() + for col in columns: + dtype = schema[col] + + if dtype != dtypes.List: + msg = ( + f"`explode` operation not supported for dtype `{dtype}`, " + "expected List type" + ) + raise InvalidOperationError(msg) + + if len(columns) != 1: + msg = ( + "Exploding on multiple columns is not supported with Ibis backend since " + "we cannot guarantee that the exploded columns have matching element counts." + ) + raise NotImplementedError(msg) + + return self._with_native(self.native.unnest(columns[0], keep_empty=True)) + + def unpivot( + self, + on: Sequence[str] | None, + index: Sequence[str] | None, + variable_name: str, + value_name: str, + ) -> Self: + import ibis.selectors as s + + index_: Sequence[str] = [] if index is None else index + on_: Sequence[str] = ( + [c for c in self.columns if c not in index_] if on is None else on + ) + + # Discard columns not in the index + final_columns = list(dict.fromkeys([*index_, variable_name, value_name])) + + unpivoted = self.native.pivot_longer( + s.cols(*on_), names_to=variable_name, values_to=value_name + ) + return self._with_native(unpivoted.select(*final_columns)) + + gather_every = not_implemented.deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + tail = not_implemented.deprecated( + "`LazyFrame.tail` is deprecated and will be removed in a future version." + ) + with_row_index = not_implemented() diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py new file mode 100644 index 0000000000..ce4cd73115 --- /dev/null +++ b/narwhals/_ibis/expr.py @@ -0,0 +1,671 @@ +from __future__ import annotations + +import operator +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable +from typing import Literal +from typing import Sequence +from typing import cast + +import ibis +from ibis import _ as col + +from narwhals._compliant import LazyExpr +from narwhals._expression_parsing import ExprKind +from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace +from narwhals._ibis.expr_list import IbisExprListNamespace +from narwhals._ibis.expr_str import IbisExprStringNamespace +from narwhals._ibis.expr_struct import IbisExprStructNamespace +from narwhals._ibis.utils import WindowInputs +from narwhals._ibis.utils import is_floating +from narwhals._ibis.utils import lit +from narwhals._ibis.utils import narwhals_to_native_dtype +from narwhals.utils import Implementation +from narwhals.utils import not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + from typing_extensions import Self + + from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries + from narwhals._expression_parsing import ExprMetadata + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.namespace import IbisNamespace + from narwhals._ibis.typing import ExprT + from narwhals._ibis.typing import WindowFunction + from narwhals.dtypes import DType + from narwhals.typing import RankMethod + from narwhals.typing import RollingInterpolationMethod + from narwhals.utils import Version + from narwhals.utils import _FullContext + + +class IbisExpr(LazyExpr["IbisLazyFrame", "ir.Column"]): + _implementation = Implementation.IBIS + + def __init__( + self, + call: EvalSeries[IbisLazyFrame, ir.Value], + *, + evaluate_output_names: EvalNames[IbisLazyFrame], + alias_output_names: AliasNames | None, + backend_version: tuple[int, ...], + version: Version, + ) -> None: + self._call = call + self._evaluate_output_names = evaluate_output_names + self._alias_output_names = alias_output_names + self._backend_version = backend_version + self._version = version + self._window_function: WindowFunction | None = None + self._metadata: ExprMetadata | None = None + + def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]: + return self._call(df) + + def __narwhals_expr__(self) -> None: ... + + def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover + # Unused, just for compatibility with PandasLikeExpr + from narwhals._ibis.namespace import IbisNamespace + + return IbisNamespace(backend_version=self._backend_version, version=self._version) + + def _cum_window_func( + self, *, reverse: bool, func_name: Literal["sum", "max", "min", "count"] + ) -> WindowFunction: + def func(window_inputs: WindowInputs) -> ir.Value: + if reverse: + order_by_cols = [ + ibis.desc(getattr(col, x), nulls_first=False) + for x in window_inputs.order_by + ] + else: + order_by_cols = [ + ibis.asc(getattr(col, x), nulls_first=True) + for x in window_inputs.order_by + ] + + window = ibis.window( + group_by=list(window_inputs.partition_by), + order_by=order_by_cols, + preceding=None, # unbounded + following=0, + ) + + return getattr(window_inputs.expr, func_name)().over(window) + + return func + + def _rolling_window_func( + self, + *, + func_name: Literal["sum", "mean", "std", "var"], + center: bool, + window_size: int, + min_samples: int, + ddof: int | None = None, + ) -> WindowFunction: + supported_funcs = ["sum", "mean", "std", "var"] + + if center: + preceding = window_size // 2 + following = window_size - preceding - 1 + else: + preceding = window_size - 1 + following = 0 + + def func(window_inputs: WindowInputs) -> ir.Value: + order_by_cols = [ + ibis.asc(getattr(col, x), nulls_first=True) + for x in window_inputs.order_by + ] + window = ibis.window( + group_by=list(window_inputs.partition_by), + order_by=order_by_cols, + preceding=preceding, + following=following, + ) + + expr: ir.NumericColumn = cast("ir.NumericColumn", window_inputs.expr) + + func_: ir.NumericScalar + + if func_name in {"sum", "mean"}: + func_ = getattr(expr, func_name)() + elif func_name == "var" and ddof == 0: + func_ = expr.var(how="pop") + elif func_name in "var" and ddof == 1: + func_ = expr.var(how="sample") + elif func_name == "std" and ddof == 0: + func_ = expr.std(how="pop") + elif func_name == "std" and ddof == 1: + func_ = expr.std(how="sample") + elif func_name in {"var", "std"}: # pragma: no cover + msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}." + raise ValueError(msg) + else: # pragma: no cover + msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}." + raise ValueError(msg) + + rolling_calc = func_.over(window) + valid_count = expr.count().over(window) + return ibis.cases( + (valid_count >= ibis.literal(min_samples), rolling_calc), + else_=ibis.null(), + ) + + return func + + def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: + if kind is ExprKind.LITERAL: + return self + + def func(df: IbisLazyFrame) -> Sequence[ir.Value]: + return [expr.over() for expr in self(df)] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + @classmethod + def from_column_names( + cls: type[Self], + evaluate_column_names: EvalNames[IbisLazyFrame], + /, + *, + context: _FullContext, + ) -> Self: + def func(df: IbisLazyFrame) -> list[ir.Column]: + return [df.native[name] for name in evaluate_column_names(df)] + + return cls( + func, + evaluate_output_names=evaluate_column_names, + alias_output_names=None, + backend_version=context._backend_version, + version=context._version, + ) + + @classmethod + def from_column_indices( + cls: type[Self], *column_indices: int, context: _FullContext + ) -> Self: + def func(df: IbisLazyFrame) -> list[ir.Column]: + return [df.native[i] for i in column_indices] + + return cls( + func, + evaluate_output_names=lambda df: [df.columns[i] for i in column_indices], + alias_output_names=None, + backend_version=context._backend_version, + version=context._version, + ) + + def _with_callable( + self, call: Callable[..., ir.Value], /, **expressifiable_args: Self | Any + ) -> Self: + """Create expression from callable. + + Arguments: + call: Callable from compliant DataFrame to native Expression + expr_name: Expression name + expressifiable_args: arguments pass to expression which should be parsed + as expressions (e.g. in `nw.col('a').is_between('b', 'c')`) + """ + + def func(df: IbisLazyFrame) -> list[ir.Value]: + native_series_list = self(df) + other_native_series = { + key: df._evaluate_expr(value) if self._is_expr(value) else value + for key, value in expressifiable_args.items() + } + return [ + call(native_series, **other_native_series) + for native_series in native_series_list + ] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: + return type(self)( + call=self._call, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=func, + backend_version=self._backend_version, + version=self._version, + ) + + def _with_window_function(self, window_function: WindowFunction) -> Self: + result = self.__class__( + self._call, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + result._window_function = window_function + return result + + @classmethod + def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT: + return cast("ExprT", expr.name(name)) + + def __and__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input & other, other=other) + + def __or__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input | other, other=other) + + def __add__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input + other, other=other) + + def __truediv__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input / other, other=other) + + def __rtruediv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__rtruediv__(other), other=other + ).alias("literal") + + def __floordiv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__floordiv__(other), other=other + ) + + def __rfloordiv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__rfloordiv__(other), other=other + ).alias("literal") + + def __mod__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__mod__(other), other=other + ) + + def __rmod__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__rmod__(other), other=other + ).alias("literal") + + def __sub__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input - other, other=other) + + def __rsub__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__rsub__(other), other=other + ).alias("literal") + + def __mul__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input * other, other=other) + + def __pow__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input**other, other=other) + + def __rpow__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda _input, other: _input.__rpow__(other), other=other + ).alias("literal") + + def __lt__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input < other, other=other) + + def __gt__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input > other, other=other) + + def __le__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input <= other, other=other) + + def __ge__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda _input, other: _input >= other, other=other) + + def __eq__(self, other: IbisExpr) -> Self: # type: ignore[override] + return self._with_callable(lambda _input, other: _input == other, other=other) + + def __ne__(self, other: IbisExpr) -> Self: # type: ignore[override] + return self._with_callable(lambda _input, other: _input != other, other=other) + + def __invert__(self) -> Self: + invert = cast("Callable[..., ir.Value]", operator.invert) + return self._with_callable(invert) + + def abs(self) -> Self: + return self._with_callable(lambda _input: _input.abs()) + + def mean(self) -> Self: + return self._with_callable(lambda _input: _input.mean()) + + def median(self) -> Self: + return self._with_callable(lambda _input: _input.median()) + + def all(self) -> Self: + return self._with_callable(lambda _input: _input.all()) + + def any(self) -> Self: + return self._with_callable(lambda _input: _input.any()) + + def quantile( + self, quantile: float, interpolation: RollingInterpolationMethod + ) -> Self: + if interpolation != "linear": + msg = "Only linear interpolation methods are supported for Ibis quantile." + raise NotImplementedError(msg) + return self._with_callable(lambda _input: _input.quantile(quantile)) + + def clip(self, lower_bound: Any, upper_bound: Any) -> Self: + def _clip(_input: ir.NumericValue, lower: Any, upper: Any) -> ir.NumericValue: + return _input.clip(lower=lower, upper=upper) + + return self._with_callable(_clip, lower=lower_bound, upper=upper_bound) + + def sum(self) -> Self: + return self._with_callable(lambda _input: _input.sum()) + + def n_unique(self) -> Self: + return self._with_callable( + lambda _input: _input.nunique() + _input.isnull().any().cast("int8") + ) + + def count(self) -> Self: + return self._with_callable(lambda _input: _input.count()) + + def len(self) -> Self: + def func(df: IbisLazyFrame) -> list[ir.IntegerScalar]: + return [df.native.count()] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def std(self, ddof: int) -> Self: + def _std(_input: ir.NumericColumn, ddof: int) -> ir.Value: + if ddof == 0: + return _input.std(how="pop") + elif ddof == 1: + return _input.std(how="sample") + else: + n_samples = _input.count() + std_pop = _input.std(how="pop") + ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof)) + return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt() + + return self._with_callable(lambda _input: _std(_input, ddof)) + + def var(self, ddof: int) -> Self: + def _var(_input: ir.NumericColumn, ddof: int) -> ir.Value: + if ddof == 0: + return _input.var(how="pop") + elif ddof == 1: + return _input.var(how="sample") + else: + n_samples = _input.count() + var_pop = _input.var(how="pop") + ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof)) + return var_pop * n_samples / (n_samples - ddof_lit) + + return self._with_callable(lambda _input: _var(_input, ddof)) + + def max(self) -> Self: + return self._with_callable(lambda _input: _input.max()) + + def min(self) -> Self: + return self._with_callable(lambda _input: _input.min()) + + def null_count(self) -> Self: + return self._with_callable(lambda _input: _input.isnull().sum()) + + def over(self, partition_by: Sequence[str], order_by: Sequence[str] | None) -> Self: + if (fn := self._window_function) is not None: + assert order_by is not None # noqa: S101 + + def func(df: IbisLazyFrame) -> list[ir.Value]: + return [ + fn(WindowInputs(expr, partition_by, order_by)) for expr in self(df) + ] + else: + + def func(df: IbisLazyFrame) -> list[ir.Value]: + return [expr.over(group_by=partition_by) for expr in self(df)] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def is_null(self) -> Self: + return self._with_callable(lambda _input: _input.isnull()) + + def is_nan(self) -> Self: + def func(_input: ir.FloatingValue | Any) -> ir.Value: + otherwise = _input.isnan() if is_floating(_input.type()) else False + return ibis.ifelse(_input.isnull(), None, otherwise) + + return self._with_callable(func) + + def is_finite(self) -> Self: + return self._with_callable(lambda _input: ~(_input.isinf() | _input.isnan())) + + def is_in(self, other: Sequence[Any]) -> Self: + return self._with_callable(lambda _input: _input.isin(other)) + + def round(self, decimals: int) -> Self: + return self._with_callable(lambda _input: _input.round(decimals)) + + def shift(self, n: int) -> Self: + def _func(window_inputs: WindowInputs) -> ir.Column: + expr = cast("ir.Column", window_inputs.expr) + return expr.lag(n) + + return self._with_window_function(_func) + + def is_first_distinct(self) -> Self: + def func(window_inputs: WindowInputs) -> ir.BooleanValue: + order_by_cols = [ + ibis.asc(getattr(col, x), nulls_first=True) + for x in window_inputs.order_by + ] + window = ibis.window( + group_by=[*window_inputs.partition_by, window_inputs.expr], + order_by=order_by_cols, + ) + # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1` + return ibis.row_number().over(window) == lit(0) + + return self._with_window_function(func) + + def is_last_distinct(self) -> Self: + def func(window_inputs: WindowInputs) -> ir.Value: + order_by_cols = [ibis.desc(getattr(col, x)) for x in window_inputs.order_by] + window = ibis.window( + group_by=[*window_inputs.partition_by, window_inputs.expr], + order_by=order_by_cols, + ) + # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1` + return ibis.row_number().over(window) == lit(0) + + return self._with_window_function(func) + + def diff(self) -> Self: + def _func(window_inputs: WindowInputs) -> ir.NumericValue: + expr = cast("ir.NumericColumn", window_inputs.expr) + return expr - cast( + "ir.NumericColumn", expr.lag().over(ibis.window(following=0)) + ) + + return self._with_window_function(_func) + + def cum_sum(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="sum") + ) + + def cum_max(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="max") + ) + + def cum_min(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="min") + ) + + def cum_count(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="count") + ) + + def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="sum", + center=center, + window_size=window_size, + min_samples=min_samples, + ) + ) + + def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="mean", + center=center, + window_size=window_size, + min_samples=min_samples, + ) + ) + + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="var", + center=center, + window_size=window_size, + min_samples=min_samples, + ddof=ddof, + ) + ) + + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="std", + center=center, + window_size=window_size, + min_samples=min_samples, + ddof=ddof, + ) + ) + + def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self: + # Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward + # strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539 + if strategy is not None: + msg = "`strategy` is not supported for the Ibis backend" + raise NotImplementedError(msg) + if limit is not None: + msg = "`limit` is not supported for the Ibis backend" # pragma: no cover + raise NotImplementedError(msg) + + def _fill_null(_input: ir.Value, value: ir.Scalar) -> ir.Value: + return _input.fill_null(value) + + return self._with_callable(_fill_null, value=value) + + def cast(self, dtype: DType | type[DType]) -> Self: + def _func(_input: ir.Column) -> ir.Value: + native_dtype = narwhals_to_native_dtype(dtype, self._version) + # ibis `cast` overloads do not include DataType, only literals + return _input.cast(native_dtype) # type: ignore[unused-ignore] + + return self._with_callable(_func) + + def is_unique(self) -> Self: + return self._with_callable( + lambda _input: _input.isnull().count().over(ibis.window(group_by=(_input))) + == 1 + ) + + def rank(self, method: RankMethod, *, descending: bool) -> Self: + def _rank(_input: ir.Column) -> ir.Column: + order_by: ir.Column = ( + cast("ir.Column", _input.desc()) + if descending + else cast("ir.Column", _input.asc()) + ) + window = ibis.window(order_by=order_by) + + if method == "dense": + rank_ = order_by.dense_rank() + elif method == "ordinal": + rank_ = cast("ir.IntegerColumn", ibis.row_number().over(window)) + else: + rank_ = order_by.rank() + + # Ibis uses 0-based ranking. Add 1 to match polars 1-based rank. + rank_ = rank_ + cast("ir.IntegerValue", lit(1)) + + # For "max" and "average", adjust using the count of rows in the partition. + if method == "max": + # Define a window partitioned by _input (i.e. each distinct value) + partition = ibis.window(group_by=[_input]) + cnt = cast("ir.IntegerValue", _input.count().over(partition)) + rank_ = rank_ + cnt - cast("ir.IntegerValue", lit(1)) + elif method == "average": + partition = ibis.window(group_by=[_input]) + cnt = cast("ir.IntegerValue", _input.count().over(partition)) + avg = cast( + "ir.NumericValue", (cnt - cast("ir.IntegerScalar", lit(1))) / lit(2.0) + ) + rank_ = rank_ + avg + + return cast("ir.Column", ibis.cases((_input.notnull(), rank_))) + + return self._with_callable(_rank) + + @property + def str(self) -> IbisExprStringNamespace: + return IbisExprStringNamespace(self) + + @property + def dt(self) -> IbisExprDateTimeNamespace: + return IbisExprDateTimeNamespace(self) + + @property + def list(self) -> IbisExprListNamespace: + return IbisExprListNamespace(self) + + @property + def struct(self) -> IbisExprStructNamespace: + return IbisExprStructNamespace(self) + + # NOTE: https://github.com/ibis-project/ibis/issues/10542 + cum_prod = not_implemented() + drop_nulls = not_implemented() + + # NOTE: https://github.com/ibis-project/ibis/issues/11176 + skew = not_implemented() + unique = not_implemented() diff --git a/narwhals/_ibis/expr_dt.py b/narwhals/_ibis/expr_dt.py new file mode 100644 index 0000000000..3742a6b647 --- /dev/null +++ b/narwhals/_ibis/expr_dt.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable + +from narwhals._duration import parse_interval_string +from narwhals._ibis.utils import UNITS_DICT_BUCKET +from narwhals._ibis.utils import UNITS_DICT_TRUNCATE +from narwhals.utils import not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + from narwhals._ibis.utils import BucketUnit + from narwhals._ibis.utils import TruncateUnit + + +class IbisExprDateTimeNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def year(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.year()) + + def month(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.month()) + + def day(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.day()) + + def hour(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.hour()) + + def minute(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.minute()) + + def second(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.second()) + + def millisecond(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.millisecond()) + + def microsecond(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.microsecond()) + + def to_string(self, format: str) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.strftime(format)) + + def weekday(self) -> IbisExpr: + # Ibis uses 0-6 for Monday-Sunday. Add 1 to match polars. + return self._compliant_expr._with_callable( + lambda _input: _input.day_of_week.index() + 1 + ) + + def ordinal_day(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.day_of_year()) + + def date(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.date()) + + def _bucket(self, kwds: dict[BucketUnit, Any], /) -> Callable[..., ir.TimestampValue]: + def fn(_input: ir.TimestampValue) -> ir.TimestampValue: + return _input.bucket(**kwds) + + return fn + + def _truncate(self, unit: TruncateUnit, /) -> Callable[..., ir.TimestampValue]: + def fn(_input: ir.TimestampValue) -> ir.TimestampValue: + return _input.truncate(unit) + + return fn + + def truncate(self, every: str) -> IbisExpr: + multiple, unit = parse_interval_string(every) + if multiple != 1: + if self._compliant_expr._backend_version < (7, 1): # pragma: no cover + msg = "Truncating datetimes with multiples of the unit is only supported in Ibis >= 7.1." + raise NotImplementedError(msg) + fn = self._bucket({UNITS_DICT_BUCKET[unit]: multiple}) + else: + fn = self._truncate(UNITS_DICT_TRUNCATE[unit]) + return self._compliant_expr._with_callable(fn) + + nanosecond = not_implemented() + total_minutes = not_implemented() + total_seconds = not_implemented() + total_milliseconds = not_implemented() + total_microseconds = not_implemented() + total_nanoseconds = not_implemented() diff --git a/narwhals/_ibis/expr_list.py b/narwhals/_ibis/expr_list.py new file mode 100644 index 0000000000..2da677932f --- /dev/null +++ b/narwhals/_ibis/expr_list.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from narwhals._ibis.expr import IbisExpr + + +class IbisExprListNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def len(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.length()) diff --git a/narwhals/_ibis/expr_str.py b/narwhals/_ibis/expr_str.py new file mode 100644 index 0000000000..56a9d71a30 --- /dev/null +++ b/narwhals/_ibis/expr_str.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable + +from ibis.expr.datatypes import Timestamp + +from narwhals.utils import not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + + +class IbisExprStringNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def starts_with(self, prefix: str) -> IbisExpr: + def fn(_input: ir.StringColumn) -> ir.BooleanValue: + return _input.startswith(prefix) + + return self._compliant_expr._with_callable(fn) + + def ends_with(self, suffix: str) -> IbisExpr: + def fn(_input: ir.StringColumn) -> ir.BooleanValue: + return _input.endswith(suffix) + + return self._compliant_expr._with_callable(fn) + + def contains(self, pattern: str, *, literal: bool) -> IbisExpr: + def fn(_input: ir.StringColumn) -> ir.BooleanValue: + return _input.contains(pattern) if literal else _input.re_search(pattern) + + return self._compliant_expr._with_callable(fn) + + def slice(self, offset: int, length: int) -> IbisExpr: + def fn(_input: ir.StringColumn) -> ir.StringValue: + return _input.substr(start=offset, length=length) + + return self._compliant_expr._with_callable(fn) + + def split(self, by: str) -> IbisExpr: + def fn(_input: ir.StringColumn) -> ir.ArrayValue: + return _input.split(by) + + return self._compliant_expr._with_callable(fn) + + def len_chars(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.length()) + + def to_lowercase(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.lower()) + + def to_uppercase(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda _input: _input.upper()) + + def strip_chars(self, characters: str | None) -> IbisExpr: + if characters is not None: + msg = "Ibis does not support `characters` argument in `str.strip_chars`" + raise NotImplementedError(msg) + + return self._compliant_expr._with_callable(lambda _input: _input.strip()) + + def _replace_all(self, pattern: str, value: str) -> Callable[..., ir.StringValue]: + def fn(_input: ir.StringColumn) -> ir.StringValue: + return _input.re_replace(pattern, value) + + return fn + + def _replace_all_literal( + self, pattern: str, value: str + ) -> Callable[..., ir.StringValue]: + def fn(_input: ir.StringColumn) -> ir.StringValue: + return _input.replace(pattern, value) # pyright: ignore[reportArgumentType] + + return fn + + def replace_all(self, pattern: str, value: str, *, literal: bool) -> IbisExpr: + fn = self._replace_all_literal if literal else self._replace_all + return self._compliant_expr._with_callable(fn(pattern, value)) + + def _to_datetime(self, format: str) -> Callable[..., ir.TimestampValue]: + def fn(_input: ir.StringColumn) -> ir.TimestampValue: + return _input.as_timestamp(format) + + return fn + + def _to_datetime_naive(self, format: str) -> Callable[..., ir.TimestampValue]: + def fn(_input: ir.StringColumn) -> ir.TimestampValue: + dtype: Any = Timestamp(timezone=None) + return _input.as_timestamp(format).cast(dtype) + + return fn + + def to_datetime(self, format: str | None) -> IbisExpr: + if format is None: + msg = "Cannot infer format with Ibis backend" + raise NotImplementedError(msg) + fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime + return self._compliant_expr._with_callable(fn(format)) + + replace = not_implemented() + + +def _is_naive_format(format_: str) -> bool: + return not any(x in format_ for x in ("%s", "%z", "Z")) diff --git a/narwhals/_ibis/expr_struct.py b/narwhals/_ibis/expr_struct.py new file mode 100644 index 0000000000..f061c230bf --- /dev/null +++ b/narwhals/_ibis/expr_struct.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + + +class IbisExprStructNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def field(self, name: str) -> IbisExpr: + def func(_input: ir.StructColumn) -> ir.Column: + return _input[name] + + return self._compliant_expr._with_callable(func).alias(name) diff --git a/narwhals/_ibis/group_by.py b/narwhals/_ibis/group_by.py new file mode 100644 index 0000000000..e81b23b1bf --- /dev/null +++ b/narwhals/_ibis/group_by.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Sequence + +from narwhals._compliant import LazyGroupBy + +if TYPE_CHECKING: + import ibis.expr.types as ir # noqa: F401 + + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.expr import IbisExpr + + +class IbisGroupBy(LazyGroupBy["IbisLazyFrame", "IbisExpr", "ir.Value"]): + def __init__( + self, + df: IbisLazyFrame, + keys: Sequence[str] | Sequence[IbisExpr], + /, + *, + drop_null_keys: bool, + ) -> None: + frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys) + self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame + + def agg(self, *exprs: IbisExpr) -> IbisLazyFrame: + native = self.compliant.native + return self.compliant._with_native( + native.group_by(self._keys).aggregate(*self._evaluate_exprs(exprs)) + ).rename(dict(zip(self._keys, self._output_key_names))) diff --git a/narwhals/_ibis/namespace.py b/narwhals/_ibis/namespace.py new file mode 100644 index 0000000000..bf9a2f369a --- /dev/null +++ b/narwhals/_ibis/namespace.py @@ -0,0 +1,234 @@ +from __future__ import annotations + +import operator +from functools import reduce +from itertools import chain +from typing import TYPE_CHECKING +from typing import Any +from typing import Iterable +from typing import Sequence +from typing import cast + +import ibis +import ibis.expr.types as ir + +from narwhals._compliant import CompliantThen +from narwhals._compliant import LazyNamespace +from narwhals._compliant import LazyWhen +from narwhals._expression_parsing import combine_alias_output_names +from narwhals._expression_parsing import combine_evaluate_output_names +from narwhals._ibis.dataframe import IbisLazyFrame +from narwhals._ibis.expr import IbisExpr +from narwhals._ibis.selectors import IbisSelectorNamespace +from narwhals._ibis.utils import lit +from narwhals._ibis.utils import narwhals_to_native_dtype +from narwhals.utils import Implementation +from narwhals.utils import requires + +if TYPE_CHECKING: + from narwhals.dtypes import DType + from narwhals.typing import ConcatMethod + from narwhals.utils import Version + + +class IbisNamespace(LazyNamespace[IbisLazyFrame, IbisExpr, "ir.Table"]): + _implementation: Implementation = Implementation.IBIS + + def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None: + self._backend_version = backend_version + self._version = version + + @property + def selectors(self) -> IbisSelectorNamespace: + return IbisSelectorNamespace.from_namespace(self) + + @property + def _expr(self) -> type[IbisExpr]: + return IbisExpr + + @property + def _lazyframe(self) -> type[IbisLazyFrame]: + return IbisLazyFrame + + def concat( + self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod + ) -> IbisLazyFrame: + if how == "diagonal": + msg = "diagonal concat not supported for Ibis. Please join instead." + raise NotImplementedError(msg) + + items = list(items) + native_items = [item.native for item in items] + schema = items[0].schema + if not all(x.schema == schema for x in items[1:]): + msg = "inputs should all have the same schema" + raise TypeError(msg) + return self._lazyframe.from_native(ibis.union(*native_items), context=self) + + def concat_str( + self, *exprs: IbisExpr, separator: str, ignore_nulls: bool + ) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = list(chain.from_iterable(expr(df) for expr in exprs)) + cols_casted = [s.cast("string") for s in cols] + + if not ignore_nulls: + result = cols_casted[0] + for col in cols_casted[1:]: + result = result + separator + col + else: + sep = cast("ir.StringValue", lit(separator)) + result = sep.join(cols_casted) + + return [result] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def all_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [reduce(operator.and_, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def any_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [reduce(operator.or_, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def max_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [ibis.greatest(*cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def min_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [ibis.least(*cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def sum_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = [e.fill_null(lit(0)) for _expr in exprs for e in _expr(df)] + return [reduce(operator.add, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + expr = ( + cast("ir.NumericColumn", e.fill_null(lit(0))) + for _expr in exprs + for e in _expr(df) + ) + non_null = ( + cast("ir.NumericColumn", e.isnull().ifelse(lit(0), lit(1))) + for _expr in exprs + for e in _expr(df) + ) + + return [ + (reduce(lambda x, y: x + y, expr) / reduce(lambda x, y: x + y, non_null)) + ] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + @requires.backend_version((10, 0)) + def when(self, predicate: IbisExpr) -> IbisWhen: + return IbisWhen.from_expr(predicate, context=self) + + def lit(self, value: Any, dtype: DType | type[DType] | None) -> IbisExpr: + def func(_df: IbisLazyFrame) -> list[ir.Value]: + ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None + return [lit(value, ibis_dtype)] + + return self._expr( + func, + evaluate_output_names=lambda _df: ["literal"], + alias_output_names=None, + backend_version=self._backend_version, + version=self._version, + ) + + def len(self) -> IbisExpr: + def func(_df: IbisLazyFrame) -> list[ir.Value]: + return [_df.native.count()] + + return self._expr( + call=func, + evaluate_output_names=lambda _df: ["len"], + alias_output_names=None, + backend_version=self._backend_version, + version=self._version, + ) + + +class IbisWhen(LazyWhen["IbisLazyFrame", "ir.Value", IbisExpr]): + lit = lit + + @property + def _then(self) -> type[IbisThen]: + return IbisThen + + def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]: + is_expr = self._condition._is_expr + condition = df._evaluate_expr(self._condition) + then_ = self._then_value + then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_) + other_ = self._otherwise_value + if other_ is None: + result = ibis.cases((condition, then)) + else: + otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_) + result = ibis.cases((condition, then), else_=otherwise) + return [result] + + +class IbisThen(CompliantThen["IbisLazyFrame", "ir.Value", IbisExpr], IbisExpr): ... diff --git a/narwhals/_ibis/selectors.py b/narwhals/_ibis/selectors.py new file mode 100644 index 0000000000..13e1413611 --- /dev/null +++ b/narwhals/_ibis/selectors.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from narwhals._compliant import CompliantSelector +from narwhals._compliant import LazySelectorNamespace +from narwhals._ibis.expr import IbisExpr + +if TYPE_CHECKING: + import ibis.expr.types as ir # noqa: F401 + + from narwhals._ibis.dataframe import IbisLazyFrame # noqa: F401 + + +class IbisSelectorNamespace(LazySelectorNamespace["IbisLazyFrame", "ir.Value"]): + @property + def _selector(self) -> type[IbisSelector]: + return IbisSelector + + +class IbisSelector( # type: ignore[misc] + CompliantSelector["IbisLazyFrame", "ir.Value"], IbisExpr +): + def _to_expr(self) -> IbisExpr: + return IbisExpr( + self._call, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) diff --git a/narwhals/_ibis/series.py b/narwhals/_ibis/series.py index 0f35fb8c07..67506733f8 100644 --- a/narwhals/_ibis/series.py +++ b/narwhals/_ibis/series.py @@ -4,7 +4,7 @@ from typing import Any from typing import NoReturn -from narwhals._ibis.dataframe import native_to_narwhals_dtype +from narwhals._ibis.utils import native_to_narwhals_dtype from narwhals.dependencies import get_ibis if TYPE_CHECKING: @@ -29,7 +29,9 @@ def __native_namespace__(self) -> ModuleType: @property def dtype(self) -> DType: - return native_to_narwhals_dtype(self._native_series.type(), self._version) + return native_to_narwhals_dtype( + self._native_series.schema().types[0], self._version + ) def __getattr__(self, attr: str) -> NoReturn: msg = ( diff --git a/narwhals/_ibis/typing.py b/narwhals/_ibis/typing.py new file mode 100644 index 0000000000..14b9a56f1c --- /dev/null +++ b/narwhals/_ibis/typing.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Protocol +from typing import TypeVar + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.utils import WindowInputs + + ExprT = TypeVar("ExprT", bound=ir.Value) + + class WindowFunction(Protocol): + def __call__(self, window_inputs: WindowInputs) -> ir.Value: ... diff --git a/narwhals/_ibis/utils.py b/narwhals/_ibis/utils.py new file mode 100644 index 0000000000..cabb79cf97 --- /dev/null +++ b/narwhals/_ibis/utils.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +from functools import lru_cache +from typing import TYPE_CHECKING +from typing import Any +from typing import Literal +from typing import Mapping +from typing import Sequence + +import ibis +import ibis.expr.datatypes as ibis_dtypes + +from narwhals.utils import isinstance_or_issubclass + +if TYPE_CHECKING: + import ibis.expr.types as ir + from ibis.expr.datatypes import DataType as IbisDataType + from typing_extensions import TypeAlias + from typing_extensions import TypeIs + + from narwhals._duration import IntervalUnit + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.expr import IbisExpr + from narwhals.dtypes import DType + from narwhals.utils import Version + +lit = ibis.literal +"""Alias for `ibis.literal`.""" + +BucketUnit: TypeAlias = Literal[ + "years", + "quarters", + "months", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", +] +TruncateUnit: TypeAlias = Literal[ + "Y", "Q", "M", "W", "D", "h", "m", "s", "ms", "us", "ns" +] + +UNITS_DICT_BUCKET: Mapping[IntervalUnit, BucketUnit] = { + "y": "years", + "q": "quarters", + "mo": "months", + "d": "days", + "h": "hours", + "m": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", +} + +UNITS_DICT_TRUNCATE: Mapping[IntervalUnit, TruncateUnit] = { + "y": "Y", + "q": "Q", + "mo": "M", + "d": "D", + "h": "h", + "m": "m", + "s": "s", + "ms": "ms", + "us": "us", + "ns": "ns", +} + + +class WindowInputs: + __slots__ = ("expr", "order_by", "partition_by") + + def __init__( + self, + expr: ir.Expr | ir.Value | ir.Column, + partition_by: Sequence[str], + order_by: Sequence[str], + ) -> None: + self.expr = expr + self.partition_by = partition_by + self.order_by = order_by + + +def evaluate_exprs(df: IbisLazyFrame, /, *exprs: IbisExpr) -> list[tuple[str, ir.Value]]: + native_results: list[tuple[str, ir.Value]] = [] + for expr in exprs: + native_series_list = expr(df) + output_names = expr._evaluate_output_names(df) + if expr._alias_output_names is not None: + output_names = expr._alias_output_names(output_names) + if len(output_names) != len(native_series_list): # pragma: no cover + msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results" + raise AssertionError(msg) + native_results.extend(zip(output_names, native_series_list)) + return native_results + + +@lru_cache(maxsize=16) +def native_to_narwhals_dtype(ibis_dtype: IbisDataType, version: Version) -> DType: # noqa: C901, PLR0912 + dtypes = version.dtypes + if ibis_dtype.is_int64(): + return dtypes.Int64() + if ibis_dtype.is_int32(): + return dtypes.Int32() + if ibis_dtype.is_int16(): + return dtypes.Int16() + if ibis_dtype.is_int8(): + return dtypes.Int8() + if ibis_dtype.is_uint64(): + return dtypes.UInt64() + if ibis_dtype.is_uint32(): + return dtypes.UInt32() + if ibis_dtype.is_uint16(): + return dtypes.UInt16() + if ibis_dtype.is_uint8(): + return dtypes.UInt8() + if ibis_dtype.is_boolean(): + return dtypes.Boolean() + if ibis_dtype.is_float64(): + return dtypes.Float64() + if ibis_dtype.is_float32(): + return dtypes.Float32() + if ibis_dtype.is_string(): + return dtypes.String() + if ibis_dtype.is_date(): + return dtypes.Date() + if ibis_dtype.is_timestamp(): + return dtypes.Datetime() + if is_interval(ibis_dtype): + _time_unit = ibis_dtype.unit.value + if _time_unit not in {"ns", "us", "ms", "s"}: # pragma: no cover + msg = f"Unsupported interval unit: {_time_unit}" + raise NotImplementedError(msg) + return dtypes.Duration(_time_unit) + if is_array(ibis_dtype): + if ibis_dtype.length: + return dtypes.Array( + native_to_narwhals_dtype(ibis_dtype.value_type, version), + ibis_dtype.length, + ) + else: + return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version)) + if is_struct(ibis_dtype): + return dtypes.Struct( + [ + dtypes.Field(name, native_to_narwhals_dtype(dtype, version)) + for name, dtype in ibis_dtype.items() + ] + ) + if ibis_dtype.is_decimal(): # pragma: no cover + return dtypes.Decimal() + if ibis_dtype.is_time(): + return dtypes.Time() + if ibis_dtype.is_binary(): + return dtypes.Binary() + return dtypes.Unknown() # pragma: no cover + + +def is_interval(obj: IbisDataType) -> TypeIs[ibis_dtypes.Interval]: + return obj.is_interval() + + +def is_array(obj: IbisDataType) -> TypeIs[ibis_dtypes.Array[Any]]: + return obj.is_array() + + +def is_struct(obj: IbisDataType) -> TypeIs[ibis_dtypes.Struct]: + return obj.is_struct() + + +def is_floating(obj: IbisDataType) -> TypeIs[ibis_dtypes.Floating]: + return obj.is_floating() + + +def narwhals_to_native_dtype( # noqa: C901, PLR0912 + dtype: DType | type[DType], version: Version +) -> IbisDataType: + dtypes = version.dtypes + + if isinstance_or_issubclass(dtype, dtypes.Decimal): # pragma: no cover + return ibis_dtypes.Decimal() + if isinstance_or_issubclass(dtype, dtypes.Float64): + return ibis_dtypes.Float64() + if isinstance_or_issubclass(dtype, dtypes.Float32): + return ibis_dtypes.Float32() + if isinstance_or_issubclass(dtype, dtypes.Int128): # pragma: no cover + msg = "Int128 not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.Int64): + return ibis_dtypes.Int64() + if isinstance_or_issubclass(dtype, dtypes.Int32): + return ibis_dtypes.Int32() + if isinstance_or_issubclass(dtype, dtypes.Int16): + return ibis_dtypes.Int16() + if isinstance_or_issubclass(dtype, dtypes.Int8): + return ibis_dtypes.Int8() + if isinstance_or_issubclass(dtype, dtypes.UInt128): # pragma: no cover + msg = "UInt128 not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.UInt64): + return ibis_dtypes.UInt64() + if isinstance_or_issubclass(dtype, dtypes.UInt32): + return ibis_dtypes.UInt32() + if isinstance_or_issubclass(dtype, dtypes.UInt16): + return ibis_dtypes.UInt16() + if isinstance_or_issubclass(dtype, dtypes.UInt8): + return ibis_dtypes.UInt8() + if isinstance_or_issubclass(dtype, dtypes.String): + return ibis_dtypes.String() + if isinstance_or_issubclass(dtype, dtypes.Boolean): + return ibis_dtypes.Boolean() + if isinstance_or_issubclass(dtype, dtypes.Categorical): + msg = "Categorical not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.Datetime): + return ibis_dtypes.Timestamp() + if isinstance_or_issubclass(dtype, dtypes.Duration): + return ibis_dtypes.Interval(unit=dtype.time_unit) # pyright: ignore[reportArgumentType] + if isinstance_or_issubclass(dtype, dtypes.Date): + return ibis_dtypes.Date() + if isinstance_or_issubclass(dtype, dtypes.Time): + return ibis_dtypes.Time() + if isinstance_or_issubclass(dtype, dtypes.List): + inner = narwhals_to_native_dtype(dtype.inner, version) + return ibis_dtypes.Array(value_type=inner) + if isinstance_or_issubclass(dtype, dtypes.Struct): + fields = [ + (field.name, narwhals_to_native_dtype(field.dtype, version)) + for field in dtype.fields + ] + return ibis_dtypes.Struct.from_tuples(fields) + if isinstance_or_issubclass(dtype, dtypes.Array): + inner = narwhals_to_native_dtype(dtype.inner, version) + return ibis_dtypes.Array(value_type=inner, length=dtype.size) + if isinstance_or_issubclass(dtype, dtypes.Binary): + return ibis_dtypes.Binary() + if isinstance_or_issubclass(dtype, dtypes.Enum): + # Ibis does not support: https://github.com/ibis-project/ibis/issues/10991 + msg = "Enum not supported by Ibis" + raise NotImplementedError(msg) + msg = f"Unknown dtype: {dtype}" # pragma: no cover + raise AssertionError(msg) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index b9ffc122d6..e04629af08 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -20,6 +20,7 @@ from narwhals.dependencies import get_pyarrow from narwhals.dependencies import is_dask_dataframe from narwhals.dependencies import is_duckdb_relation +from narwhals.dependencies import is_ibis_table from narwhals.dependencies import is_pyspark_connect_dataframe from narwhals.dependencies import is_pyspark_dataframe from narwhals.dependencies import is_sqlframe_dataframe @@ -42,6 +43,7 @@ from narwhals._arrow.namespace import ArrowNamespace from narwhals._dask.namespace import DaskNamespace from narwhals._duckdb.namespace import DuckDBNamespace + from narwhals._ibis.namespace import IbisNamespace from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._polars.namespace import PolarsNamespace from narwhals._spark_like.dataframe import SQLFrameDataFrame @@ -60,16 +62,18 @@ _Dask: TypeAlias = Literal["dask"] _DuckDB: TypeAlias = Literal["duckdb"] _PandasLike: TypeAlias = Literal["pandas", "cudf", "modin"] + _Ibis: TypeAlias = Literal["ibis"] _SparkLike: TypeAlias = Literal["pyspark", "sqlframe", "pyspark[connect]"] _EagerOnly: TypeAlias = "_PandasLike | _Arrow" _EagerAllowed: TypeAlias = "_Polars | _EagerOnly" - _LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB" + _LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB | _Ibis" _LazyAllowed: TypeAlias = "_Polars | _LazyOnly" Polars: TypeAlias = Literal[_Polars, Implementation.POLARS] Arrow: TypeAlias = Literal[_Arrow, Implementation.PYARROW] Dask: TypeAlias = Literal[_Dask, Implementation.DASK] DuckDB: TypeAlias = Literal[_DuckDB, Implementation.DUCKDB] + Ibis: TypeAlias = Literal[_Ibis, Implementation.IBIS] PandasLike: TypeAlias = Literal[ _PandasLike, Implementation.PANDAS, Implementation.CUDF, Implementation.MODIN ] @@ -81,7 +85,7 @@ ] EagerOnly: TypeAlias = "PandasLike | Arrow" EagerAllowed: TypeAlias = "EagerOnly | Polars" - LazyOnly: TypeAlias = "SparkLike | Dask | DuckDB" + LazyOnly: TypeAlias = "SparkLike | Dask | DuckDB | Ibis" LazyAllowed: TypeAlias = "LazyOnly | Polars" BackendName: TypeAlias = "_EagerAllowed | _LazyAllowed" @@ -102,6 +106,12 @@ class _NativeDask(Protocol): class _NativeCuDF(Protocol): def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... + class _NativeIbis(Protocol): + def sql(self, *args: Any, **kwds: Any) -> Any: ... + def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ... + class _ModinDataFrame(Protocol): _pandas_class: type[pd.DataFrame] @@ -121,7 +131,7 @@ class _ModinSeries(Protocol): "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect" ) - NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask" + NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis" NativeUnknown: TypeAlias = ( "NativeFrame | NativeSeries | NativeLazyFrame | DataFrameLike" ) @@ -185,6 +195,10 @@ def from_backend(cls, backend: DuckDB, /) -> Namespace[DuckDBNamespace]: ... @classmethod def from_backend(cls, backend: Dask, /) -> Namespace[DaskNamespace]: ... + @overload + @classmethod + def from_backend(cls, backend: Ibis, /) -> Namespace[IbisNamespace]: ... + @overload @classmethod def from_backend(cls, backend: EagerAllowed, /) -> EagerAllowedNamespace: ... @@ -243,6 +257,10 @@ def from_backend( from narwhals._dask.namespace import DaskNamespace ns = DaskNamespace(backend_version=backend_version, version=version) + elif impl.is_ibis(): + from narwhals._ibis.namespace import IbisNamespace + + ns = IbisNamespace(backend_version=backend_version, version=version) else: msg = "Not supported Implementation" # pragma: no cover raise AssertionError(msg) @@ -280,6 +298,10 @@ def from_native_object( @classmethod def from_native_object(cls, native: _NativeDask, /) -> Namespace[DaskNamespace]: ... + @overload + @classmethod + def from_native_object(cls, native: _NativeIbis, /) -> Namespace[IbisNamespace]: ... + @overload @classmethod def from_native_object( @@ -312,6 +334,8 @@ def from_native_object( # noqa: PLR0911 return cls.from_backend(Implementation.CUDF) elif is_native_modin(native): # pragma: no cover return cls.from_backend(Implementation.MODIN) + elif is_native_ibis(native): + return cls.from_backend(Implementation.IBIS) else: msg = f"Unsupported type: {type(native).__qualname__!r}" raise TypeError(msg) @@ -367,3 +391,7 @@ def is_native_spark_like(obj: Any) -> TypeIs[_NativeSparkLike]: or is_native_pyspark(obj) or is_native_pyspark_connect(obj) ) + + +def is_native_ibis(obj: Any) -> TypeIs[_NativeIbis]: + return is_ibis_table(obj) diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 56ed0ef6a8..bbf4e99237 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -208,6 +208,10 @@ def _with_window_function(self, window_function: WindowFunction) -> Self: result._window_function = window_function return result + @classmethod + def _alias_native(cls, expr: Column, name: str) -> Column: + return expr.alias(name) + def _cum_window_func( self, *, diff --git a/narwhals/functions.py b/narwhals/functions.py index 9cb91646fa..132661c8f0 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -773,6 +773,7 @@ def _scan_csv_impl( Implementation.CUDF, Implementation.DASK, Implementation.DUCKDB, + Implementation.IBIS, }: native_frame = native_namespace.read_csv(source, **kwargs) elif implementation is Implementation.PYARROW: @@ -869,6 +870,7 @@ def _read_parquet_impl( Implementation.MODIN, Implementation.CUDF, Implementation.DUCKDB, + Implementation.IBIS, }: native_frame = native_namespace.read_parquet(source, **kwargs) elif implementation is Implementation.PYARROW: @@ -981,6 +983,7 @@ def _scan_parquet_impl( Implementation.CUDF, Implementation.DASK, Implementation.DUCKDB, + Implementation.IBIS, }: native_frame = native_namespace.read_parquet(source, **kwargs) elif implementation is Implementation.PYARROW: diff --git a/narwhals/translate.py b/narwhals/translate.py index 1664a9a8d0..898366db86 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -537,32 +537,16 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 ) # Ibis - elif is_ibis_table(native_object): # pragma: no cover - from narwhals._ibis.dataframe import IbisLazyFrame - - if eager_only or series_only: + elif is_ibis_table(native_object): + if eager_only or series_only: # pragma: no cover if not pass_through: - msg = ( - "Cannot only use `series_only=True` or `eager_only=False` " - "with Ibis table" - ) + msg = "Cannot only use `series_only=True` or `eager_only=False` with ibis.Table" raise TypeError(msg) return native_object - import ibis # ignore-banned-import - - backend_version = parse_version(ibis) - if version is Version.V1: - return DataFrame( - IbisLazyFrame( - native_object, backend_version=backend_version, version=version - ), - level="interchange", - ) - return LazyFrame( - IbisLazyFrame( - native_object, backend_version=backend_version, version=version - ), - level="lazy", + return ( + version.namespace.from_native_object(native_object) + .compliant.from_native(native_object) + .to_narwhals() ) # PySpark diff --git a/narwhals/utils.py b/narwhals/utils.py index 395eb38697..4f3963342e 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -387,6 +387,11 @@ def to_native_namespace(self) -> ModuleType: # noqa: C901, PLR0911 return sqlframe + if self is Implementation.IBIS: + import ibis # ignore-banned-import + + return ibis + if self is Implementation.PYSPARK_CONNECT: # pragma: no cover import pyspark.sql.connect # ignore-banned-import diff --git a/pyproject.toml b/pyproject.toml index 138c4bf829..1a644577a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ typing = [ "sqlframe", "polars==1.25.2", "uv", + "narwhals[ibis]", ] docs = [ "black", # required by mkdocstrings_handlers @@ -258,10 +259,7 @@ omit = [ 'narwhals/_arrow/typing.py', 'narwhals/_duckdb/typing.py', 'narwhals/_spark_like/typing.py', - # we can't run this in every environment that we measure coverage on due to upper-bound constraits - 'narwhals/_ibis/*', - # we don't (yet) run these in every environment - 'tests/ibis_test.py', + 'narwhals/_ibis/typing.py', # Remove after finishing eager sub-protocols 'narwhals/_compliant/namespace.py', ] @@ -295,6 +293,7 @@ module = [ "dask.*", "dask_expr.*", "duckdb.*", + # https://github.com/ibis-project/ibis/issues/6844 "ibis.*", "joblib.*", "modin.*", diff --git a/tests/conftest.py b/tests/conftest.py index a7d1dc850f..f912786b63 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,9 @@ import os import sys +import uuid from copy import deepcopy +from functools import lru_cache from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -16,9 +18,11 @@ if TYPE_CHECKING: import duckdb + import ibis import pandas as pd import polars as pl import pyarrow as pa + from ibis.backends.duckdb import Backend as IbisDuckDBBackend from pyspark.sql import DataFrame as PySparkDataFrame from typing_extensions import TypeAlias @@ -41,7 +45,9 @@ ): # pragma: no cover DEFAULT_CONSTRUCTORS = default_constructors else: - DEFAULT_CONSTRUCTORS = "pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe" + DEFAULT_CONSTRUCTORS = ( + "pandas,pandas[pyarrow],polars[eager],pyarrow,duckdb,sqlframe,ibis" + ) def pytest_addoption(parser: Any) -> None: @@ -217,6 +223,22 @@ def sqlframe_pyspark_lazy_constructor(obj: Data) -> SQLFrameDataFrame: # pragma return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()]) +@lru_cache(maxsize=1) +def _ibis_backend() -> IbisDuckDBBackend: # pragma: no cover + """Cached (singleton) in-memory backend to ensure all tables exist within the same in-memory database.""" + import ibis + + return ibis.duckdb.connect() + + +def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover + import polars as pl + + ldf = pl.from_dict(obj).lazy() + table_name = str(uuid.uuid4()) + return _ibis_backend().create_table(table_name, ldf) + + EAGER_CONSTRUCTORS: dict[str, ConstructorEager] = { "pandas": pandas_constructor, "pandas[nullable]": pandas_nullable_constructor, @@ -233,6 +255,7 @@ def sqlframe_pyspark_lazy_constructor(obj: Data) -> SQLFrameDataFrame: # pragma "duckdb": duckdb_lazy_constructor, "pyspark": pyspark_lazy_constructor, # type: ignore[dict-item] "sqlframe": sqlframe_pyspark_lazy_constructor, + "ibis": ibis_lazy_constructor, } GPU_CONSTRUCTORS: dict[str, ConstructorEager] = {"cudf": cudf_constructor} diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 2208f63f28..019d03e243 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -60,6 +60,7 @@ SPARK_LIKE_INCOMPATIBLE_COLUMNS = {"e", "f", "g", "h", "o", "p"} DUCKDB_INCOMPATIBLE_COLUMNS = {"l", "o", "p"} +IBIS_INCOMPATIBLE_COLUMNS = {"o"} @pytest.mark.filterwarnings("ignore:casting period[M] values to int64:FutureWarning") @@ -79,6 +80,8 @@ def test_cast( incompatible_columns = SPARK_LIKE_INCOMPATIBLE_COLUMNS # pragma: no cover elif "duckdb" in str(constructor): incompatible_columns = DUCKDB_INCOMPATIBLE_COLUMNS # pragma: no cover + elif "ibis" in str(constructor): + incompatible_columns = IBIS_INCOMPATIBLE_COLUMNS # pragma: no cover else: incompatible_columns = set() @@ -190,6 +193,8 @@ def test_cast_raises_for_unknown_dtype( if "pyspark" in str(constructor): incompatible_columns = SPARK_LIKE_INCOMPATIBLE_COLUMNS # pragma: no cover + elif "ibis" in str(constructor): + incompatible_columns = IBIS_INCOMPATIBLE_COLUMNS # pragma: no cover else: incompatible_columns = set() @@ -216,6 +221,7 @@ def test_cast_datetime_tz_aware( or "cudf" in str(constructor) # https://github.com/rapidsai/cudf/issues/16973 or ("pyarrow_table" in str(constructor) and is_windows()) or "pyspark" in str(constructor) + or "ibis" in str(constructor) ): request.applymarker(pytest.mark.xfail) diff --git a/tests/expr_and_series/cum_prod_test.py b/tests/expr_and_series/cum_prod_test.py index fafa34fefe..4cdabe04ed 100644 --- a/tests/expr_and_series/cum_prod_test.py +++ b/tests/expr_and_series/cum_prod_test.py @@ -88,6 +88,9 @@ def test_lazy_cum_prod_grouped( if "cudf" in str(constructor): # https://github.com/rapidsai/cudf/issues/18159 request.applymarker(pytest.mark.xfail) + if "ibis" in str(constructor): + # https://github.com/ibis-project/ibis/issues/10542 + request.applymarker(pytest.mark.xfail) df = nw.from_native( constructor( diff --git a/tests/expr_and_series/drop_nulls_test.py b/tests/expr_and_series/drop_nulls_test.py index 766bbdcbf1..3f8a54a895 100644 --- a/tests/expr_and_series/drop_nulls_test.py +++ b/tests/expr_and_series/drop_nulls_test.py @@ -34,7 +34,7 @@ def test_drop_nulls(constructor_eager: ConstructorEager) -> None: def test_drop_nulls_agg(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) data = { "A": [1, 2, None, 4], diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py index 7a851f365c..16cb565efd 100644 --- a/tests/expr_and_series/dt/convert_time_zone_test.py +++ b/tests/expr_and_series/dt/convert_time_zone_test.py @@ -29,7 +29,7 @@ def test_convert_time_zone( or ("modin_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1)) ): pytest.skip() - if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark")): + if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) data = { "a": [ @@ -89,7 +89,7 @@ def test_convert_time_zone_from_none( or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,)) ): pytest.skip() - if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark")): + if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 7): # polars used to disallow this diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py index 3625ccb466..dd892f0e85 100644 --- a/tests/expr_and_series/dt/datetime_attributes_test.py +++ b/tests/expr_and_series/dt/datetime_attributes_test.py @@ -49,6 +49,8 @@ def test_datetime_attributes( request.applymarker(pytest.mark.xfail) if attribute == "date" and "cudf" in str(constructor): request.applymarker(pytest.mark.xfail) + if attribute == "nanosecond" and "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.select(getattr(nw.col("a").dt, attribute)()) diff --git a/tests/expr_and_series/dt/datetime_duration_test.py b/tests/expr_and_series/dt/datetime_duration_test.py index 972f32640b..39d2634a72 100644 --- a/tests/expr_and_series/dt/datetime_duration_test.py +++ b/tests/expr_and_series/dt/datetime_duration_test.py @@ -47,7 +47,7 @@ def test_duration_attributes( ) -> None: if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor): pytest.skip() - if "pyspark" in str(constructor): + if "pyspark" in str(constructor) or "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) if "duckdb" in str(constructor) and attribute == "total_nanoseconds": request.applymarker(pytest.mark.xfail) diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py index 7927930226..a2c7b64025 100644 --- a/tests/expr_and_series/dt/replace_time_zone_test.py +++ b/tests/expr_and_series/dt/replace_time_zone_test.py @@ -27,7 +27,7 @@ def test_replace_time_zone( or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,)) ): pytest.skip() - if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark")): + if any(x in str(constructor) for x in ("cudf", "duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) data = { "a": [ @@ -56,7 +56,7 @@ def test_replace_time_zone_none( or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,)) ): pytest.skip() - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) data = { "a": [ diff --git a/tests/expr_and_series/dt/timestamp_test.py b/tests/expr_and_series/dt/timestamp_test.py index 1ce7e2d9bb..07ef6b0be7 100644 --- a/tests/expr_and_series/dt/timestamp_test.py +++ b/tests/expr_and_series/dt/timestamp_test.py @@ -54,7 +54,7 @@ def test_timestamp_datetimes( time_unit: Literal["ns", "us", "ms"], expected: list[int | None], ) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker( pytest.mark.xfail(reason="Backend timestamp conversion not yet implemented") ) @@ -98,7 +98,7 @@ def test_timestamp_datetimes_tz_aware( time_unit: Literal["ns", "us", "ms"], expected: list[int | None], ) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker( pytest.mark.xfail(reason="Backend timestamp conversion not yet implemented") ) @@ -157,7 +157,7 @@ def test_timestamp_dates( time_unit: Literal["ns", "us", "ms"], expected: list[int | None], ) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker( pytest.mark.xfail(reason="Backend timestamp conversion not yet implemented") ) @@ -184,7 +184,7 @@ def test_timestamp_dates( def test_timestamp_invalid_date( request: pytest.FixtureRequest, constructor: Constructor ) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker( pytest.mark.xfail(reason="Backend timestamp conversion not yet implemented") ) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index 8413604216..c742bf7414 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -141,7 +141,11 @@ def test_dt_to_string_iso_local_datetime_expr( expected: str, request: pytest.FixtureRequest, ) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = constructor({"a": [data]}) diff --git a/tests/expr_and_series/dt/truncate_test.py b/tests/expr_and_series/dt/truncate_test.py index c3c6eefcf9..5a026efafb 100644 --- a/tests/expr_and_series/dt/truncate_test.py +++ b/tests/expr_and_series/dt/truncate_test.py @@ -105,7 +105,7 @@ def test_truncate( pytest.mark.xfail(reason="https://github.com/eakmanrq/sqlframe/issues/383") ) if every.endswith("ns") and any( - x in str(constructor) for x in ("polars", "duckdb", "pyspark") + x in str(constructor) for x in ("polars", "duckdb", "pyspark", "ibis") ): request.applymarker(pytest.mark.xfail()) if any(every.endswith(x) for x in ("mo", "q", "y")) and any( @@ -195,7 +195,9 @@ def test_truncate_multiples( request.applymarker( pytest.mark.xfail(reason="https://github.com/eakmanrq/sqlframe/issues/383") ) - if every.endswith("ns") and any(x in str(constructor) for x in ("polars", "duckdb")): + if every.endswith("ns") and any( + x in str(constructor) for x in ("polars", "duckdb", "ibis") + ): request.applymarker(pytest.mark.xfail()) if "cudf" in str(constructor): # https://github.com/rapidsai/cudf/issues/18654 @@ -204,6 +206,10 @@ def test_truncate_multiples( x in str(constructor) for x in ("dask",) ): request.applymarker(pytest.mark.xfail(reason="Not implemented")) + if any(every.endswith(x) for x in ("q",)) and any( + x in str(constructor) for x in ("ibis",) + ): + request.applymarker(pytest.mark.xfail(reason="Not implemented")) request.applymarker( pytest.mark.xfail( "pyspark" in str(constructor), diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py index 173c35cee8..f2d3044c19 100644 --- a/tests/expr_and_series/fill_null_test.py +++ b/tests/expr_and_series/fill_null_test.py @@ -66,12 +66,17 @@ def test_fill_null_exceptions(constructor: Constructor) -> None: df.with_columns(nw.col("a").fill_null(strategy="invalid")) # type: ignore # noqa: PGH003 -def test_fill_null_strategies_with_limit_as_none(constructor: Constructor) -> None: +def test_fill_null_strategies_with_limit_as_none( + constructor: Constructor, request: pytest.FixtureRequest +) -> None: if ("duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)) or ( "polars" in str(constructor) and POLARS_VERSION < (1, 10) ): pytest.skip() + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) + data_limits = { "a": [1, None, None, None, 5, 6, None, None, None, 10], "b": ["a", None, None, None, "b", "c", None, None, None, "d"], @@ -148,12 +153,17 @@ def test_fill_null_strategies_with_limit_as_none(constructor: Constructor) -> No assert_equal_data(result_backward, expected_backward) -def test_fill_null_limits(constructor: Constructor) -> None: +def test_fill_null_limits( + constructor: Constructor, request: pytest.FixtureRequest +) -> None: if ("duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)) or ( "polars" in str(constructor) and POLARS_VERSION < (1, 10) ): pytest.skip() + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) + context: Any = ( pytest.raises(NotImplementedError, match="The limit keyword is not supported") if "cudf" in str(constructor) @@ -371,7 +381,7 @@ def test_fill_null_series_exceptions(constructor_eager: ConstructorEager) -> Non def test_fill_null_strategies_with_partition_by( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if any(x in str(constructor) for x in ("pyarrow_table", "dask")): + if any(x in str(constructor) for x in ("pyarrow_table", "dask", "ibis")): request.applymarker(pytest.mark.xfail) if ("duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)) or ( diff --git a/tests/expr_and_series/is_finite_test.py b/tests/expr_and_series/is_finite_test.py index 80fe8f9d34..1e371d5398 100644 --- a/tests/expr_and_series/is_finite_test.py +++ b/tests/expr_and_series/is_finite_test.py @@ -13,7 +13,8 @@ @pytest.mark.filterwarnings("ignore:invalid value encountered in cast") def test_is_finite_expr(constructor: Constructor) -> None: if any( - x in str(constructor) for x in ("polars", "pyarrow_table", "duckdb", "pyspark") + x in str(constructor) + for x in ("polars", "pyarrow_table", "duckdb", "pyspark", "ibis") ): expected = {"a": [False, False, True, None]} elif any( diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py index e3eb7dded9..cacf95496a 100644 --- a/tests/expr_and_series/is_nan_test.py +++ b/tests/expr_and_series/is_nan_test.py @@ -94,7 +94,11 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None: def test_nan_non_float(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) from pyarrow.lib import ArrowNotImplementedError diff --git a/tests/expr_and_series/len_test.py b/tests/expr_and_series/len_test.py index 71a7b55719..1283d7a02e 100644 --- a/tests/expr_and_series/len_test.py +++ b/tests/expr_and_series/len_test.py @@ -7,11 +7,12 @@ def test_len_no_filter(constructor: Constructor) -> None: - data = {"a": list("xyz"), "b": [1, 2, 1]} - expected = {"l": [3], "l2": [6]} + data = {"a": list("xyz"), "b": [1, 2, None]} + expected = {"l": [3], "l2": [6], "l3": [3]} df = nw.from_native(constructor(data)).select( nw.col("a").len().alias("l"), (nw.col("a").len() * 2).alias("l2"), + nw.col("b").len().alias("l3"), ) assert_equal_data(df, expected) diff --git a/tests/expr_and_series/median_test.py b/tests/expr_and_series/median_test.py index 4b99ffe469..dadc5a262e 100644 --- a/tests/expr_and_series/median_test.py +++ b/tests/expr_and_series/median_test.py @@ -43,7 +43,11 @@ def test_median_series( def test_median_expr_raises_on_str( constructor: Constructor, expr: nw.Expr, request: pytest.FixtureRequest ) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) diff --git a/tests/expr_and_series/quantile_test.py b/tests/expr_and_series/quantile_test.py index fc2474d854..af29d32499 100644 --- a/tests/expr_and_series/quantile_test.py +++ b/tests/expr_and_series/quantile_test.py @@ -29,7 +29,7 @@ def test_quantile_expr( request: pytest.FixtureRequest, ) -> None: if ( - any(x in str(constructor) for x in ("dask", "duckdb")) + any(x in str(constructor) for x in ("dask", "duckdb", "ibis")) and interpolation != "linear" ) or "pyspark" in str(constructor): request.applymarker(pytest.mark.xfail) diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py index 8908c536ca..9b4e7ec6da 100644 --- a/tests/expr_and_series/reduction_test.py +++ b/tests/expr_and_series/reduction_test.py @@ -73,7 +73,11 @@ def test_empty_scalar_reduction_select( constructor: Constructor, request: pytest.FixtureRequest ) -> None: # pyspark doesn't necessarely fails, but returns all None's - if "pyspark" in str(constructor) or "duckdb" in str(constructor): + if ( + "pyspark" in str(constructor) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) data = { "str": [*"abcde"], diff --git a/tests/expr_and_series/replace_strict_test.py b/tests/expr_and_series/replace_strict_test.py index 72f5478632..71e81ff7c1 100644 --- a/tests/expr_and_series/replace_strict_test.py +++ b/tests/expr_and_series/replace_strict_test.py @@ -24,7 +24,11 @@ def test_replace_strict( ) -> None: if "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [1, 2, 3]})) result = df.select( @@ -59,7 +63,11 @@ def test_replace_non_full( ) -> None: if "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [1, 2, 3]})) if isinstance(df, nw.LazyFrame): @@ -80,7 +88,11 @@ def test_replace_strict_mapping( ) -> None: if "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [1, 2, 3]})) diff --git a/tests/expr_and_series/str/replace_test.py b/tests/expr_and_series/str/replace_test.py index d7774beb9a..a600bff97c 100644 --- a/tests/expr_and_series/str/replace_test.py +++ b/tests/expr_and_series/str/replace_test.py @@ -101,7 +101,11 @@ def test_str_replace_expr( literal: bool, # noqa: FBT001 expected: dict[str, list[str]], ) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result_df = df.select( diff --git a/tests/expr_and_series/str/strip_chars_test.py b/tests/expr_and_series/str/strip_chars_test.py index 4ca3fada57..117280772e 100644 --- a/tests/expr_and_series/str/strip_chars_test.py +++ b/tests/expr_and_series/str/strip_chars_test.py @@ -21,9 +21,13 @@ ) def test_str_strip_chars( constructor: Constructor, + request: pytest.FixtureRequest, characters: str | None, expected: Any, ) -> None: + if "ibis" in str(constructor) and characters is not None: + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) result_frame = df.select(nw.col("a").str.strip_chars(characters)) assert_equal_data(result_frame, expected) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index f25a9c9e4a..60452277b9 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -90,6 +90,7 @@ def test_to_datetime_infer_fmt( ("polars" in str(constructor) and str(data["a"][0]).isdigit()) or "duckdb" in str(constructor) or ("pyspark" in str(constructor) and data["a"][0] == "20240101123456") + or "ibis" in str(constructor) ): request.applymarker(pytest.mark.xfail) @@ -149,7 +150,7 @@ def test_to_datetime_series_infer_fmt( def test_to_datetime_infer_fmt_from_date( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if "duckdb" in str(constructor): + if "duckdb" in str(constructor) or "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) data = {"z": ["2020-01-01", "2020-01-02", None]} if "pyspark" in str(constructor): @@ -221,7 +222,7 @@ def test_to_datetime_tz_aware( request.applymarker(pytest.mark.xfail) context = ( pytest.raises(NotImplementedError) - if any(x in str(constructor) for x in ("duckdb",)) and format is None + if any(x in str(constructor) for x in ("duckdb", "ibis")) and format is None else does_not_raise() ) df = nw.from_native(constructor({"a": ["2020-01-01T01:02:03+0100"]})) diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py index de8a67e884..52e52a272d 100644 --- a/tests/expr_and_series/unary_test.py +++ b/tests/expr_and_series/unary_test.py @@ -10,7 +10,10 @@ from tests.utils import assert_equal_data -def test_unary(constructor: Constructor) -> None: +def test_unary(constructor: Constructor, request: pytest.FixtureRequest) -> None: + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) + data = { "a": [1, 3, 2], "b": [4, 4, 6], @@ -77,7 +80,11 @@ def test_unary_series(constructor_eager: ConstructorEager) -> None: assert_equal_data(result, expected) -def test_unary_two_elements(constructor: Constructor) -> None: +def test_unary_two_elements( + constructor: Constructor, request: pytest.FixtureRequest +) -> None: + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) data = {"a": [1, 2], "b": [2, 10], "c": [2.0, None]} result = nw.from_native(constructor(data)).select( a_nunique=nw.col("a").n_unique(), @@ -125,6 +132,8 @@ def test_unary_one_element( ) -> None: if "pyspark" in str(constructor) and "sqlframe" not in str(constructor): request.applymarker(pytest.mark.xfail) + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) data = {"a": [1], "b": [2], "c": [None]} # Dask runs into a divide by zero RuntimeWarning for 1 element skew. context = ( @@ -132,6 +141,7 @@ def test_unary_one_element( if "dask" in str(constructor) else does_not_raise() ) + result = ( nw.from_native(constructor(data)) .with_columns(nw.col("c").cast(nw.Float64)) diff --git a/tests/expr_and_series/unique_test.py b/tests/expr_and_series/unique_test.py index 30b8b816b9..66378b517b 100644 --- a/tests/expr_and_series/unique_test.py +++ b/tests/expr_and_series/unique_test.py @@ -32,7 +32,7 @@ def test_unique_expr(constructor: Constructor) -> None: def test_unique_expr_agg( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor) for x in ("duckdb", "pyspark", "ibis")): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.select(nw.col("a").unique().sum()) diff --git a/tests/frame/concat_test.py b/tests/frame/concat_test.py index d734fc0495..de96d432e1 100644 --- a/tests/frame/concat_test.py +++ b/tests/frame/concat_test.py @@ -66,7 +66,7 @@ def test_concat_vertical(constructor: Constructor) -> None: def test_concat_diagonal( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if "duckdb" in str(constructor): + if "duckdb" in str(constructor) or "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) data_1 = {"a": [1, 3], "b": [4, 6]} data_2 = {"a": [100, 200], "z": ["x", "y"]} diff --git a/tests/frame/explode_test.py b/tests/frame/explode_test.py index e4f548ff9f..1be3d11962 100644 --- a/tests/frame/explode_test.py +++ b/tests/frame/explode_test.py @@ -88,7 +88,15 @@ def test_explode_multiple_cols( ) -> None: if any( backend in str(constructor) - for backend in ("dask", "modin", "cudf", "pyarrow_table", "duckdb", "pyspark") + for backend in ( + "dask", + "modin", + "cudf", + "pyarrow_table", + "duckdb", + "pyspark", + "ibis", + ) ): request.applymarker(pytest.mark.xfail) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 761876db94..43b5e1c4ba 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -101,7 +101,12 @@ def test_full_join( assert_equal_data(result, expected) -def test_full_join_duplicate(constructor: Constructor) -> None: +def test_full_join_duplicate( + request: pytest.FixtureRequest, constructor: Constructor +) -> None: + if "ibis" in str(constructor): + request.applymarker(pytest.mark.xfail) + df1 = {"foo": [1, 2, 3], "val1": [1, 2, 3]} df2 = {"foo": [1, 2, 3], "foo_right": [1, 2, 3]} df_left = nw.from_native(constructor(df1)).lazy() @@ -506,7 +511,9 @@ def test_joinasof_numeric( ) -> None: if any(x in str(constructor) for x in ("pyarrow_table", "cudf", "pyspark")): request.applymarker(pytest.mark.xfail) - if "duckdb" in str(constructor) and strategy == "nearest": + if ( + "duckdb" in str(constructor) or "ibis" in str(constructor) + ) and strategy == "nearest": request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) @@ -578,7 +585,9 @@ def test_joinasof_time( ) -> None: if any(x in str(constructor) for x in ("pyarrow_table", "cudf", "pyspark")): request.applymarker(pytest.mark.xfail) - if "duckdb" in str(constructor) and strategy == "nearest": + if ( + "duckdb" in str(constructor) or "ibis" in str(constructor) + ) and strategy == "nearest": request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)): request.applymarker(pytest.mark.xfail) @@ -811,6 +820,9 @@ def test_join_duplicate_column_names( exception = AnalysisException elif "modin" in str(constructor): exception = NotImplementedError + elif "ibis" in str(constructor): + # ibis doesn't raise here + request.applymarker(pytest.mark.xfail) else: exception = nw.exceptions.DuplicateError df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) diff --git a/tests/frame/select_test.py b/tests/frame/select_test.py index 010fd7bfe3..fd1e8fd8d5 100644 --- a/tests/frame/select_test.py +++ b/tests/frame/select_test.py @@ -86,7 +86,11 @@ def test_comparison_with_list_error_message() -> None: def test_missing_columns( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} df = nw.from_native(constructor(data)) @@ -165,7 +169,10 @@ def test_select_duplicates(constructor: Constructor) -> None: # cudf already raises its own error pytest.skip() df = nw.from_native(constructor({"a": [1, 2]})).lazy() - with pytest.raises(ValueError, match="Expected unique|[Dd]uplicate|more than one"): + with pytest.raises( + ValueError, + match="Expected unique|[Dd]uplicate|more than one|Duplicate column name", + ): df.select("a", nw.col("a") + 1).collect() diff --git a/tests/frame/with_columns_test.py b/tests/frame/with_columns_test.py index 4fcbfb4015..0519781802 100644 --- a/tests/frame/with_columns_test.py +++ b/tests/frame/with_columns_test.py @@ -63,7 +63,11 @@ def test_with_columns_dtypes_single_row( ) -> None: if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (15,): pytest.skip() - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) data = {"a": ["foo"]} df = nw.from_native(constructor(data)).with_columns(nw.col("a").cast(nw.Categorical)) diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index ecc195a5c4..10b0812493 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -13,7 +13,11 @@ def test_with_row_index(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) result = nw.from_native(constructor(data)).with_row_index() expected = {"index": [0, 1], "a": ["foo", "bars"], "ab": ["foo", "bars"]} diff --git a/tests/group_by_test.py b/tests/group_by_test.py index d64b3e386d..9af0692d00 100644 --- a/tests/group_by_test.py +++ b/tests/group_by_test.py @@ -335,8 +335,12 @@ def test_no_agg(constructor: Constructor) -> None: def test_group_by_categorical( constructor: Constructor, ) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - pytest.skip(reason="DuckDB and PySpark do not support categorical types") + if ( + ("pyspark" in str(constructor)) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): + pytest.skip(reason="DuckDB, PySpark, and Ibis do not support categorical types") if "pyarrow_table" in str(constructor) and PYARROW_VERSION < ( 15, ): # pragma: no cover diff --git a/tests/selectors_test.py b/tests/selectors_test.py index afb05feaa9..9588859cf5 100644 --- a/tests/selectors_test.py +++ b/tests/selectors_test.py @@ -77,7 +77,11 @@ def test_categorical( 15, ): # pragma: no cover request.applymarker(pytest.mark.xfail) - if "pyspark" in str(constructor) or "duckdb" in str(constructor): + if ( + "pyspark" in str(constructor) + or "duckdb" in str(constructor) + or "ibis" in str(constructor) + ): request.applymarker(pytest.mark.xfail) expected = {"b": ["a", "b", "c"]} @@ -94,6 +98,7 @@ def test_datetime(constructor: Constructor, request: pytest.FixtureRequest) -> N or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,)) or ("pyarrow" in str(constructor) and is_windows()) or ("pandas" in str(constructor) and PANDAS_VERSION < (2,)) + or "ibis" in str(constructor) ): request.applymarker(pytest.mark.xfail) if "modin" in str(constructor): @@ -223,7 +228,10 @@ def test_set_ops( expected: list[str], request: pytest.FixtureRequest, ) -> None: - if ("duckdb" in str(constructor) or "sqlframe" in str(constructor)) and not expected: + if ( + any(x in str(constructor) for x in ("duckdb", "sqlframe", "ibis")) + and not expected + ): # https://github.com/narwhals-dev/narwhals/issues/2469 request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -267,7 +275,11 @@ def test_tz_aware(constructor: Constructor, request: pytest.FixtureRequest) -> N if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,): # bug in old pyarrow pytest.skip() - if "duckdb" in str(constructor) or "pyspark" in str(constructor): + if ( + "duckdb" in str(constructor) + or "pyspark" in str(constructor) + or "ibis" in str(constructor) + ): # replace_time_zone not implemented request.applymarker(pytest.mark.xfail) diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py index aba17a8b35..3e77c51a05 100644 --- a/tests/series_only/cast_test.py +++ b/tests/series_only/cast_test.py @@ -120,7 +120,7 @@ def test_cast_to_enum_vmain( # Backends that do not (yet) support Enum dtype if any( backend in str(constructor) - for backend in ["pyarrow_table", "sqlframe", "pyspark", "modin"] + for backend in ["pyarrow_table", "sqlframe", "pyspark", "modin", "ibis"] ): request.applymarker(pytest.mark.xfail) diff --git a/tests/utils.py b/tests/utils.py index 4469dcf0fe..28adf58f9b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -77,8 +77,14 @@ def assert_equal_data(result: Any, expected: Mapping[str, Any]) -> None: hasattr(result, "_compliant_frame") and result._compliant_frame._implementation is Implementation.DUCKDB ) + is_ibis = ( + hasattr(result, "_compliant_frame") + and result._compliant_frame._implementation is Implementation.IBIS + ) if is_duckdb: result = from_native(result.to_native().arrow()) + if is_ibis: + result = from_native(result.to_native().to_pyarrow()) if hasattr(result, "collect"): kwargs: dict[Implementation, dict[str, Any]] = {Implementation.POLARS: {}} diff --git a/tests/v1_test.py b/tests/v1_test.py index 3dc9fb7710..f20d0288d8 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -209,7 +209,7 @@ def test_cast_to_enum_v1( if ( any( backend in str(constructor) - for backend in ["pyarrow_table", "sqlframe", "pyspark"] + for backend in ["pyarrow_table", "sqlframe", "pyspark", "ibis"] ) or str(constructor) == "modin" ): diff --git a/utils/import_check.py b/utils/import_check.py index bac54aff79..9895f5003a 100644 --- a/utils/import_check.py +++ b/utils/import_check.py @@ -24,6 +24,7 @@ "_dask": {"dask.dataframe", "pandas", "dask_expr"}, "_polars": {"polars"}, "_duckdb": {"duckdb"}, + "_ibis": {"ibis", "ibis._", "ibis.expr.types"}, }