Skip to content
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
54760af
feat: add `nw.Schema.to_*` methods
dangotbanned Feb 3, 2025
e9ccaad
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 3, 2025
db5bac2
Merge branch 'main' into schema-convert-api
dangotbanned Feb 4, 2025
e72cb75
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 4, 2025
e09426d
feat: replace `native_namespace` -> `backend`
dangotbanned Feb 4, 2025
3826e1f
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 4, 2025
d1e0576
feat: adds `Schema._version`
dangotbanned Feb 4, 2025
62b6e24
revert: remove `Schema.to_native`
dangotbanned Feb 4, 2025
fe96f8c
refactor: drop `backend`, use hard imports
dangotbanned Feb 4, 2025
a575ea8
refactor: use `Schema.to_(arrow|polars)` in `from_dict`
dangotbanned Feb 4, 2025
72fc3de
refactor: use `Schema..to_pandas` in `from_dict`
dangotbanned Feb 4, 2025
82b1623
refactor: remove `version` parameter from `_from_dict_impl`
dangotbanned Feb 4, 2025
7d90555
chore: ignore banned imports
dangotbanned Feb 5, 2025
3957699
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 5, 2025
8925445
test: adds `test_schema_to_pandas`
dangotbanned Feb 5, 2025
38adce2
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 5, 2025
83949f8
test: use `[ns]` instead of `[us]`
dangotbanned Feb 5, 2025
e62221e
fix: return `dict` when `pl.Schema` unavailable
dangotbanned Feb 5, 2025
347f5ac
fix: handle unequal length case
dangotbanned Feb 5, 2025
d423a0b
docs: add docs for new methods
dangotbanned Feb 5, 2025
fd28337
docs: add "Returns" to all
dangotbanned Feb 5, 2025
bce4b20
Merge branch 'main' into schema-convert-api
dangotbanned Feb 6, 2025
4581d95
refactor: `to_pandas` -> positional or keyword
dangotbanned Feb 7, 2025
95a123a
refactor: use `Implementation.PANDAS`
dangotbanned Feb 7, 2025
47ad060
test: try removing doctest skip
dangotbanned Feb 7, 2025
99b3925
test: fix `to_polars` doctest repr
dangotbanned Feb 7, 2025
881aae0
style: rename `it` to `schema`
dangotbanned Feb 7, 2025
c86512e
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 7, 2025
9f72d7c
Merge branch 'main' into schema-convert-api
dangotbanned Feb 7, 2025
5e9beb2
match pandas dtype_backend
MarcoGorelli Feb 8, 2025
bae64af
py39 compat
MarcoGorelli Feb 8, 2025
b901957
remove Any in return from Schema.to_polars
MarcoGorelli Feb 8, 2025
bde48a8
fixup
MarcoGorelli Feb 8, 2025
0133050
missing else
MarcoGorelli Feb 8, 2025
5c26e9a
coverage
MarcoGorelli Feb 8, 2025
a2181c6
Merge remote-tracking branch 'upstream/main' into schema-convert-api
dangotbanned Feb 8, 2025
3bcebc1
refactor(typing): lie more explicitly
dangotbanned Feb 8, 2025
7b89936
chore: move pragma
dangotbanned Feb 8, 2025
10b04d3
Merge branch 'main' into schema-convert-api
dangotbanned Feb 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,14 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self:
dtype_backend = get_dtype_backend(
dtype=ser.dtype, implementation=self._implementation
)
dtype = narwhals_to_native_dtype(
pd_dtype = narwhals_to_native_dtype(
dtype,
dtype_backend=dtype_backend,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)
return self._from_native_series(ser.astype(dtype))
return self._from_native_series(ser.astype(pd_dtype))

def item(self: Self, index: int | None) -> Any:
# cuDF doesn't have Series.item().
Expand Down
112 changes: 54 additions & 58 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
T = TypeVar("T")

if TYPE_CHECKING:
from pandas._typing import Dtype as PandasDtype

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dtypes import DType
from narwhals.typing import DTypeBackend

ExprT = TypeVar("ExprT", bound=PandasLikeExpr)

Expand Down Expand Up @@ -499,113 +502,106 @@ def native_to_narwhals_dtype(
raise AssertionError(msg)


def get_dtype_backend(dtype: Any, implementation: Implementation) -> str:
def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBackend:
"""Get dtype backend for pandas type.

Matches pandas' `dtype_backend` argument in `convert_dtypes`.
"""
if implementation in {Implementation.PANDAS, Implementation.MODIN}:
import pandas as pd

if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype):
return "pyarrow-nullable"
return "pyarrow"

with suppress(AttributeError):
if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype):
return "pandas-nullable"
return "numpy"
else: # pragma: no cover
return "numpy"
return "numpy_nullable"
return None


def narwhals_to_native_dtype( # noqa: PLR0915
dtype: DType | type[DType],
dtype_backend: str | None,
dtype_backend: DTypeBackend,
implementation: Implementation,
backend_version: tuple[int, ...],
version: Version,
) -> Any:
) -> str | PandasDtype:
if dtype_backend is not None and dtype_backend not in {"pyarrow", "numpy_nullable"}:
msg = f"Expected one of {{None, 'pyarrow', 'numpy_nullable'}}, got: '{dtype_backend}'"
raise ValueError(msg)
dtypes = import_dtypes_module(version)
if isinstance_or_issubclass(dtype, dtypes.Float64):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Float64[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Float64"
else:
return "float64"
return "float64"
if isinstance_or_issubclass(dtype, dtypes.Float32):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Float32[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Float32"
else:
return "float32"
return "float32"
if isinstance_or_issubclass(dtype, dtypes.Int64):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Int64[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Int64"
else:
return "int64"
return "int64"
if isinstance_or_issubclass(dtype, dtypes.Int32):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Int32[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Int32"
else:
return "int32"
return "int32"
if isinstance_or_issubclass(dtype, dtypes.Int16):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Int16[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Int16"
else:
return "int16"
return "int16"
if isinstance_or_issubclass(dtype, dtypes.Int8):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "Int8[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "Int8"
else:
return "int8"
return "int8"
if isinstance_or_issubclass(dtype, dtypes.UInt64):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "UInt64[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "UInt64"
else:
return "uint64"
return "uint64"
if isinstance_or_issubclass(dtype, dtypes.UInt32):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "UInt32[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "UInt32"
else:
return "uint32"
return "uint32"
if isinstance_or_issubclass(dtype, dtypes.UInt16):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "UInt16[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "UInt16"
else:
return "uint16"
return "uint16"
if isinstance_or_issubclass(dtype, dtypes.UInt8):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "UInt8[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "UInt8"
else:
return "uint8"
return "uint8"
if isinstance_or_issubclass(dtype, dtypes.String):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "string[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "string"
else:
return str
return str
if isinstance_or_issubclass(dtype, dtypes.Boolean):
if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
return "boolean[pyarrow]"
if dtype_backend == "pandas-nullable":
elif dtype_backend == "numpy_nullable":
return "boolean"
else:
return "bool"
return "bool"
if isinstance_or_issubclass(dtype, dtypes.Categorical):
# TODO(Unassigned): is there no pyarrow-backed categorical?
# or at least, convert_dtypes(dtype_backend='pyarrow') doesn't
Expand All @@ -622,7 +618,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915
): # pragma: no cover
dt_time_unit = "ns"

if dtype_backend == "pyarrow-nullable":
if dtype_backend == "pyarrow":
tz_part = f", tz={dt_time_zone}" if dt_time_zone else ""
return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]"
else:
Expand All @@ -636,7 +632,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915
dt_time_unit = "ns"
return (
f"duration[{du_time_unit}][pyarrow]"
if dtype_backend == "pyarrow-nullable"
if dtype_backend == "pyarrow"
else f"timedelta64[{du_time_unit}]"
)
if isinstance_or_issubclass(dtype, dtypes.Date):
Expand Down
68 changes: 16 additions & 52 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from narwhals.dependencies import is_numpy_array
from narwhals.exceptions import ShapeError
from narwhals.expr import Expr
from narwhals.schema import Schema
from narwhals.translate import from_native
from narwhals.translate import to_native
from narwhals.utils import Implementation
Expand All @@ -43,7 +44,6 @@
from typing_extensions import Self

from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.series import Series
from narwhals.typing import IntoDataFrameT
from narwhals.typing import IntoExpr
Expand Down Expand Up @@ -343,10 +343,11 @@ def _new_series_impl(
)

backend_version = parse_version(native_namespace.__version__)
dtype = pandas_like_narwhals_to_native_dtype(
pd_dtype = pandas_like_narwhals_to_native_dtype(
dtype, None, implementation, backend_version, version
)
native_series = native_namespace.Series(values, name=name, dtype=dtype)
native_series = native_namespace.Series(values, name=name, dtype=pd_dtype)
native_series = native_namespace.Series(values, name=name)

elif implementation is Implementation.PYARROW:
if dtype:
Expand Down Expand Up @@ -449,20 +450,14 @@ def from_dict(
backend = validate_native_namespace_and_backend(
backend, native_namespace, emit_deprecation_warning=True
)
return _from_dict_impl(
data,
schema,
backend=backend,
version=Version.MAIN,
)
return _from_dict_impl(data, schema, backend=backend)


def _from_dict_impl( # noqa: PLR0915
def _from_dict_impl(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
version: Version,
) -> DataFrame[Any]:
from narwhals.series import Series

Expand Down Expand Up @@ -494,18 +489,7 @@ def _from_dict_impl( # noqa: PLR0915
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
raise ValueError(msg)
if eager_backend is Implementation.POLARS:
if schema:
from narwhals._polars.utils import (
narwhals_to_native_dtype as polars_narwhals_to_native_dtype,
)

schema_pl = {
name: polars_narwhals_to_native_dtype(dtype, version=version)
for name, dtype in schema.items()
}
else:
schema_pl = None

schema_pl = Schema(schema).to_polars() if schema else None
native_frame = native_namespace.from_dict(data, schema=schema_pl)
elif eager_backend in {
Implementation.PANDAS,
Expand Down Expand Up @@ -535,36 +519,16 @@ def _from_dict_impl( # noqa: PLR0915

if schema:
from narwhals._pandas_like.utils import get_dtype_backend
from narwhals._pandas_like.utils import (
narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype,
)

backend_version = parse_version(native_namespace.__version__)
schema = {
name: pandas_like_narwhals_to_native_dtype(
dtype=schema[name],
dtype_backend=get_dtype_backend(native_type, eager_backend),
implementation=eager_backend,
backend_version=backend_version,
version=version,
)
for name, native_type in native_frame.dtypes.items()
}
native_frame = native_frame.astype(schema)

elif eager_backend is Implementation.PYARROW:
if schema:
from narwhals._arrow.utils import (
narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
pd_schema = Schema(schema).to_pandas(
get_dtype_backend(native_type, eager_backend)
for native_type in native_frame.dtypes
)
native_frame = native_frame.astype(pd_schema)

schema = native_namespace.schema(
[
(name, arrow_narwhals_to_native_dtype(dtype, version))
for name, dtype in schema.items()
]
)
native_frame = native_namespace.table(data, schema=schema)
elif eager_backend is Implementation.PYARROW:
pa_schema = Schema(schema).to_arrow() if schema is not None else schema
native_frame = native_namespace.table(data, schema=pa_schema)
else: # pragma: no cover
try:
# implementation is UNKNOWN, Narwhals extension using this feature should
Expand Down Expand Up @@ -772,7 +736,7 @@ def _from_numpy_impl(
)

backend_version = parse_version(native_namespace.__version__)
schema = {
pd_schema = {
name: pandas_like_narwhals_to_native_dtype(
dtype=schema[name],
dtype_backend=get_dtype_backend(native_type, implementation),
Expand All @@ -783,7 +747,7 @@ def _from_numpy_impl(
for name, native_type in schema.items()
}
native_frame = native_namespace.DataFrame(data, columns=schema.keys()).astype(
schema
pd_schema
)
elif isinstance(schema, list):
native_frame = native_namespace.DataFrame(data, columns=schema)
Expand Down
Loading
Loading