diff --git a/docs/api-reference/dtypes.md b/docs/api-reference/dtypes.md index 52c064f982..0bb289ea98 100644 --- a/docs/api-reference/dtypes.md +++ b/docs/api-reference/dtypes.md @@ -5,6 +5,7 @@ options: members: - Array + - Decimal - List - Int128 - Int64 diff --git a/narwhals/__init__.py b/narwhals/__init__.py index bbde49c036..a744fc62a7 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -12,6 +12,7 @@ from narwhals.dtypes import Categorical from narwhals.dtypes import Date from narwhals.dtypes import Datetime +from narwhals.dtypes import Decimal from narwhals.dtypes import Duration from narwhals.dtypes import Enum from narwhals.dtypes import Field @@ -85,6 +86,7 @@ "DataFrame", "Date", "Datetime", + "Decimal", "Duration", "Enum", "Expr", diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 2017867234..1742fd1993 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -78,6 +78,8 @@ def native_to_narwhals_dtype(dtype: pa.DataType, version: Version) -> DType: return dtypes.Array( native_to_narwhals_dtype(dtype.value_type, version), dtype.list_size ) + if pa.types.is_decimal(dtype): + return dtypes.Decimal() return dtypes.Unknown() # pragma: no cover diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 3df2b9cc08..fe4a5856ef 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -76,6 +76,8 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType: native_to_narwhals_dtype(match_.group(1), version), int(match_.group(2)), ) + if duckdb_dtype.startswith("DECIMAL("): + return dtypes.Decimal() return dtypes.Unknown() # pragma: no cover diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py index 62c5f7a187..fe9bb0349c 100644 --- a/narwhals/_ibis/dataframe.py +++ b/narwhals/_ibis/dataframe.py @@ -62,6 +62,9 @@ def native_to_narwhals_dtype(ibis_dtype: Any, version: Version) -> DType: for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items() ] ) + if ibis_dtype.is_decimal(): # pragma: no cover + # TODO(unassigned): cover this + return dtypes.Decimal() return dtypes.Unknown() # pragma: no cover diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 517ba3d37e..4076a6b886 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -399,6 +399,8 @@ def non_object_native_to_narwhals_dtype( return dtypes.Duration(du_time_unit) if dtype == "date32[day][pyarrow]": return dtypes.Date() + if dtype.startswith("decimal") and dtype.endswith("[pyarrow]"): + return dtypes.Decimal() return dtypes.Unknown() # pragma: no cover diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 020a063a39..0e445a4f15 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -144,6 +144,8 @@ def native_to_narwhals_dtype( native_to_narwhals_dtype(dtype.inner, version, backend_version), # type: ignore[attr-defined] dtype.size, # type: ignore[attr-defined] ) + if dtype == pl.Decimal: + return dtypes.Decimal() return dtypes.Unknown() diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py index d3c646a9c9..0325224756 100644 --- a/narwhals/_spark_like/utils.py +++ b/narwhals/_spark_like/utils.py @@ -52,6 +52,9 @@ def native_to_narwhals_dtype( ] if any(isinstance(dtype, t) for t in datetime_types): return dtypes.Datetime() + if isinstance(dtype, pyspark_types.DecimalType): # pragma: no cover + # TODO(unassigned): cover this in dtypes_test.py + return dtypes.Decimal() return dtypes.Unknown() diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index b3aaebc0ea..7461aa5db2 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -47,6 +47,18 @@ class NumericType(DType): ... class TemporalType(DType): ... +class Decimal(NumericType): + """Decimal type. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s = pl.Series(["1.5"], dtype=pl.Decimal) + >>> nw.from_native(s, series_only=True).dtype + Decimal + """ + + class Int128(NumericType): """128-bit signed integer type.""" diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index cfba9ca552..3282ccbeab 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -37,6 +37,7 @@ from narwhals.stable.v1.dtypes import Categorical from narwhals.stable.v1.dtypes import Date from narwhals.stable.v1.dtypes import Datetime +from narwhals.stable.v1.dtypes import Decimal from narwhals.stable.v1.dtypes import Duration from narwhals.stable.v1.dtypes import Enum from narwhals.stable.v1.dtypes import Field @@ -3510,6 +3511,7 @@ def scan_csv( "DataFrame", "Date", "Datetime", + "Decimal", "Duration", "Enum", "Expr", diff --git a/narwhals/stable/v1/_dtypes.py b/narwhals/stable/v1/_dtypes.py index d38a81c523..00fe061418 100644 --- a/narwhals/stable/v1/_dtypes.py +++ b/narwhals/stable/v1/_dtypes.py @@ -5,6 +5,7 @@ from narwhals.dtypes import Categorical from narwhals.dtypes import Date from narwhals.dtypes import Datetime as NwDatetime +from narwhals.dtypes import Decimal from narwhals.dtypes import DType from narwhals.dtypes import Duration as NwDuration from narwhals.dtypes import Enum @@ -111,6 +112,7 @@ def __hash__(self) -> int: "DType", "Date", "Datetime", + "Decimal", "Duration", "Enum", "Field", diff --git a/narwhals/stable/v1/dtypes.py b/narwhals/stable/v1/dtypes.py index 91f76192c6..ac35fd3cdd 100644 --- a/narwhals/stable/v1/dtypes.py +++ b/narwhals/stable/v1/dtypes.py @@ -5,6 +5,7 @@ from narwhals.stable.v1._dtypes import Categorical from narwhals.stable.v1._dtypes import Date from narwhals.stable.v1._dtypes import Datetime +from narwhals.stable.v1._dtypes import Decimal from narwhals.stable.v1._dtypes import DType from narwhals.stable.v1._dtypes import Duration from narwhals.stable.v1._dtypes import Enum @@ -35,6 +36,7 @@ "DType", "Date", "Datetime", + "Decimal", "Duration", "Enum", "Field", diff --git a/narwhals/typing.py b/narwhals/typing.py index fb87a20bc6..5c59296c46 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -189,6 +189,7 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... class DTypes: + Decimal: type[dtypes.Decimal] Int128: type[dtypes.Int128] Int64: type[dtypes.Int64] Int32: type[dtypes.Int32] diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 4f3d4ac538..0624352c1e 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -201,7 +201,13 @@ def test_pandas_fixed_offset_1302() -> None: def test_huge_int() -> None: - df = pl.DataFrame({"a": [1, 2, 3]}) # noqa: F841 + df = pl.DataFrame({"a": [1, 2, 3]}) + if POLARS_VERSION >= (1, 18): # pragma: no cover + result = nw.from_native(df).schema + assert result["a"] == nw.Int128 + else: # pragma: no cover + # Int128 was not available yet + pass rel = duckdb.sql(""" select cast(a as int128) as a from df @@ -215,5 +221,29 @@ def test_huge_int() -> None: result = nw.from_native(rel).schema assert result["a"] == nw.UInt128 + if POLARS_VERSION >= (1, 18): # pragma: no cover + result = nw.from_native(df).schema + assert result["a"] == nw.UInt128 + else: # pragma: no cover + # UInt128 was not available yet + pass + # TODO(unassigned): once other libraries support Int128/UInt128, # add tests for them too + + +@pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow") +def test_decimal() -> None: + df = pl.DataFrame({"a": [1]}, schema={"a": pl.Decimal}) + result = nw.from_native(df).schema + assert result["a"] == nw.Decimal + rel = duckdb.sql(""" + select * + from df + """) + result = nw.from_native(rel).schema + assert result["a"] == nw.Decimal + result = nw.from_native(df.to_pandas(use_pyarrow_extension_array=True)).schema + assert result["a"] == nw.Decimal + result = nw.from_native(df.to_arrow()).schema + assert result["a"] == nw.Decimal