Skip to content

Commit

Permalink
fix(python): Don't trigger row limit in array construction (#19215)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Oct 13, 2024
1 parent 1997293 commit cca31b3
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 22 deletions.
4 changes: 2 additions & 2 deletions crates/polars-error/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub static FALSE: &str = "false";

#[cfg(not(feature = "python"))]
pub static LENGTH_LIMIT_MSG: &str =
"polars' maximum length reached. Consider compiling with 'bigidx' feature.";
"Polars' maximum length reached. Consider compiling with 'bigidx' feature.";
#[cfg(feature = "python")]
pub static LENGTH_LIMIT_MSG: &str =
"polars' maximum length reached. Consider installing 'polars-u64-idx'.";
"Polars' maximum length reached. Consider installing 'polars-u64-idx'.";
63 changes: 43 additions & 20 deletions py-polars/polars/_utils/construction/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Object,
Struct,
Time,
UInt32,
Unknown,
dtype_to_py_type,
is_polars_dtype,
Expand All @@ -57,9 +58,10 @@
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
from polars.dependencies import pyarrow as pa
from polars.functions.eager import concat

with contextlib.suppress(ImportError): # Module not available when building docs
from polars.polars import PySeries
from polars.polars import PySeries, get_index_type

if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
Expand Down Expand Up @@ -454,27 +456,48 @@ def numpy_to_pyseries(
return constructor(
name, values, nan_to_null if dtype in (np.float32, np.float64) else strict
)
elif sum(values.shape) == 0:
# Optimize by ingesting 1D and reshaping in Rust
original_shape = values.shape
values = values.reshape(-1)
py_s = numpy_to_pyseries(
name,
values,
strict=strict,
nan_to_null=nan_to_null,
)
return wrap_s(py_s).reshape(original_shape)._s
else:
original_shape = values.shape
values = values.reshape(-1)
py_s = numpy_to_pyseries(
name,
values,
strict=strict,
nan_to_null=nan_to_null,
)
return wrap_s(py_s).reshape(original_shape)._s
values_1d = values.reshape(-1)

if get_index_type() == UInt32:
limit = 2**32 - 1
else:
limit = 2**64 - 1

if values.size <= limit:
py_s = numpy_to_pyseries(
name,
values_1d,
strict=strict,
nan_to_null=nan_to_null,
)
return wrap_s(py_s).reshape(original_shape)._s
else:
# Process in chunk, so we don't trigger ROWS_LIMIT
offset = 0
chunks = []

# Tuples are immutable, so convert to list
original_shape_chunk = list(original_shape)
# Rows size is now changed, so infer
original_shape_chunk[0] = -1
original_shape_chunk_t = tuple(original_shape_chunk)
while True:
chunk = values_1d[offset : offset + limit]
offset += limit
if chunk.shape[0] == 0:
break

py_s = numpy_to_pyseries(
name,
chunk,
strict=strict,
nan_to_null=nan_to_null,
)
chunks.append(wrap_s(py_s).reshape(original_shape_chunk_t))

return concat(chunks)._s


def series_to_pyseries(
Expand Down

0 comments on commit cca31b3

Please sign in to comment.