Skip to content

Commit

Permalink
[BUG] Support delta-rs version >0.17.4 in deltalake writes (#2488)
Browse files Browse the repository at this point in the history
Daft had some bad code when handling newer versions of delta-rs

---------

Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
  • Loading branch information
jaychia and Jay Chia authored Jul 8, 2024
1 parent 9223213 commit 83532e2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
9 changes: 5 additions & 4 deletions daft/table/table_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import random
import time
from collections.abc import Callable, Generator
from functools import partial
from typing import IO, TYPE_CHECKING, Any, Union
from uuid import uuid4

Expand Down Expand Up @@ -682,14 +683,14 @@ def write_deltalake(

data_files: list[AddAction] = []

# added to get_file_stats_from_metadata in deltalake v0.17.4: non-optional "num_indexed_cols" argument
# added to get_file_stats_from_metadata in deltalake v0.17.4: non-optional "num_indexed_cols" and "columns_to_collect_stats" arguments
# https://github.com/delta-io/delta-rs/blob/353e08be0202c45334dcdceee65a8679f35de710/python/deltalake/writer.py#L725
if parse(deltalake.__version__) < parse("0.17.4"):
get_file_stats_from_metadata = deltalake.writer.get_file_stats_from_metadata
else:

def get_file_stats_from_metadata(metadata):
deltalake.writer.get_file_stats_from_metadata(metadata, -1)
get_file_stats_from_metadata = partial(
deltalake.writer.get_file_stats_from_metadata, num_indexed_cols=-1, columns_to_collect_stats=None
)

def file_visitor(written_file: Any) -> None:
path, partition_values = get_partitions_from_path(written_file.path)
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ tenacity==8.2.3; python_version >= '3.8'

# Delta Lake
deltalake==0.5.8; platform_system == "Windows"
deltalake==0.15.3; platform_system != "Windows" and python_version >= '3.8'
deltalake==0.18.2; platform_system != "Windows" and python_version >= '3.8'

# Databricks
databricks-sdk==0.12.0
Expand Down

0 comments on commit 83532e2

Please sign in to comment.