Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4550,7 +4550,10 @@ def apply_defaults(self, default_opts: Dict[str, Any]) -> ScannerBuilder:
setter = getattr(self, key, None)
if setter is None:
raise ValueError(f"Unknown option {key}")
setter(value)
if isinstance(value, dict):
setter(**value)
else:
setter(value)
return self

def batch_size(self, batch_size: int) -> ScannerBuilder:
Expand Down
35 changes: 35 additions & 0 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4989,3 +4989,38 @@ def test_branches(tmp_path: Path):
branch1.checkout_latest()
assert branch1.version == 2
assert branch1.to_table().combine_chunks() == expected_branch1.combine_chunks()


def test_default_scan_options_nearest(tmp_path: Path) -> None:
dim = 4
num_rows = 10

values = []
for i in range(num_rows):
values.extend(float(i) for _ in range(dim))
value_array = pa.array(values, type=pa.float32())
vector_array = pa.FixedSizeListArray.from_arrays(value_array, dim)
table = pa.Table.from_pydict({"vector": vector_array, "id": list(range(num_rows))})

base_dir = tmp_path / "nearest_default_scan_options"
lance.write_dataset(table, base_dir)

query_vec = [0.0] * dim
default_scan_options = {
"nearest": {
"column": "vector",
"q": query_vec,
"k": 5,
},
}

ds = lance.dataset(base_dir, default_scan_options=default_scan_options)
result = ds.to_table()

assert result.num_rows == 5

assert "_distance" in result.column_names
distances = result["_distance"].to_pylist()
assert distances == sorted(distances)

assert "id" in result.column_names