Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rust/lance-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ tokio-util.workspace = true
tracing.workspace = true
url.workspace = true
log.workspace = true
strsim = "0.11"

# This is used to detect CPU features at runtime.
# See src/utils/cpu.rs
Expand Down
57 changes: 52 additions & 5 deletions rust/lance-core/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

Check warning on line 3 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
//! Schema

use std::{
Expand All @@ -8,11 +8,10 @@
fmt::{self, Debug, Formatter},
sync::Arc,
};

use arrow_array::RecordBatch;
use arrow_schema::{Field as ArrowField, Schema as ArrowSchema};
use deepsize::DeepSizeOf;
use lance_arrow::*;

Check warning on line 14 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
use snafu::location;

use super::field::{Field, OnTypeMismatch, SchemaCompareOptions};
Expand Down Expand Up @@ -54,9 +53,16 @@
Ok(id)
}
FieldRef::ByPath(path) => {
let field = schema.field(path).ok_or_else(|| Error::InvalidInput {
source: format!("Field '{}' not found in schema", path).into(),
location: location!(),
let field = schema.field(path).ok_or_else(|| {

Check warning on line 56 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
let suggestion = suggest_closest_column(path, &schema.fields);
let message = match suggestion {
Some(s) => format!("Field '{}' not found in schema. Did you mean '{}'?", path, s),
None => format!("Field '{}' not found in schema", path),
};
Error::InvalidInput {
source: message.into(),
location: location!(),
}
})?;
Ok(field.id)
}
Expand Down Expand Up @@ -267,8 +273,15 @@
}
}
} else if err_on_missing {
let typo = col.as_ref();
let suggestion = suggest_closest_column(typo, &self.fields);
let message = match suggestion {
Some(s) => format!("Column '{}' does not exist. Did you mean '{}'?", typo, s),
None => format!("Column '{}' does not exist", typo),
};

return Err(Error::Schema {
message: format!("Column {} does not exist", col.as_ref()),
message,
location: location!(),
});
}
Expand Down Expand Up @@ -2772,4 +2785,38 @@
assert_eq!(pk_fields[1].name, "e");
assert_eq!(pk_fields[2].name, "g");
}

#[test]
fn test_schema_typo_suggestion() {
// Using DataType directly to match how the rest of their tests are written
let arrow_schema = ArrowSchema::new(vec![
ArrowField::new("vector", DataType::Int32, false),
ArrowField::new("timestamp", DataType::Int64, false),
]);
let schema = Schema::try_from(&arrow_schema).unwrap();

// 1. Test the project() suggestion
let err = schema.project(&["vctor"]).unwrap_err();

Check warning on line 2799 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
assert!(
err.to_string().contains("Did you mean 'vector'?"),
"Error did not contain suggestion. Got: {}", err
);

// 2. Test the FieldRef::into_id() suggestion
let field_ref = FieldRef::ByPath("timstamp");
let err = field_ref.into_id(&schema).unwrap_err();

Check warning on line 2807 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
assert!(
err.to_string().contains("Did you mean 'timestamp'?"),
"Error did not contain suggestion. Got: {}", err
);
}
}

Check warning on line 2814 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
// Helper to find the closest matching column name for typos
fn suggest_closest_column(typo: &str, fields: &[crate::datatypes::Field]) -> Option<String> {
fields.iter()
.map(|f| f.name.as_str())
.filter(|&name| strsim::levenshtein(typo, name) <= 2) // Max edit distance of 2
.min_by_key(|&name| strsim::levenshtein(typo, name))
.map(|name| name.to_string())

Check warning on line 2821 in rust/lance-core/src/datatypes/schema.rs

View workflow job for this annotation

GitHub Actions / format

Diff in /home/runner/work/lance/lance/rust/lance-core/src/datatypes/schema.rs
}
Loading