File tree Expand file tree Collapse file tree 5 files changed +51
-6
lines changed Expand file tree Collapse file tree 5 files changed +51
-6
lines changed Original file line number Diff line number Diff line change @@ -1069,7 +1069,7 @@ mod tests {
10691069 Column names are case sensitive. \
10701070 You can use double quotes to refer to the \" \" t1.c0\" \" column \
10711071 or set the datafusion.sql_parser.enable_ident_normalization configuration. \
1072- Valid fields are t1.c0, t1.c1 .";
1072+ Did you mean ' t1.c0'? .";
10731073 assert_eq ! ( err. strip_backtrace( ) , expected) ;
10741074 Ok ( ( ) )
10751075 }
Original file line number Diff line number Diff line change @@ -26,6 +26,7 @@ use std::io;
2626use std:: result;
2727use std:: sync:: Arc ;
2828
29+ use crate :: utils:: datafusion_strsim:: normalized_levenshtein;
2930use crate :: utils:: quote_identifier;
3031use crate :: { Column , DFSchema , Diagnostic , TableReference } ;
3132#[ cfg( feature = "avro" ) ]
@@ -176,6 +177,11 @@ impl Display for SchemaError {
176177 . iter ( )
177178 . map ( |column| column. flat_name ( ) . to_lowercase ( ) )
178179 . collect :: < Vec < String > > ( ) ;
180+
181+ let valid_fields_names = valid_fields
182+ . iter ( )
183+ . map ( |column| column. flat_name ( ) )
184+ . collect :: < Vec < String > > ( ) ;
179185 if lower_valid_fields. contains ( & field. flat_name ( ) . to_lowercase ( ) ) {
180186 write ! (
181187 f,
@@ -184,7 +190,15 @@ impl Display for SchemaError {
184190 field. quoted_flat_name( )
185191 ) ?;
186192 }
187- if !valid_fields. is_empty ( ) {
193+ let field_name = field. name ( ) ;
194+ if let Some ( matched) = valid_fields_names
195+ . iter ( )
196+ . filter ( |str| normalized_levenshtein ( str, field_name) > 0.5 )
197+ . collect :: < Vec < & String > > ( )
198+ . first ( )
199+ {
200+ write ! ( f, ". Did you mean '{matched}'?" ) ?;
201+ } else if !valid_fields. is_empty ( ) {
188202 write ! (
189203 f,
190204 ". Valid fields are {}" ,
Original file line number Diff line number Diff line change @@ -736,6 +736,27 @@ pub mod datafusion_strsim {
736736 pub fn levenshtein ( a : & str , b : & str ) -> usize {
737737 generic_levenshtein ( & StringWrapper ( a) , & StringWrapper ( b) )
738738 }
739+
740+ /// Calculates the normalized Levenshtein distance between two strings.
741+ /// The normalized distance is a value between 0.0 and 1.0, where 1.0 indicates
742+ /// that the strings are identical and 0.0 indicates no similarity.
743+ ///
744+ /// ```
745+ /// use datafusion_common::utils::datafusion_strsim::normalized_levenshtein;
746+ ///
747+ /// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
748+ ///
749+ /// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
750+ ///
751+ /// assert!((normalized_levenshtein("kitten", "sitten") - 0.833).abs() < 0.001);
752+ /// ```
753+ pub fn normalized_levenshtein ( a : & str , b : & str ) -> f64 {
754+ if a. is_empty ( ) && b. is_empty ( ) {
755+ return 1.0 ;
756+ }
757+ 1.0 - ( levenshtein ( a, b) as f64 )
758+ / ( a. chars ( ) . count ( ) . max ( b. chars ( ) . count ( ) ) as f64 )
759+ }
739760}
740761
741762/// Merges collections `first` and `second`, removes duplicates and sorts the
Original file line number Diff line number Diff line change @@ -161,3 +161,13 @@ create table records (timestamp timestamp, value float) as values (
161161 '2021-01-01 00:00:00', 1.0,
162162 '2021-01-01 00:00:00', 2.0
163163);
164+
165+
166+ statement ok
167+ create table a(timestamp int, birthday int);
168+
169+ query error DataFusion error: Schema error: No field named timetamp\. Did you mean 'a\.timestamp'\?\.
170+ select timetamp from a;
171+
172+ query error DataFusion error: Schema error: No field named dadsada\. Valid fields are a\.timestamp, a\.birthday\.
173+ select dadsada from a;
Original file line number Diff line number Diff line change @@ -90,16 +90,16 @@ drop table case_insensitive_test
9090statement ok
9191CREATE TABLE test("Column1" string) AS VALUES ('content1');
9292
93- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
93+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
9494SELECT COLumn1 from test
9595
96- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
96+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
9797SELECT Column1 from test
9898
99- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
99+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
100100SELECT column1 from test
101101
102- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
102+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
103103SELECT "column1" from test
104104
105105statement ok
You can’t perform that action at this time.
0 commit comments