Skip to content

Commit c708534

Browse files
authored
chore: polish error message NDJSON. (#15502)
refactor: polish error message when fail to decode a row of NDJSON to JSON.
1 parent 8bd6549 commit c708534

File tree

2 files changed

+64
-5
lines changed

2 files changed

+64
-5
lines changed

src/query/storages/stage/src/read/row_based/formats/ndjson/block_builder.rs

+62-3
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@ impl NdJsonDecoder {
5151
null_if: &[&str],
5252
) -> std::result::Result<(), FileParseError> {
5353
let mut json: serde_json::Value =
54-
serde_json::from_reader(buf).map_err(|e| FileParseError::InvalidNDJsonRow {
55-
message: e.to_string(),
56-
})?;
54+
serde_json::from_reader(buf).map_err(|e| map_json_error(e, buf))?;
5755
// todo: this is temporary
5856
if self.field_decoder.is_select {
5957
self.field_decoder
@@ -197,3 +195,64 @@ impl RowDecoder for NdJsonDecoder {
197195
Ok(vec![])
198196
}
199197
}
198+
199+
// The origin JSON error format "{} at line {} column {}" is misleading for NDJSON.
200+
// - rm `line {}`
201+
// - rename `column {}` to `pos {}`, 1-based to 0 based
202+
// - add info for size and next byte
203+
//
204+
// Use test in case of changes of serde_json.
205+
fn map_json_error(err: serde_json::Error, data: &[u8]) -> FileParseError {
206+
let pos = if err.column() > 0 {
207+
err.column() - 1
208+
} else {
209+
err.column()
210+
};
211+
let len = data.len();
212+
213+
let mut message = err.to_string();
214+
if let Some(p) = message.rfind(" at line") {
215+
message = message[..p].to_string()
216+
}
217+
message = format!("{message} at pos {pos} of size {len}");
218+
if err.column() < len {
219+
message = format!("{message}, next byte is '{}'", data[pos] as char)
220+
}
221+
FileParseError::InvalidNDJsonRow { message }
222+
}
223+
224+
#[cfg(test)]
225+
mod test {
226+
use super::map_json_error;
227+
use super::FileParseError;
228+
229+
fn decode_err(data: &str) -> String {
230+
serde_json::from_slice::<serde_json::Value>(data.as_bytes())
231+
.map_err(|e| {
232+
let e = map_json_error(e, data.as_bytes());
233+
if let FileParseError::InvalidNDJsonRow { message } = e {
234+
message
235+
} else {
236+
unreachable!()
237+
}
238+
})
239+
.err()
240+
.unwrap()
241+
}
242+
243+
#[test]
244+
fn test_json_decode_error() {
245+
assert_eq!(
246+
decode_err("{").as_str(),
247+
"EOF while parsing an object at pos 0 of size 1"
248+
);
249+
assert_eq!(
250+
decode_err("").as_str(),
251+
"EOF while parsing a value at pos 0 of size 0"
252+
);
253+
assert_eq!(
254+
decode_err("{\"k\"-}").as_str(),
255+
"expected `:` at pos 4 of size 6, next byte is '-'"
256+
);
257+
}
258+
}

tests/sqllogictests/suites/stage/formats/ndjson/ndjson_on_error.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ CREATE TABLE wrong_ndjson (a Boolean, b Int, c Float, d String, e Date, f Timest
77
query
88
copy /*+ set_var(max_threads=1) */ into wrong_ndjson from @data/ndjson/ pattern = 'wrong_sample.*[.]ndjson' file_format = (type = NDJSON) ON_ERROR=continue
99
----
10-
ndjson/wrong_sample.ndjson 3 1 Invalid JSON row: key must be a string at line 1 column 89 2
11-
ndjson/wrong_sample2.ndjson 3 1 Invalid JSON row: key must be a string at line 1 column 89 2
10+
ndjson/wrong_sample.ndjson 3 1 Invalid JSON row: key must be a string at pos 88 of size 114, next byte is 'h' 2
11+
ndjson/wrong_sample2.ndjson 3 1 Invalid JSON row: key must be a string at pos 88 of size 114, next byte is 'h' 2
1212

1313
query
1414
select * from wrong_ndjson order by a

0 commit comments

Comments
 (0)