Skip to content

Commit 21e5800

Browse files
committed
bugfix: csv jsonl worker with incomplete byte reads
1 parent 0a92ecc commit 21e5800

File tree

3 files changed

+19
-9
lines changed

3 files changed

+19
-9
lines changed

server/Cargo.lock

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ minijinja = { version = "2.2.0", features = ["loader", "json"] }
186186
hallucination-detection = { version = "0.1.5", default-features = false, optional = true }
187187
broccoli_queue = "0.1.1"
188188
youtube-transcript = { git = "https://github.com/densumesh/summarizer.git" }
189+
bytes = "1.9.0"
189190

190191

191192
[build-dependencies]

server/src/bin/csv-jsonl-worker.rs

+15-7
Original file line numberDiff line numberDiff line change
@@ -315,19 +315,27 @@ async fn process_csv_jsonl_file(
315315

316316
let mut columns = vec![];
317317
let mut line = String::new();
318+
let mut bytes: bytes::BytesMut = bytes::BytesMut::new();
318319
let mut byte_count = 0;
319320
let mut chunk_req_payloads: Vec<ChunkReqPayload> = vec![];
320321
while let Some(chunk) = response_data_stream.bytes().next().await {
321-
let chunk = chunk.map_err(|err| {
322+
let chunk_bytes = chunk.map_err(|err| {
322323
log::error!("Failed to get chunk from stream: {:?}", err);
323324
ServiceError::InternalServerError("Failed to get chunk from stream".to_string())
324325
})?;
325-
let chunk = String::from_utf8(chunk.to_vec()).map_err(|err| {
326-
log::error!("Failed to convert chunk from stream to string: {:?}", err);
327-
ServiceError::InternalServerError(
328-
"Failed to convert chunk from stream to string".to_string(),
329-
)
330-
})?;
326+
bytes.extend_from_slice(&chunk_bytes);
327+
let chunk = match String::from_utf8(bytes.to_vec()) {
328+
Ok(chunk) => {
329+
bytes.clear();
330+
chunk
331+
}
332+
Err(_) => {
333+
log::info!(
334+
"Failed to convert bytes chunk to utf8, continuing with bytes append..."
335+
);
336+
continue;
337+
}
338+
};
331339

332340
byte_count += chunk.len();
333341

0 commit comments

Comments
 (0)