Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bug if row buffer is too small and trailing data contains embedded quotes and commas in particular locations #172

Merged
merged 2 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,8 @@ static int zsv_prop_foreach_copy(struct zsv_foreach_dirent_handle *h, size_t dep
if(h->verbose)
fprintf(stderr, "Renaming: %s => %s\n", dest_prop_filepath_tmp, dest_prop_filepath);
if(zsv_replace_file(dest_prop_filepath_tmp, dest_prop_filepath)) {
const char *msg = strerror(errno);
fprintf(stderr, "Unable to rename %s -> %s: ", dest_prop_filepath_tmp, dest_prop_filepath);
fprintf(stderr, "Unable to rename %s -> %s: ",
dest_prop_filepath_tmp, dest_prop_filepath);
zsv_perror(NULL);
ctx->err = errno;
}
Expand Down
7 changes: 6 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ test: ${TESTS}
test-prop:
EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test

test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2
test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2 test-echo-buffsize

test-echo-buffsize: ${BUILD_DIR}/bin/zsv_echo${EXE} ${TEST_DATA_DIR}/bigger-than-buff.csv
@${TEST_INIT}
@${PREFIX} $< ${TEST_DATA_DIR}/bigger-than-buff.csv --buff-size 131072 ${REDIRECT} ${TMP_DIR}/[email protected] 2>&1
@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}

test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
Expand Down
5 changes: 5 additions & 0 deletions app/test/expected/test-echo-buffsize.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Warning: row 1 truncated
Warning: row 1 truncated
XX,XXXXXXXXXXX
XXX
XXX,XXX
2 changes: 1 addition & 1 deletion app/utils/writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ enum zsv_writer_status zsv_writer_cell_Lf(zsv_csv_writer w, char new_row, const
long double ldbl) {
char s[128];
char fmt[64];
int n = snprintf(fmt, sizeof(fmt), "%%%sLf", fmt_spec);
int n = snprintf(fmt, sizeof(fmt), "%%%sLf", fmt_spec ? fmt_spec : "");
if(!(n > 0 && n < (int)sizeof(fmt)))
fprintf(stderr, "Invalid format specifier, should be X for format %%XLf e.g. '.2'\n");
else {
Expand Down
3 changes: 3 additions & 0 deletions data/bigger-than-buff.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) {

size_t capacity = scanner->buff.size - scanner->partial_row_length;
if(VERY_UNLIKELY(capacity == 0)) { // our row size was too small to fit a single row of data
fprintf(stderr, "Warning: row truncated\n");
fprintf(stderr, "Warning: row %zu truncated\n", scanner->data_row_count);
if(scanner->mode == ZSV_MODE_FIXED) {
if(VERY_UNLIKELY(row_fx(scanner, scanner->buff.buff, 0, scanner->buff.size)))
return zsv_status_cancelled;
} else if(VERY_UNLIKELY(row_dl(scanner)))
return zsv_status_cancelled;

// throw away the next row end
scanner->buffer_exceeded = 1;
scanner->opts.row_handler = zsv_throwaway_row;
scanner->opts.ctx = scanner;

Expand Down
20 changes: 15 additions & 5 deletions src/zsv_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ struct zsv_scanner {
} buff;

size_t cell_start;
unsigned char quoted; // bitfield of ZSV_PARSER_QUOTE_XXX flags
unsigned char quoted:7; // bitfield of ZSV_PARSER_QUOTE_XXX flags
unsigned char buffer_exceeded:1;

unsigned char waiting_for_end:1;
unsigned char checked_bom:1;
Expand Down Expand Up @@ -630,6 +631,7 @@ static void zsv_throwaway_row(void *ctx) {
if(zsv_cell_count(scanner) > 1 || zsv_get_cell_1(scanner, 0).len > 0)
scanner->opts.overflow_row_handler(ctx);
}
scanner->buffer_exceeded = 0;
set_callbacks(ctx);
}

Expand Down Expand Up @@ -752,15 +754,23 @@ static int zsv_scanner_init(struct zsv_scanner *scanner,
if(opts->insert_header_row)
scanner->insert_string = opts->insert_header_row;

if(need_buff_size < ZSV_MIN_SCANNER_BUFFSIZE)
need_buff_size = ZSV_MIN_SCANNER_BUFFSIZE;
if(opts->buffsize < need_buff_size) {
if(opts->buffsize > 0) {
if(need_buff_size == ZSV_MIN_SCANNER_BUFFSIZE)
fprintf(stderr, "Increasing --buff-size to minimum %zu\n", need_buff_size);
else
fprintf(stderr, "Increasing --buff-size to %zu to accommmodate max-row-size of %u\n",
need_buff_size, opts->max_row_size);
}
opts->buffsize = need_buff_size;
}
if(opts->buffsize == 0)
opts->buffsize = ZSV_DEFAULT_SCANNER_BUFFSIZE;
else if(opts->buffsize < ZSV_MIN_SCANNER_BUFFSIZE)
opts->buffsize = ZSV_MIN_SCANNER_BUFFSIZE;

if(opts->buffsize < need_buff_size) {
opts->max_row_size = opts->buffsize / 2;
fprintf(stderr, "Warning: max row size set to %u due to buffer size %zu\n", opts->max_row_size, opts->buffsize);
}
scanner->in = opts->stream;
if(!opts->read) {
scanner->read = (zsv_generic_read)fread;
Expand Down
2 changes: 1 addition & 1 deletion src/zsv_scan_delim.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
// we are inside an open quote, which is needed to escape this char
scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
} else if(LIKELY(c == quote)) {
if(i == scanner->cell_start) {
if(i == scanner->cell_start && !scanner->buffer_exceeded) {
scanner->quoted = ZSV_PARSER_QUOTE_UNCLOSED;
scanner->quote_close_position = 0;
c = 0;
Expand Down
Loading