Skip to content

Commit

Permalink
fix bug if row buffer is too small and trailing data contains embedde…
Browse files Browse the repository at this point in the history
…d quotes and commas in particular locations (#172)

* fix bug if row buffer is too small and trailing data contains embedded quotes and commas in certain locations
* add test-echo-buffsize
  • Loading branch information
liquidaty authored Jun 14, 2024
1 parent 7fa5bd1 commit 1789992
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 11 deletions.
4 changes: 2 additions & 2 deletions app/prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,8 @@ static int zsv_prop_foreach_copy(struct zsv_foreach_dirent_handle *h, size_t dep
if(h->verbose)
fprintf(stderr, "Renaming: %s => %s\n", dest_prop_filepath_tmp, dest_prop_filepath);
if(zsv_replace_file(dest_prop_filepath_tmp, dest_prop_filepath)) {
const char *msg = strerror(errno);
fprintf(stderr, "Unable to rename %s -> %s: ", dest_prop_filepath_tmp, dest_prop_filepath);
fprintf(stderr, "Unable to rename %s -> %s: ",
dest_prop_filepath_tmp, dest_prop_filepath);
zsv_perror(NULL);
ctx->err = errno;
}
Expand Down
7 changes: 6 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ test: ${TESTS}
test-prop:
EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test

test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2
test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2 test-echo-buffsize

test-echo-buffsize: ${BUILD_DIR}/bin/zsv_echo${EXE} ${TEST_DATA_DIR}/bigger-than-buff.csv
@${TEST_INIT}
@${PREFIX} $< ${TEST_DATA_DIR}/bigger-than-buff.csv --buff-size 131072 ${REDIRECT} ${TMP_DIR}/$@.out 2>&1
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
Expand Down
5 changes: 5 additions & 0 deletions app/test/expected/test-echo-buffsize.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Warning: row 1 truncated
Warning: row 1 truncated
XX,XXXXXXXXXXX
XXX
XXX,XXX
2 changes: 1 addition & 1 deletion app/utils/writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ enum zsv_writer_status zsv_writer_cell_Lf(zsv_csv_writer w, char new_row, const
long double ldbl) {
char s[128];
char fmt[64];
int n = snprintf(fmt, sizeof(fmt), "%%%sLf", fmt_spec);
int n = snprintf(fmt, sizeof(fmt), "%%%sLf", fmt_spec ? fmt_spec : "");
if(!(n > 0 && n < (int)sizeof(fmt)))
fprintf(stderr, "Invalid format specifier, should be X for format %%XLf e.g. '.2'\n");
else {
Expand Down
3 changes: 3 additions & 0 deletions data/bigger-than-buff.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) {

size_t capacity = scanner->buff.size - scanner->partial_row_length;
if(VERY_UNLIKELY(capacity == 0)) { // our row size was too small to fit a single row of data
fprintf(stderr, "Warning: row truncated\n");
fprintf(stderr, "Warning: row %zu truncated\n", scanner->data_row_count);
if(scanner->mode == ZSV_MODE_FIXED) {
if(VERY_UNLIKELY(row_fx(scanner, scanner->buff.buff, 0, scanner->buff.size)))
return zsv_status_cancelled;
} else if(VERY_UNLIKELY(row_dl(scanner)))
return zsv_status_cancelled;

// throw away the next row end
scanner->buffer_exceeded = 1;
scanner->opts.row_handler = zsv_throwaway_row;
scanner->opts.ctx = scanner;

Expand Down
20 changes: 15 additions & 5 deletions src/zsv_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ struct zsv_scanner {
} buff;

size_t cell_start;
unsigned char quoted; // bitfield of ZSV_PARSER_QUOTE_XXX flags
unsigned char quoted:7; // bitfield of ZSV_PARSER_QUOTE_XXX flags
unsigned char buffer_exceeded:1;

unsigned char waiting_for_end:1;
unsigned char checked_bom:1;
Expand Down Expand Up @@ -630,6 +631,7 @@ static void zsv_throwaway_row(void *ctx) {
if(zsv_cell_count(scanner) > 1 || zsv_get_cell_1(scanner, 0).len > 0)
scanner->opts.overflow_row_handler(ctx);
}
scanner->buffer_exceeded = 0;
set_callbacks(ctx);
}

Expand Down Expand Up @@ -752,15 +754,23 @@ static int zsv_scanner_init(struct zsv_scanner *scanner,
if(opts->insert_header_row)
scanner->insert_string = opts->insert_header_row;

if(need_buff_size < ZSV_MIN_SCANNER_BUFFSIZE)
need_buff_size = ZSV_MIN_SCANNER_BUFFSIZE;
if(opts->buffsize < need_buff_size) {
if(opts->buffsize > 0) {
if(need_buff_size == ZSV_MIN_SCANNER_BUFFSIZE)
fprintf(stderr, "Increasing --buff-size to minimum %zu\n", need_buff_size);
else
fprintf(stderr, "Increasing --buff-size to %zu to accommmodate max-row-size of %u\n",
need_buff_size, opts->max_row_size);
}
opts->buffsize = need_buff_size;
}
if(opts->buffsize == 0)
opts->buffsize = ZSV_DEFAULT_SCANNER_BUFFSIZE;
else if(opts->buffsize < ZSV_MIN_SCANNER_BUFFSIZE)
opts->buffsize = ZSV_MIN_SCANNER_BUFFSIZE;

if(opts->buffsize < need_buff_size) {
opts->max_row_size = opts->buffsize / 2;
fprintf(stderr, "Warning: max row size set to %u due to buffer size %zu\n", opts->max_row_size, opts->buffsize);
}
scanner->in = opts->stream;
if(!opts->read) {
scanner->read = (zsv_generic_read)fread;
Expand Down
2 changes: 1 addition & 1 deletion src/zsv_scan_delim.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
// we are inside an open quote, which is needed to escape this char
scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
} else if(LIKELY(c == quote)) {
if(i == scanner->cell_start) {
if(i == scanner->cell_start && !scanner->buffer_exceeded) {
scanner->quoted = ZSV_PARSER_QUOTE_UNCLOSED;
scanner->quote_close_position = 0;
c = 0;
Expand Down

0 comments on commit 1789992

Please sign in to comment.