Skip to content

Commit

Permalink
fix cum_scanned_length tracking for files larger than initial read bu…
Browse files Browse the repository at this point in the history
…ff (#186)

* fix cum_scanned_length tracking for files larger than initial read buff
add zsv_peek()
* updated config for up to gcc-14
* update help message format
  • Loading branch information
liquidaty authored Aug 13, 2024
1 parent 6950d93 commit 2b3b4db
Show file tree
Hide file tree
Showing 10 changed files with 24 additions and 11 deletions.
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ MORE_SOURCE+= ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -I${JQ_INCLUDE_DIR}
MORE_LIBS+=${JQ_LIB} ${LDFLAGS_JQ}

help:
@echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] <install|all|test>"
@echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] <install|all|install-util-lib|test>"
@echo
@echo "If JQ_PREFIX is not defined, libjq will be built in the build dir"
@echo
Expand Down
2 changes: 1 addition & 1 deletion app/builtin/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static int main_help(int argc, const char *argv[]) {
"",
"Options common to all commands except `prop`, `rm` and `jq`:",
#ifdef ZSV_EXTRAS
" -L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))",
" -L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))",
#endif
" -c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024",
" -r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k",
Expand Down
2 changes: 1 addition & 1 deletion app/ext_example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ endif

DEBUG=0
ifeq ($(DEBUG),0)
CFLAGS+= -O3 -DNDEBUG -std=gnu11 -Wno-gnu-statement-expression -Wshadow -Wall -Wextra -Wno-missing-braces -pedantic -DSTDC_HEADERS -D_GNU_SOURCE -lm -mavx2 -ftree-vectorize -flto
CFLAGS+= -O3 -DNDEBUG -std=gnu11 -Wno-gnu-statement-expression -Wshadow -Wall -Wextra -Wno-missing-braces -pedantic -DSTDC_HEADERS -D_GNU_SOURCE -lm -ftree-vectorize -flto
else
CFLAGS += -g
endif
Expand Down
4 changes: 2 additions & 2 deletions app/ext_example/test/expected/zsvext-test-3.out
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Usage:
zsv license [<extension_id>]

Options common to all commands except `prop`, `rm` and `jq`:
-L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))
-L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))
-c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024
-r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k
-B,--buff-size <n> : set internal buffer size. defaults to 256k
Expand Down Expand Up @@ -76,7 +76,7 @@ Usage:
zsv license [<extension_id>]

Options common to all commands except `prop`, `rm` and `jq`:
-L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))
-L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))
-c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024
-r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k
-B,--buff-size <n> : set internal buffer size. defaults to 256k
Expand Down
3 changes: 2 additions & 1 deletion app/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
while(status == zsv_status_ok
&& !zsv_signal_interrupted && !data.cancelled)
status = zsv_parse_more(data.parser);
zsv_finish(data.parser);
if(status == zsv_status_no_more_input)
status = zsv_finish(data.parser);
zsv_delete(data.parser);
}
}
Expand Down
4 changes: 2 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,8 @@ test -n "$MAKE" || fail "$0: cannot find a MAKE tool"
# Find a C compiler to use
#
printf "checking for C compiler... "
for c in cc gcc gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc1 "$c"; done
for c in cc gcc gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc2 "$c"; done
for c in cc gcc gcc-14 gcc-13 gcc-12 gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc1 "$c"; done
for c in cc gcc gcc-14 gcc-13 gcc-12 gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc2 "$c"; done
printf "%s\n" "$CC"
test -n "$CC" || fail "$0: cannot find a C compiler"

Expand Down
3 changes: 3 additions & 0 deletions include/zsv/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
* - zsv_delete(): dispose the parser
******************************************************************************/

ZSV_EXPORT
int zsv_peek(zsv_parser);

/**
* Create a zsv parser. Typically, passed options will at least include a
* a `row_handler()` callback. Many, but not all, options can be subsequently
Expand Down
2 changes: 1 addition & 1 deletion include/zsv/utils/dirs.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct zsv_foreach_dirent_handle {

unsigned char verbose:1;
unsigned char is_dir:1; /* non-zero if this entry is a directory */
unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */
unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */
unsigned char _:5;
};

Expand Down
1 change: 1 addition & 0 deletions scripts/ci-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ if [ "$RUN_TESTS" = true ]; then
echo "[INF] Tests completed successfully!"

echo "[INF] Configuring example extension and running example extension tests"
echo "[INF] (cd app/ext_example && $MAKE CONFIGFILE=../../config.mk test)"
(cd app/ext_example && "$MAKE" CONFIGFILE=../../config.mk test)
echo "[INF] Tests completed successfully!"
fi
Expand Down
12 changes: 10 additions & 2 deletions src/zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) {
scanner->old_bytes_read = 0;
}

scanner->cum_scanned_length += scanner->scanned_length;
scanner->cum_scanned_length += scanner->scanned_length - scanner->partial_row_length;

size_t capacity = scanner->buff.size - scanner->partial_row_length;
if(VERY_UNLIKELY(capacity == 0)) { // our row size was too small to fit a single row of data
if(VERY_UNLIKELY(capacity == 0)) {
// our row size was too small to fit a single row of data
fprintf(stderr, "Warning: row %zu truncated\n", scanner->data_row_count);
if(scanner->mode == ZSV_MODE_FIXED) {
if(VERY_UNLIKELY(row_fx(scanner, scanner->buff.buff, 0, scanner->buff.size)))
Expand Down Expand Up @@ -310,6 +311,13 @@ ZSV_EXPORT enum zsv_status zsv_set_fixed_offsets(zsv_parser parser, size_t count
return zsv_status_ok;
}

ZSV_EXPORT
int zsv_peek(zsv_parser z) {
if(z->scanned_length + 1 < z->buff.size)
return z->buff.buff[z->scanned_length+1];
return -1;
}

/**
* Create a zsv parser
* @param opts
Expand Down

0 comments on commit 2b3b4db

Please sign in to comment.