Skip to content

Commit

Permalink
Create index inside writer and transformation (#319)
Browse files Browse the repository at this point in the history
* Create index inside writer and transformation. This creates the index while writing the file saving an extra pass
* Display dlerror when dynamic linking fails. Helps with missing symbol errors
* sheet: Allow search on generated buffers
* sheet: Allow index to be used before it is complete
* tests: Retry matching expected output to improve test robustness. This also reduces test time dramatically on faster hardware.
* sheet: Use buffer status on first display to prevent race in testing
* sheet: Convert sheet-subcommand tests to use expect
* sheet: Fix bad memory access by duplicating filename string.  UI buffer frees this on closing and the indexing thread will use it so it can't point to a stack variable or memory that gets overwritten after the file is opened.
* sheet: Convert sheet extension example tests to use expect
* sheet: Add benchmark test
* sheet: Only notify the UI of progress after 32MB
* sheet: Fix bug in \r\n line handling. Added related test
  • Loading branch information
richiejp authored Dec 11, 2024
1 parent abf0407 commit 417e27d
Show file tree
Hide file tree
Showing 44 changed files with 3,473 additions and 242 deletions.
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ ${ZSV_UTIL_A}: ${BUILD_DIR}/objs/utils/util.a
@mkdir -p `dirname $@`
cp -p $< $@

UTIL_A_OBJ:=writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=index writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTIL_A_OBJ}))

${BUILD_DIR}/objs/utils/util.a: ${UTIL_A_OBJ}
Expand Down
4 changes: 4 additions & 0 deletions app/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,11 @@ static struct zsv_ext *zsv_ext_new(const char *dl_name, const char *id, char ver
}
}
if (!h)
#if defined(WIN32) || defined(_WIN32)
fprintf(stderr, "Library %s not found\n", dl_name);
#else
fprintf(stderr, "Library %s failed to load: %s\n", dl_name, dlerror());
#endif

// run zsv_ext_init to add to our extension list, even if it's invalid
tmp.ok = !zsv_ext_init(h, dl_name, &tmp);
Expand Down
53 changes: 26 additions & 27 deletions app/ext_example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ else
CFLAGS += -g
endif

THIS_LIB_BASE=$(shell cd ../.. && pwd)
export THIS_LIB_BASE=$(shell cd ../.. && pwd)
CCBN=$(shell basename ${CC})
BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN}
TARGET=${BUILD_DIR}/bin/zsvextmy.${SO}
Expand All @@ -77,7 +77,9 @@ TEST_PASS=printf "${COLOR_BLUE}$@: ${COLOR_GREEN}Passed${COLOR_NONE}\n"
TEST_FAIL=(printf "${COLOR_BLUE}$@: ${COLOR_RED}Failed!${COLOR_NONE}\n" && exit 1)
TEST_INIT=printf "${COLOR_PINK}$@: ${COLOR_NONE}\n"

UTILS1=
EXPECT=../../scripts/test-expect.sh
export EXPECTED_PATH=test/expected

CFLAGS_SHARED=-shared
ifneq ($(findstring emcc,$(CC)),) # emcc
CFLAGS_SHARED=-s SIDE_MODULE=1 -s LINKABLE=1
Expand All @@ -86,9 +88,6 @@ else
INSTALLED_EXTENSION=
endif

UTILS1+=writer
UTILS=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTILS1}))

CFLAGS+= -I${THIS_LIB_BASE}/include -I${PREFIX}/include

all: ${TARGET} ${TARGET_SHEET}
Expand Down Expand Up @@ -145,7 +144,10 @@ test-3: test-%: ${CLI} ${TARGET}
../test/worldcitiespop_mil.csv:
make -C ../test worldcitiespop_mil.csv

TMP_DIR=/tmp
export TMP_DIR=/tmp
DATE_TIME:=$(shell date +%F-%H-%M-%S)
export TIMINGS_CSV:=${TMP_DIR}/timings-${DATE_TIME}.csv
export CMP=cmp
TMUX_TERM=xterm-256color
test-sheet-extension-1: ${CLI} ${TARGET} ../test/worldcitiespop_mil.csv
@${TEST_INIT}
Expand All @@ -154,15 +156,10 @@ test-sheet-extension-1: ${CLI} ${TARGET} ../test/worldcitiespop_mil.csv
@${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ]
@${RUN_CLI} register my 2>&1 | grep -v [.]so || [ 1==1 ]
@echo 'set-option default-terminal "${TMUX_TERM}"' > ~/.tmux.conf
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 80 -y 5 -d -s $@ && \
sleep 0.5 && \
tmux send-keys -t $@ "${CLI} sheet ../test/worldcitiespop_mil.csv" ENTER && \
sleep 0.5 && \
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 80 -y 5 -d -s $@ $< sheet ../test/worldcitiespop_mil.csv && \
${EXPECT} $@ indexed && \
tmux send-keys -t $@ "t" "hello" Enter && \
tmux capture-pane -t $@ -p > ${TMP_DIR}/$@.out && \
tmux kill-session -t $@ && \
cmp ${TMP_DIR}/$@.out test/expected/$@.out && ${TEST_PASS} || (echo 'Incorrect output:' && \
if [ -f ${TMP_DIR}/$@.out ]; then cat ${TMP_DIR}/$@.out; fi && ${TEST_FAIL}))
${EXPECT} $@ && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-extension-2: ${CLI} ${TARGET}
@${TEST_INIT}
Expand All @@ -171,18 +168,12 @@ test-sheet-extension-2: ${CLI} ${TARGET}
@${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ]
@${RUN_CLI} register my 2>&1 | grep -v [.]so || [ 1==1 ]
@echo 'set-option default-terminal "${TMUX_TERM}"' > ~/.tmux.conf
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 120 -y 5 -d -s $@ && \
sleep 0.5 && \
tmux send-keys -t $@ "${CLI} sheet -d 3 ../../data/test/mixed-line-endings.csv" ENTER && \
sleep 0.5 && \
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 120 -y 5 -d -s $@ $< sheet -d 3 ../../data/test/mixed-line-endings.csv && \
${EXPECT} $@ indexed && \
tmux send-keys -t $@ T Enter && \
sleep 0.5 && \
${EXPECT} $@ transformed && \
tmux send-keys -t $@ G && \
sleep 0.5 && \
tmux capture-pane -t $@ -p > ${TMP_DIR}/$@.out && \
tmux kill-session -t $@ && \
cmp ${TMP_DIR}/$@.out test/expected/$@.out && ${TEST_PASS} || (echo 'Incorrect output:' && \
if [ -f ${TMP_DIR}/$@.out ]; then cat ${TMP_DIR}/$@.out; fi && ${TEST_FAIL}))
${EXPECT} $@ && ${TEST_PASS} || ${TEST_FAIL})

test-4: test-%: ${CLI} ${TARGET}
@${TEST_INIT}
Expand Down Expand Up @@ -213,13 +204,21 @@ clean:
${BUILD_DIR}/objs/%.o : ${THIS_LIB_BASE}/src/%.c ${PARSER_DEPS}
${MAKE} -C ${THIS_LIB_BASE}/src CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} $@

LIBZSV=${PREFIX}/lib/libzsv.a
${LIBZSV}:
${MAKE} -C ${THIS_LIB_BASE}/src CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} install

LIBZSVUTIL=${PREFIX}/lib/libzsvutil.a
${LIBZSVUTIL}:
${MAKE} -C ${THIS_LIB_BASE}/app CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} install-util-lib

YAJL_SRC_DIR=${THIS_MAKEFILE_DIR}/../external/yajl
YAJL_INCLUDE=-I${YAJL_SRC_DIR}/build/yajl-2.1.1/include
YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/../external/yajl_helper
${TARGET_SHEET}: LIBS="../external/sqlite3/sqlite3.c" -lzsv -lzsvutil -L${PREFIX}/lib -ljq -lutf8proc
${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${UTILS}
${TARGET_SHEET}: LIBS="../external/sqlite3/sqlite3.c" -lzsvutil -lzsv -L${PREFIX}/lib -ljq -lutf8proc
${TARGET}: LIBS=-lzsvutil -lzsv -L${PREFIX}/lib
${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${LIBZSV} ${LIBZSVUTIL}
@mkdir -p `dirname "$@"`
${CC} ${CFLAGS} ${CFLAGS_SHARED} $< ${UTILS} -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE}
${CC} ${CFLAGS} ${CFLAGS_SHARED} $< -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE}

.PHONY: all test test-% clean install
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # Country City AccentCit Region Populatio Latitude Longitude
1 ir sarmaj-e Sarmaj-e 13 34.3578 47.5207
2 ad aixirival Aixirival 06 42.466666 1.5
3 mm mokho-atw Mokho-atw 09 18.033333 96.75
? for help 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3
1 A1 B1 C1
2 A2 B2 C2
3 A3 B3 C3
? for help 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3 Column count
1 A1 B1 C1 6
2 A2 B2 C2 9
3 A3 B3 C3 12
? for help 1
36 changes: 20 additions & 16 deletions app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ static zsvsheet_status zsvsheet_find(struct zsvsheet_sheet_context *state, bool
struct zsvsheet_opts zsvsheet_opts = {0};
int prompt_footer_row = (int)(di->dimensions->rows - di->dimensions->footer_span);

if (!current_ui_buffer->filename)
if (!zsvsheet_buffer_data_filename(current_ui_buffer))
goto out;

if (!next) {
Expand Down Expand Up @@ -381,14 +381,14 @@ static zsvsheet_status zsvsheet_open_file_handler(struct zsvsheet_proc_context *
UNUSED(ctx);

if (ctx->num_params > 0) {
filename = ctx->params[0].u.string;
filename = strdup(ctx->params[0].u.string);
} else {
if (!ctx->invocation.interactive)
return zsvsheet_status_error;
get_subcommand("File to open", prompt_buffer, sizeof(prompt_buffer), prompt_footer_row);
if (*prompt_buffer == '\0')
goto no_input;
filename = prompt_buffer;
filename = strdup(prompt_buffer);
}

if ((err = zsvsheet_ui_buffer_open_file(filename, NULL, state->custom_prop_handler, di->ui_buffers.base,
Expand Down Expand Up @@ -637,6 +637,19 @@ void zsvsheet_register_builtin_procedures(void) {
}
}

static void zsvsheet_check_buffer_worker_updates(struct zsvsheet_ui_buffer *ub,
struct zsvsheet_display_dimensions *display_dims,
struct zsvsheet_sheet_context *handler_state) {
pthread_mutex_lock(&ub->mutex);
if (ub->status)
zsvsheet_priv_set_status(display_dims, 1, ub->status);
if (ub->index_ready && ub->dimensions.row_count != ub->index->row_count + 1) {
ub->dimensions.row_count = ub->index->row_count + 1;
handler_state->display_info.update_buffer = true;
}
pthread_mutex_unlock(&ub->mutex);
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
struct zsv_prop_handler *custom_prop_handler) {
if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
Expand Down Expand Up @@ -695,7 +708,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
cbreak();
set_escdelay(30);
struct zsvsheet_display_dimensions display_dims = get_display_dimensions(1, 1);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);

zsvsheet_register_builtin_procedures();

Expand All @@ -717,6 +729,9 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

zsvsheet_status status;

zsvsheet_check_buffer_worker_updates(current_ui_buffer, &display_dims, &handler_state);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);

halfdelay(2); // now ncurses getch() will fire every 2-tenths of a second so we can check for status update
//
while (true) {
Expand All @@ -734,18 +749,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
}

struct zsvsheet_ui_buffer *ub = current_ui_buffer;
pthread_mutex_lock(&ub->mutex);
if (ub->status)
zsvsheet_priv_set_status(&display_dims, 1, ub->status);
if (ub->write_progressed) {
handler_state.display_info.update_buffer = true;
ub->write_progressed = 0;
}
if (ub->index_ready && ub->dimensions.row_count != ub->index->row_count + 1) {
ub->dimensions.row_count = ub->index->row_count + 1;
handler_state.display_info.update_buffer = true;
}
pthread_mutex_unlock(&ub->mutex);
zsvsheet_check_buffer_worker_updates(ub, &display_dims, &handler_state);

if (handler_state.display_info.update_buffer && zsvsheet_buffer_data_filename(ub)) {
struct zsvsheet_opts zsvsheet_opts = {0};
Expand Down
21 changes: 16 additions & 5 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;
uint64_t line_end = zsv_cum_scanned_length(parser);

if (zsv_index_add_row(ix, parser) != zsv_index_status_ok)
if (zsv_index_add_row(ix, line_end) != zsv_index_status_ok)
zsv_abort(parser);
}

Expand Down Expand Up @@ -42,23 +43,33 @@ enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
if (!ixr.ix)
goto out;

optsp->uib->index = ixr.ix;

char cancelled = 0;
size_t committed_bytes = 0;
while (!cancelled && (zst = zsv_parse_more(ixr.parser)) == zsv_status_ok) {
if (zsv_cum_scanned_length(ixr.parser) - committed_bytes < 32 * 1024 * 1024)
continue;
committed_bytes = zsv_cum_scanned_length(ixr.parser);

pthread_mutex_lock(&optsp->uib->mutex);
if (optsp->uib->worker_cancelled) {
cancelled = 1;
zst = zsv_status_cancelled;
}
zsv_index_commit_rows(ixr.ix);
optsp->uib->index_ready = 1;
pthread_mutex_unlock(&optsp->uib->mutex);
}

zsv_finish(ixr.parser);
pthread_mutex_lock(&optsp->uib->mutex);
zsv_index_commit_rows(ixr.ix);
optsp->uib->index_ready = 1;
pthread_mutex_unlock(&optsp->uib->mutex);

if (zst == zsv_status_no_more_input || zst == zsv_status_cancelled) {
if (zst == zsv_status_no_more_input || zst == zsv_status_cancelled)
ret = zsv_index_status_ok;
optsp->uib->index = ixr.ix;
} else
zsv_index_delete(ixr.ix);

out:
if (ixr.parser)
Expand Down
1 change: 1 addition & 0 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct zsvsheet_index_opts {
struct zsvsheet_ui_buffer *uib;
int *errp;
struct zsv_prop_handler *custom_prop_handler;
char *old_ui_status;
};

enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp);
Expand Down
33 changes: 11 additions & 22 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ static char *zsvsheet_found_in_row(zsv_parser parser, size_t col_count, const ch
static void *get_data_index(void *d);

static void get_data_index_async(struct zsvsheet_ui_buffer *uibuffp, const char *filename, struct zsv_opts *optsp,
struct zsv_prop_handler *custom_prop_handler, pthread_mutex_t *mutexp) {
struct zsv_prop_handler *custom_prop_handler, char *old_ui_status) {
struct zsvsheet_index_opts *ixopts = calloc(1, sizeof(*ixopts));
ixopts->mutexp = mutexp;
ixopts->mutexp = &uibuffp->mutex;
ixopts->filename = filename;
ixopts->zsv_opts = *optsp;
ixopts->custom_prop_handler = custom_prop_handler;
ixopts->uib = uibuffp;
ixopts->uib->ixopts = ixopts;
ixopts->old_ui_status = old_ui_status;

if (uibuffp->worker_active)
zsvsheet_ui_buffer_join_worker(uibuffp);
Expand Down Expand Up @@ -204,16 +205,20 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_
return 0;

pthread_mutex_lock(&uibuff->mutex);
char need_index = !uibuff->index_started && (!uibuff->write_in_progress || uibuff->write_done);
char need_index = !uibuff->index_started && !uibuff->write_in_progress;
char *old_ui_status = uibuff->status;
pthread_mutex_unlock(&uibuff->mutex);

if (need_index) {
if (asprintf(&uibuff->status, "%s(building index) ", old_ui_status ? old_ui_status : "") == -1)
return -1;

uibuff->buff_used_rows = rows_read;
uibuff->dimensions.row_count = rows_read;
uibuff->index_started = 1;
if (original_row_num > 1 && rows_read > 0) {
opts.stream = NULL;
get_data_index_async(uibuff, filename, &opts, custom_prop_handler, &uibuff->mutex);
get_data_index_async(uibuff, filename, &opts, custom_prop_handler, old_ui_status);
}
} else if (rows_read > uibuff->buff_used_rows) {
uibuff->buff_used_rows = rows_read;
Expand All @@ -229,22 +234,7 @@ static void *get_data_index(void *gdi) {
pthread_mutex_t *mutexp = d->mutexp;
int *errp = d->errp;
struct zsvsheet_ui_buffer *uib = d->uib;

pthread_mutex_lock(&uib->mutex);
char *old_ui_status = uib->status;
pthread_mutex_unlock(&uib->mutex);

char *ui_status;
/* I think there was a race between this print and a "? for help" which causes
* ci to fail. Once read-file is called the main thread displays its contents
* and this thread indexes the file. There is no synchronisation between the
* two so the status we end up with is random.
*/
// asprintf(&ui_status, "%s(building index) ", old_ui_status ? old_ui_status : "");

pthread_mutex_lock(&uib->mutex);
uib->status = ui_status;
pthread_mutex_unlock(&uib->mutex);
char *ui_status = uib->status;

enum zsv_index_status ix_status = build_memory_index(d);

Expand All @@ -258,8 +248,7 @@ static void *get_data_index(void *gdi) {
}

pthread_mutex_lock(mutexp);
uib->index_ready = 1;
uib->status = old_ui_status;
uib->status = d->old_ui_status;
uib->ixopts = NULL;
pthread_mutex_unlock(mutexp);

Expand Down
Loading

0 comments on commit 417e27d

Please sign in to comment.