Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create index inside writer and transformation #319

Merged
merged 13 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ ${ZSV_UTIL_A}: ${BUILD_DIR}/objs/utils/util.a
@mkdir -p `dirname $@`
cp -p $< $@

UTIL_A_OBJ:=writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=index writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTIL_A_OBJ}))

${BUILD_DIR}/objs/utils/util.a: ${UTIL_A_OBJ}
Expand Down
4 changes: 4 additions & 0 deletions app/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,11 @@ static struct zsv_ext *zsv_ext_new(const char *dl_name, const char *id, char ver
}
}
if (!h)
#if defined(WIN32) || defined(_WIN32)
fprintf(stderr, "Library %s not found\n", dl_name);
#else
fprintf(stderr, "Library %s failed to load: %s\n", dl_name, dlerror());
#endif

// run zsv_ext_init to add to our extension list, even if it's invalid
tmp.ok = !zsv_ext_init(h, dl_name, &tmp);
Expand Down
53 changes: 26 additions & 27 deletions app/ext_example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ else
CFLAGS += -g
endif

THIS_LIB_BASE=$(shell cd ../.. && pwd)
export THIS_LIB_BASE=$(shell cd ../.. && pwd)
CCBN=$(shell basename ${CC})
BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN}
TARGET=${BUILD_DIR}/bin/zsvextmy.${SO}
Expand All @@ -77,7 +77,9 @@ TEST_PASS=printf "${COLOR_BLUE}$@: ${COLOR_GREEN}Passed${COLOR_NONE}\n"
TEST_FAIL=(printf "${COLOR_BLUE}$@: ${COLOR_RED}Failed!${COLOR_NONE}\n" && exit 1)
TEST_INIT=printf "${COLOR_PINK}$@: ${COLOR_NONE}\n"

UTILS1=
EXPECT=../../scripts/test-expect.sh
export EXPECTED_PATH=test/expected

CFLAGS_SHARED=-shared
ifneq ($(findstring emcc,$(CC)),) # emcc
CFLAGS_SHARED=-s SIDE_MODULE=1 -s LINKABLE=1
Expand All @@ -86,9 +88,6 @@ else
INSTALLED_EXTENSION=
endif

UTILS1+=writer
UTILS=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTILS1}))

CFLAGS+= -I${THIS_LIB_BASE}/include -I${PREFIX}/include

all: ${TARGET} ${TARGET_SHEET}
Expand Down Expand Up @@ -145,7 +144,10 @@ test-3: test-%: ${CLI} ${TARGET}
../test/worldcitiespop_mil.csv:
make -C ../test worldcitiespop_mil.csv

TMP_DIR=/tmp
export TMP_DIR=/tmp
DATE_TIME:=$(shell date +%F-%H-%M-%S)
export TIMINGS_CSV:=${TMP_DIR}/timings-${DATE_TIME}.csv
export CMP=cmp
TMUX_TERM=xterm-256color
test-sheet-extension-1: ${CLI} ${TARGET} ../test/worldcitiespop_mil.csv
@${TEST_INIT}
Expand All @@ -154,15 +156,10 @@ test-sheet-extension-1: ${CLI} ${TARGET} ../test/worldcitiespop_mil.csv
@${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ]
@${RUN_CLI} register my 2>&1 | grep -v [.]so || [ 1==1 ]
@echo 'set-option default-terminal "${TMUX_TERM}"' > ~/.tmux.conf
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 80 -y 5 -d -s $@ && \
sleep 0.5 && \
tmux send-keys -t $@ "${CLI} sheet ../test/worldcitiespop_mil.csv" ENTER && \
sleep 0.5 && \
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 80 -y 5 -d -s $@ $< sheet ../test/worldcitiespop_mil.csv && \
${EXPECT} $@ indexed && \
tmux send-keys -t $@ "t" "hello" Enter && \
tmux capture-pane -t $@ -p > ${TMP_DIR}/[email protected] && \
tmux kill-session -t $@ && \
cmp ${TMP_DIR}/[email protected] test/expected/[email protected] && ${TEST_PASS} || (echo 'Incorrect output:' && \
if [ -f ${TMP_DIR}/[email protected] ]; then cat ${TMP_DIR}/[email protected]; fi && ${TEST_FAIL}))
${EXPECT} $@ && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-extension-2: ${CLI} ${TARGET}
@${TEST_INIT}
Expand All @@ -171,18 +168,12 @@ test-sheet-extension-2: ${CLI} ${TARGET}
@${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ]
@${RUN_CLI} register my 2>&1 | grep -v [.]so || [ 1==1 ]
@echo 'set-option default-terminal "${TMUX_TERM}"' > ~/.tmux.conf
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 120 -y 5 -d -s $@ && \
sleep 0.5 && \
tmux send-keys -t $@ "${CLI} sheet -d 3 ../../data/test/mixed-line-endings.csv" ENTER && \
sleep 0.5 && \
@(ZSV_CONFIG_DIR=/tmp tmux -v new-session -x 120 -y 5 -d -s $@ $< sheet -d 3 ../../data/test/mixed-line-endings.csv && \
${EXPECT} $@ indexed && \
tmux send-keys -t $@ T Enter && \
sleep 0.5 && \
${EXPECT} $@ transformed && \
tmux send-keys -t $@ G && \
sleep 0.5 && \
tmux capture-pane -t $@ -p > ${TMP_DIR}/[email protected] && \
tmux kill-session -t $@ && \
cmp ${TMP_DIR}/[email protected] test/expected/[email protected] && ${TEST_PASS} || (echo 'Incorrect output:' && \
if [ -f ${TMP_DIR}/[email protected] ]; then cat ${TMP_DIR}/[email protected]; fi && ${TEST_FAIL}))
${EXPECT} $@ && ${TEST_PASS} || ${TEST_FAIL})

test-4: test-%: ${CLI} ${TARGET}
@${TEST_INIT}
Expand Down Expand Up @@ -213,13 +204,21 @@ clean:
${BUILD_DIR}/objs/%.o : ${THIS_LIB_BASE}/src/%.c ${PARSER_DEPS}
${MAKE} -C ${THIS_LIB_BASE}/src CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} $@

LIBZSV=${PREFIX}/lib/libzsv.a
${LIBZSV}:
${MAKE} -C ${THIS_LIB_BASE}/src CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} install

LIBZSVUTIL=${PREFIX}/lib/libzsvutil.a
${LIBZSVUTIL}:
${MAKE} -C ${THIS_LIB_BASE}/app CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} WIN=${WIN} install-util-lib

YAJL_SRC_DIR=${THIS_MAKEFILE_DIR}/../external/yajl
YAJL_INCLUDE=-I${YAJL_SRC_DIR}/build/yajl-2.1.1/include
YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/../external/yajl_helper
${TARGET_SHEET}: LIBS="../external/sqlite3/sqlite3.c" -lzsv -lzsvutil -L${PREFIX}/lib -ljq -lutf8proc
${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${UTILS}
${TARGET_SHEET}: LIBS="../external/sqlite3/sqlite3.c" -lzsvutil -lzsv -L${PREFIX}/lib -ljq -lutf8proc
${TARGET}: LIBS=-lzsvutil -lzsv -L${PREFIX}/lib
${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${LIBZSV} ${LIBZSVUTIL}
@mkdir -p `dirname "$@"`
${CC} ${CFLAGS} ${CFLAGS_SHARED} $< ${UTILS} -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE}
${CC} ${CFLAGS} ${CFLAGS_SHARED} $< -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE}

.PHONY: all test test-% clean install
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # Country City AccentCit Region Populatio Latitude Longitude
1 ir sarmaj-e Sarmaj-e 13 34.3578 47.5207
2 ad aixirival Aixirival 06 42.466666 1.5
3 mm mokho-atw Mokho-atw 09 18.033333 96.75
? for help 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3
1 A1 B1 C1
2 A2 B2 C2
3 A3 B3 C3
? for help 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3 Column count
1 A1 B1 C1 6
2 A2 B2 C2 9
3 A3 B3 C3 12
? for help 1
36 changes: 20 additions & 16 deletions app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ static zsvsheet_status zsvsheet_find(struct zsvsheet_sheet_context *state, bool
struct zsvsheet_opts zsvsheet_opts = {0};
int prompt_footer_row = (int)(di->dimensions->rows - di->dimensions->footer_span);

if (!current_ui_buffer->filename)
if (!zsvsheet_buffer_data_filename(current_ui_buffer))
goto out;

if (!next) {
Expand Down Expand Up @@ -381,14 +381,14 @@ static zsvsheet_status zsvsheet_open_file_handler(struct zsvsheet_proc_context *
UNUSED(ctx);

if (ctx->num_params > 0) {
filename = ctx->params[0].u.string;
filename = strdup(ctx->params[0].u.string);
} else {
if (!ctx->invocation.interactive)
return zsvsheet_status_error;
get_subcommand("File to open", prompt_buffer, sizeof(prompt_buffer), prompt_footer_row);
if (*prompt_buffer == '\0')
goto no_input;
filename = prompt_buffer;
filename = strdup(prompt_buffer);
}

if ((err = zsvsheet_ui_buffer_open_file(filename, NULL, state->custom_prop_handler, di->ui_buffers.base,
Expand Down Expand Up @@ -637,6 +637,19 @@ void zsvsheet_register_builtin_procedures(void) {
}
}

static void zsvsheet_check_buffer_worker_updates(struct zsvsheet_ui_buffer *ub,
struct zsvsheet_display_dimensions *display_dims,
struct zsvsheet_sheet_context *handler_state) {
pthread_mutex_lock(&ub->mutex);
if (ub->status)
zsvsheet_priv_set_status(display_dims, 1, ub->status);
if (ub->index_ready && ub->dimensions.row_count != ub->index->row_count + 1) {
ub->dimensions.row_count = ub->index->row_count + 1;
handler_state->display_info.update_buffer = true;
}
pthread_mutex_unlock(&ub->mutex);
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
struct zsv_prop_handler *custom_prop_handler) {
if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
Expand Down Expand Up @@ -695,7 +708,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
cbreak();
set_escdelay(30);
struct zsvsheet_display_dimensions display_dims = get_display_dimensions(1, 1);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);

zsvsheet_register_builtin_procedures();

Expand All @@ -717,6 +729,9 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

zsvsheet_status status;

zsvsheet_check_buffer_worker_updates(current_ui_buffer, &display_dims, &handler_state);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);

halfdelay(2); // now ncurses getch() will fire every 2-tenths of a second so we can check for status update
//
while (true) {
Expand All @@ -734,18 +749,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
}

struct zsvsheet_ui_buffer *ub = current_ui_buffer;
pthread_mutex_lock(&ub->mutex);
if (ub->status)
zsvsheet_priv_set_status(&display_dims, 1, ub->status);
if (ub->write_progressed) {
handler_state.display_info.update_buffer = true;
ub->write_progressed = 0;
}
if (ub->index_ready && ub->dimensions.row_count != ub->index->row_count + 1) {
ub->dimensions.row_count = ub->index->row_count + 1;
handler_state.display_info.update_buffer = true;
}
pthread_mutex_unlock(&ub->mutex);
zsvsheet_check_buffer_worker_updates(ub, &display_dims, &handler_state);

if (handler_state.display_info.update_buffer && zsvsheet_buffer_data_filename(ub)) {
struct zsvsheet_opts zsvsheet_opts = {0};
Expand Down
21 changes: 16 additions & 5 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;
uint64_t line_end = zsv_cum_scanned_length(parser);

if (zsv_index_add_row(ix, parser) != zsv_index_status_ok)
if (zsv_index_add_row(ix, line_end) != zsv_index_status_ok)
zsv_abort(parser);
}

Expand Down Expand Up @@ -42,23 +43,33 @@ enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
if (!ixr.ix)
goto out;

optsp->uib->index = ixr.ix;

char cancelled = 0;
size_t committed_bytes = 0;
while (!cancelled && (zst = zsv_parse_more(ixr.parser)) == zsv_status_ok) {
if (zsv_cum_scanned_length(ixr.parser) - committed_bytes < 32 * 1024 * 1024)
continue;
committed_bytes = zsv_cum_scanned_length(ixr.parser);

pthread_mutex_lock(&optsp->uib->mutex);
if (optsp->uib->worker_cancelled) {
cancelled = 1;
zst = zsv_status_cancelled;
}
zsv_index_commit_rows(ixr.ix);
optsp->uib->index_ready = 1;
pthread_mutex_unlock(&optsp->uib->mutex);
}

zsv_finish(ixr.parser);
pthread_mutex_lock(&optsp->uib->mutex);
zsv_index_commit_rows(ixr.ix);
optsp->uib->index_ready = 1;
pthread_mutex_unlock(&optsp->uib->mutex);

if (zst == zsv_status_no_more_input || zst == zsv_status_cancelled) {
if (zst == zsv_status_no_more_input || zst == zsv_status_cancelled)
ret = zsv_index_status_ok;
optsp->uib->index = ixr.ix;
} else
zsv_index_delete(ixr.ix);

out:
if (ixr.parser)
Expand Down
1 change: 1 addition & 0 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct zsvsheet_index_opts {
struct zsvsheet_ui_buffer *uib;
int *errp;
struct zsv_prop_handler *custom_prop_handler;
char *old_ui_status;
};

enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp);
Expand Down
33 changes: 11 additions & 22 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ static char *zsvsheet_found_in_row(zsv_parser parser, size_t col_count, const ch
static void *get_data_index(void *d);

static void get_data_index_async(struct zsvsheet_ui_buffer *uibuffp, const char *filename, struct zsv_opts *optsp,
struct zsv_prop_handler *custom_prop_handler, pthread_mutex_t *mutexp) {
struct zsv_prop_handler *custom_prop_handler, char *old_ui_status) {
struct zsvsheet_index_opts *ixopts = calloc(1, sizeof(*ixopts));
ixopts->mutexp = mutexp;
ixopts->mutexp = &uibuffp->mutex;
ixopts->filename = filename;
ixopts->zsv_opts = *optsp;
ixopts->custom_prop_handler = custom_prop_handler;
ixopts->uib = uibuffp;
ixopts->uib->ixopts = ixopts;
ixopts->old_ui_status = old_ui_status;

if (uibuffp->worker_active)
zsvsheet_ui_buffer_join_worker(uibuffp);
Expand Down Expand Up @@ -204,16 +205,20 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_
return 0;

pthread_mutex_lock(&uibuff->mutex);
char need_index = !uibuff->index_started && (!uibuff->write_in_progress || uibuff->write_done);
char need_index = !uibuff->index_started && !uibuff->write_in_progress;
char *old_ui_status = uibuff->status;
pthread_mutex_unlock(&uibuff->mutex);

if (need_index) {
if (asprintf(&uibuff->status, "%s(building index) ", old_ui_status ? old_ui_status : "") == -1)
return -1;

uibuff->buff_used_rows = rows_read;
uibuff->dimensions.row_count = rows_read;
uibuff->index_started = 1;
if (original_row_num > 1 && rows_read > 0) {
opts.stream = NULL;
get_data_index_async(uibuff, filename, &opts, custom_prop_handler, &uibuff->mutex);
get_data_index_async(uibuff, filename, &opts, custom_prop_handler, old_ui_status);
}
} else if (rows_read > uibuff->buff_used_rows) {
uibuff->buff_used_rows = rows_read;
Expand All @@ -229,22 +234,7 @@ static void *get_data_index(void *gdi) {
pthread_mutex_t *mutexp = d->mutexp;
int *errp = d->errp;
struct zsvsheet_ui_buffer *uib = d->uib;

pthread_mutex_lock(&uib->mutex);
char *old_ui_status = uib->status;
pthread_mutex_unlock(&uib->mutex);

char *ui_status;
/* I think there was a race between this print and a "? for help" which causes
* ci to fail. Once read-file is called the main thread displays its contents
* and this thread indexes the file. There is no synchronisation between the
* two so the status we end up with is random.
*/
// asprintf(&ui_status, "%s(building index) ", old_ui_status ? old_ui_status : "");

pthread_mutex_lock(&uib->mutex);
uib->status = ui_status;
pthread_mutex_unlock(&uib->mutex);
char *ui_status = uib->status;

enum zsv_index_status ix_status = build_memory_index(d);

Expand All @@ -258,8 +248,7 @@ static void *get_data_index(void *gdi) {
}

pthread_mutex_lock(mutexp);
uib->index_ready = 1;
uib->status = old_ui_status;
uib->status = d->old_ui_status;
uib->ixopts = NULL;
pthread_mutex_unlock(mutexp);

Expand Down
Loading