diff --git a/app/Makefile b/app/Makefile index 962d0c42..db441588 100644 --- a/app/Makefile +++ b/app/Makefile @@ -316,7 +316,7 @@ ${ZSV_UTIL_A}: ${BUILD_DIR}/objs/utils/util.a @mkdir -p `dirname $@` cp -p $< $@ -UTIL_A_OBJ:=writer file dirs-no-jq os ${UTIL_A_OBJ_WIN} +UTIL_A_OBJ:=index writer file dirs-no-jq os ${UTIL_A_OBJ_WIN} UTIL_A_OBJ:=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTIL_A_OBJ})) ${BUILD_DIR}/objs/utils/util.a: ${UTIL_A_OBJ} diff --git a/app/ext_example/Makefile b/app/ext_example/Makefile index c8cd1a67..d37d8f93 100644 --- a/app/ext_example/Makefile +++ b/app/ext_example/Makefile @@ -77,7 +77,6 @@ TEST_PASS=printf "${COLOR_BLUE}$@: ${COLOR_GREEN}Passed${COLOR_NONE}\n" TEST_FAIL=(printf "${COLOR_BLUE}$@: ${COLOR_RED}Failed!${COLOR_NONE}\n" && exit 1) TEST_INIT=printf "${COLOR_PINK}$@: ${COLOR_NONE}\n" -UTILS1= CFLAGS_SHARED=-shared ifneq ($(findstring emcc,$(CC)),) # emcc CFLAGS_SHARED=-s SIDE_MODULE=1 -s LINKABLE=1 @@ -86,9 +85,6 @@ else INSTALLED_EXTENSION= endif -UTILS1+=writer -UTILS=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTILS1})) - CFLAGS+= -I${THIS_LIB_BASE}/include -I${PREFIX}/include all: ${TARGET} ${TARGET_SHEET} @@ -218,8 +214,9 @@ YAJL_SRC_DIR=${THIS_MAKEFILE_DIR}/../external/yajl YAJL_INCLUDE=-I${YAJL_SRC_DIR}/build/yajl-2.1.1/include YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/../external/yajl_helper ${TARGET_SHEET}: LIBS="../external/sqlite3/sqlite3.c" -lzsv -lzsvutil -L${PREFIX}/lib -ljq -lutf8proc +${TARGET}: LIBS=-lzsvutil -lzsv -L${PREFIX}/lib ${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${UTILS} @mkdir -p `dirname "$@"` - ${CC} ${CFLAGS} ${CFLAGS_SHARED} $< ${UTILS} -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} + ${CC} ${CFLAGS} ${CFLAGS_SHARED} $< -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} .PHONY: all test test-% clean install diff --git a/app/sheet/index.c b/app/sheet/index.c index 6f1822a5..56f5bc7a 100644 --- a/app/sheet/index.c +++ b/app/sheet/index.c @@ -12,8 +12,9 @@ static void build_memory_index_row_handler(void *ctx) { struct zsvsheet_indexer *ixr = ctx; struct zsv_index *ix = ixr->ix; zsv_parser parser = ixr->parser; + uint64_t line_end = zsv_cum_scanned_length(parser); - if (zsv_index_add_row(ix, parser) != zsv_index_status_ok) + if (zsv_index_add_row(ix, line_end) != zsv_index_status_ok) zsv_abort(parser); } diff --git a/app/sheet/transformation.c b/app/sheet/transformation.c index c876223c..1ebcbe0c 100644 --- a/app/sheet/transformation.c +++ b/app/sheet/transformation.c @@ -5,6 +5,7 @@ #include "transformation.h" #include "pthread.h" #include "zsv/utils/file.h" +#include "zsv/utils/index.h" #include "zsv/utils/prop.h" struct zsvsheet_transformation { @@ -60,6 +61,7 @@ enum zsv_status zsvsheet_transformation_new(struct zsvsheet_transformation_opts struct zsv_csv_writer_options writer_opts = { .with_bom = 0, + .index = opts.index, .write = transformation_write, .stream = trn, .table_init = NULL, @@ -153,6 +155,7 @@ static void *zsvsheet_run_buffer_transformation(void *arg) { char *buff_status_old = uib->status; uib->write_progressed = 1; uib->write_done = 1; + uib->index_ready = 1; if (buff_status_old == trn->default_status) uib->status = NULL; pthread_mutex_unlock(mutex); @@ -172,6 +175,7 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, const char *filename = zsvsheet_buffer_data_filename(buff); enum zsvsheet_status stat = zsvsheet_status_error; struct zsvsheet_buffer_info info = zsvsheet_buffer_get_info(buff); + struct zsv_index *index = NULL; // TODO: Starting a second transformation before the first ends works, but if the second is faster // than the first then it can end prematurely and read a partially written row. @@ -179,12 +183,16 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, if (info.write_in_progress && !info.write_done) return zsvsheet_status_busy; + if (!(index = zsv_index_new())) + return zsvsheet_status_memory; + // TODO: custom_prop_handler is not passed to extensions? struct zsvsheet_transformation_opts trn_opts = { .custom_prop_handler = NULL, .input_filename = filename, .on_done = opts.on_done, .ui_buffer = NULL, + .index = index, }; zsvsheet_transformation trn = NULL; struct zsv_opts zopts = zsvsheet_buffer_get_zsv_opts(buff); @@ -194,13 +202,13 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, zopts.stream = fopen(filename, "rb"); if (!zopts.stream) - goto out; + goto error; trn_opts.zsv_opts = zopts; enum zsv_status zst = zsvsheet_transformation_new(trn_opts, &trn); if (zst != zsv_status_ok) - return stat; + goto error; // Transform part of the file to initially populate the UI buffer // TODO: If the transformation is a reduction that doesn't output for some time this will caus a pause @@ -214,13 +222,13 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, case zsv_status_no_more_input: case zsv_status_cancelled: if (zsv_finish(parser) != zsv_status_ok) - goto out; + goto error; zsv_writer_flush(trn->writer); break; case zsv_status_ok: break; default: - goto out; + goto error; } struct zsvsheet_ui_buffer_opts uibopts = {0}; @@ -230,14 +238,17 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, stat = zsvsheet_open_file_opts(ctx, &uibopts); if (stat != zsvsheet_status_ok) - goto out; + goto error; struct zsvsheet_ui_buffer *nbuff = zsvsheet_buffer_current(ctx); trn->ui_buffer = nbuff; nbuff->write_progressed = 1; + nbuff->index_started = 1; + nbuff->index = index; if (zst != zsv_status_ok) { nbuff->write_done = 1; + nbuff->index_ready = 1; goto out; } @@ -247,6 +258,9 @@ enum zsvsheet_status zsvsheet_push_transformation(zsvsheet_proc_context_t ctx, zsvsheet_ui_buffer_create_worker(nbuff, zsvsheet_run_buffer_transformation, trn); return stat; +error: + free(index); + out: if (trn && trn->on_done) opts.on_done(trn); diff --git a/app/sheet/transformation.h b/app/sheet/transformation.h index 87ea7226..f5af1a8f 100644 --- a/app/sheet/transformation.h +++ b/app/sheet/transformation.h @@ -15,6 +15,7 @@ struct zsvsheet_transformation_opts { const char *input_filename; void (*on_done)(zsvsheet_transformation trn); struct zsvsheet_ui_buffer *ui_buffer; + struct zsv_index *index; }; enum zsv_status zsvsheet_transformation_new(struct zsvsheet_transformation_opts, zsvsheet_transformation *); diff --git a/app/utils/index.c b/app/utils/index.c index 5cd56635..96720969 100644 --- a/app/utils/index.c +++ b/app/utils/index.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -30,10 +29,9 @@ void zsv_index_delete(struct zsv_index *ix) { } } -enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, zsv_parser parser) { +enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, uint64_t line_end) { struct zsv_index_array *arr = ix->array; size_t len = arr->len, cap = arr->capacity; - uint64_t line_end = zsv_cum_scanned_length(parser); if (!ix->header_line_end) { ix->header_line_end = line_end; diff --git a/app/utils/writer.c b/app/utils/writer.c index 39be2c30..80b912e5 100644 --- a/app/utils/writer.c +++ b/app/utils/writer.c @@ -6,6 +6,7 @@ * https://opensource.org/licenses/MIT */ +#include "zsv/utils/index.h" #include #include #include @@ -108,6 +109,7 @@ struct zsv_output_buff { size_t (*write)(const void *restrict, size_t size, size_t nitems, void *restrict stream); void *stream; size_t used; + uint64_t written; unsigned char close_on_delete : 1; unsigned char _ : 7; }; @@ -122,6 +124,7 @@ struct zsv_writer_data { void *table_init_ctx; const char *cell_prepend; + struct zsv_index *index; unsigned char with_bom : 1; unsigned char started : 1; @@ -132,6 +135,7 @@ struct zsv_writer_data { static inline void zsv_output_buff_flush(struct zsv_output_buff *b) { b->write(b->buff, b->used, 1, b->stream); + b->written += b->used; b->used = 0; } @@ -141,6 +145,7 @@ static inline void zsv_output_buff_write(struct zsv_output_buff *b, const unsign zsv_output_buff_flush(b); if (n > ZSV_OUTPUT_BUFF_SIZE) { // n too big, so write directly b->write(s, n, 1, b->stream); + b->written += n; return; } } @@ -199,6 +204,7 @@ zsv_csv_writer zsv_writer_new(struct zsv_csv_writer_options *opts) { w->with_bom = opts->with_bom; w->table_init = opts->table_init; w->table_init_ctx = opts->table_init_ctx; + w->index = opts->index; } } return w; @@ -223,17 +229,21 @@ enum zsv_writer_status zsv_writer_delete(zsv_csv_writer w) { if (!w) return zsv_writer_status_missing_handle; + if (w->started) + zsv_output_buff_write(&w->out, (const unsigned char *)"\n", 1); + if (w->out.stream && w->out.write && w->out.buff) zsv_output_buff_flush(&w->out); - if (w->started) - w->out.write("\n", 1, 1, w->out.stream); + if (w->started && w->index) + zsv_index_add_row(w->index, w->out.written); if (w->out.buff) free(w->out.buff); if (w->out.close_on_delete && w->out.stream) fclose(w->out.stream); + free(w); return zsv_writer_status_ok; } @@ -266,9 +276,11 @@ enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row, const uns if (w->with_bom) zsv_output_buff_write(&w->out, (const unsigned char *)"\xef\xbb\xbf", 3); w->started = 1; - } else if (new_row) + } else if (new_row) { + if (w->index) + zsv_index_add_row(w->index, (uint64_t)(w->out.used + w->out.written)); zsv_output_buff_write(&w->out, (const unsigned char *)"\n", 1); - else + } else zsv_output_buff_write(&w->out, (const unsigned char *)",", 1); if (VERY_UNLIKELY(w->cell_prepend && *w->cell_prepend)) { diff --git a/include/zsv/utils/index.h b/include/zsv/utils/index.h index b9466a7d..f5805c94 100644 --- a/include/zsv/utils/index.h +++ b/include/zsv/utils/index.h @@ -2,8 +2,7 @@ #define ZSV_UTILS_INDEX_H #include -#include -#include +#include #include "zsv/common.h" @@ -37,7 +36,7 @@ struct zsv_index { struct zsv_index *zsv_index_new(void); void zsv_index_delete(struct zsv_index *ix); -enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, zsv_parser parser); +enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, uint64_t line_end); enum zsv_index_status zsv_index_row_end_offset(const struct zsv_index *ix, uint64_t row, uint64_t *offset_out, uint64_t *remaining_rows_out); enum zsv_index_status zsv_index_seek_row(const struct zsv_index *ix, struct zsv_opts *opts, uint64_t row); diff --git a/include/zsv/utils/writer.h b/include/zsv/utils/writer.h index e9a0a9d0..7727522e 100644 --- a/include/zsv/utils/writer.h +++ b/include/zsv/utils/writer.h @@ -26,6 +26,7 @@ struct zsv_csv_writer_options { void (*table_init)(void *); void *table_init_ctx; const char *output_path; // if provided, will be created by zsv_writer_new() and closed by zsv_writer_delete() + struct zsv_index *index; }; void zsv_writer_set_default_opts(struct zsv_csv_writer_options opts);