diff --git a/app/Makefile b/app/Makefile index 4758ed94..bdbfb4de 100644 --- a/app/Makefile +++ b/app/Makefile @@ -451,6 +451,7 @@ ${CLI} ${STANDALONE_PFX}2json${EXE}: MORE_OBJECTS+= ${BUILD_DIR}/objs/utils/db.o # pretty uses termcap ${CLI} ${STANDALONE_PFX}pretty${EXE}: MORE_LIBS+=${LDFLAGS_TERMCAP} + ${STANDALONE_PFX}%${EXE}: %.c ${OBJECTS} ${MORE_OBJECTS} ${LIBZSV_INSTALL} ${UTF8PROC_OBJECT} @mkdir -p `dirname "$@"` ${CC} ${CFLAGS} -I${INCLUDE_DIR} -o $@ $< ${OBJECTS} ${MORE_OBJECTS} ${MORE_SOURCE} -L${LIBDIR} ${LIBZSV_L} ${UTF8PROC_OBJECT} ${LDFLAGS} ${LDFLAGS_OPT} ${MORE_LIBS} ${STATIC_LIB_FLAGS} diff --git a/app/cli.c b/app/cli.c index a8408cbd..090f3ccb 100644 --- a/app/cli.c +++ b/app/cli.c @@ -509,9 +509,11 @@ static struct builtin_cmd *find_builtin(const char *cmd_name) { #include "builtin/version.c" #include "builtin/register.c" +#define ZSV_EXTENSION_ID_MAX_LEN 8 static const char *extension_cmd_from_arg(const char *arg) { - if (strlen(arg) > 3 && arg[2] == '-') - return arg + 3; + const char *dash = strchr(arg, '-'); + if (dash && dash < arg + ZSV_EXTENSION_ID_MAX_LEN && dash[1] != '\0') + return dash + 1; return NULL; } @@ -567,7 +569,7 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) { } int err = 1; - if (strlen(argv[1]) > 3 && argv[1][2] == '-') { // this is an extension command + if (extension_cmd_from_arg(argv[1]) != NULL) { // this is an extension command struct cli_config config; memset(&config, 0, sizeof(config)); if (!(err = add_extension(argv[1], &config.extensions, 0, 0))) diff --git a/app/ext_example/my_extension.c b/app/ext_example/my_extension.c index 7e409fc3..4cb2d394 100644 --- a/app/ext_example/my_extension.c +++ b/app/ext_example/my_extension.c @@ -15,7 +15,7 @@ /** * `zsv` can easily be extended by simply creating a shared library * that implements the interface specified in zsv/ext/implementation.h - * for any two-character extension id + * for any extension id of up to 8 bytes * * Once the library file is created, you can run any commands it implements * by naming the library file zsvext, placing it in any folder that is @@ -30,21 +30,21 @@ * * We will name our extension "my", so our shared library will be named * zsvextmy.so (non-win) or zsvextmy.dll (win). After the shared lib is built, - * a user can place it anywhere in their path or in the same folder as the zsv - * binary, and invoke our operations as follows: + * place it anywhere in the PATH or in the same folder as the zsv binary. + * Our extension commands can then be invoked by running: * `zsv my-count` * `zsv my-echo` * - * in addition, users will see a brief description of our module if they execute: + * in addition, a description of our extension is available via: * `zsv help` * - * or + * and command-specific help displayed via: * `zsv help my-` * */ /** - * *Required*: define our extension id, which must be two characters in length + * *Required*: define our extension id, of up to 8 bytes in length */ const char *zsv_ext_id(void) { return "my"; @@ -114,7 +114,7 @@ zsvsheet_status my_test_command_handler(zsvsheet_proc_context_t ctx) { * initialization routine uses `ext_add_command` to register our commands and * `ext_set_help` to set the help text. When we register a command, we provide a * callback-- in our cases, those will be `count_main()` and `echo_main()`-- for - * zsv to invoke when a user runs our command + * zsv to invoke when our command is run * * @param callbacks pointers to zsvlib functions that we must save for later use * @param ctx context to be passed whenever we execute a zsvlib function from our init diff --git a/app/external/sqlite3/sqlite3_csv_vtab-mem.c b/app/external/sqlite3/sqlite3_csv_vtab-mem.c new file mode 100644 index 00000000..93b9a58b --- /dev/null +++ b/app/external/sqlite3/sqlite3_csv_vtab-mem.c @@ -0,0 +1,147 @@ +#ifndef SQLITE3_CSV_VTAB_ZSV_H +#define SQLITE3_CSV_VTAB_ZSV_H + +#include + +/** + * see sqlite3_csv_vtab-mem.h for background info + */ +#if defined(_WIN32) || defined(_WIN64) +#include +#else +#include +#endif + +struct sqlite3_zsv_data { + struct sqlite3_zsv_data *next; + pid_t pid; + char *filename; + struct zsv_opts opts; + struct zsv_prop_handler custom_prop_handler; +}; + +pthread_mutex_t sqlite3_zsv_data_mutex; +struct sqlite3_zsv_data *sqlite3_zsv_data_g = NULL; + +/** + * Our shared memory structure should be locked for read/write + */ +static int sqlite3_zsv_data_lock(void) { +#ifndef NO_THREADING + pthread_mutex_lock(&sqlite3_zsv_data_mutex); +#endif + return 0; +} + +static int sqlite3_zsv_data_unlock(void) { +#ifndef NO_THREADING + pthread_mutex_unlock(&sqlite3_zsv_data_mutex); +#endif + return 0; +} + +static void sqlite3_zsv_data_delete(struct sqlite3_zsv_data *e) { + if (e) { + free(e->filename); + } + free(e); +} + +void sqlite3_zsv_list_delete(void **list) { + for (struct sqlite3_zsv_data *next, *e = *list; e; e = next) { + next = e->next; + sqlite3_zsv_data_delete(e); + } +#ifndef NO_THREADING + pthread_mutex_destroy(&sqlite3_zsv_data_mutex); +#endif + *list = NULL; +} + +static struct sqlite3_zsv_data *sqlite3_zsv_data_new(const char *filename, struct zsv_opts *opts, + struct zsv_prop_handler *custom_prop_handler) { + if (!filename) + return NULL; + struct sqlite3_zsv_data *e = calloc(1, sizeof(*e)); + if (e) { + e->pid = getpid(); + e->filename = strdup(filename); + if (opts) + e->opts = *opts; + if (custom_prop_handler) + e->custom_prop_handler = *custom_prop_handler; + if (e->filename) + return e; + } + sqlite3_zsv_data_delete(e); + return NULL; +} + +int sqlite3_zsv_data_add(const char *filename, struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler) { + struct sqlite3_zsv_data **list = &sqlite3_zsv_data_g; + struct sqlite3_zsv_data *e = sqlite3_zsv_data_new(filename, opts, custom_prop_handler); + if (e) { + struct sqlite3_zsv_data **next; + if (sqlite3_zsv_data_lock()) { + sqlite3_zsv_data_delete(e); + return -1; + } else { + for (next = list; *next; next = &(*next)->next) + ; + *next = e; + sqlite3_zsv_data_unlock(); + return 0; + } + } + return ENOMEM; +} + +static int sqlite3_zsv_data_cmp(struct sqlite3_zsv_data *x, const char *filename, pid_t pid) { + return strcmp(x->filename, filename) && x->pid == pid; +} + +struct sqlite3_zsv_data *sqlite3_zsv_data_find(const char *filename) { + struct sqlite3_zsv_data *list = sqlite3_zsv_data_g; + struct sqlite3_zsv_data *found = NULL; + pid_t pid = getpid(); + if (!sqlite3_zsv_data_lock()) { + for (struct sqlite3_zsv_data *e = list; e && !found; e = e->next) { + if (!sqlite3_zsv_data_cmp(e, filename, pid)) + found = e; + } + if (sqlite3_zsv_data_unlock()) + fprintf(stderr, "Error unlocking sqlite3-csv-zsv shared mem lock\n"); + } + return found; +} + +int sqlite3_zsv_list_remove(const char *filename) { + if (!filename) + return 0; + struct sqlite3_zsv_data **list = &sqlite3_zsv_data_g; + struct sqlite3_zsv_data *found = NULL; + pid_t pid = getpid(); + if (*list) { + if (!sqlite3_zsv_data_cmp(*list, filename, pid)) { + // found a match at the head of list + found = *list; + *list = found->next; + } else { + // look for a match somewhere after the first element + for (struct sqlite3_zsv_data *prior = *list; prior->next != NULL; prior = prior->next) { + if (!sqlite3_zsv_data_cmp(prior->next, filename, pid)) { + found = prior->next; + prior->next = prior->next->next; + break; + } + } + } + } + if (found) { + sqlite3_zsv_data_delete(found); + return 0; + } + return ENOENT; // not found +} + +#endif diff --git a/app/external/sqlite3/sqlite3_csv_vtab-mem.h b/app/external/sqlite3/sqlite3_csv_vtab-mem.h new file mode 100644 index 00000000..68350ca0 --- /dev/null +++ b/app/external/sqlite3/sqlite3_csv_vtab-mem.h @@ -0,0 +1,49 @@ +#ifndef SQLITE3_CSV_VTAB_ZSV_H +#define SQLITE3_CSV_VTAB_ZSV_H + +#include +#include + +/** + * when sqlite3 opens a CSV file using ZSV, it needs a way to know + * what options to open with (such as user-specified delimiter, header offset + * or span, etc + * + * In particular, it needs access to: + * - zsv options (struct zsv_opts) + * - custom property handler (struct zsv_prop_handler *) + * - options used (const char *) [but this can be passed via connection string] + * + * Some ways to pass this info are: + * - Embed it in the text of the URI passed to the module's xConnect function. + * This is not practical because we need to pass pointers + * - Use a single global variable that can hold only one set of data at a time. + * This was the old approach, via `zsv_set_default_opts` etc, which has the + * usual drawbacks of using a single global variable structure + * - Use a shared memory structure that can support multiple sets of data + * That is the approach implemented here. Data is identified by the related + * filename and caller pid + * + * sqlite3_create_module_v2 is passed the shared memory root pointer, + * but it's not really needed because there is no way for it to be + * dynamic so it always has to point to the single global location + * + * Prior to calling xConnect, the caller should save data for the related + * file via `sqlite3_zsv_data_add()`; xConnect then does a lookup to + * locate and use the saved data + */ + +struct sqlite3_zsv_data; + +void sqlite3_zsv_list_delete(struct sqlite3_zsv_data **list); + +int sqlite3_zsv_data_add(const char *filename, struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler); + +struct sqlite3_zsv_data *sqlite3_csv_vtab_zsv_find(const char *filename); + +/** + * Remove from list. Return 0 on success, non-zero on error + */ +int sqlite3_zsv_list_remove(const char *filename); + +#endif diff --git a/app/external/sqlite3/sqlite3_csv_vtab-zsv.c b/app/external/sqlite3/sqlite3_csv_vtab-zsv.c index 81f4106f..d34c51e8 100644 --- a/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +++ b/app/external/sqlite3/sqlite3_csv_vtab-zsv.c @@ -1,3 +1,4 @@ +/* clang-format off */ /* * This file has been modified from its original form, in order to use the ZSV csv parser * The preamble / disclaimer to the original file is included below @@ -51,6 +52,7 @@ SQLITE_EXTENSION_INIT1 #include #include #include +#include "sqlite3_csv_vtab-mem.c" #ifndef SQLITE_OMIT_VIRTUALTABLE @@ -102,12 +104,15 @@ typedef struct zsvTable { sqlite_int64 rowCount; } zsvTable; -struct zsvTable *zsvTable_new() { +struct zsvTable *zsvTable_new(const char *filename) { struct zsvTable *z = sqlite3_malloc(sizeof(*z)); if(z) { memset(z, 0, sizeof(*z)); - z->parser_opts = zsv_get_default_opts(); - z->custom_prop_handler = zsv_get_default_custom_prop_handler(); + struct sqlite3_zsv_data *d = sqlite3_zsv_data_find(filename); + if(d) { + z->parser_opts = d->opts; // zsv_get_default_opts(); + z->custom_prop_handler = d->custom_prop_handler; // zsv_get_default_custom_prop_handler(); + } } return z; } @@ -120,7 +125,6 @@ typedef struct zsvCursor { sqlite3_vtab_cursor base; /* Base class. Must be first */ } zsvCursor; - /* ** The xConnect and xCreate methods do the same thing, but they must be ** different so that the virtual table is not an eponymous virtual table. @@ -172,25 +176,20 @@ static int zsvtabConnect( sqlite3_vtab **ppVtab, char **pzErr ){ - zsvTable *pNew = NULL; + (void)(_pAux); + zsvTable pTmp = { 0 }; int rc = SQLITE_OK; /* Result code from this routine */ #define ZSVTABCONNECT_PARAM_MAX 3 static const char *azParam[ZSVTABCONNECT_PARAM_MAX] = { "filename", "options_used", "max_columns" }; char *azPValue[ZSVTABCONNECT_PARAM_MAX]; /* Parameter values */ + memset(azPValue, 0, sizeof(azPValue)); # define CSV_FILENAME (azPValue[0]) # define ZSV_OPTS_USED (azPValue[1]) char *schema = NULL; - pNew = zsvTable_new(); - if(!pNew) - return SQLITE_NOMEM; - - pNew->parser_opts.max_columns = 2000; /* default max columns */ - - (void)(_pAux); - memset(azPValue, 0, sizeof(azPValue)); + zsvTable *pNew = NULL; char *errmsg = NULL; // set parameters @@ -203,11 +202,11 @@ static int zsvtabConnect( } if( jparser_opts.max_columns = atoi(zValue); - if(pNew->parser_opts.max_columns<=0 || pNew->parser_opts.max_columns > 2000){ + pTmp.parser_opts.max_columns = atoi(zValue); + if(pTmp.parser_opts.max_columns<=0 || pTmp.parser_opts.max_columns > 2000){ asprintf(&errmsg, "max_columns= value must be > 0 and < 2000"); goto zsvtab_connect_error; } @@ -223,6 +222,14 @@ static int zsvtabConnect( goto zsvtab_connect_error; } + pNew = zsvTable_new(CSV_FILENAME); + if(!pNew) + goto zsvtab_connect_oom; + if(pTmp.parser_opts.max_columns) + pNew->parser_opts.max_columns = pTmp.parser_opts.max_columns; + else if(!pNew->parser_opts.max_columns) + pNew->parser_opts.max_columns = 2000; /* default max columns */ + if(!(pNew->parser_opts.stream = fopen(CSV_FILENAME, "rb"))) { asprintf(&errmsg, "Unable to open for reading: %s", CSV_FILENAME); goto zsvtab_connect_error; @@ -477,7 +484,9 @@ int sqlite3_csv_init( #ifndef SQLITE_OMIT_VIRTUALTABLE int rc; SQLITE_EXTENSION_INIT2(pApi); - rc = sqlite3_create_module(db, "csv", &CsvModule, 0); + pthread_mutex_t init = PTHREAD_MUTEX_INITIALIZER; + memcpy(&sqlite3_zsv_data_mutex, &init, sizeof(init)); + rc = sqlite3_create_module_v2(db, "csv", &CsvModule, &sqlite3_zsv_data_g, (void (*)(void *))sqlite3_zsv_list_delete); #ifdef SQLITE_TEST if( rc==SQLITE_OK ){ rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); @@ -488,3 +497,4 @@ int sqlite3_csv_init( return SQLITE_OK; #endif } +/* clang-format on */ diff --git a/app/sheet.c b/app/sheet.c index ea2d57fc..f8e1ee84 100644 --- a/app/sheet.c +++ b/app/sheet.c @@ -760,6 +760,9 @@ const char *display_cell(struct zsvsheet_screen_buffer *buff, size_t data_row, s size_t cell_display_width) { char *str = (char *)zsvsheet_screen_buffer_cell_display(buff, data_row, data_col); size_t len = str ? strlen(str) : 0; + int attrs = zsvsheet_screen_buffer_cell_attrs(buff, data_row, data_col); + if (attrs) + attron(attrs); if (len == 0 || has_multibyte_char(str, len < cell_display_width ? len : cell_display_width) == 0) mvprintw(row, col * cell_display_width, "%-*.*s", cell_display_width, cell_display_width - 1, str); else { @@ -783,7 +786,7 @@ const char *display_cell(struct zsvsheet_screen_buffer *buff, size_t data_row, s #endif if (wlen == (size_t)-1) { fprintf(stderr, "Unable to convert to wide chars: %s\n", str); - return str; + goto out; } // move to the desired position @@ -794,6 +797,9 @@ const char *display_cell(struct zsvsheet_screen_buffer *buff, size_t data_row, s for (size_t k = used_width; k < cell_display_width; k++) addch(' '); } +out: + if (attrs) + attroff(attrs); return str; } diff --git a/app/sheet/screen_buffer.c b/app/sheet/screen_buffer.c index f0c5585a..5112cd80 100644 --- a/app/sheet/screen_buffer.c +++ b/app/sheet/screen_buffer.c @@ -5,6 +5,7 @@ struct zsvsheet_screen_buffer { size_t long_cell_count; struct zsvsheet_screen_buffer_opts opts; unsigned char *data; + int *cell_attrs; // used for per-cell attron() and attroff() // to do: add hooks for extension }; @@ -143,6 +144,14 @@ enum zsvsheet_priv_status zsvsheet_screen_buffer_write_cell(zsvsheet_screen_buff return zsvsheet_screen_buffer_write_cell_w_len(buff, row, col, value, strlen((void *)value)); } +int zsvsheet_screen_buffer_cell_attrs(zsvsheet_screen_buffer_t buff, size_t row, size_t col) { + if (buff->cell_attrs) { + size_t offset = row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len; + return buff->cell_attrs[offset]; + } + return 0; +} + const unsigned char *zsvsheet_screen_buffer_cell_display(zsvsheet_screen_buffer_t buff, size_t row, size_t col) { if (row < buff->opts.rows && col < buff->cols) { size_t offset = row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len; diff --git a/app/sheet/screen_buffer.h b/app/sheet/screen_buffer.h index 45af8a27..b96acebf 100644 --- a/app/sheet/screen_buffer.h +++ b/app/sheet/screen_buffer.h @@ -25,6 +25,8 @@ enum zsvsheet_priv_status zsvsheet_screen_buffer_write_cell_w_len(zsvsheet_scree const unsigned char *zsvsheet_screen_buffer_cell_display(zsvsheet_screen_buffer_t buff, size_t row, size_t col); +int zsvsheet_screen_buffer_cell_attrs(zsvsheet_screen_buffer_t buff, size_t row, size_t col); + void zsvsheet_screen_buffer_delete(zsvsheet_screen_buffer_t); size_t zsvsheet_screen_buffer_cols(zsvsheet_screen_buffer_t); diff --git a/app/sheet/ui_buffer.c b/app/sheet/ui_buffer.c index 1273870b..cb9f7d9c 100644 --- a/app/sheet/ui_buffer.c +++ b/app/sheet/ui_buffer.c @@ -38,7 +38,8 @@ struct zsvsheet_ui_buffer { unsigned char rownum_col_offset : 1; unsigned char index_started : 1; unsigned char has_row_num : 1; - unsigned char _ : 5; + unsigned char mutex_inited : 1; + unsigned char _ : 4; }; void zsvsheet_ui_buffer_delete(struct zsvsheet_ui_buffer *ub) { @@ -46,6 +47,8 @@ void zsvsheet_ui_buffer_delete(struct zsvsheet_ui_buffer *ub) { if (ub->ext_on_close) ub->ext_on_close(ub->ext_ctx); zsvsheet_screen_buffer_delete(ub->buffer); + if (ub->mutex_inited) + pthread_mutex_destroy(&ub->mutex); if (ub->ixopts) ub->ixopts->uib = NULL; free(ub->row_filter); @@ -73,6 +76,7 @@ struct zsvsheet_ui_buffer *zsvsheet_ui_buffer_new(zsvsheet_screen_buffer_t buffe pthread_mutex_t init = PTHREAD_MUTEX_INITIALIZER; if (uib) { uib->buffer = buffer; + uib->mutex_inited = 1; memcpy(&uib->mutex, &init, sizeof(init)); if (!(uibopts && uibopts->no_rownum_col_offset)) uib->rownum_col_offset = 1; diff --git a/app/sql.c b/app/sql.c index 92d419b5..ea8107c7 100644 --- a/app/sql.c +++ b/app/sql.c @@ -9,7 +9,9 @@ #include #include #include -#include +// #include +#include "external/sqlite3/sqlite3.h" +#include "external/sqlite3/sqlite3_csv_vtab-mem.h" #define ZSV_COMMAND sql #include "zsv_command.h" @@ -20,8 +22,6 @@ #include // unlink -extern sqlite3_module CsvModule; - #ifndef STRING_LIST #define STRING_LIST struct string_list { @@ -67,6 +67,7 @@ static int zsv_sql_usage(FILE *f) { struct zsv_sql_data { FILE *in; + const char *input_filename; struct string_list *more_input_filenames; char *sql_dynamic; // will hold contents of sql file, if any char *join_indexes; // will hold contents of join_indexes arg, prefixed and suffixed with a comma @@ -82,6 +83,7 @@ static void zsv_sql_finalize(struct zsv_sql_data *data) { static void zsv_sql_cleanup(struct zsv_sql_data *data) { if (data->in && data->in != stdin) fclose(data->in); + sqlite3_zsv_list_remove(data->input_filename); free(data->sql_dynamic); free(data->join_indexes); if (data->join_column_names) { @@ -96,6 +98,7 @@ static void zsv_sql_cleanup(struct zsv_sql_data *data) { if (data->more_input_filenames) { struct string_list *next; for (struct string_list *tmp = data->more_input_filenames; tmp; tmp = next) { + sqlite3_zsv_list_remove(tmp->value); next = tmp->next; free(tmp); } @@ -103,37 +106,13 @@ static void zsv_sql_cleanup(struct zsv_sql_data *data) { (void)data; } -static int create_virtual_csv_table(const char *fname, sqlite3 *db, const char *opts_used, int max_columns, - char **err_msg, int table_ix) { - // TO DO: set customizable maximum number of columns to prevent - // runaway in case no line ends found - char *sql = NULL; - char table_name_suffix[64]; - - if (table_ix == 0) - *table_name_suffix = '\0'; - else if (table_ix < 0 || table_ix > 1000) - return -1; - else - snprintf(table_name_suffix, sizeof(table_name_suffix), "%i", table_ix + 1); - - if (max_columns) - sql = sqlite3_mprintf("CREATE VIRTUAL TABLE data%s USING csv(filename=%Q,options_used=%Q,max_columns=%i)", - table_name_suffix, fname, opts_used, max_columns); - else - sql = sqlite3_mprintf("CREATE VIRTUAL TABLE data%s USING csv(filename=%Q,options_used=%Q)", table_name_suffix, - fname, opts_used); - - int rc = sqlite3_exec(db, sql, NULL, NULL, err_msg); - sqlite3_free(sql); - return rc; -} - static char is_select_sql(const char *s) { return strlen(s) > strlen("select ") && !zsv_strincmp((const unsigned char *)"select ", strlen("select "), (const unsigned char *)s, strlen("select ")); } +#include "sql_internal.c" + int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) { /** @@ -144,11 +123,9 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op * conflict between (a) and properties of (b) * * For file path and options_used, we will pass as part of the - * CREATE VIRTUAL TABLE - * command. For everything else, rather than having to sync all the - * CREATE VIRTUAL TABLE options with all the zsv options, we will just use - * zsv_set_default_opts() here to effectively pass the options when the sql - * module calls zsv_get_default_opts() + * CREATE VIRTUAL TABLE connection string. + * For zsv opts and custom_prop_handler, we will pass via + * sqlite3_zsv_data_add() */ int err = 0; if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) @@ -156,21 +133,10 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op else { struct zsv_sql_data data = {0}; int max_cols = 0; // TO DO: remove this; use parser_opts.max_columns - const char *input_filename = NULL; const char *my_sql = NULL; struct string_list **next_input_filename = &data.more_input_filenames; - // save current default opts so that we can restore them later - struct zsv_opts original_default_opts = zsv_get_default_opts(); - struct zsv_prop_handler original_default_custom_prop_handler = zsv_get_default_custom_prop_handler(); - - // set parser opts that the sql module will get via zsv_get_default_opts() - zsv_set_default_opts(*opts); - if (custom_prop_handler) - zsv_set_default_custom_prop_handler(*custom_prop_handler); - struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts(); - int err = 0; for (int arg_i = 1; !err && arg_i < argc; arg_i++) { const char *arg = argv[arg_i]; if (!strcmp(arg, "--join-indexes")) { @@ -250,12 +216,13 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op err = 1; } } else if (*arg != '-') { - if (!input_filename) { - input_filename = arg; + if (!data.input_filename) { + data.input_filename = arg; if (!(data.in = fopen(arg, "rb"))) { fprintf(stderr, "Unable to open for reading: %s\n", arg); err = 1; - } + } else + err = sqlite3_zsv_data_add(arg, opts, custom_prop_handler); } else { // another input file FILE *tmp_f; if (!(tmp_f = fopen(arg, "rb"))) { @@ -270,6 +237,9 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op tmp->value = (char *)arg; *next_input_filename = tmp; next_input_filename = &tmp->next; + + // TO DO: option to only apply specified opts to first input? + err = sqlite3_zsv_data_add(arg, opts, custom_prop_handler); } } } @@ -279,7 +249,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op } } - if (!data.in || !input_filename) { + if (!data.in || !data.input_filename) { #ifdef NO_STDIN fprintf(stderr, "Please specify an input file\n"); err = 1; @@ -295,10 +265,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op if (err) { zsv_sql_cleanup(&data); - if (custom_prop_handler) { - zsv_set_default_opts(original_default_opts); // restore default options - zsv_set_default_custom_prop_handler(original_default_custom_prop_handler); - } return 1; } @@ -309,10 +275,10 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op FILE *f = NULL; char *tmpfn = NULL; - if (input_filename) { - f = fopen(input_filename, "rb"); + if (data.input_filename) { + f = fopen(data.input_filename, "rb"); if (!f) - fprintf(stderr, "Unable to open %s for reading\n", input_filename); + fprintf(stderr, "Unable to open %s for reading\n", data.input_filename); } else f = stdin; @@ -338,165 +304,158 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op fclose(f); // to do: don't open in the first place f = NULL; - sqlite3 *db = NULL; - int rc; - zsv_csv_writer cw = zsv_writer_new(&writer_opts); unsigned char cw_buff[1024]; zsv_writer_set_temp_buff(cw, cw_buff, sizeof(cw_buff)); - char *err_msg = NULL; - const char *db_url = data.in_memory ? "file::memory:" : ""; - if ((rc = sqlite3_open_v2(db_url, &db, SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE, NULL)) == SQLITE_OK && db && - (rc = sqlite3_create_module(db, "csv", &CsvModule, 0) == SQLITE_OK) && - (rc = create_virtual_csv_table(tmpfn ? tmpfn : input_filename, db, opts_used, max_cols, &err_msg, 0)) == - SQLITE_OK) { - int i = 1; - for (struct string_list *sl = data.more_input_filenames; sl; sl = sl->next) - if (create_virtual_csv_table(sl->value, db, opts_used, max_cols, &err_msg, i++) != SQLITE_OK) - rc = SQLITE_ERROR; - } + const char *csv_filename = tmpfn ? (const char *)tmpfn : data.input_filename; + struct zsv_sqlite3_db *zdb = + zsv_sqlite3_db_new(csv_filename, data.in_memory, opts_used, max_cols, SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE); + if (zdb) { + if (zdb->rc == SQLITE_OK) { + for (struct string_list *sl = data.more_input_filenames; sl; sl = sl->next) + if (zsv_sqlite3_add_csv(zdb, sl->value, opts_used, max_cols) != SQLITE_OK) + break; + } - if (data.join_indexes) { // get column names, and construct the sql - // sql template: - // select t1.*, t2.*, t3.* from t1 left join (select * from t2 group by a) t2 left join (select * from t3 group - // by a) t3 using(a); - sqlite3_stmt *stmt = NULL; - const char *prefix_search = NULL; - const char *prefix_end = NULL; - if (my_sql) { - prefix_search = " from data "; - prefix_end = strstr(my_sql, prefix_search); - if (!prefix_end) { - prefix_search = " from data"; + if (zdb->rc == SQLITE_OK && data.join_indexes) { // get column names, and construct the sql + // sql template: + // select t1.*, t2.*, t3.* from t1 left join (select * from t2 group by a) t2 left join (select * from t3 + // group by a) t3 using(a); + sqlite3_stmt *stmt = NULL; + const char *prefix_search = NULL; + const char *prefix_end = NULL; + if (my_sql) { + prefix_search = " from data "; prefix_end = strstr(my_sql, prefix_search); - if (prefix_end && (prefix_end + strlen(prefix_search) != my_sql + strlen(my_sql))) - prefix_end = NULL; - } - if (!prefix_end || !prefix_search) { - err = 1; - fprintf(stderr, "Invalid sql: must contain 'from data'"); + if (!prefix_end) { + prefix_search = " from data"; + prefix_end = strstr(my_sql, prefix_search); + if (prefix_end && (prefix_end + strlen(prefix_search) != my_sql + strlen(my_sql))) + prefix_end = NULL; + } + if (!prefix_end || !prefix_search) { + err = 1; + fprintf(stderr, "Invalid sql: must contain 'from data'"); + } } - } - if (!err) { - rc = sqlite3_prepare_v2(db, "select * from data", -1, &stmt, NULL); - if (rc != SQLITE_OK) { - fprintf(stderr, "%s:\n %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), "select * from data"); - err = 1; + if (!err) { + zdb->rc = sqlite3_prepare_v2(zdb->db, "select * from data", -1, &stmt, NULL); + if (zdb->rc != SQLITE_OK) { + fprintf(stderr, "%s:\n %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), "select * from data"); + err = 1; + } } - } - if (!err) { - struct string_list **next_joined_column_name = &data.join_column_names; - int col_count = sqlite3_column_count(stmt); - for (char *ix_str = data.join_indexes; !err && ix_str && *ix_str && *(++ix_str); - ix_str = strchr(ix_str + 1, ',')) { - unsigned int next_ix; - if (sscanf(ix_str, "%u,", &next_ix) == 1) { - if (next_ix == 0) - fprintf(stderr, "--join-indexes index must be greater than zero\n"); - else if (next_ix > (unsigned)col_count) - fprintf(stderr, "Column %u out of range; input has only %i columns\n", next_ix, col_count), err = 1; - else if (!sqlite3_column_name(stmt, next_ix - 1)) - fprintf(stderr, "Column %u unexpectedly missing name\n", next_ix); - else { - struct string_list *tmp = calloc(1, sizeof(**next_joined_column_name)); - if (!tmp) - fprintf(stderr, "Out of memory!\n"), err = 1; + if (!err) { + struct string_list **next_joined_column_name = &data.join_column_names; + int col_count = sqlite3_column_count(stmt); + for (char *ix_str = data.join_indexes; !err && ix_str && *ix_str && *(++ix_str); + ix_str = strchr(ix_str + 1, ',')) { + unsigned int next_ix; + if (sscanf(ix_str, "%u,", &next_ix) == 1) { + if (next_ix == 0) + fprintf(stderr, "--join-indexes index must be greater than zero\n"); + else if (next_ix > (unsigned)col_count) + fprintf(stderr, "Column %u out of range; input has only %i columns\n", next_ix, col_count), err = 1; + else if (!sqlite3_column_name(stmt, next_ix - 1)) + fprintf(stderr, "Column %u unexpectedly missing name\n", next_ix); else { - tmp->value = strdup(sqlite3_column_name(stmt, next_ix - 1)); - *next_joined_column_name = tmp; - next_joined_column_name = &tmp->next; + struct string_list *tmp = calloc(1, sizeof(**next_joined_column_name)); + if (!tmp) + fprintf(stderr, "Out of memory!\n"), err = 1; + else { + tmp->value = strdup(sqlite3_column_name(stmt, next_ix - 1)); + *next_joined_column_name = tmp; + next_joined_column_name = &tmp->next; + } } } } - } - if (!data.more_input_filenames) - fprintf(stderr, "--join-indexes requires more than one input\n"), err = 1; - else if (!err) { // now build the join select - sqlite3_str *select_clause = sqlite3_str_new(db); - sqlite3_str *from_clause = sqlite3_str_new(db); - sqlite3_str *group_by_clause = sqlite3_str_new(db); + if (!data.more_input_filenames) + fprintf(stderr, "--join-indexes requires more than one input\n"), err = 1; + else if (!err) { // now build the join select + sqlite3_str *select_clause = sqlite3_str_new(zdb->db); + sqlite3_str *from_clause = sqlite3_str_new(zdb->db); + sqlite3_str *group_by_clause = sqlite3_str_new(zdb->db); - sqlite3_str_appendf(select_clause, "data.*"); - sqlite3_str_appendf(from_clause, "data"); + sqlite3_str_appendf(select_clause, "data.*"); + sqlite3_str_appendf(from_clause, "data"); - for (struct string_list *sl = data.join_column_names; sl; sl = sl->next) { - if (sl != data.join_column_names) - sqlite3_str_appendf(group_by_clause, ","); - sqlite3_str_appendf(group_by_clause, "\"%w\"", sl->value); - } + for (struct string_list *sl = data.join_column_names; sl; sl = sl->next) { + if (sl != data.join_column_names) + sqlite3_str_appendf(group_by_clause, ","); + sqlite3_str_appendf(group_by_clause, "\"%w\"", sl->value); + } - int i = 2; - for (struct string_list *sl = data.more_input_filenames; sl; sl = sl->next, i++) { - sqlite3_str_appendf(select_clause, ", data%i.*", i); - // left join (select * from t2 group by a) t2 using(x,...) - sqlite3_str_appendf(from_clause, " left join (select * from data%i group by %s) data%i", i, - sqlite3_str_value(group_by_clause), i); - sqlite3_str_appendf(from_clause, " using (%s)", sqlite3_str_value(group_by_clause)); - } + int i = 2; + for (struct string_list *sl = data.more_input_filenames; sl; sl = sl->next, i++) { + sqlite3_str_appendf(select_clause, ", data%i.*", i); + // left join (select * from t2 group by a) t2 using(x,...) + sqlite3_str_appendf(from_clause, " left join (select * from data%i group by %s) data%i", i, + sqlite3_str_value(group_by_clause), i); + sqlite3_str_appendf(from_clause, " using (%s)", sqlite3_str_value(group_by_clause)); + } - if (!prefix_end || !prefix_search) - asprintf(&data.sql_dynamic, "select %s from %s", sqlite3_str_value(select_clause), - sqlite3_str_value(from_clause)); - else { - asprintf(&data.sql_dynamic, "%.*s from %s%s%s", (int)(prefix_end - my_sql), my_sql, - sqlite3_str_value(from_clause), strlen(prefix_end + strlen(prefix_search)) ? " " : "", - strlen(prefix_end + strlen(prefix_search)) ? prefix_end + strlen(prefix_search) : ""); - } + if (!prefix_end || !prefix_search) + asprintf(&data.sql_dynamic, "select %s from %s", sqlite3_str_value(select_clause), + sqlite3_str_value(from_clause)); + else { + asprintf(&data.sql_dynamic, "%.*s from %s%s%s", (int)(prefix_end - my_sql), my_sql, + sqlite3_str_value(from_clause), strlen(prefix_end + strlen(prefix_search)) ? " " : "", + strlen(prefix_end + strlen(prefix_search)) ? prefix_end + strlen(prefix_search) : ""); + } - my_sql = data.sql_dynamic; - if (opts->verbose) - fprintf(stderr, "Join sql:\n%s\n", my_sql); - sqlite3_free(sqlite3_str_finish(select_clause)); - sqlite3_free(sqlite3_str_finish(from_clause)); - sqlite3_free(sqlite3_str_finish(group_by_clause)); + my_sql = data.sql_dynamic; + if (opts->verbose) + fprintf(stderr, "Join sql:\n%s\n", my_sql); + sqlite3_free(sqlite3_str_finish(select_clause)); + sqlite3_free(sqlite3_str_finish(from_clause)); + sqlite3_free(sqlite3_str_finish(group_by_clause)); + } } + if (stmt) + sqlite3_finalize(stmt); } - if (stmt) - sqlite3_finalize(stmt); - } - - if (rc == SQLITE_OK && !err && my_sql) { - sqlite3_stmt *stmt; - err = sqlite3_prepare_v2(db, my_sql, -1, &stmt, NULL); - if (err != SQLITE_OK) - fprintf(stderr, "%s:\n %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), my_sql); - else { - int col_count = sqlite3_column_count(stmt); - // write header row - for (int i = 0; i < col_count; i++) { - const char *colname = sqlite3_column_name(stmt, i); - zsv_writer_cell(cw, !i, (const unsigned char *)colname, colname ? strlen(colname) : 0, 1); - } + if (zdb->rc == SQLITE_OK && !err && my_sql) { + sqlite3_stmt *stmt; + err = sqlite3_prepare_v2(zdb->db, my_sql, -1, &stmt, NULL); + if (err != SQLITE_OK) + fprintf(stderr, "%s:\n %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), my_sql); + else { + int col_count = sqlite3_column_count(stmt); - while (sqlite3_step(stmt) == SQLITE_ROW) { + // write header row for (int i = 0; i < col_count; i++) { - const unsigned char *text = sqlite3_column_text(stmt, i); - int len = text ? sqlite3_column_bytes(stmt, i) : 0; - zsv_writer_cell(cw, !i, text, len, 1); + const char *colname = sqlite3_column_name(stmt, i); + zsv_writer_cell(cw, !i, (const unsigned char *)colname, colname ? strlen(colname) : 0, 1); } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + for (int i = 0; i < col_count; i++) { + const unsigned char *text = sqlite3_column_text(stmt, i); + int len = text ? sqlite3_column_bytes(stmt, i) : 0; + zsv_writer_cell(cw, !i, text, len, 1); + } + } + sqlite3_finalize(stmt); } - sqlite3_finalize(stmt); } + err = 1; + if (zdb->err_msg) + fprintf(stderr, "Error: %s\n", zdb->err_msg); + else if (!zdb->db) + fprintf(stderr, "Error (unable to open db, code %i): %s\n", zdb->rc, sqlite3_errstr(zdb->rc)); + else if (zdb->rc) + fprintf(stderr, "Error (code %i): %s\n", zdb->rc, sqlite3_errstr(zdb->rc)); + else + err = 0; + + zsv_writer_delete(cw); + zsv_sqlite3_db_delete(zdb); } - err = 1; - if (err_msg) { - fprintf(stderr, "Error: %s\n", err_msg); - sqlite3_free(err_msg); - } else if (!db) - fprintf(stderr, "Error (unable to open db, code %i): %s\n", rc, sqlite3_errstr(rc)); - else if (rc) - fprintf(stderr, "Error (code %i): %s\n", rc, sqlite3_errstr(rc)); - else - err = 0; - - if (db) - sqlite3_close(db); - - zsv_writer_delete(cw); } if (f) fclose(f); @@ -507,9 +466,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op unlink(tmpfn); free(tmpfn); } - zsv_set_default_opts(original_default_opts); // restore default options - if (custom_prop_handler) - zsv_set_default_custom_prop_handler(original_default_custom_prop_handler); } return err; } diff --git a/app/sql_internal.c b/app/sql_internal.c new file mode 100644 index 00000000..7ba905b0 --- /dev/null +++ b/app/sql_internal.c @@ -0,0 +1,65 @@ +#include "sql_internal.h" + +struct zsv_sqlite3_db *zsv_sqlite3_db_new(const char *csv_filename, char in_memory, const char *opts_used, + + size_t max_cols, int sqlite3_flags) { + struct zsv_sqlite3_db *zdb = calloc(1, sizeof(*zdb)); + if (!zdb) { + perror(NULL); + return NULL; + } + const char *db_url = in_memory ? "file::memory:" : ""; + zdb->rc = sqlite3_open_v2(db_url, &zdb->db, sqlite3_flags, NULL); + if (zdb->rc == SQLITE_OK && zdb->db) { + zdb->rc = sqlite3_create_module(zdb->db, "csv", &CsvModule, 0); + if (zdb->rc == SQLITE_OK) + zsv_sqlite3_add_csv(zdb, csv_filename, opts_used, max_cols); + } + if (zdb->rc != SQLITE_OK && !zdb->err_msg) + zdb->err_msg = strdup(sqlite3_errstr(zdb->rc)); + return zdb; +} + +void zsv_sqlite3_db_delete(struct zsv_sqlite3_db *zdb) { + if (zdb && zdb->db) + sqlite3_close(zdb->db); + free(zdb); +} + +static int create_virtual_csv_table(const char *fname, sqlite3 *db, const char *opts_used, int max_columns, + char **err_msgp, int table_ix) { + // TO DO: set customizable maximum number of columns to prevent + // runaway in case no line ends found + char *sql = NULL; + char table_name_suffix[64]; + + if (table_ix == 0) + *table_name_suffix = '\0'; + else if (table_ix < 0 || table_ix > 1000) + return -1; + else + snprintf(table_name_suffix, sizeof(table_name_suffix), "%i", table_ix + 1); + + if (max_columns) + sql = sqlite3_mprintf("CREATE VIRTUAL TABLE data%s USING csv(filename=%Q,options_used=%Q,max_columns=%i)", + table_name_suffix, fname, opts_used, max_columns); + else + sql = sqlite3_mprintf("CREATE VIRTUAL TABLE data%s USING csv(filename=%Q,options_used=%Q)", table_name_suffix, + fname, opts_used); + + char *err_msg_tmp; + int rc = sqlite3_exec(db, sql, NULL, NULL, &err_msg_tmp); + if (err_msg_tmp) { + *err_msgp = strdup(err_msg_tmp); + sqlite3_free(err_msg_tmp); + } + sqlite3_free(sql); + return rc; +} + +int zsv_sqlite3_add_csv(struct zsv_sqlite3_db *zdb, const char *csv_filename, const char *opts_used, size_t max_cols) { + zdb->rc = create_virtual_csv_table(csv_filename, zdb->db, opts_used, max_cols, &zdb->err_msg, zdb->table_count); + if (zdb->rc == SQLITE_OK) + zdb->table_count++; + return zdb->rc; +} diff --git a/app/sql_internal.h b/app/sql_internal.h new file mode 100644 index 00000000..81fcf0a3 --- /dev/null +++ b/app/sql_internal.h @@ -0,0 +1,20 @@ +#ifndef SQL_INTERNAL_H +#define SQL_INTERNAL_H + +extern sqlite3_module CsvModule; + +struct zsv_sqlite3_db { + sqlite3 *db; + int table_count; + char *err_msg; + int rc; +}; + +struct zsv_sqlite3_db *zsv_sqlite3_db_new(const char *csv_filename, char in_memory, const char *opts_used, + size_t max_cols, int sqlite3_flags); + +void zsv_sqlite3_db_delete(struct zsv_sqlite3_db *zdb); + +int zsv_sqlite3_add_csv(struct zsv_sqlite3_db *zdb, const char *csv_filename, const char *opts_used, size_t max_cols); + +#endif