Skip to content

Commit

Permalink
add integrated overwrite support (#140)
Browse files Browse the repository at this point in the history
* add parser option to include overwrites in CSV format. Now you can layer removable modifications on your data
  • Loading branch information
liquidaty authored Oct 27, 2023
1 parent 05b9429 commit 781ddd5
Show file tree
Hide file tree
Showing 33 changed files with 991 additions and 153 deletions.
2 changes: 1 addition & 1 deletion app/2db.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ static yajl_handle zsv_2db_yajl_handle(zsv_2db_handle data) {
return data->json_parser.st.yajl;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
(void)(zsv_opts);
(void)(opts_used);
FILE *f_in = NULL;
Expand Down
4 changes: 2 additions & 2 deletions app/2json.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ static int zsv_db2json(const char *input_filename, char **tname, jsonwriter_hand
return err;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
struct zsv_2json_data data = { 0 };
data.headers_next = &data.headers;

Expand Down Expand Up @@ -357,7 +357,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
} else {
opts->row_handler = zsv_2json_row;
opts->ctx = &data;
if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) == zsv_status_ok) {
if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) == zsv_status_ok) {
zsv_handle_ctrl_c_signal();
while(!data.err
&& !zsv_signal_interrupted
Expand Down
4 changes: 2 additions & 2 deletions app/2tsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ int zsv_2tsv_usage(int rc) {
return rc;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
struct zsv_2tsv_data data = { 0 };
const char *input_path = NULL;
int err = 0;
Expand Down Expand Up @@ -194,7 +194,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

opts->row_handler = zsv_2tsv_row;
opts->ctx = &data;
if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) == zsv_status_ok) {
if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) == zsv_status_ok) {
char output[ZSV_2TSV_BUFF_SIZE];
data.out.buff = output;

Expand Down
13 changes: 9 additions & 4 deletions app/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string.h>
#include <zsv/utils/arg.h>
#include <zsv/utils/dl.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/string.h>
#include <zsv/utils/dirs.h>
#include <zsv/utils/signal.h>
Expand All @@ -32,7 +33,7 @@ static struct zsv_ext *zsv_ext_new(const char *dl_name, const char *id, char ver
#include "cli_ini.c"

typedef int (cmd_main)(int argc, const char *argv[]);
typedef int (zsv_cmd)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used);
typedef int (zsv_cmd)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used);
typedef int (*cmd_reserved)();

struct builtin_cmd {
Expand Down Expand Up @@ -292,9 +293,12 @@ static enum zsv_ext_status ext_add_command(zsv_execution_context ctx,
static enum zsv_ext_status ext_parse_all(zsv_execution_context ctx,
void *user_context,
void (*row_handler)(void *ctx),
struct zsv_opts *const custom
struct zsv_opts *const custom,
struct zsv_prop_handler *custom_prop
) {
struct zsv_opts opts = custom ? *custom : ext_parser_opts(ctx);
struct zsv_prop_handler custom_prop_handler = custom_prop ? *custom_prop : zsv_get_default_custom_prop_handler();

if(row_handler)
opts.row_handler = row_handler;
zsv_parser parser = zsv_new(&opts);
Expand Down Expand Up @@ -403,6 +407,7 @@ static enum zsv_ext_status run_extension(int argc, const char *argv[], struct zs
struct zsv_opts opts;
zsv_args_to_opts(argc, argv, &argc, argv, &opts, NULL);
zsv_set_default_opts(opts);
// need a corresponding zsv_set_default_custom_prop_handler?
stat = cmd->main(&ctx, ctx.argc - 1, &ctx.argv[1]);
}

Expand Down Expand Up @@ -483,7 +488,7 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) {
else if(help_builtin->cmd) {
char opts_used[ZSV_OPTS_SIZE_MAX] = { 0 };
struct zsv_opts opts = { 0 };
return help_builtin->cmd(2, argv_tmp, &opts, opts_used);
return help_builtin->cmd(2, argv_tmp, &opts, NULL, opts_used);
} else
return fprintf(stderr, "Unexpected syntax!\n");
} else {
Expand All @@ -509,7 +514,7 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) {
struct zsv_opts opts;
enum zsv_status stat = zsv_args_to_opts(argc, argv, &argc, argv, &opts, opts_used);
if(stat == zsv_status_ok)
return builtin->cmd(argc - 1, argc > 1 ? &argv[1] : NULL, &opts, opts_used);
return builtin->cmd(argc - 1, argc > 1 ? &argv[1] : NULL, &opts, NULL, opts_used);
return stat;
}
}
Expand Down
19 changes: 14 additions & 5 deletions app/compare.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ static enum zsv_compare_status
input_init_unsorted(struct zsv_compare_data *data,
struct zsv_compare_input *input,
struct zsv_opts *opts,
struct zsv_prop_handler *custom_prop_handler,
const char *opts_used) {
(void)(opts_used);
if(!(input->stream = fopen(input->path, "rb"))) {
Expand All @@ -377,7 +378,7 @@ input_init_unsorted(struct zsv_compare_data *data,
}
struct zsv_opts these_opts = *opts;
these_opts.stream = input->stream;
enum zsv_status stat = zsv_new_with_properties(&these_opts, input->path, NULL, &input->parser);
enum zsv_status stat = zsv_new_with_properties(&these_opts, custom_prop_handler, input->path, NULL, &input->parser);
if(stat != zsv_status_ok)
return zsv_compare_status_error;

Expand Down Expand Up @@ -591,8 +592,7 @@ static int compare_usage() {
}

// TO DO: consolidate w sql.c, move common code to utils/db.c
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
/**
* See sql.c re passing options to sqlite3 when sorting is used
*/
Expand Down Expand Up @@ -660,6 +660,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
}

struct zsv_opts original_default_opts;
struct zsv_prop_handler original_default_custom_prop_handler;
if(data->sort) {
if(!data->key_count) {
fprintf(stderr, "Error: --sort requires one or more keys\n");
Expand All @@ -668,6 +669,11 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
original_default_opts = zsv_get_default_opts();
zsv_set_default_opts(*opts);

if(custom_prop_handler) {
original_default_custom_prop_handler = zsv_get_default_custom_prop_handler();
zsv_set_default_custom_prop_handler(*custom_prop_handler);
}

if(data->status == zsv_compare_status_ok)
data->status = zsv_compare_init_sorted(data);
}
Expand All @@ -683,7 +689,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
for(unsigned ix = 0; data->status == zsv_compare_status_ok && ix < input_count; ix++) {
struct zsv_compare_input *input = &data->inputs[ix];
input->path = input_filenames[ix];
data->status = data->input_init(data, input, opts, opts_used);
data->status = data->input_init(data, input, opts, custom_prop_handler, opts_used);
}
}

Expand Down Expand Up @@ -839,8 +845,11 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

err = data->status == zsv_compare_status_ok ? 0 : 1;

if(data->sort)
if(data->sort) {
zsv_set_default_opts(original_default_opts); // restore default options
if(custom_prop_handler)
zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
}

zsv_compare_delete(data);
return err;
Expand Down
1 change: 1 addition & 0 deletions app/compare_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ struct zsv_compare_data {
enum zsv_compare_status (*input_init)(struct zsv_compare_data *data,
struct zsv_compare_input *input,
struct zsv_opts *opts,
struct zsv_prop_handler *custom_prop_handler,
const char *opts_used);

sqlite3 *sort_db; // used when --sort option was specified
Expand Down
4 changes: 2 additions & 2 deletions app/count-pull.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static int count_usage() {
return 0;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
const char *input_path = NULL;
int err = 0;
for(int i = 1; !err && i < argc; i++) {
Expand Down Expand Up @@ -61,7 +61,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
if(!err) {
zsv_parser parser;
// if(zsv_pull_new_with_properties(opts, input_path, opts_used, &parser) != zsv_status_ok) {
if(zsv_new_with_properties(opts, input_path, opts_used, &parser) != zsv_status_ok) {
if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &parser) != zsv_status_ok) {
fprintf(stderr, "Unable to initialize parser\n");
err = 1;
} else {
Expand Down
4 changes: 2 additions & 2 deletions app/count.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static int count_usage() {
return 0;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
struct data data = { 0 };
const char *input_path = NULL;
int err = 0;
Expand Down Expand Up @@ -71,7 +71,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
if(!err) {
opts->row_handler = row;
opts->ctx = &data;
if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) != zsv_status_ok) {
if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) != zsv_status_ok) {
fprintf(stderr, "Unable to initialize parser\n");
err = 1;
} else {
Expand Down
7 changes: 4 additions & 3 deletions app/desc.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ static void zsv_desc_cleanup(struct zsv_desc_data *data) {
#define ZSV_DESC_TMPFN_TEMPLATE "zsv_desc_XXXXXXXXXXXX"

static void zsv_desc_execute(struct zsv_desc_data *data,
struct zsv_prop_handler *custom_prop_handler,
const char *input_path,
const char *opts_used) {
data->opts->cell_handler = zsv_desc_cell;
Expand All @@ -501,7 +502,7 @@ static void zsv_desc_execute(struct zsv_desc_data *data,

if(!data->max_enum)
data->max_enum = ZSV_DESC_MAX_ENUM_DEFAULT;
if(zsv_new_with_properties(data->opts, input_path, opts_used, &data->parser)
if(zsv_new_with_properties(data->opts, custom_prop_handler, input_path, opts_used, &data->parser)
== zsv_status_ok) {
FILE *input_temp_file = NULL;
enum zsv_status status;
Expand All @@ -517,7 +518,7 @@ static void zsv_desc_execute(struct zsv_desc_data *data,
}
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
if(argc < 1)
zsv_desc_usage();
else if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
Expand Down Expand Up @@ -593,7 +594,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
return 1;
}

zsv_desc_execute(&data, input_path, opts_used);
zsv_desc_execute(&data, custom_prop_handler, input_path, opts_used);
zsv_desc_finalize(&data);
zsv_desc_print(&data);
zsv_desc_cleanup(&data);
Expand Down
34 changes: 29 additions & 5 deletions app/echo.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ const char *zsv_echo_usage_msg[] = {
" --overwrite <source>: overwrite cells using given source. Source may be:",
" - sqlite3://<filename>[?sql=<query>]",
" ex: sqlite3://overwrites.db?sql=select row, column, value from overwrites order by row, column",
" - /path/to/file.csv",
" path to CSV file with columns row,col,val (in that order) and rows pre-sorted by row and column",
NULL
};

Expand All @@ -134,8 +136,9 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) {
sqlite3_close(data->o.sqlite3.db);
}

static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const char *source, size_t len) {
#define zsv_echo_sqlite3_prefix "sqlite3://"

static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const char *source, size_t len) {
size_t pfx_len;
if(len > (pfx_len = strlen(zsv_echo_sqlite3_prefix)) && !memcmp(source, zsv_echo_sqlite3_prefix, pfx_len)) {
data->o.sqlite3.filename = zsv_memdup(source + pfx_len, len - pfx_len);
Expand All @@ -149,7 +152,9 @@ static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const cha
data->o.sqlite3.sql = sql + strlen(zsv_echo_sql_prefix);
}
// open the sql connection
if(!(data->o.sqlite3.filename && *data->o.sqlite3.filename)) {
if(!(data->o.sqlite3.filename && *data->o.sqlite3.filename
&& data->o.sqlite3.sql && *data->o.sqlite3.sql)) {
free(data->o.sqlite3.filename);
fprintf(stderr, "Invalid query string");
return 1;
}
Expand Down Expand Up @@ -182,7 +187,7 @@ static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const cha
return 1;
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
if(argc < 1 || (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))) {
zsv_echo_usage();
return 0;
Expand All @@ -194,6 +199,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

int err = 0;

const char *overwrites_csv = NULL;

data.overwrite.eof = 1;
for(int arg_i = 1; !err && arg_i < argc; arg_i++) {
const char *arg = argv[arg_i];
Expand All @@ -205,7 +212,12 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
err = 1;
} else {
const char *src = argv[++arg_i];
err = zsv_echo_parse_overwrite_source(&data, src, strlen(src));
if(strlen(src) > strlen(zsv_echo_sqlite3_prefix) &&
!memcmp(zsv_echo_sqlite3_prefix, src, strlen(zsv_echo_sqlite3_prefix)))
err = zsv_echo_parse_overwrite_source(&data, src, strlen(src));
else {
overwrites_csv = src;
}
}
} else if(!data.in) {
#ifndef NO_STDIN
Expand Down Expand Up @@ -243,7 +255,19 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
opts->stream = data.in;
opts->ctx = &data;
data.csv_writer = zsv_writer_new(&writer_opts);
if(zsv_new_with_properties(opts, data.input_path, opts_used, &data.parser) != zsv_status_ok

if(overwrites_csv) {
if(!(opts->overwrite.ctx = fopen(overwrites_csv, "rb"))) {
fprintf(stderr, "Unable to open for write: %s\n", overwrites_csv);
zsv_echo_cleanup(&data);
return 1;
} else {
opts->overwrite.type = zsv_overwrite_type_csv;
opts->overwrite.close_ctx = (int (*)(void *))fclose;
}
}

if(zsv_new_with_properties(opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok
|| !data.csv_writer) {
zsv_echo_cleanup(&data);
return 1;
Expand Down
6 changes: 4 additions & 2 deletions app/external/sqlite3/sqlite3_csv_vtab-zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ typedef struct zsvTable {
sqlite3_vtab base; /* Base class. Must be first */
char *zFilename; /* Name of the CSV file */
struct zsv_opts parser_opts;
struct zsv_prop_handler custom_prop_handler;
char *opts_used;
enum zsv_status parser_status;
zsv_parser parser;
Expand All @@ -106,6 +107,7 @@ struct zsvTable *zsvTable_new() {
if(z) {
memset(z, 0, sizeof(*z));
z->parser_opts = zsv_get_default_opts();
z->custom_prop_handler = zsv_get_default_custom_prop_handler();
}
return z;
}
Expand Down Expand Up @@ -229,7 +231,7 @@ static int zsvtabConnect(
pNew->zFilename = CSV_FILENAME;
pNew->opts_used = ZSV_OPTS_USED;
CSV_FILENAME = ZSV_OPTS_USED = 0; // in use; don't free
if(zsv_new_with_properties(&pNew->parser_opts, pNew->zFilename, pNew->opts_used,
if(zsv_new_with_properties(&pNew->parser_opts, &pNew->custom_prop_handler, pNew->zFilename, pNew->opts_used,
&pNew->parser) != zsv_status_ok)
goto zsvtab_connect_error;

Expand Down Expand Up @@ -376,7 +378,7 @@ static int zsvtabFilter(
fseek(pTab->parser_opts.stream, 0, SEEK_SET);

// reload and advance header, then first data row
if(zsv_new_with_properties(&pTab->parser_opts, pTab->zFilename, pTab->opts_used,
if(zsv_new_with_properties(&pTab->parser_opts, &pTab->custom_prop_handler, pTab->zFilename, pTab->opts_used,
&pTab->parser) != zsv_status_ok
|| (pTab->parser_status = zsv_next_row(pTab->parser)) != zsv_status_row)
return SQLITE_ERROR;
Expand Down
4 changes: 2 additions & 2 deletions app/flatten.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ static void flatten_cleanup(struct flatten_data *data) {
zsv_writer_delete(data->csv_writer);
}

int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
flatten_usage();
return 0;
Expand Down Expand Up @@ -746,7 +746,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
opts->ctx = &data;

zsv_parser handle;
if(zsv_new_with_properties(opts, input_path, opts_used, &handle) != zsv_status_ok)
if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &handle) != zsv_status_ok)
err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
else {
zsv_set_scan_filter(handle, zsv_filter_write, tmp_f);
Expand Down
Loading

0 comments on commit 781ddd5

Please sign in to comment.