From bcd1625f293e05913e01d30d2bb01f62838ab5da Mon Sep 17 00:00:00 2001 From: liquidaty Date: Sun, 14 Apr 2024 17:56:02 -0700 Subject: [PATCH 1/4] echo: add --contiguous option --- app/echo.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/echo.c b/app/echo.c index e64fb47c..cde03356 100644 --- a/app/echo.c +++ b/app/echo.c @@ -53,7 +53,8 @@ struct zsv_echo_data { unsigned char *skip_until_prefix; size_t skip_until_prefix_len; unsigned char trim_white:1; - unsigned char _:7; + unsigned char contiguous:1; + unsigned char _:6; }; /** @@ -95,6 +96,8 @@ static void zsv_echo_row(void *hook) { cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len); zsv_writer_cell(data->csv_writer, i == 0, cell.str, cell.len, cell.quoted); } + } else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) { + zsv_abort(data->parser); } else { for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) { if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) { @@ -132,6 +135,7 @@ const char *zsv_echo_usage_msg[] = { "Options:", " -b : output with BOM", " --trim : trim whitespace", + " --contiguous : stop output upon scanning an entire row of blank values", " --skip-until : ignore all leading rows until the first row whose first column starts with the given value ", " --overwrite : overwrite cells using given source. Source may be:", " - sqlite3://[?sql=]", @@ -229,6 +233,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op const char *arg = argv[arg_i]; if(!strcmp(arg, "-b")) writer_opts.with_bom = 1; + else if(!strcmp(arg, "--contiguous")) + data.contiguous = 1; else if(!strcmp(arg, "--trim")) data.trim_white = 1; else if(!strcmp(arg, "--skip-until")) { From 24f15491740fe25707c77e3763d6491d3e7dc9ae Mon Sep 17 00:00:00 2001 From: liquidaty Date: Sun, 14 Apr 2024 19:03:20 -0700 Subject: [PATCH 2/4] echo: add --trim-columns, tests for --contiguous and --trim-columns --- app/echo.c | 90 +++++++++++++++++-- app/test/Makefile | 17 +++- app/test/expected/test-echo-contiguous.out | 2 + .../expected/test-echo-trim-columns-2.out | 4 + app/test/expected/test-echo-trim-columns.out | 4 + data/test/echo-contiguous.csv | 4 + data/test/echo-trim-columns.csv | 4 + 7 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 app/test/expected/test-echo-contiguous.out create mode 100644 app/test/expected/test-echo-trim-columns-2.out create mode 100644 app/test/expected/test-echo-trim-columns.out create mode 100644 data/test/echo-contiguous.csv create mode 100644 data/test/echo-trim-columns.csv diff --git a/app/echo.c b/app/echo.c index cde03356..805685a5 100644 --- a/app/echo.c +++ b/app/echo.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -52,9 +53,13 @@ struct zsv_echo_data { unsigned char *skip_until_prefix; size_t skip_until_prefix_len; + + char *tmp_fn; + unsigned max_nonempty_cols; unsigned char trim_white:1; + unsigned char trim_columns:1; unsigned char contiguous:1; - unsigned char _:6; + unsigned char _:5; }; /** @@ -87,10 +92,28 @@ void zsv_echo_get_next_overwrite(struct zsv_echo_data *data) { } } +static void zsv_echo_get_max_nonempty_cols(void *hook) { + struct zsv_echo_data *data = hook; + unsigned row_nonempty_col_count = 0; + for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) { + struct zsv_cell cell = zsv_get_cell(data->parser, i); + if(UNLIKELY(data->trim_white)) + cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len); + if(cell.len) + row_nonempty_col_count = i+1; + } + if(data->max_nonempty_cols < row_nonempty_col_count) + data->max_nonempty_cols = row_nonempty_col_count; +} + static void zsv_echo_row(void *hook) { struct zsv_echo_data *data = hook; + size_t j = zsv_cell_count(data->parser); + if(UNLIKELY(data->trim_columns && j > data->max_nonempty_cols)) + j = data->max_nonempty_cols; + if(VERY_UNLIKELY(data->row_ix == 0)) { // header - for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) { + for(size_t i = 0; i < j; i++) { struct zsv_cell cell = zsv_get_cell(data->parser, i); if(UNLIKELY(data->trim_white)) cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len); @@ -99,7 +122,7 @@ static void zsv_echo_row(void *hook) { } else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) { zsv_abort(data->parser); } else { - for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) { + for(size_t i = 0; i < j; i++) { if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) { zsv_writer_cell(data->csv_writer, i == 0, data->overwrite.str, data->overwrite.len, 1); zsv_echo_get_next_overwrite(data); @@ -135,6 +158,7 @@ const char *zsv_echo_usage_msg[] = { "Options:", " -b : output with BOM", " --trim : trim whitespace", + " --trim-columns : trim blank columns", " --contiguous : stop output upon scanning an entire row of blank values", " --skip-until : ignore all leading rows until the first row whose first column starts with the given value ", " --overwrite : overwrite cells using given source. Source may be:", @@ -161,6 +185,11 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) { fclose(data->in); if(data->o.sqlite3.db) sqlite3_close(data->o.sqlite3.db); + + if(data->tmp_fn) { + remove(data->tmp_fn); + free(data->tmp_fn); + } } #define zsv_echo_sqlite3_prefix "sqlite3://" @@ -235,6 +264,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op writer_opts.with_bom = 1; else if(!strcmp(arg, "--contiguous")) data.contiguous = 1; + else if(!strcmp(arg, "--trim-columns")) + data.trim_columns = 1; else if(!strcmp(arg, "--trim")) data.trim_white = 1; else if(!strcmp(arg, "--skip-until")) { @@ -294,10 +325,57 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op return 1; } + unsigned char buff[4096]; if(data.skip_until_prefix) opts->row_handler = zsv_echo_row_skip_until; - else + else { + if(data.trim_columns) { + // first, save the file if it is stdin + if(data.in == stdin) { + if(!(data.tmp_fn = zsv_get_temp_filename("zsv_echo_XXXXXXXX"))) { + zsv_echo_cleanup(&data); + return 1; + } + + FILE *f = fopen(data.tmp_fn, "wb"); + if(!f) { + perror(data.tmp_fn); + zsv_echo_cleanup(&data); + return 1; + } else { + size_t bytes_read; + while((bytes_read = fread(buff, 1, sizeof(buff), data.in)) > 0) + fwrite(buff, 1, bytes_read, f); + fclose(f); + if(!(data.in = fopen(data.tmp_fn, "rb"))) { + perror(data.tmp_fn); + zsv_echo_cleanup(&data); + return 1; + } + } + } + // next, determine the max number of columns from the left that contains data + struct zsv_opts tmp_opts = *opts; + tmp_opts.row_handler = zsv_echo_get_max_nonempty_cols; + tmp_opts.stream = data.in; + tmp_opts.ctx = &data; + if(zsv_new_with_properties(&tmp_opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok) { + zsv_echo_cleanup(&data); + return 1; + } else { + // find the max nonempty col count + enum zsv_status status; + while(!zsv_signal_interrupted && (status = zsv_parse_more(data.parser)) == zsv_status_ok) ; + zsv_finish(data.parser); + zsv_delete(data.parser); + data.parser = NULL; + + // re-open the input again + data.in = fopen(data.tmp_fn ? data.tmp_fn : data.input_path, "rb"); + } + } opts->row_handler = zsv_echo_row; + } opts->stream = data.in; opts->ctx = &data; data.csv_writer = zsv_writer_new(&writer_opts); @@ -320,8 +398,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op } // create a local csv writer buff for faster performance - unsigned char writer_buff[64]; - zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff)); + // unsigned char writer_buff[64]; + zsv_writer_set_temp_buff(data.csv_writer, buff, sizeof(buff)); // process the input data. zsv_handle_ctrl_c_signal(); diff --git a/app/test/Makefile b/app/test/Makefile index 394cd833..e25e0a42 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -100,7 +100,7 @@ test: ${TESTS} test-prop: EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test -test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until +test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2 test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE} @${TEST_INIT} @@ -124,6 +124,21 @@ test-echo-skip-until: ${BUILD_DIR}/bin/zsv_echo${EXE} @${PREFIX} $< --skip-until ASF ${TEST_DATA_DIR}/test/echo-skip-until.csv ${REDIRECT} ${TMP_DIR}/$@.out @${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} +test-echo-contiguous: ${BUILD_DIR}/bin/zsv_echo${EXE} + @${TEST_INIT} + @${PREFIX} $< --contiguous ${TEST_DATA_DIR}/test/../../data/test/echo-contiguous.csv ${REDIRECT} ${TMP_DIR}/$@.out + @${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} + +test-echo-trim-columns: ${BUILD_DIR}/bin/zsv_echo${EXE} + @${TEST_INIT} + @${PREFIX} $< --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out + @${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} + +test-echo-trim-columns-2: ${BUILD_DIR}/bin/zsv_echo${EXE} + @${TEST_INIT} + @${PREFIX} $< --trim --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out + @${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} + test-echo-chars: ${BUILD_DIR}/bin/zsv_echo${EXE} @${TEST_INIT} @${PREFIX} echo '東京都' | $< -u '?' ${REDIRECT} ${TMP_DIR}/$@.out diff --git a/app/test/expected/test-echo-contiguous.out b/app/test/expected/test-echo-contiguous.out new file mode 100644 index 00000000..b1ac26e1 --- /dev/null +++ b/app/test/expected/test-echo-contiguous.out @@ -0,0 +1,2 @@ +abc,def,,,, +1,2,3,,,, diff --git a/app/test/expected/test-echo-trim-columns-2.out b/app/test/expected/test-echo-trim-columns-2.out new file mode 100644 index 00000000..a46f45b1 --- /dev/null +++ b/app/test/expected/test-echo-trim-columns-2.out @@ -0,0 +1,4 @@ +abc,def,, +1,2,3, +4,,, +5,6,7,8 diff --git a/app/test/expected/test-echo-trim-columns.out b/app/test/expected/test-echo-trim-columns.out new file mode 100644 index 00000000..b4e8c952 --- /dev/null +++ b/app/test/expected/test-echo-trim-columns.out @@ -0,0 +1,4 @@ +abc,def,,, +1,2,3,, +4,,,, +5,6,7,8, diff --git a/data/test/echo-contiguous.csv b/data/test/echo-contiguous.csv new file mode 100644 index 00000000..d1d9011f --- /dev/null +++ b/data/test/echo-contiguous.csv @@ -0,0 +1,4 @@ +abc,def,,,, +1,2,3,,,, +,,,,,,,, +5,6,7,8,,,, diff --git a/data/test/echo-trim-columns.csv b/data/test/echo-trim-columns.csv new file mode 100644 index 00000000..09e1a093 --- /dev/null +++ b/data/test/echo-trim-columns.csv @@ -0,0 +1,4 @@ +abc,def,,,, +1,2,3,, ,, +4,,,,,,, +5,6,7,8,,,, From 3d5c8cfaed6238e368413833e27bdbaa278798d6 Mon Sep 17 00:00:00 2001 From: liquidaty Date: Mon, 15 Apr 2024 21:56:22 -0700 Subject: [PATCH 3/4] updated READMEs --- README.md | 48 +++++++++++++++++++++++++++++++++++++----- examples/lib/README.md | 8 ++++++- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d9884e7f..7c42fe5d 100644 --- a/README.md +++ b/README.md @@ -225,17 +225,20 @@ for speed and ease of development for extending and/or customizing to your needs * `echo`: read CSV from stdin and write it back out to stdout. This is mostly useful for demonstrating how to use the API and also how to create a plug-in, - and has some limited utility beyond that e.g. for adding/removing the UTF8 - BOM, or cleaning up bad UTF8 + and has several uses beyond that including adding/removing BOM, + cleaning up bad UTF8, + whitespace or blank column trimming, + limiting output to a contiguous data block, skipping leading garbage, and even + proving substitution values without modifying the underlying source * `select`: re-shape CSV by skipping leading garbage, combining header rows into a single header, selecting or excluding specified columns, removing duplicate - columns, sampling, searching and more -* `sql`: run ad-hoc SQL query on a CSV file + columns, sampling, converting from fixed-width input, searching and more +* `sql`: treat one or more CSV files like database tables and query with SQL * `desc`: provide a quick description of your table data * `pretty`: format for console (fixed-width) display, or convert to markdown format * `2json`: convert CSV to JSON. Optionally, output in [database schema](docs/db.schema.json) -* `2tsv`: convert CSV to TSV +* `2tsv`: convert to TSV (tab-delimited) format * `compare`: compare two or more tables of data and output the differences * `paste` (alpha): horizontally paste two tables together (given inputs X and Y, output 1...N rows where each row all columns of X in row N, followed by all columns of Y in row N) @@ -264,6 +267,41 @@ zsv sql my_population_data.csv "select * from data where population > 100000" ### Using the API +Simple API usage examples include: + +Pull parsing: +``` +zsv_parser parser = zsv_new(...); +while(zsv_next_row(parser) == zsv_status_row) { /* for each row */ + // do something + size_t cell_count = zsv_cell_count(parser); + for(size_t i = 0; i < cell_count; i++) { + struct zsv_cell c = zsv_get_cell(parser, i); + fprintf(stderr, "Cell: %.*s\n", c.len, c.str); + ... + } +``` + +Push parsing: +``` +static void my_row_handler(void *ctx) { + zsv_parser p = ctx; + size_t cell_count = zsv_cell_count(p); + for(size_t i = 0, j = zsv_cell_count(p); i < j; i++) { + ... + } +} + +int main() { + zsv_parser p = zsv_new(NULL); + zsv_set_row_handler(p, my_row_handler); + zsv_set_context(p, p); + + enum zsv_status stat; + while((stat = zsv_parse_more(data.parser)) == zsv_status_ok) ; + +``` + Full application code examples can be found at [examples/lib/README.md](examples/lib/README.md). An example of using the API, compiled to wasm and called via Javascript, diff --git a/examples/lib/README.md b/examples/lib/README.md index 19349cab..939f7ed6 100644 --- a/examples/lib/README.md +++ b/examples/lib/README.md @@ -70,7 +70,13 @@ returns `zsv_status_row` until no more rows are left to parse ``` zsv_parser parser = zsv_new(...); while(zsv_next_row(parser) == zsv_status_row) { /* for each row */ - // do something + // do something + size_t cell_count = zsv_cell_count(parser); + for(size_t i = 0; i < cell_count; i++) { + struct zsv_cell c = zsv_get_cell(parser, i); + fprintf(stderr, "Cell: %.*s\n", c.len, c.str); + ... + } } ``` From 614d672e4a3b9e8603632595092c661b382b34b7 Mon Sep 17 00:00:00 2001 From: liquidaty Date: Wed, 17 Apr 2024 10:51:47 -0700 Subject: [PATCH 4/4] add --tolerance option --- app/compare.c | 66 +++++++++++++------ app/compare_internal.h | 6 ++ app/test/Makefile | 15 +++++ app/test/expected/test-compare-tolerance.out1 | 5 ++ app/test/expected/test-compare-tolerance.out2 | 2 + app/test/expected/test-compare-tolerance.out3 | 3 + app/test/expected/test-compare-tolerance.out4 | 5 ++ app/utils/string.c | 10 +++ data/compare/tolerance1.csv | 2 + data/compare/tolerance2.csv | 2 + include/zsv/utils/string.h | 12 ++++ 11 files changed, 109 insertions(+), 19 deletions(-) create mode 100644 app/test/expected/test-compare-tolerance.out1 create mode 100644 app/test/expected/test-compare-tolerance.out2 create mode 100644 app/test/expected/test-compare-tolerance.out3 create mode 100644 app/test/expected/test-compare-tolerance.out4 create mode 100644 data/compare/tolerance1.csv create mode 100644 data/compare/tolerance2.csv diff --git a/app/compare.c b/app/compare.c index 9526ab16..de8330f0 100644 --- a/app/compare.c +++ b/app/compare.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -227,25 +228,23 @@ static void zsv_compare_print_row(struct zsv_compare_data *data, #define ZSV_COMPARE_MISSING "Missing" -// if(last_ix + 1 < data->input_count) { - // if we don't have data from every input, then output "Missing" for missing inputs - char got_missing = 0; - for(unsigned i = 0; i < data->input_count; i++) { - struct zsv_compare_input *input = data->inputs_to_sort[i]; - if(i > last_ix) { - got_missing = 1; - unsigned input_ix = input->index; - values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING; - values[input_ix].len = strlen(ZSV_COMPARE_MISSING); - } - } - if(got_missing) { - const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)""; - zsv_compare_output_tuple(data, key_input, key_names, values, 1); - // reset values - memset(values, 0, data->input_count * sizeof(*values)); + // if we don't have data from every input, then output "Missing" for missing inputs + char got_missing = 0; + for(unsigned i = 0; i < data->input_count; i++) { + struct zsv_compare_input *input = data->inputs_to_sort[i]; + if(i > last_ix) { + got_missing = 1; + unsigned input_ix = input->index; + values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING; + values[input_ix].len = strlen(ZSV_COMPARE_MISSING); } -// } + } + if(got_missing) { + const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)""; + zsv_compare_output_tuple(data, key_input, key_names, values, 1); + // reset values + memset(values, 0, data->input_count * sizeof(*values)); + } // for each output column zsv_compare_unique_colname *output_col = data->output_colnames_first; @@ -272,8 +271,23 @@ static void zsv_compare_print_row(struct zsv_compare_data *data, if(!output_col) output_col = input->output_colnames[input_col_ix]; values[input_ix] = data->get_cell(input, input_col_ix); - if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) + if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) { different = 1; + if(data->tolerance.value + && values[first_input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN + && values[input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN) { + // check if both are numbers with a difference less than the given tolerance + double d1, d2; + memcpy(data->tolerance.str1, values[first_input_ix].str, values[first_input_ix].len); + data->tolerance.str1[values[first_input_ix].len] = '\0'; + memcpy(data->tolerance.str2, values[input_ix].str, values[input_ix].len); + data->tolerance.str2[values[input_ix].len] = '\0'; + if(!zsv_strtod_exact(data->tolerance.str1, &d1) + && !zsv_strtod_exact(data->tolerance.str2, &d2) + && fabs(d1 - d2) < data->tolerance.value) + different = 0; + } + } } } @@ -608,6 +622,10 @@ static int compare_usage() { " --sort : sort on keys before comparing", " --sort-in-memory : for sorting, use in-memory instead of temporary db", " (see https://www.sqlite.org/inmemorydb.html)", + " --tolerance : ignore differences where both values are numeric", + " strings with values differing by less than the given", + " amount e.g. --tolerance 0.01 will ignore differences", + " of numeric strings such as 123.45 vs 123.44", " --json : output as JSON", " --json-compact : output as compact JSON", " --json-object : output as an array of objects", @@ -695,6 +713,16 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op data->added_colcount++; } } + } else if(!strcmp(arg, "--tolerance")) { + const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err); + if(next_arg) { + if(zsv_strtod_exact(next_arg, &data->tolerance.value)) + fprintf(stderr, "Invalid numeric value: %s\n", next_arg), err = 1; + else if(data->tolerance.value < 0) + fprintf(stderr, "Tolerance must be greater than zero (got %s)\n", next_arg), err = 1; + else + data->tolerance.value = nextafterf(data->tolerance.value, INFINITY); + } } else if(!strcmp(arg, "--sort")) { data->sort = 1; } else if(!strcmp(arg, "--json")) { diff --git a/app/compare_internal.h b/app/compare_internal.h index 52bb49c3..4c3acc70 100644 --- a/app/compare_internal.h +++ b/app/compare_internal.h @@ -106,6 +106,12 @@ struct zsv_compare_data { sqlite3 *sort_db; // used when --sort option was specified + struct { + double value; +#define ZSV_COMPARE_MAX_NUMBER_BUFF_LEN 128 + char str1[ZSV_COMPARE_MAX_NUMBER_BUFF_LEN]; + char str2[ZSV_COMPARE_MAX_NUMBER_BUFF_LEN]; + } tolerance; struct { char type; // 'j' for json union { diff --git a/app/test/Makefile b/app/test/Makefile index e25e0a42..e5f62d67 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -495,6 +495,20 @@ test-desc: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} @(${PREFIX} $< < ${TEST_DATA_DIR}/test/$*-trim.csv ${REDIRECT2} ${TMP_DIR}/$@.trim && \ ${CMP} ${TMP_DIR}/$@.trim expected/$@.trim && ${TEST_PASS} || ${TEST_FAIL}) +test-compare-tolerance: ${BUILD_DIR}/bin/zsv_compare${EXE} + @(${PREFIX} $< ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out1 && \ + ${CMP} ${TMP_DIR}/$@.out1 expected/$@.out1 && ${TEST_PASS} || ${TEST_FAIL}) + + @(${PREFIX} $< --tolerance 0.001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out2 && \ + ${CMP} ${TMP_DIR}/$@.out2 expected/$@.out2 && ${TEST_PASS} || ${TEST_FAIL}) + + @(${PREFIX} $< --tolerance 0.0001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out3 && \ + ${CMP} ${TMP_DIR}/$@.out3 expected/$@.out3 && ${TEST_PASS} || ${TEST_FAIL}) + + @(${PREFIX} $< --tolerance 0.00001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out4 && \ + ${CMP} ${TMP_DIR}/$@.out4 expected/$@.out4 && ${TEST_PASS} || ${TEST_FAIL}) + + test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} @${TEST_INIT} @(${PREFIX} $< compare/t1.csv compare/t2.csv compare/t3.csv ${REDIRECT1} ${TMP_DIR}/$@.out && \ @@ -529,3 +543,4 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} @(${PREFIX} $< ../../data/compare/t1.csv ../../data/compare/t2.csv --add AccentCity --sort -k country -k city ${REDIRECT1} ${TMP_DIR}/$@.out10 && \ ${CMP} ${TMP_DIR}/$@.out10 expected/$@.out10 && ${TEST_PASS} || ${TEST_FAIL}) + diff --git a/app/test/expected/test-compare-tolerance.out1 b/app/test/expected/test-compare-tolerance.out1 new file mode 100644 index 00000000..c2edf924 --- /dev/null +++ b/app/test/expected/test-compare-tolerance.out1 @@ -0,0 +1,5 @@ +Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv +1,A,1,1.01 +1,B,1,1.001 +1,C,1,1.0001 +1,D,1,1.00009 diff --git a/app/test/expected/test-compare-tolerance.out2 b/app/test/expected/test-compare-tolerance.out2 new file mode 100644 index 00000000..68e7993e --- /dev/null +++ b/app/test/expected/test-compare-tolerance.out2 @@ -0,0 +1,2 @@ +Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv +1,A,1,1.01 diff --git a/app/test/expected/test-compare-tolerance.out3 b/app/test/expected/test-compare-tolerance.out3 new file mode 100644 index 00000000..bcf8af67 --- /dev/null +++ b/app/test/expected/test-compare-tolerance.out3 @@ -0,0 +1,3 @@ +Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv +1,A,1,1.01 +1,B,1,1.001 diff --git a/app/test/expected/test-compare-tolerance.out4 b/app/test/expected/test-compare-tolerance.out4 new file mode 100644 index 00000000..c2edf924 --- /dev/null +++ b/app/test/expected/test-compare-tolerance.out4 @@ -0,0 +1,5 @@ +Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv +1,A,1,1.01 +1,B,1,1.001 +1,C,1,1.0001 +1,D,1,1.00009 diff --git a/app/utils/string.c b/app/utils/string.c index aa96ffcc..ec37852a 100644 --- a/app/utils/string.c +++ b/app/utils/string.c @@ -330,6 +330,16 @@ size_t zsv_strunescape_backslash(unsigned char *s, size_t len) { return j; } +// zsv_strtod_exact(const char *s): return error; if 0, set value of *d +int zsv_strtod_exact(const char *s, double *d) { + if(!*s) return 1; + char *end; + *d = strtod(s, &end); + if(*end) return 1; + return 0; +} + + #ifndef ZSV_STRING_LIB_ONLY struct zsv_cell zsv_get_cell_trimmed(zsv_parser parser, size_t ix) { struct zsv_cell c = zsv_get_cell(parser, ix); diff --git a/data/compare/tolerance1.csv b/data/compare/tolerance1.csv new file mode 100644 index 00000000..77c53863 --- /dev/null +++ b/data/compare/tolerance1.csv @@ -0,0 +1,2 @@ +A,B,C,D +1,1,1,1 diff --git a/data/compare/tolerance2.csv b/data/compare/tolerance2.csv new file mode 100644 index 00000000..7eb08ea0 --- /dev/null +++ b/data/compare/tolerance2.csv @@ -0,0 +1,2 @@ +A,B,C,D +1.01,1.001,1.0001,1.00009 diff --git a/include/zsv/utils/string.h b/include/zsv/utils/string.h index 30ed5905..21131717 100644 --- a/include/zsv/utils/string.h +++ b/include/zsv/utils/string.h @@ -118,8 +118,20 @@ size_t zsv_strnext_is_sign(const unsigned char *s, size_t len); */ size_t zsv_strnext_is_currency(const unsigned char *s, size_t len); + +/* + * Convert a string to a double. must convert entire string, else returns error + * @param s string to convert + * @param d pointer to converted value, on success + * + * @returns 0 on success, non-zero on error + */ +int zsv_strtod_exact(const char *s, double *d); + /* * `zsv_get_cell_trimmed` is equivalent to `zsv_get_cell`, except that it + * @param s string to convert + * @param len length of input string * returns a value with leading and trailing whitespace removed */ #include