From bcd1625f293e05913e01d30d2bb01f62838ab5da Mon Sep 17 00:00:00 2001
From: liquidaty <info@liquidaty.com>
Date: Sun, 14 Apr 2024 17:56:02 -0700
Subject: [PATCH 1/4] echo: add --contiguous option

---
 app/echo.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/app/echo.c b/app/echo.c
index e64fb47c..cde03356 100644
--- a/app/echo.c
+++ b/app/echo.c
@@ -53,7 +53,8 @@ struct zsv_echo_data {
   unsigned char *skip_until_prefix;
   size_t skip_until_prefix_len;
   unsigned char trim_white:1;
-  unsigned char _:7;
+  unsigned char contiguous:1;
+  unsigned char _:6;
 };
 
 /**
@@ -95,6 +96,8 @@ static void zsv_echo_row(void *hook) {
         cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
       zsv_writer_cell(data->csv_writer, i == 0, cell.str, cell.len, cell.quoted);
     }
+  } else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) {
+    zsv_abort(data->parser);
   } else {
     for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
       if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) {
@@ -132,6 +135,7 @@ const char *zsv_echo_usage_msg[] = {
   "Options:",
   "  -b                  : output with BOM",
   "  --trim              : trim whitespace",
+  "  --contiguous        : stop output upon scanning an entire row of blank values",
   "  --skip-until <value>: ignore all leading rows until the first row whose first column starts with the given value ",
   "  --overwrite <source>: overwrite cells using given source. Source may be:",
   "                        - sqlite3://<filename>[?sql=<query>]",
@@ -229,6 +233,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
     const char *arg = argv[arg_i];
     if(!strcmp(arg, "-b"))
       writer_opts.with_bom = 1;
+    else if(!strcmp(arg, "--contiguous"))
+      data.contiguous = 1;
     else if(!strcmp(arg, "--trim"))
       data.trim_white = 1;
     else if(!strcmp(arg, "--skip-until")) {

From 24f15491740fe25707c77e3763d6491d3e7dc9ae Mon Sep 17 00:00:00 2001
From: liquidaty <info@liquidaty.com>
Date: Sun, 14 Apr 2024 19:03:20 -0700
Subject: [PATCH 2/4] echo: add --trim-columns, tests for --contiguous and
 --trim-columns

---
 app/echo.c                                    | 90 +++++++++++++++++--
 app/test/Makefile                             | 17 +++-
 app/test/expected/test-echo-contiguous.out    |  2 +
 .../expected/test-echo-trim-columns-2.out     |  4 +
 app/test/expected/test-echo-trim-columns.out  |  4 +
 data/test/echo-contiguous.csv                 |  4 +
 data/test/echo-trim-columns.csv               |  4 +
 7 files changed, 118 insertions(+), 7 deletions(-)
 create mode 100644 app/test/expected/test-echo-contiguous.out
 create mode 100644 app/test/expected/test-echo-trim-columns-2.out
 create mode 100644 app/test/expected/test-echo-trim-columns.out
 create mode 100644 data/test/echo-contiguous.csv
 create mode 100644 data/test/echo-trim-columns.csv

diff --git a/app/echo.c b/app/echo.c
index cde03356..805685a5 100644
--- a/app/echo.c
+++ b/app/echo.c
@@ -18,6 +18,7 @@
 
 #include <zsv/utils/compiler.h>
 #include <zsv/utils/writer.h>
+#include <zsv/utils/file.h>
 #include <zsv/utils/string.h>
 #include <zsv/utils/mem.h>
 
@@ -52,9 +53,13 @@ struct zsv_echo_data {
 
   unsigned char *skip_until_prefix;
   size_t skip_until_prefix_len;
+
+  char *tmp_fn;
+  unsigned max_nonempty_cols;
   unsigned char trim_white:1;
+  unsigned char trim_columns:1;
   unsigned char contiguous:1;
-  unsigned char _:6;
+  unsigned char _:5;
 };
 
 /**
@@ -87,10 +92,28 @@ void zsv_echo_get_next_overwrite(struct zsv_echo_data *data) {
   }
 }
 
+static void zsv_echo_get_max_nonempty_cols(void *hook) {
+  struct zsv_echo_data *data = hook;
+  unsigned row_nonempty_col_count = 0;
+  for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
+    struct zsv_cell cell = zsv_get_cell(data->parser, i);
+    if(UNLIKELY(data->trim_white))
+      cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
+    if(cell.len)
+      row_nonempty_col_count = i+1;
+  }
+  if(data->max_nonempty_cols < row_nonempty_col_count)
+    data->max_nonempty_cols = row_nonempty_col_count;
+}
+
 static void zsv_echo_row(void *hook) {
   struct zsv_echo_data *data = hook;
+  size_t j = zsv_cell_count(data->parser);
+  if(UNLIKELY(data->trim_columns && j > data->max_nonempty_cols))
+    j = data->max_nonempty_cols;
+  
   if(VERY_UNLIKELY(data->row_ix == 0)) { // header
-    for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
+    for(size_t i = 0; i < j; i++) {
       struct zsv_cell cell = zsv_get_cell(data->parser, i);
       if(UNLIKELY(data->trim_white))
         cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
@@ -99,7 +122,7 @@ static void zsv_echo_row(void *hook) {
   } else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) {
     zsv_abort(data->parser);
   } else {
-    for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
+    for(size_t i = 0; i < j; i++) {
       if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) {
         zsv_writer_cell(data->csv_writer, i == 0, data->overwrite.str, data->overwrite.len, 1);
         zsv_echo_get_next_overwrite(data);
@@ -135,6 +158,7 @@ const char *zsv_echo_usage_msg[] = {
   "Options:",
   "  -b                  : output with BOM",
   "  --trim              : trim whitespace",
+  "  --trim-columns      : trim blank columns",
   "  --contiguous        : stop output upon scanning an entire row of blank values",
   "  --skip-until <value>: ignore all leading rows until the first row whose first column starts with the given value ",
   "  --overwrite <source>: overwrite cells using given source. Source may be:",
@@ -161,6 +185,11 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) {
     fclose(data->in);
   if(data->o.sqlite3.db)
     sqlite3_close(data->o.sqlite3.db);
+
+  if(data->tmp_fn) {
+    remove(data->tmp_fn);
+    free(data->tmp_fn);
+  }
 }
 
 #define zsv_echo_sqlite3_prefix "sqlite3://"
@@ -235,6 +264,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       writer_opts.with_bom = 1;
     else if(!strcmp(arg, "--contiguous"))
       data.contiguous = 1;
+    else if(!strcmp(arg, "--trim-columns"))
+      data.trim_columns = 1;
     else if(!strcmp(arg, "--trim"))
       data.trim_white = 1;
     else if(!strcmp(arg, "--skip-until")) {
@@ -294,10 +325,57 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
     return 1;
   }
 
+  unsigned char buff[4096];
   if(data.skip_until_prefix)
     opts->row_handler = zsv_echo_row_skip_until;
-  else
+  else {
+    if(data.trim_columns) {
+      // first, save the file if it is stdin
+      if(data.in == stdin) {
+        if(!(data.tmp_fn = zsv_get_temp_filename("zsv_echo_XXXXXXXX"))) {
+          zsv_echo_cleanup(&data);
+          return 1;
+        }
+          
+        FILE *f = fopen(data.tmp_fn, "wb");
+        if(!f) {
+          perror(data.tmp_fn);
+          zsv_echo_cleanup(&data);
+          return 1;
+        } else {
+          size_t bytes_read;
+          while((bytes_read = fread(buff, 1, sizeof(buff), data.in)) > 0)
+            fwrite(buff, 1, bytes_read, f);
+          fclose(f);
+          if(!(data.in = fopen(data.tmp_fn, "rb"))) {
+            perror(data.tmp_fn);
+            zsv_echo_cleanup(&data);
+            return 1;
+          }
+        }
+      }
+      // next, determine the max number of columns from the left that contains data
+      struct zsv_opts tmp_opts = *opts;
+      tmp_opts.row_handler = zsv_echo_get_max_nonempty_cols;
+      tmp_opts.stream = data.in;
+      tmp_opts.ctx = &data;
+      if(zsv_new_with_properties(&tmp_opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok) {
+        zsv_echo_cleanup(&data);
+        return 1;
+      } else {
+        // find the max nonempty col count
+        enum zsv_status status;
+        while(!zsv_signal_interrupted && (status = zsv_parse_more(data.parser)) == zsv_status_ok) ;
+        zsv_finish(data.parser);
+        zsv_delete(data.parser);
+        data.parser = NULL;
+
+        // re-open the input again
+        data.in = fopen(data.tmp_fn ? data.tmp_fn : data.input_path, "rb");
+      }
+    }
     opts->row_handler = zsv_echo_row;
+  }
   opts->stream = data.in;
   opts->ctx = &data;
   data.csv_writer = zsv_writer_new(&writer_opts);
@@ -320,8 +398,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   }
 
   // create a local csv writer buff for faster performance
-  unsigned char writer_buff[64];
-  zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
+  //  unsigned char writer_buff[64];
+  zsv_writer_set_temp_buff(data.csv_writer, buff, sizeof(buff));
 
   // process the input data.
   zsv_handle_ctrl_c_signal();
diff --git a/app/test/Makefile b/app/test/Makefile
index 394cd833..e25e0a42 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -100,7 +100,7 @@ test: ${TESTS}
 test-prop:
 	EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test
 
-test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until
+test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2
 
 test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
 	@${TEST_INIT}
@@ -124,6 +124,21 @@ test-echo-skip-until: ${BUILD_DIR}/bin/zsv_echo${EXE}
 	@${PREFIX} $< --skip-until ASF ${TEST_DATA_DIR}/test/echo-skip-until.csv ${REDIRECT} ${TMP_DIR}/$@.out
 	@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}
 
+test-echo-contiguous: ${BUILD_DIR}/bin/zsv_echo${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< --contiguous ${TEST_DATA_DIR}/test/../../data/test/echo-contiguous.csv ${REDIRECT} ${TMP_DIR}/$@.out
+	@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-echo-trim-columns: ${BUILD_DIR}/bin/zsv_echo${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
+	@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-echo-trim-columns-2: ${BUILD_DIR}/bin/zsv_echo${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< --trim --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
+	@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}
+
 test-echo-chars: ${BUILD_DIR}/bin/zsv_echo${EXE}
 	@${TEST_INIT}
 	@${PREFIX} echo '東京都' | $< -u '?' ${REDIRECT} ${TMP_DIR}/$@.out
diff --git a/app/test/expected/test-echo-contiguous.out b/app/test/expected/test-echo-contiguous.out
new file mode 100644
index 00000000..b1ac26e1
--- /dev/null
+++ b/app/test/expected/test-echo-contiguous.out
@@ -0,0 +1,2 @@
+abc,def,,,,
+1,2,3,,,,
diff --git a/app/test/expected/test-echo-trim-columns-2.out b/app/test/expected/test-echo-trim-columns-2.out
new file mode 100644
index 00000000..a46f45b1
--- /dev/null
+++ b/app/test/expected/test-echo-trim-columns-2.out
@@ -0,0 +1,4 @@
+abc,def,,
+1,2,3,
+4,,,
+5,6,7,8
diff --git a/app/test/expected/test-echo-trim-columns.out b/app/test/expected/test-echo-trim-columns.out
new file mode 100644
index 00000000..b4e8c952
--- /dev/null
+++ b/app/test/expected/test-echo-trim-columns.out
@@ -0,0 +1,4 @@
+abc,def,,,
+1,2,3,, 
+4,,,,
+5,6,7,8,
diff --git a/data/test/echo-contiguous.csv b/data/test/echo-contiguous.csv
new file mode 100644
index 00000000..d1d9011f
--- /dev/null
+++ b/data/test/echo-contiguous.csv
@@ -0,0 +1,4 @@
+abc,def,,,,
+1,2,3,,,,
+,,,,,,,,
+5,6,7,8,,,,
diff --git a/data/test/echo-trim-columns.csv b/data/test/echo-trim-columns.csv
new file mode 100644
index 00000000..09e1a093
--- /dev/null
+++ b/data/test/echo-trim-columns.csv
@@ -0,0 +1,4 @@
+abc,def,,,,
+1,2,3,, ,,
+4,,,,,,,
+5,6,7,8,,,,

From 3d5c8cfaed6238e368413833e27bdbaa278798d6 Mon Sep 17 00:00:00 2001
From: liquidaty <info@liquidaty.com>
Date: Mon, 15 Apr 2024 21:56:22 -0700
Subject: [PATCH 3/4] updated READMEs

---
 README.md              | 48 +++++++++++++++++++++++++++++++++++++-----
 examples/lib/README.md |  8 ++++++-
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index d9884e7f..7c42fe5d 100644
--- a/README.md
+++ b/README.md
@@ -225,17 +225,20 @@ for speed and ease of development for extending and/or customizing to your needs
 
 * `echo`: read CSV from stdin and write it back out to stdout. This is mostly
   useful for demonstrating how to use the API and also how to create a plug-in,
-  and has some limited utility beyond that e.g. for adding/removing the UTF8
-  BOM, or cleaning up bad UTF8
+  and has several uses beyond that including adding/removing BOM,
+  cleaning up bad UTF8,
+  whitespace or blank column trimming,
+  limiting output to a contiguous data block, skipping leading garbage, and even
+  proving substitution values without modifying the underlying source
 * `select`: re-shape CSV by skipping leading garbage, combining header rows into
   a single header, selecting or excluding specified columns, removing duplicate
-  columns, sampling, searching and more
-* `sql`: run ad-hoc SQL query on a CSV file
+  columns, sampling, converting from fixed-width input, searching and more
+* `sql`: treat one or more CSV files like database tables and query with SQL
 * `desc`: provide a quick description of your table data
 * `pretty`: format for console (fixed-width) display, or convert to markdown
   format
 * `2json`: convert CSV to JSON. Optionally, output in [database schema](docs/db.schema.json)
-* `2tsv`: convert CSV to TSV
+* `2tsv`: convert to TSV (tab-delimited) format
 * `compare`: compare two or more tables of data and output the differences
 * `paste` (alpha): horizontally paste two tables together (given inputs X and Y,
    output 1...N rows where each row all columns of X in row N, followed by all columns of Y in row N)
@@ -264,6 +267,41 @@ zsv sql my_population_data.csv "select * from data where population > 100000"
 
 ### Using the API
 
+Simple API usage examples include:
+
+Pull parsing:
+```
+zsv_parser parser = zsv_new(...);
+while(zsv_next_row(parser) == zsv_status_row) { /* for each row */
+    // do something
+  size_t cell_count = zsv_cell_count(parser);
+  for(size_t i = 0; i < cell_count; i++) {
+    struct zsv_cell c = zsv_get_cell(parser, i);
+    fprintf(stderr, "Cell: %.*s\n", c.len, c.str);
+    ...
+  }
+```
+
+Push parsing:
+```
+static void my_row_handler(void *ctx) {
+  zsv_parser p = ctx;
+  size_t cell_count = zsv_cell_count(p);
+  for(size_t i = 0, j = zsv_cell_count(p); i < j; i++) {
+    ...
+  }
+}
+
+int main() {
+  zsv_parser p = zsv_new(NULL);
+  zsv_set_row_handler(p, my_row_handler);
+  zsv_set_context(p, p);
+
+  enum zsv_status stat;
+  while((stat = zsv_parse_more(data.parser)) == zsv_status_ok) ;
+
+```
+
 Full application code examples can be found at [examples/lib/README.md](examples/lib/README.md).
 
 An example of using the API, compiled to wasm and called via Javascript,
diff --git a/examples/lib/README.md b/examples/lib/README.md
index 19349cab..939f7ed6 100644
--- a/examples/lib/README.md
+++ b/examples/lib/README.md
@@ -70,7 +70,13 @@ returns `zsv_status_row` until no more rows are left to parse
 ```
 zsv_parser parser = zsv_new(...);
 while(zsv_next_row(parser) == zsv_status_row) { /* for each row */
-    // do something
+  // do something
+  size_t cell_count = zsv_cell_count(parser);
+  for(size_t i = 0; i < cell_count; i++) {
+    struct zsv_cell c = zsv_get_cell(parser, i);
+    fprintf(stderr, "Cell: %.*s\n", c.len, c.str);
+    ...
+  }
 }
 ```
 

From 614d672e4a3b9e8603632595092c661b382b34b7 Mon Sep 17 00:00:00 2001
From: liquidaty <info@liquidaty.com>
Date: Wed, 17 Apr 2024 10:51:47 -0700
Subject: [PATCH 4/4] add --tolerance option

---
 app/compare.c                                 | 66 +++++++++++++------
 app/compare_internal.h                        |  6 ++
 app/test/Makefile                             | 15 +++++
 app/test/expected/test-compare-tolerance.out1 |  5 ++
 app/test/expected/test-compare-tolerance.out2 |  2 +
 app/test/expected/test-compare-tolerance.out3 |  3 +
 app/test/expected/test-compare-tolerance.out4 |  5 ++
 app/utils/string.c                            | 10 +++
 data/compare/tolerance1.csv                   |  2 +
 data/compare/tolerance2.csv                   |  2 +
 include/zsv/utils/string.h                    | 12 ++++
 11 files changed, 109 insertions(+), 19 deletions(-)
 create mode 100644 app/test/expected/test-compare-tolerance.out1
 create mode 100644 app/test/expected/test-compare-tolerance.out2
 create mode 100644 app/test/expected/test-compare-tolerance.out3
 create mode 100644 app/test/expected/test-compare-tolerance.out4
 create mode 100644 data/compare/tolerance1.csv
 create mode 100644 data/compare/tolerance2.csv

diff --git a/app/compare.c b/app/compare.c
index 9526ab16..de8330f0 100644
--- a/app/compare.c
+++ b/app/compare.c
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
+#include <math.h>
 #include <jsonwriter.h>
 
 #include <sqlite3.h>
@@ -227,25 +228,23 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,
 
 #define ZSV_COMPARE_MISSING "Missing"
 
-//  if(last_ix + 1 < data->input_count) {
-    // if we don't have data from every input, then output "Missing" for missing inputs
-    char got_missing = 0;
-    for(unsigned i = 0; i < data->input_count; i++) {
-      struct zsv_compare_input *input = data->inputs_to_sort[i];
-      if(i > last_ix) {
-        got_missing = 1;
-        unsigned input_ix = input->index;
-        values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
-        values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
-      }
-    }
-    if(got_missing) {
-      const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
-      zsv_compare_output_tuple(data, key_input, key_names, values, 1);
-      // reset values
-      memset(values, 0, data->input_count * sizeof(*values));
+  // if we don't have data from every input, then output "Missing" for missing inputs
+  char got_missing = 0;
+  for(unsigned i = 0; i < data->input_count; i++) {
+    struct zsv_compare_input *input = data->inputs_to_sort[i];
+    if(i > last_ix) {
+      got_missing = 1;
+      unsigned input_ix = input->index;
+      values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
+      values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
     }
-//  }
+  }
+  if(got_missing) {
+    const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
+    zsv_compare_output_tuple(data, key_input, key_names, values, 1);
+    // reset values
+    memset(values, 0, data->input_count * sizeof(*values));
+  }
 
   // for each output column
   zsv_compare_unique_colname *output_col = data->output_colnames_first;
@@ -272,8 +271,23 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,
         if(!output_col)
           output_col = input->output_colnames[input_col_ix];
         values[input_ix] = data->get_cell(input, input_col_ix);
-        if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix))
+        if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) {
           different = 1;
+          if(data->tolerance.value
+             && values[first_input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN
+             && values[input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN) {
+            // check if both are numbers with a difference less than the given tolerance            
+            double d1, d2;
+            memcpy(data->tolerance.str1, values[first_input_ix].str, values[first_input_ix].len);
+            data->tolerance.str1[values[first_input_ix].len] = '\0';
+            memcpy(data->tolerance.str2, values[input_ix].str, values[input_ix].len);
+            data->tolerance.str2[values[input_ix].len] = '\0';
+            if(!zsv_strtod_exact(data->tolerance.str1, &d1)
+               && !zsv_strtod_exact(data->tolerance.str2, &d2)
+               && fabs(d1 - d2) < data->tolerance.value)
+              different = 0;
+          }
+        }
       }
     }
 
@@ -608,6 +622,10 @@ static int compare_usage() {
     "  --sort             : sort on keys before comparing",
     "  --sort-in-memory   : for sorting,  use in-memory instead of temporary db",
     "                       (see https://www.sqlite.org/inmemorydb.html)",
+    "  --tolerance <value>: ignore differences where both values are numeric",
+    "                       strings with values differing by less than the given",
+    "                       amount e.g. --tolerance 0.01 will ignore differences",
+    "                       of numeric strings such as 123.45 vs 123.44",
     "  --json             : output as JSON",
     "  --json-compact     : output as compact JSON",
     "  --json-object      : output as an array of objects",
@@ -695,6 +713,16 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
             data->added_colcount++;
         }
       }
+    } else if(!strcmp(arg, "--tolerance")) {
+      const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
+      if(next_arg) {
+        if(zsv_strtod_exact(next_arg, &data->tolerance.value))
+          fprintf(stderr, "Invalid numeric value: %s\n", next_arg), err = 1;
+        else if(data->tolerance.value < 0)
+          fprintf(stderr, "Tolerance must be greater than zero (got %s)\n", next_arg), err = 1;
+        else
+          data->tolerance.value = nextafterf(data->tolerance.value, INFINITY);
+      }
     } else if(!strcmp(arg, "--sort")) {
       data->sort = 1;
     } else if(!strcmp(arg, "--json")) {
diff --git a/app/compare_internal.h b/app/compare_internal.h
index 52bb49c3..4c3acc70 100644
--- a/app/compare_internal.h
+++ b/app/compare_internal.h
@@ -106,6 +106,12 @@ struct zsv_compare_data {
 
   sqlite3 *sort_db; // used when --sort option was specified
 
+  struct {
+    double value;
+#define ZSV_COMPARE_MAX_NUMBER_BUFF_LEN 128
+    char   str1[ZSV_COMPARE_MAX_NUMBER_BUFF_LEN];
+    char   str2[ZSV_COMPARE_MAX_NUMBER_BUFF_LEN];
+  } tolerance;
   struct {
     char type; // 'j' for json
     union {
diff --git a/app/test/Makefile b/app/test/Makefile
index e25e0a42..e5f62d67 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -495,6 +495,20 @@ test-desc: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 	@(${PREFIX} $< < ${TEST_DATA_DIR}/test/$*-trim.csv ${REDIRECT2} ${TMP_DIR}/$@.trim && \
 	${CMP} ${TMP_DIR}/$@.trim expected/$@.trim && ${TEST_PASS} || ${TEST_FAIL})
 
+test-compare-tolerance: ${BUILD_DIR}/bin/zsv_compare${EXE}
+	@(${PREFIX} $< ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out1 && \
+	${CMP} ${TMP_DIR}/$@.out1 expected/$@.out1 && ${TEST_PASS} || ${TEST_FAIL})
+
+	@(${PREFIX} $< --tolerance 0.001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out2 && \
+	${CMP} ${TMP_DIR}/$@.out2 expected/$@.out2 && ${TEST_PASS} || ${TEST_FAIL})
+
+	@(${PREFIX} $< --tolerance 0.0001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out3 && \
+	${CMP} ${TMP_DIR}/$@.out3 expected/$@.out3 && ${TEST_PASS} || ${TEST_FAIL})
+
+	@(${PREFIX} $< --tolerance 0.00001 ../../data/compare/tolerance1.csv ../../data/compare/tolerance2.csv ${REDIRECT1} ${TMP_DIR}/$@.out4 && \
+	${CMP} ${TMP_DIR}/$@.out4 expected/$@.out4 && ${TEST_PASS} || ${TEST_FAIL})
+
+
 test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
 	@(${PREFIX} $< compare/t1.csv compare/t2.csv compare/t3.csv ${REDIRECT1} ${TMP_DIR}/$@.out && \
@@ -529,3 +543,4 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 
 	@(${PREFIX} $< ../../data/compare/t1.csv ../../data/compare/t2.csv --add AccentCity --sort -k country -k city ${REDIRECT1} ${TMP_DIR}/$@.out10 && \
 	${CMP} ${TMP_DIR}/$@.out10 expected/$@.out10 && ${TEST_PASS} || ${TEST_FAIL})
+
diff --git a/app/test/expected/test-compare-tolerance.out1 b/app/test/expected/test-compare-tolerance.out1
new file mode 100644
index 00000000..c2edf924
--- /dev/null
+++ b/app/test/expected/test-compare-tolerance.out1
@@ -0,0 +1,5 @@
+Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv
+1,A,1,1.01
+1,B,1,1.001
+1,C,1,1.0001
+1,D,1,1.00009
diff --git a/app/test/expected/test-compare-tolerance.out2 b/app/test/expected/test-compare-tolerance.out2
new file mode 100644
index 00000000..68e7993e
--- /dev/null
+++ b/app/test/expected/test-compare-tolerance.out2
@@ -0,0 +1,2 @@
+Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv
+1,A,1,1.01
diff --git a/app/test/expected/test-compare-tolerance.out3 b/app/test/expected/test-compare-tolerance.out3
new file mode 100644
index 00000000..bcf8af67
--- /dev/null
+++ b/app/test/expected/test-compare-tolerance.out3
@@ -0,0 +1,3 @@
+Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv
+1,A,1,1.01
+1,B,1,1.001
diff --git a/app/test/expected/test-compare-tolerance.out4 b/app/test/expected/test-compare-tolerance.out4
new file mode 100644
index 00000000..c2edf924
--- /dev/null
+++ b/app/test/expected/test-compare-tolerance.out4
@@ -0,0 +1,5 @@
+Row #,Column,../../data/compare/tolerance1.csv,../../data/compare/tolerance2.csv
+1,A,1,1.01
+1,B,1,1.001
+1,C,1,1.0001
+1,D,1,1.00009
diff --git a/app/utils/string.c b/app/utils/string.c
index aa96ffcc..ec37852a 100644
--- a/app/utils/string.c
+++ b/app/utils/string.c
@@ -330,6 +330,16 @@ size_t zsv_strunescape_backslash(unsigned char *s, size_t len) {
   return j;
 }
 
+// zsv_strtod_exact(const char *s): return error; if 0, set value of *d
+int zsv_strtod_exact(const char *s, double *d) {
+  if(!*s) return 1;
+  char *end;
+  *d = strtod(s, &end);
+  if(*end) return 1;
+  return 0;
+}
+
+
 #ifndef ZSV_STRING_LIB_ONLY
 struct zsv_cell zsv_get_cell_trimmed(zsv_parser parser, size_t ix) {
   struct zsv_cell c = zsv_get_cell(parser, ix);
diff --git a/data/compare/tolerance1.csv b/data/compare/tolerance1.csv
new file mode 100644
index 00000000..77c53863
--- /dev/null
+++ b/data/compare/tolerance1.csv
@@ -0,0 +1,2 @@
+A,B,C,D
+1,1,1,1
diff --git a/data/compare/tolerance2.csv b/data/compare/tolerance2.csv
new file mode 100644
index 00000000..7eb08ea0
--- /dev/null
+++ b/data/compare/tolerance2.csv
@@ -0,0 +1,2 @@
+A,B,C,D
+1.01,1.001,1.0001,1.00009
diff --git a/include/zsv/utils/string.h b/include/zsv/utils/string.h
index 30ed5905..21131717 100644
--- a/include/zsv/utils/string.h
+++ b/include/zsv/utils/string.h
@@ -118,8 +118,20 @@ size_t zsv_strnext_is_sign(const unsigned char *s, size_t len);
  */
 size_t zsv_strnext_is_currency(const unsigned char *s, size_t len);
 
+
+/*
+ * Convert a string to a double. must convert entire string, else returns error
+ * @param s     string to convert
+ * @param d     pointer to converted value, on success
+ *  
+ * @returns     0 on success, non-zero on error
+ */
+int zsv_strtod_exact(const char *s, double *d);
+
 /*
  * `zsv_get_cell_trimmed` is equivalent to `zsv_get_cell`, except that it
+ * @param s     string to convert
+ * @param len   length of input string
  * returns a value with leading and trailing whitespace removed
  */
 #include <zsv.h>