Skip to content

Commit

Permalink
compare: fix non-reporting of non-matching trailing rows (#143)
Browse files Browse the repository at this point in the history
and add `--print-key-colname` option
  • Loading branch information
liquidaty authored Nov 11, 2023
1 parent ab4b0b7 commit d959919
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 25 deletions.
88 changes: 70 additions & 18 deletions app/compare.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ static void zsv_compare_json_row_end(struct zsv_compare_data *data) {
static void zsv_compare_output_tuple(struct zsv_compare_data *data,
struct zsv_compare_input *key_input,
const unsigned char *colname,
struct zsv_cell *values // in original input order
struct zsv_cell *values, // in original input order
char is_key
) {
// print ID | Column | Value 1 | ... | Value N
if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
Expand All @@ -159,7 +160,7 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data,

for(unsigned i = 0; i < data->input_count; i++) {
struct zsv_compare_input *input = &data->inputs[i];
if(input->done || !input->row_loaded) { // no data for this input
if((input->done || !input->row_loaded) && !is_key) { // no data for this input
zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
} else {
struct zsv_cell *value = &values[i];
Expand All @@ -182,6 +183,34 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data,
zsv_compare_json_row_end(data);
}

static const unsigned char *zsv_compare_combined_key_names(struct zsv_compare_data *data) {
if(!data->combined_key_names) {
size_t len = 2;

for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
struct zsv_compare_key *key = &data->keys[key_ix];
if(key && key->name)
len += strlen(key->name) + 1;
}
if((data->combined_key_names = calloc(1, len))) {
unsigned char *start = NULL;
for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
struct zsv_compare_key *key = &data->keys[key_ix];
if(key && key->name) {
if(start) {
*start = (unsigned char)'|';
start++;
} else
start = data->combined_key_names;
strcpy((char *)start, key->name);
start += strlen((char *)start);
}
}
}
}
return data->combined_key_names;
}

static void zsv_compare_print_row(struct zsv_compare_data *data,
const unsigned last_ix // last input ix in inputs_to_sort
) {
Expand All @@ -198,19 +227,25 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,

#define ZSV_COMPARE_MISSING "Missing"

if(last_ix + 1 < data->input_count) {
// if(last_ix + 1 < data->input_count) {
// if we don't have data from every input, then output "Missing" for missing inputs
for(unsigned i = last_ix + 1; i < data->input_count; i++) {
char got_missing = 0;
for(unsigned i = 0; i < data->input_count; i++) {
struct zsv_compare_input *input = data->inputs_to_sort[i];
unsigned input_ix = input->index;
values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
if(i > last_ix) {
got_missing = 1;
unsigned input_ix = input->index;
values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
}
}
zsv_compare_output_tuple(data, key_input, (unsigned char *)"<key>", values);

// reset values
memset(values, 0, data->input_count * sizeof(*values));
}
if(got_missing) {
const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
zsv_compare_output_tuple(data, key_input, key_names, values, 1);
// reset values
memset(values, 0, data->input_count * sizeof(*values));
}
// }

// for each output column
zsv_compare_unique_colname *output_col = data->output_colnames_first;
Expand All @@ -237,13 +272,13 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,
if(!output_col)
output_col = input->output_colnames[input_col_ix];
values[input_ix] = data->get_cell(input, input_col_ix);
if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix]))
if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix))
different = 1;
}
}

if(different)
zsv_compare_output_tuple(data, key_input, output_col->name, values);
zsv_compare_output_tuple(data, key_input, output_col->name, values, 0);
}
free(values);
}
Expand Down Expand Up @@ -283,7 +318,8 @@ static enum zsv_compare_status zsv_compare_set_inputs(struct zsv_compare_data *d
return zsv_compare_status_ok;
}

static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2);
static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2,
void *data, unsigned col_ix);

static void zsv_compare_output_begin(struct zsv_compare_data *data) {
if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
Expand Down Expand Up @@ -390,7 +426,11 @@ input_init_unsorted(struct zsv_compare_data *data,

zsv_compare_handle zsv_compare_new() {
zsv_compare_handle z = calloc(1, sizeof(*z));
zsv_compare_set_comparison(z, zsv_compare_cell, z);
#if defined(ZSV_COMPARE_CMP_FUNC) && defined(ZSV_COMPARE_CMP_CTX)
zsv_compare_set_comparison(z, ZSV_COMPARE_CMP_FUNC, ZSV_COMPARE_CMP_CTX);
#else
zsv_compare_set_comparison(z, zsv_compare_cell, NULL);
#endif
z->output_colnames_next = &z->output_colnames;

z->next_row = zsv_compare_next_unsorted_row;
Expand Down Expand Up @@ -431,6 +471,7 @@ static void zsv_compare_data_free(struct zsv_compare_data *data) {
for(unsigned i = 0; i < data->input_count; i++)
zsv_compare_input_free(&data->inputs[i]);
free(data->inputs);
free(data->combined_key_names);
free(data->inputs_to_sort);
for(unsigned i = 0; i < data->writer.properties.used; i++)
free(data->writer.properties.names[i]);
Expand Down Expand Up @@ -466,8 +507,11 @@ void zsv_compare_set_comparison(struct zsv_compare_data *data,
data->cmp_ctx = cmp_ctx;
}

static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2) {
static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2,
void *data, unsigned col_ix) {
(void)(ctx);
(void)(data);
(void)(col_ix);
return zsv_strincmp(c1.str, c1.len,
c2.str, c2.len);
}
Expand All @@ -478,7 +522,11 @@ static enum zsv_compare_status zsv_compare_advance(struct zsv_compare_data *data
for(unsigned i = 0; i < data->input_count; i++) {
struct zsv_compare_input *input = &data->inputs[i];
if(input->done) continue;
if(input->row_loaded) continue;

if(input->row_loaded) {
got = 1;
continue;
}
if(data->next_row(input) != zsv_status_row)
input->done = 1;
else {
Expand Down Expand Up @@ -563,6 +611,8 @@ static int compare_usage() {
" --json : output as JSON",
" --json-compact : output as compact JSON",
" --json-object : output as an array of objects",
" --print-key-colname : when outputting key column diffs,",
" print column name instead of <key>",
"",
"NOTES",
"",
Expand Down Expand Up @@ -655,6 +705,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
} else if(!strcmp(arg, "--json-compact")) {
data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
data->writer.compact = 1;
} else if(!strcmp(arg, "--print-key-colname")) {
data->print_key_col_names = 1;
} else
input_filenames[input_count++] = arg;
}
Expand Down
4 changes: 3 additions & 1 deletion app/compare.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ enum zsv_compare_status {

typedef struct zsv_compare_data *zsv_compare_handle;

typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell);
typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell,
void *struct_zsv_compare_data,
unsigned input_col_ix);

zsv_compare_handle zsv_compare_new();
// enum zsv_compare_status zsv_compare_set_inputs(zsv_compare_handle, unsigned input_count, unsigned key_count);
Expand Down
7 changes: 5 additions & 2 deletions app/compare_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ struct zsv_compare_input {
sqlite3_stmt *sort_stmt;

unsigned char row_loaded:1;
unsigned char missing:1;
unsigned char done:1;
unsigned char _:6;
unsigned char _:5;
};

struct zsv_compare_key {
Expand All @@ -77,6 +78,7 @@ struct zsv_compare_data {

unsigned key_count;
struct zsv_compare_key *keys;
unsigned char *combined_key_names;

size_t row_count; // only matters if no ID columns are specified

Expand Down Expand Up @@ -125,7 +127,8 @@ struct zsv_compare_data {

unsigned char sort:1;
unsigned char sort_in_memory:1;
unsigned char _:6;
unsigned char print_key_col_names:1;
unsigned char _:5;
};

#endif
6 changes: 4 additions & 2 deletions app/compare_sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ static int zsv_compare_sort_stmt_prep(sqlite3 *db, sqlite3_stmt **stmtp,
static enum zsv_compare_status
input_init_sorted(struct zsv_compare_data *data,
struct zsv_compare_input *input,
struct zsv_opts *opts,
struct zsv_opts *_opts,
struct zsv_prop_handler *_prop_handler,
const char *opts_used
) {
(void)(opts);
(void)(_opts);
(void)(_prop_handler);
char *err_msg = NULL;
int rc = zsv_compare_sort_prep_table(data, input->path, opts_used, 0, &err_msg, input->index);
if(err_msg) {
Expand Down
3 changes: 3 additions & 0 deletions app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,6 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}

@(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --json-object -k c ${REDIRECT1} ${TMP_DIR}/[email protected] && \
${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})

@(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --print-key-colname -k c ${REDIRECT1} ${TMP_DIR}/[email protected] && \
${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
4 changes: 2 additions & 2 deletions app/test/expected/test-compare.out5
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ C,Column,compare/t1.csv,compare/t5.csv,compare/t6-unsorted.csv
C1,<key>,,,Missing
C9-NONMATCHING,<key>,Missing,,Missing
X2,B,B2,BB,BB
C9-NONMATCHING,<key>,,,
C1,<key>,,,
C9-NONMATCHING,<key>,Missing,Missing,
C1,<key>,Missing,Missing,
6 changes: 6 additions & 0 deletions app/test/expected/test-compare.out8
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,11 @@
"Column": "B",
"compare/t7.csv": "",
"compare/t3.csv": "BB"
},
{
"c": "X2",
"Column": "<key>",
"compare/t7.csv": "Missing",
"compare/t3.csv": "Missing"
}
]
5 changes: 5 additions & 0 deletions app/test/expected/test-compare.out9
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
c,Column,compare/t1.csv,compare/t7.csv,compare/t3.csv
C1,B,X1,B1,X1
C2,c,Missing,,
C2,B,,,BB
X2,c,,Missing,Missing

0 comments on commit d959919

Please sign in to comment.