Skip to content

Commit

Permalink
sheet: Override header span and skip rows when using properties file (#…
Browse files Browse the repository at this point in the history
…266)

* sheet: Override header span and skip rows when using properties file

The properties file was overriding opts.rows_to_ignore and
opts.header_span values that need to be set to zero when using an
index. This recreates the parser with zsv_new after the opts have
been rewritten when an index is active.

* sheet/index: Fix building the index with a filter

Previously the indexer was using the file offsets from the original
file when the filter was active. Instead it should use offsets in the
temporary filtered file.

The writer library doesn't record or expose the total number of bytes
written and flushing to the stream and retrieving the offset would be
expensive and invasive. So this splits filtering and indexing into two
operations.

* Update ci.yml

add CI tests for macos-14, which should pass

---------

Co-authored-by: liquidaty <[email protected]>
  • Loading branch information
richiejp and liquidaty authored Nov 9, 2024
1 parent 748d02a commit a6eeeee
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 31 deletions.
72 changes: 49 additions & 23 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,34 @@

#include "index.h"

static void build_memory_index_row_handler(void *ctx) {
static void save_filtered_file_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;
size_t col_count = zsv_cell_count(parser);

if (ixr->filter) {
if (col_count == 0)
return;
if (col_count == 0)
return;

if (ixr->ix->header_line_end) {
struct zsv_cell first_cell = zsv_get_cell(parser, 0);
struct zsv_cell last_cell = zsv_get_cell(parser, col_count - 1);
if (ixr->seen_header) {
struct zsv_cell first_cell = zsv_get_cell(parser, 0);
struct zsv_cell last_cell = zsv_get_cell(parser, col_count - 1);

if (!memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, ixr->filter, ixr->filter_len))
return;
}
if (!memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, ixr->filter, ixr->filter_len))
return;
} else {
ixr->seen_header = 1;
}

for (size_t i = 0; i < col_count; i++) {
struct zsv_cell cell = zsv_get_cell(parser, i);
zsv_writer_cell(ixr->writer, i == 0, cell.str, cell.len, cell.quoted);
}
for (size_t i = 0; i < col_count; i++) {
struct zsv_cell cell = zsv_get_cell(parser, i);
zsv_writer_cell(ixr->writer, i == 0, cell.str, cell.len, cell.quoted);
}
}

static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;

if (zsv_index_add_row(ix, parser) != zsv_index_status_ok)
zsv_abort(parser);
Expand All @@ -54,32 +59,53 @@ enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {

ix_zopts.ctx = &ixr;
ix_zopts.stream = fp;
ix_zopts.row_handler = build_memory_index_row_handler;

enum zsv_status zst =
zsv_new_with_properties(&ix_zopts, optsp->custom_prop_handler, optsp->filename, optsp->opts_used, &ixr.parser);
if (zst != zsv_status_ok)
goto out;

if (optsp->row_filter) {

temp_filename = zsv_get_temp_filename("zsvsheet_filter_XXXXXXXX");
if (!temp_filename)
return ret;

*optsp->temp_filename = temp_filename;

struct zsv_csv_writer_options writer_opts = {0};
if (!(writer_opts.stream = temp_f = fopen(temp_filename, "wb")))
if (!(writer_opts.stream = temp_f = fopen(temp_filename, "w+")))
return ret;
if (!(temp_file_writer = zsv_writer_new(&writer_opts)))
goto out;

zsv_writer_set_temp_buff(temp_file_writer, temp_buff, sizeof(temp_buff));
ixr.writer = temp_file_writer;
ixr.filter_stream = temp_f;
ix_zopts.row_handler = save_filtered_file_row_handler;

enum zsv_status zst =
zsv_new_with_properties(&ix_zopts, optsp->custom_prop_handler, optsp->filename, optsp->opts_used, &ixr.parser);
if (zst != zsv_status_ok)
goto out;

while ((zst = zsv_parse_more(ixr.parser)) == zsv_status_ok)
;

if (zst != zsv_status_no_more_input)
goto out;

zsv_finish(ixr.parser);
zsv_delete(ixr.parser);
zsv_writer_delete(temp_file_writer);
temp_file_writer = NULL;
if (fseek(temp_f, 0, SEEK_SET))
goto out;

ix_zopts.stream = temp_f;
}

ix_zopts.row_handler = build_memory_index_row_handler;

enum zsv_status zst =
zsv_new_with_properties(&ix_zopts, optsp->custom_prop_handler, optsp->filename, optsp->opts_used, &ixr.parser);
if (zst != zsv_status_ok)
goto out;

ixr.ix = zsv_index_new();
if (!ixr.ix)
goto out;
Expand Down
1 change: 1 addition & 0 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct zsvsheet_indexer {
size_t filter_len;
zsv_csv_writer writer;
FILE *filter_stream;
char seen_header;
};

struct zsvsheet_index_opts {
Expand Down
17 changes: 10 additions & 7 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_

opts.stream = fp; // Input file stream

zsv_parser parser = {0};
if (zsv_new_with_properties(&opts, custom_prop_handler, filename, opts_used, &parser) != zsv_status_ok) {
fclose(fp);
zsv_delete(parser);
return errno ? errno : -1;
}

if (uibuff) {
pthread_mutex_lock(&uibuff->mutex);
if (uibuff->index_ready && row_filter) {
Expand All @@ -88,6 +95,9 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_
opts.rows_to_ignore = 0;
zst = zsv_index_seek_row(uibuff->index, &opts, start_row);

zsv_delete(parser);
parser = zsv_new(&opts);

remaining_header_to_skip = 0;
remaining_rows_to_skip = 0;
original_row_num = header_span + start_row;
Expand All @@ -99,13 +109,6 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_

size_t rows_read = header_span;

zsv_parser parser = {0};
if (zsv_new_with_properties(&opts, custom_prop_handler, filename, opts_used, &parser) != zsv_status_ok) {
fclose(fp);
zsv_delete(parser);
return errno ? errno : -1;
}

size_t find_len = zsvsheet_opts->find ? strlen(zsvsheet_opts->find) : 0;
size_t rows_searched = 0;
zsvsheet_buffer_t buffer = uibuff ? uibuff->buffer : NULL;
Expand Down
27 changes: 26 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ test-sheet: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} worldcitiespop_mil.csv test-she
test-sheet-cleanup:
@rm -f tmux-*.log

test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5 test-sheet-6 test-sheet-7
test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5 test-sheet-6 test-sheet-7 test-sheet-8
@(for SESSION in $^; do ! tmux kill-session -t "$$SESSION" 2>/dev/null; done && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-1: ${BUILD_DIR}/bin/zsv_sheet${EXE}
Expand Down Expand Up @@ -654,3 +654,28 @@ test-sheet-7: ${BUILD_DIR}/bin/zsv_sheet${EXE}
tmux capture-pane -t $@ -p ${REDIRECT1} ${TMP_DIR}/$@.out && \
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-7: ${BUILD_DIR}/bin/zsv_sheet${EXE}
@${TEST_INIT}
@echo 'set-option default-terminal "tmux-256color"' > ~/.tmux.conf
@(tmux new-session -x 80 -y 5 -d -s $@ "${PREFIX} $< -d 3 ${TEST_DATA_DIR}/test/mixed-line-endings.csv" && \
sleep 0.5 && \
tmux send-keys -t $@ "G" "g" "g" "C-u" "/" "1234" "Enter" && \
sleep 0.5 && \
tmux capture-pane -t $@ -p ${REDIRECT1} ${TMP_DIR}/$@.out && \
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-8: ${BUILD_DIR}/bin/zsv_sheet${EXE}
@${TEST_INIT}
@echo 'set-option default-terminal "tmux-256color"' > ~/.tmux.conf
@(tmux new-session -x 160 -y 5 -d -s $@ "${PREFIX} $< worldcitiespop_mil.csv" && \
sleep 0.5 && \
tmux send-keys -t $@ "f" "e" "Enter" && \
sleep 0.5 && \
tmux send-keys -t $@ "G" "C-u" "k" && \
sleep 0.5 && \
tmux capture-pane -t $@ -p ${REDIRECT1} ${TMP_DIR}/$@.out && \
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})

5 changes: 5 additions & 0 deletions app/test/expected/test-sheet-8.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # Country City AccentCity Region Population Latitude Longitude
493034 gb ruthven Ruthven V3 57.066667 -4.033333
493035 gb rutlandshire Rutlandshire L4 52.666667 -.666667
493036 gb rutupiae Rutupiæ G5 51.283333 1.333333
(493039 filtered rows) 493035

0 comments on commit a6eeeee

Please sign in to comment.