Skip to content

Commit

Permalink
sheet/index: Fix and test multi-line headings and line endings (#264)
Browse files Browse the repository at this point in the history
* sheet: Fix multi-line headers and ignored rows when using the index
* index: Handle \r\n correctly when seeking a row. This makes line end handling more consistent by always checking new lines during seeking. Also makes sure to add the offset from the new line from the initial jump.
* sheet: add tests for mixing line endings and multi-line headings
  • Loading branch information
richiejp authored Nov 6, 2024
1 parent 522e5d4 commit 7086ed0
Show file tree
Hide file tree
Showing 6 changed files with 2,850 additions and 12 deletions.
2 changes: 2 additions & 0 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_

enum zsv_index_status zst;
if (uibuff->index_ready) {
opts.header_span = 0;
opts.rows_to_ignore = 0;
zst = zsv_index_seek_row(uibuff->index, &opts, start_row);

remaining_header_to_skip = 0;
Expand Down
23 changes: 22 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}

test-sheet: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} worldcitiespop_mil.csv test-sheet-all

test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5
test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5 test-sheet-6 test-sheet-7
@(for SESSION in $^; do ! tmux kill-session -t "$$SESSION" 2>/dev/null; done && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-1: ${BUILD_DIR}/bin/zsv_sheet${EXE}
Expand Down Expand Up @@ -628,3 +628,24 @@ test-sheet-5: ${BUILD_DIR}/bin/zsv_sheet${EXE}
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-6: ${BUILD_DIR}/bin/zsv_sheet${EXE}
@${TEST_INIT}
@echo 'set-option default-terminal "tmux-256color"' > ~/.tmux.conf
@(tmux new-session -x 80 -y 50 -d -s $@ "${PREFIX} $< -d 3 ${TEST_DATA_DIR}/test/mixed-line-endings.csv" && \
sleep 0.5 && \
tmux send-keys -t $@ "G" "C-u" "C-u" "C-u" && \
sleep 0.5 && \
tmux capture-pane -t $@ -p ${REDIRECT1} ${TMP_DIR}/$@.out && \
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})

test-sheet-7: ${BUILD_DIR}/bin/zsv_sheet${EXE}
@${TEST_INIT}
@echo 'set-option default-terminal "tmux-256color"' > ~/.tmux.conf
@(tmux new-session -x 80 -y 5 -d -s $@ "${PREFIX} $< -d 3 ${TEST_DATA_DIR}/test/mixed-line-endings.csv" && \
sleep 0.5 && \
tmux send-keys -t $@ "G" "g" "g" "C-u" "/" "1234" "Enter" && \
sleep 0.5 && \
tmux capture-pane -t $@ -p ${REDIRECT1} ${TMP_DIR}/$@.out && \
tmux send-keys -t $@ "q" && \
${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL})
50 changes: 50 additions & 0 deletions app/test/expected/test-sheet-6.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3
3905 A3905 B3905 C3905
3906 A3906 B3906 C3906
3907 A3907 B3907 C3907
3908 A3908 B3908 C3908
3909 A3909 B3909 C3909
3910 A3910 B3910 C3910
3911 A3911 B3911 C3911
3912 A3912 B3912 C3912
3913 A3913 B3913 C3913
3914 A3914 B3914 C3914
3915 A3915 B3915 C3915
3916 A3916 B3916 C3916
3917 A3917 B3917 C3917
3918 A3918 B3918 C3918
3919 A3919 B3919 C3919
3920 A3920 B3920 C3920
3921 A3921 B3921 C3921
3922 A3922 B3922 C3922
3923 A3923 B3923 C3923
3924 A3924 B3924 C3924
3925 A3925 B3925 C3925
3926 A3926 B3926 C3926
3927 A3927 B3927 C3927
3928 A3928 B3928 C3928
3929 A3929 B3929 C3929
3930 A3930 B3930 C3930
3931 A3931 B3931 C3931
3932 A3932 B3932 C3932
3933 A3933 B3933 C3933
3934 A3934 B3934 C3934
3935 A3935 B3935 C3935
3936 A3936 B3936 C3936
3937 A3937 B3937 C3937
3938 A3938 B3938 C3938
3939 A3939 B3939 C3939
3940 A3940 B3940 C3940
3941 A3941 B3941 C3941
3942 A3942 B3942 C3942
3943 A3943 B3943 C3943
3944 A3944 B3944 C3944
3945 A3945 B3945 C3945
3946 A3946 B3946 C3946
3947 A3947 B3947 C3947
3948 A3948 B3948 C3948
3949 A3949 B3949 C3949
3950 A3950 B3950 C3950
3951 A3951 B3951 C3951
3952 A3952 B3952 C3952
? for help 3952
5 changes: 5 additions & 0 deletions app/test/expected/test-sheet-7.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Row # HA1 HA2 HA3 HB1 HB2 HB3 HC1 HC2 HC3
1234 A1234 B1234 C1234
1235 A1235 B1235 C1235
1236 A1236 B1236 C1236
? for help 1234
35 changes: 24 additions & 11 deletions app/utils/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct zsv_index *zsv_index_new(void) {
enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, zsv_parser parser) {
struct zsv_index_array *arr = ix->array;
size_t len = arr->len, cap = arr->capacity;
uint64_t line_end = zsv_cum_scanned_length(parser) + 1;
uint64_t line_end = zsv_cum_scanned_length(parser);

if (!ix->header_line_end) {
ix->header_line_end = line_end;
Expand Down Expand Up @@ -89,8 +89,8 @@ static void seek_row_handler(void *ctx) {
zsv_abort(c->parser);
}

static enum zsv_index_status seek_and_check_newline(long offset, struct zsv_opts *opts) {
char maybe_space;
static enum zsv_index_status seek_and_check_newline(long *offset, struct zsv_opts *opts) {
char new_line[2];
zsv_generic_read read = (zsv_generic_read)fread;
zsv_generic_seek seek = (zsv_generic_seek)fseek;
FILE *stream = opts->stream;
Expand All @@ -101,17 +101,30 @@ static enum zsv_index_status seek_and_check_newline(long offset, struct zsv_opts
if (opts->read)
read = opts->read;

if (seek(stream, offset, SEEK_SET))
if (seek(stream, *offset, SEEK_SET))
return zsv_index_status_error;

if (read(&maybe_space, 1, 1, stream) != 1)
size_t nmemb = read(new_line, 1, 2, stream);

if (nmemb < 1)
return zsv_index_status_error;

if (!isspace(maybe_space)) {
if (seek(stream, offset, SEEK_SET))
return zsv_index_status_error;
if (new_line[0] == '\n') {
*offset += 1;
} else if (new_line[0] == '\r') {
if (new_line[1] == '\n') {
*offset += 1;
return zsv_index_status_ok;
}

*offset += 1;
} else {
return zsv_index_status_error;
}

if (seek(stream, *offset, SEEK_SET))
return zsv_index_status_error;

return zsv_index_status_ok;
}

Expand All @@ -123,7 +136,7 @@ enum zsv_index_status zsv_index_seek_row(const struct zsv_index *ix, struct zsv_
if (zist != zsv_index_status_ok)
return zist;

if ((zist = seek_and_check_newline(offset, opts)) != zsv_index_status_ok)
if ((zist = seek_and_check_newline((long *)&offset, opts)) != zsv_index_status_ok)
return zist;

if (!remaining_rows)
Expand All @@ -146,9 +159,9 @@ enum zsv_index_status zsv_index_seek_row(const struct zsv_index *ix, struct zsv_
if (zst != zsv_status_cancelled)
return zsv_index_status_error;

offset += zsv_cum_scanned_length(parser) + 1;
offset += zsv_cum_scanned_length(parser);

zsv_delete(parser);

return seek_and_check_newline(offset, opts);
return seek_and_check_newline((long *)&offset, opts);
}
Loading

0 comments on commit 7086ed0

Please sign in to comment.