Skip to content

Commit

Permalink
Cleanup and modularize index (#261)
Browse files Browse the repository at this point in the history
Move the basic in-memory index functionality into utils and rework the
API to make it easier to use. There is now a seek function which moves
the file offset to the exact line-end requested by the user.

The temp file generation has not been moved into utils because
generalising it started to introduced a lot more callbacks and other
complications.

This also changes the sheet display code so that (building index) is
displayed in the status area while indexing is in progress.
  • Loading branch information
richiejp authored Nov 5, 2024
1 parent 65e27ef commit 923870d
Show file tree
Hide file tree
Showing 10 changed files with 278 additions and 155 deletions.
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ CFLAGS+= -I${PREFIX}/include
THIS_LIB_BASE=$(shell cd .. && pwd)
INCLUDE_DIR=${THIS_LIB_BASE}/include
BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN}
UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq os overwrite
UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq os overwrite index

ZSV_EXTRAS ?=

Expand Down
39 changes: 19 additions & 20 deletions app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -605,35 +605,32 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

zsvsheet_handler_status status;

halfdelay(2); // now ncurses getch() will fire every 2-tenths of a second so we can check for status update

while (true) {
char *status_msg = NULL;
ch = getch();
if (ch == ERR) {
pthread_mutex_lock(&current_ui_buffer->mutex);
if (current_ui_buffer && current_ui_buffer->status) {
zsvsheet_set_status(&display_dims, 1, current_ui_buffer->status);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);
cbreak();
}
pthread_mutex_unlock(&current_ui_buffer->mutex);
continue;
}

zsvsheet_set_status(&display_dims, 1, "");
handler_state.display_info.update_buffer = false;

pthread_mutex_lock(&current_ui_buffer->mutex);
status_msg = current_ui_buffer->status;
if (current_ui_buffer->index_ready &&
current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count) {
current_ui_buffer->dimensions.row_count = current_ui_buffer->index->row_count;
current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count + 1) {
current_ui_buffer->dimensions.row_count = current_ui_buffer->index->row_count + 1;
handler_state.display_info.update_buffer = true;
}
pthread_mutex_unlock(&current_ui_buffer->mutex);

status = zsvsheet_key_press(ch, &handler_state);
if (status == zsvsheet_handler_status_exit)
break;
if (status != zsvsheet_handler_status_ok)
continue;
zsvsheet_set_status(&display_dims, 1, "");

if (ch != ERR) {
status = zsvsheet_key_press(ch, &handler_state);
if (status == zsvsheet_handler_status_exit)
break;
if (status != zsvsheet_handler_status_ok)
continue;
}

if (handler_state.display_info.update_buffer && current_ui_buffer->filename) {
struct zsvsheet_opts zsvsheet_opts = {0};
Expand All @@ -643,8 +640,10 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
continue;
}
}
if (current_ui_buffer->status)
zsvsheet_set_status(&display_dims, 1, current_ui_buffer->status);

if (status_msg)
zsvsheet_set_status(&display_dims, 1, status_msg);

display_buffer_subtable(current_ui_buffer, header_span, &display_dims);
}

Expand Down
87 changes: 19 additions & 68 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,85 +3,51 @@
#include <unistd.h>
#include <zsv.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/index.h>
#include <zsv/utils/file.h>
#include <zsv/utils/writer.h>

#include "index.h"
#include "zsv/utils/file.h"
#include "zsv/utils/writer.h"

static struct zsvsheet_index *add_line_end(struct zsvsheet_index *ix, uint64_t end) {
size_t len = ix->line_end_len, cap = ix->line_end_capacity;

if (len >= cap) {
cap *= 2;
ix = realloc(ix, sizeof(*ix) + cap * sizeof(ix->line_ends[0]));
if (!ix)
return NULL;

ix->line_end_capacity = cap;
}

ix->line_ends[len] = end;
ix->line_end_len++;

return ix;
}

static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsvsheet_index *ix = ixr->ix;
uint64_t line_end = zsv_cum_scanned_length(ixr->parser) + 1;
size_t col_count = zsv_cell_count(ixr->parser);
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;
size_t col_count = zsv_cell_count(parser);

if (ixr->filter) {
if (col_count == 0)
return;

if (ixr->ix->header_line_end) {
struct zsv_cell first_cell = zsv_get_cell(ixr->parser, 0);
struct zsv_cell last_cell = zsv_get_cell(ixr->parser, col_count - 1);
struct zsv_cell first_cell = zsv_get_cell(parser, 0);
struct zsv_cell last_cell = zsv_get_cell(parser, col_count - 1);

if (!memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, ixr->filter, ixr->filter_len))
return;
}

for (size_t i = 0; i < col_count; i++) {
struct zsv_cell cell = zsv_get_cell(ixr->parser, i);
struct zsv_cell cell = zsv_get_cell(parser, i);
zsv_writer_cell(ixr->writer, i == 0, cell.str, cell.len, cell.quoted);
}
}

if (!ixr->ix->header_line_end) {
ix->header_line_end = line_end;
} else if ((ix->row_count & (LINE_END_N - 1)) == 0) {
if (ixr->filter) {
if (zsv_writer_flush(ixr->writer) != zsv_writer_status_ok) {
zsv_abort(ixr->parser);
return;
}
line_end = ftell(ixr->filter_stream);
}

ix = add_line_end(ix, line_end);
if (!ix) {
zsv_abort(ixr->parser);
return;
}

ixr->ix = ix;
}

ix->row_count++;
if (zsv_index_add_row(ix, parser) != zsv_index_status_ok)
zsv_abort(parser);
}

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
struct zsvsheet_indexer ixr = {0};
ixr.filter = optsp->row_filter;
ixr.filter_len = optsp->row_filter ? strlen(optsp->row_filter) : 0;

enum zsvsheet_index_status ret = zsvsheet_index_status_error;
enum zsv_index_status ret = zsv_index_status_error;
struct zsv_opts ix_zopts = optsp->zsv_opts;
unsigned char temp_buff[8196];
char *temp_filename;
FILE *temp_f = NULL;
zsv_csv_writer temp_file_writer = NULL;
FILE *fp = fopen(optsp->filename, "rb");
if (!fp)
return ret;
Expand All @@ -96,8 +62,6 @@ enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp)
goto out;

if (optsp->row_filter) {
zsv_csv_writer temp_file_writer = NULL;
unsigned char temp_buff[8196];

temp_filename = zsv_get_temp_filename("zsvsheet_filter_XXXXXXXX");
if (!temp_filename)
Expand All @@ -116,41 +80,28 @@ enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp)
ixr.filter_stream = temp_f;
}

const size_t initial_cap = 256;
ixr.ix = malloc(sizeof(*ixr.ix) + initial_cap * sizeof(size_t));
ixr.ix = zsv_index_new();
if (!ixr.ix)
goto out;
memset(ixr.ix, 0, sizeof(*ixr.ix));
ixr.ix->line_end_capacity = initial_cap;

while ((zst = zsv_parse_more(ixr.parser)) == zsv_status_ok)
;

zsv_finish(ixr.parser);

if (zst == zsv_status_no_more_input) {
ret = zsvsheet_index_status_ok;
ret = zsv_index_status_ok;
*optsp->index = ixr.ix;
} else
free(ixr.ix);

out:
zsv_delete(ixr.parser);
fclose(fp);
if (temp_file_writer)
zsv_writer_delete(temp_file_writer);
if (temp_f)
fclose(temp_f);

return ret;
}

void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out) {
if (!row || row - 1 < LINE_END_N) {
*offset_out = (off_t)ix->header_line_end;
*remaining_rows_out = row;
return;
}

const size_t i = (row - LINE_END_N) >> LINE_END_SHIFT;
*offset_out = (off_t)ix->line_ends[i];
*remaining_rows_out = row & (LINE_END_N - 1);
}
27 changes: 3 additions & 24 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,9 @@
#include "zsv.h"
#include "zsv/utils/writer.h"

// Decides the number of rows we skip when storing the line end
// 1 << 10 = 1024 means that we store every 1024th line end
#define LINE_END_SHIFT 10
#define LINE_END_N (1 << LINE_END_SHIFT)

enum zsvsheet_index_status {
zsvsheet_index_status_ok = 0,
zsvsheet_index_status_memory,
zsvsheet_index_status_error,
zsvsheet_index_status_utf8,
};

struct zsvsheet_index {
uint64_t header_line_end;
uint64_t row_count;
size_t line_end_capacity;
size_t line_end_len;
uint64_t line_ends[];
};

struct zsvsheet_indexer {
zsv_parser parser;
struct zsvsheet_index *ix;
struct zsv_index *ix;
const char *filter;
size_t filter_len;
zsv_csv_writer writer;
Expand All @@ -43,15 +23,14 @@ struct zsvsheet_index_opts {
char **temp_filename;
const char *row_filter;
struct zsv_opts zsv_opts;
struct zsvsheet_index **index;
struct zsv_index **index;
unsigned char *index_ready;
struct zsvsheet_ui_buffer *uib;
int *errp;
struct zsv_prop_handler *custom_prop_handler;
const char *opts_used;
};

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp);
void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out);
enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp);

#endif
Loading

0 comments on commit 923870d

Please sign in to comment.