Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup and modularize index #261

Merged
merged 1 commit into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ CFLAGS+= -I${PREFIX}/include
THIS_LIB_BASE=$(shell cd .. && pwd)
INCLUDE_DIR=${THIS_LIB_BASE}/include
BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN}
UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq os overwrite
UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq os overwrite index

ZSV_EXTRAS ?=

Expand Down
39 changes: 19 additions & 20 deletions app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -605,35 +605,32 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op

zsvsheet_handler_status status;

halfdelay(2); // now ncurses getch() will fire every 2-tenths of a second so we can check for status update

while (true) {
char *status_msg = NULL;
ch = getch();
if (ch == ERR) {
pthread_mutex_lock(&current_ui_buffer->mutex);
if (current_ui_buffer && current_ui_buffer->status) {
zsvsheet_set_status(&display_dims, 1, current_ui_buffer->status);
display_buffer_subtable(current_ui_buffer, header_span, &display_dims);
cbreak();
}
pthread_mutex_unlock(&current_ui_buffer->mutex);
continue;
}

zsvsheet_set_status(&display_dims, 1, "");
handler_state.display_info.update_buffer = false;

pthread_mutex_lock(&current_ui_buffer->mutex);
status_msg = current_ui_buffer->status;
if (current_ui_buffer->index_ready &&
current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count) {
current_ui_buffer->dimensions.row_count = current_ui_buffer->index->row_count;
current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count + 1) {
current_ui_buffer->dimensions.row_count = current_ui_buffer->index->row_count + 1;
handler_state.display_info.update_buffer = true;
}
pthread_mutex_unlock(&current_ui_buffer->mutex);

status = zsvsheet_key_press(ch, &handler_state);
if (status == zsvsheet_handler_status_exit)
break;
if (status != zsvsheet_handler_status_ok)
continue;
zsvsheet_set_status(&display_dims, 1, "");

if (ch != ERR) {
status = zsvsheet_key_press(ch, &handler_state);
if (status == zsvsheet_handler_status_exit)
break;
if (status != zsvsheet_handler_status_ok)
continue;
}

if (handler_state.display_info.update_buffer && current_ui_buffer->filename) {
struct zsvsheet_opts zsvsheet_opts = {0};
Expand All @@ -643,8 +640,10 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
continue;
}
}
if (current_ui_buffer->status)
zsvsheet_set_status(&display_dims, 1, current_ui_buffer->status);

if (status_msg)
zsvsheet_set_status(&display_dims, 1, status_msg);

display_buffer_subtable(current_ui_buffer, header_span, &display_dims);
}

Expand Down
87 changes: 19 additions & 68 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,85 +3,51 @@
#include <unistd.h>
#include <zsv.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/index.h>
#include <zsv/utils/file.h>
#include <zsv/utils/writer.h>

#include "index.h"
#include "zsv/utils/file.h"
#include "zsv/utils/writer.h"

static struct zsvsheet_index *add_line_end(struct zsvsheet_index *ix, uint64_t end) {
size_t len = ix->line_end_len, cap = ix->line_end_capacity;

if (len >= cap) {
cap *= 2;
ix = realloc(ix, sizeof(*ix) + cap * sizeof(ix->line_ends[0]));
if (!ix)
return NULL;

ix->line_end_capacity = cap;
}

ix->line_ends[len] = end;
ix->line_end_len++;

return ix;
}

static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsvsheet_index *ix = ixr->ix;
uint64_t line_end = zsv_cum_scanned_length(ixr->parser) + 1;
size_t col_count = zsv_cell_count(ixr->parser);
struct zsv_index *ix = ixr->ix;
zsv_parser parser = ixr->parser;
size_t col_count = zsv_cell_count(parser);

if (ixr->filter) {
if (col_count == 0)
return;

if (ixr->ix->header_line_end) {
struct zsv_cell first_cell = zsv_get_cell(ixr->parser, 0);
struct zsv_cell last_cell = zsv_get_cell(ixr->parser, col_count - 1);
struct zsv_cell first_cell = zsv_get_cell(parser, 0);
struct zsv_cell last_cell = zsv_get_cell(parser, col_count - 1);

if (!memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, ixr->filter, ixr->filter_len))
return;
}

for (size_t i = 0; i < col_count; i++) {
struct zsv_cell cell = zsv_get_cell(ixr->parser, i);
struct zsv_cell cell = zsv_get_cell(parser, i);
zsv_writer_cell(ixr->writer, i == 0, cell.str, cell.len, cell.quoted);
}
}

if (!ixr->ix->header_line_end) {
ix->header_line_end = line_end;
} else if ((ix->row_count & (LINE_END_N - 1)) == 0) {
if (ixr->filter) {
if (zsv_writer_flush(ixr->writer) != zsv_writer_status_ok) {
zsv_abort(ixr->parser);
return;
}
line_end = ftell(ixr->filter_stream);
}

ix = add_line_end(ix, line_end);
if (!ix) {
zsv_abort(ixr->parser);
return;
}

ixr->ix = ix;
}

ix->row_count++;
if (zsv_index_add_row(ix, parser) != zsv_index_status_ok)
zsv_abort(parser);
}

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
struct zsvsheet_indexer ixr = {0};
ixr.filter = optsp->row_filter;
ixr.filter_len = optsp->row_filter ? strlen(optsp->row_filter) : 0;

enum zsvsheet_index_status ret = zsvsheet_index_status_error;
enum zsv_index_status ret = zsv_index_status_error;
struct zsv_opts ix_zopts = optsp->zsv_opts;
unsigned char temp_buff[8196];
char *temp_filename;
FILE *temp_f = NULL;
zsv_csv_writer temp_file_writer = NULL;
FILE *fp = fopen(optsp->filename, "rb");
if (!fp)
return ret;
Expand All @@ -96,8 +62,6 @@ enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp)
goto out;

if (optsp->row_filter) {
zsv_csv_writer temp_file_writer = NULL;
unsigned char temp_buff[8196];

temp_filename = zsv_get_temp_filename("zsvsheet_filter_XXXXXXXX");
if (!temp_filename)
Expand All @@ -116,41 +80,28 @@ enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp)
ixr.filter_stream = temp_f;
}

const size_t initial_cap = 256;
ixr.ix = malloc(sizeof(*ixr.ix) + initial_cap * sizeof(size_t));
ixr.ix = zsv_index_new();
if (!ixr.ix)
goto out;
memset(ixr.ix, 0, sizeof(*ixr.ix));
ixr.ix->line_end_capacity = initial_cap;

while ((zst = zsv_parse_more(ixr.parser)) == zsv_status_ok)
;

zsv_finish(ixr.parser);

if (zst == zsv_status_no_more_input) {
ret = zsvsheet_index_status_ok;
ret = zsv_index_status_ok;
*optsp->index = ixr.ix;
} else
free(ixr.ix);

out:
zsv_delete(ixr.parser);
fclose(fp);
if (temp_file_writer)
zsv_writer_delete(temp_file_writer);
if (temp_f)
fclose(temp_f);

return ret;
}

void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out) {
if (!row || row - 1 < LINE_END_N) {
*offset_out = (off_t)ix->header_line_end;
*remaining_rows_out = row;
return;
}

const size_t i = (row - LINE_END_N) >> LINE_END_SHIFT;
*offset_out = (off_t)ix->line_ends[i];
*remaining_rows_out = row & (LINE_END_N - 1);
}
27 changes: 3 additions & 24 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,9 @@
#include "zsv.h"
#include "zsv/utils/writer.h"

// Decides the number of rows we skip when storing the line end
// 1 << 10 = 1024 means that we store every 1024th line end
#define LINE_END_SHIFT 10
#define LINE_END_N (1 << LINE_END_SHIFT)

enum zsvsheet_index_status {
zsvsheet_index_status_ok = 0,
zsvsheet_index_status_memory,
zsvsheet_index_status_error,
zsvsheet_index_status_utf8,
};

struct zsvsheet_index {
uint64_t header_line_end;
uint64_t row_count;
size_t line_end_capacity;
size_t line_end_len;
uint64_t line_ends[];
};

struct zsvsheet_indexer {
zsv_parser parser;
struct zsvsheet_index *ix;
struct zsv_index *ix;
const char *filter;
size_t filter_len;
zsv_csv_writer writer;
Expand All @@ -43,15 +23,14 @@ struct zsvsheet_index_opts {
char **temp_filename;
const char *row_filter;
struct zsv_opts zsv_opts;
struct zsvsheet_index **index;
struct zsv_index **index;
unsigned char *index_ready;
struct zsvsheet_ui_buffer *uib;
int *errp;
struct zsv_prop_handler *custom_prop_handler;
const char *opts_used;
};

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp);
void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out);
enum zsv_index_status build_memory_index(struct zsvsheet_index_opts *optsp);

#endif
Loading