Skip to content

Commit

Permalink
Rework temporary filter file into the index
Browse files Browse the repository at this point in the history
  • Loading branch information
richiejp committed Nov 1, 2024
1 parent 416220b commit 1a770f4
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 96 deletions.
11 changes: 6 additions & 5 deletions app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ struct zsvsheet_opts {
#include "sheet/read-data.c"
#include "sheet/key-bindings.c"

static void display_buffer_subtable(struct zsvsheet_ui_buffer *ui_buffer, size_t rownum_col_offset, size_t input_header_span,
struct zsvsheet_display_dimensions *ddims);
static void display_buffer_subtable(struct zsvsheet_ui_buffer *ui_buffer, size_t rownum_col_offset,
size_t input_header_span, struct zsvsheet_display_dimensions *ddims);

struct zsvsheet_display_info {
int update_buffer;
Expand Down Expand Up @@ -609,7 +609,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
handler_state.display_info.update_buffer = false;

pthread_mutex_lock(&current_ui_buffer->mutex);
if (current_ui_buffer->index_ready && current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count) {
if (current_ui_buffer->index_ready &&
current_ui_buffer->dimensions.row_count != current_ui_buffer->index->row_count) {
current_ui_buffer->dimensions.row_count = current_ui_buffer->index->row_count;
handler_state.display_info.update_buffer = true;
}
Expand Down Expand Up @@ -683,8 +684,8 @@ const char *display_cell(struct zsvsheet_buffer *buff, size_t data_row, size_t d
return str;
}

static void display_buffer_subtable(struct zsvsheet_ui_buffer *ui_buffer, size_t rownum_col_offset, size_t input_header_span,
struct zsvsheet_display_dimensions *ddims) {
static void display_buffer_subtable(struct zsvsheet_ui_buffer *ui_buffer, size_t rownum_col_offset,
size_t input_header_span, struct zsvsheet_display_dimensions *ddims) {
struct zsvsheet_buffer *buffer = ui_buffer->buffer;
size_t start_row = ui_buffer->buff_offset.row;
size_t buffer_used_row_count = ui_buffer->buff_used_rows;
Expand Down
108 changes: 81 additions & 27 deletions app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
#include <zsv/utils/prop.h>

#include "index.h"
#include "zsv/utils/file.h"
#include "zsv/utils/writer.h"

static struct zsvsheet_index *add_line_end(struct zsvsheet_index *ix, uint64_t end) {
size_t len = ix->line_end_len, cap = ix->line_end_capacity;

if (len >= cap) {
cap *= 2;
ix = realloc(ix, sizeof(*ix) + cap*sizeof(ix->line_ends[0]));
ix = realloc(ix, sizeof(*ix) + cap * sizeof(ix->line_ends[0]));
if (!ix)
return NULL;

ix->line_end_capacity = cap;
}

Expand All @@ -28,74 +30,126 @@ static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
struct zsvsheet_index *ix = ixr->ix;
uint64_t line_end = zsv_cum_scanned_length(ixr->parser) + 1;

if(!ixr->ix->header_line_end) {
size_t col_count = zsv_cell_count(ixr->parser);

if (ixr->filter) {
if (col_count == 0)
return;

if (ixr->ix->header_line_end) {
struct zsv_cell first_cell = zsv_get_cell(ixr->parser, 0);
struct zsv_cell last_cell = zsv_get_cell(ixr->parser, col_count - 1);

if (!memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, ixr->filter, ixr->filter_len))
return;
}

for (size_t i = 0; i < col_count; i++) {
struct zsv_cell cell = zsv_get_cell(ixr->parser, i);
zsv_writer_cell(ixr->writer, i == 0, cell.str, cell.len, cell.quoted);
}
}

if (!ixr->ix->header_line_end) {
ix->header_line_end = line_end;
} else if((ix->row_count & ((1 << LINE_END_SHIFT) - 1)) == 0) {
} else if ((ix->row_count & ((1 << LINE_END_SHIFT) - 1)) == 0) {
if (ixr->filter) {
if (zsv_writer_flush(ixr->writer) != zsv_writer_status_ok) {
zsv_abort(ixr->parser);
return;
}
line_end = ftell(ixr->filter_stream);
}

ix = add_line_end(ix, line_end);
if (!ix) {
zsv_abort(ixr->parser);
return;
}

ixr->ix = ix;
}

ix->row_count++;
}

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp, struct zsvsheet_index **index_out) {
struct zsvsheet_indexer ixr = {0};
enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp) {
struct zsvsheet_indexer ixr = {
.filter = optsp->row_filter,
.filter_len = optsp->row_filter ? strlen(optsp->row_filter) : 0,
};
enum zsvsheet_index_status ret = zsvsheet_index_status_error;
struct zsv_opts *zopts = optsp->zsv_optsp;
struct zsv_opts ix_zopts = {0};
char *temp_filename;
FILE *temp_f = NULL;

memcpy(&ix_zopts, zopts, sizeof(ix_zopts));

FILE *fp = fopen(optsp->filename, "rb");
if (!fp)
goto close_file;
return ret;

ix_zopts.ctx = &ixr;
ix_zopts.stream = fp;
ix_zopts.row_handler = build_memory_index_row_handler;

enum zsv_status zst = zsv_new_with_properties(&ix_zopts, optsp->custom_prop_handler,
optsp->filename, optsp->opts_used,
&ixr.parser);
enum zsv_status zst =
zsv_new_with_properties(&ix_zopts, optsp->custom_prop_handler, optsp->filename, optsp->opts_used, &ixr.parser);
if (zst != zsv_status_ok)
goto free_parser;

goto out;

if (optsp->row_filter) {
zsv_csv_writer temp_file_writer = NULL;
unsigned char temp_buff[8196];

temp_filename = zsv_get_temp_filename("zsvsheet_filter_XXXXXXXX");
if (!temp_filename)
return ret;

*optsp->temp_filename = temp_filename;

struct zsv_csv_writer_options writer_opts = {0};
if (!(writer_opts.stream = temp_f = fopen(temp_filename, "wb")))
return ret;
if (!(temp_file_writer = zsv_writer_new(&writer_opts)))
goto out;

zsv_writer_set_temp_buff(temp_file_writer, temp_buff, sizeof(temp_buff));
ixr.writer = temp_file_writer;
ixr.filter_stream = temp_f;
}

const size_t initial_cap = 256;
ixr.ix = malloc(sizeof(*ixr.ix) + initial_cap * sizeof(size_t));
if (!ixr.ix)
goto free_parser;
goto out;
memset(ixr.ix, 0, sizeof(*ixr.ix));
ixr.ix->line_end_capacity = initial_cap;
while ((zst = zsv_parse_more(ixr.parser)) == zsv_status_ok)
;

while ((zst = zsv_parse_more(ixr.parser)) == zsv_status_ok)
;

zsv_finish(ixr.parser);
if (zst == zsv_status_no_more_input) {

if (zst == zsv_status_no_more_input) {
ret = zsvsheet_index_status_ok;
*index_out = ixr.ix;
*optsp->index = ixr.ix;
} else
free(ixr.ix);

free_parser:
out:
zsv_delete(ixr.parser);

close_file:
fclose(fp);
if (temp_f)
fclose(temp_f);

return ret;
}

void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out) {
const size_t i = row >> LINE_END_SHIFT;

*offset_out = (off_t)ix->line_ends[i];
*remaining_rows_out = row & ((1 << LINE_END_SHIFT) - 1);
}
10 changes: 8 additions & 2 deletions app/sheet/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
#include <pthread.h>

#include "zsv.h"
#include "zsv/utils/writer.h"

// Decides the number of rows we skip when storing the line end
// 1 << 10 = 1024 means that we store every 1024th line end
#define LINE_END_SHIFT 10
#define LINE_END_N (1 << LINE_END_SHIFT)

enum zsvsheet_index_status{
enum zsvsheet_index_status {
zsvsheet_index_status_ok = 0,
zsvsheet_index_status_memory,
zsvsheet_index_status_error,
Expand All @@ -30,11 +31,16 @@ struct zsvsheet_index {
struct zsvsheet_indexer {
zsv_parser parser;
struct zsvsheet_index *ix;
const char *filter;
size_t filter_len;
zsv_csv_writer writer;
FILE *filter_stream;
};

struct zsvsheet_index_opts {
pthread_mutex_t *mutexp;
const char *filename;
char **temp_filename;
const char *row_filter;
struct zsv_opts *zsv_optsp;
struct zsvsheet_index **index;
Expand All @@ -44,7 +50,7 @@ struct zsvsheet_index_opts {
const char *opts_used;
};

enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp, struct zsvsheet_index **index_out);
enum zsvsheet_index_status build_memory_index(struct zsvsheet_index_opts *optsp);
void get_memory_index(struct zsvsheet_index *ix, uint64_t row, off_t *offset_out, size_t *remaining_rows_out);

#endif
Loading

0 comments on commit 1a770f4

Please sign in to comment.