Skip to content

Commit

Permalink
sheet extension and index cleanup (#350)
Browse files Browse the repository at this point in the history
* sheet extension and index cleanup
* on extension build, rebuild libzsvutil if stale
* remove index.h from external api
* review writer.c for performance

Reviewed benchmark performance
  • Loading branch information
liquidaty authored Dec 19, 2024
1 parent 3dd0422 commit 79e769b
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 23 deletions.
7 changes: 5 additions & 2 deletions app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -316,14 +316,13 @@ uninstall:
rm -rf ${ZSV}

ZSV_UTIL_A=${LIBDIR}/libzsvutil.a

${ZSV_UTIL_A}:SQLITE_EXT=
${ZSV_UTIL_A}:SQLITE_EXT_INCLUDE=
${ZSV_UTIL_A}: ${BUILD_DIR}/objs/utils/util.a
@mkdir -p `dirname $@`
cp -p $< $@

UTIL_A_OBJ:=index writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=writer file dirs-no-jq os ${UTIL_A_OBJ_WIN}
UTIL_A_OBJ:=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTIL_A_OBJ}))

${BUILD_DIR}/objs/utils/util.a: ${UTIL_A_OBJ}
Expand Down Expand Up @@ -375,6 +374,10 @@ ${CLEANS}: clean-%:
.SUFFIXES:
.SUFFIXES: .o .c .a

${BUILD_DIR}/objs/utils/index.o: ${BUILD_DIR}/objs/utils/%.o : utils/%.c utils/%.h
@mkdir -p `dirname "$@"`
${CC} ${CFLAGS} -I${INCLUDE_DIR} -o $@ -c $<

${BUILD_DIR}/objs/utils/%.o : utils/%.c ${INCLUDE_DIR}/zsv/utils/%.h ${JQ_LIB}
@mkdir -p `dirname "$@"`
${CC} ${CFLAGS} -I${INCLUDE_DIR} -I${UTF8PROC_INCLUDE} -DINCLUDE_SRC -o $@ -c utils/$*.c ${MORE_SOURCE}
Expand Down
2 changes: 1 addition & 1 deletion app/ext_example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -221,4 +221,4 @@ ${TARGET} ${TARGET_SHEET}: ${BUILD_DIR}/bin/zsvext%.${SO} : %_extension.c ${LIBZ
@mkdir -p `dirname "$@"`
${CC} ${CFLAGS} ${CFLAGS_SHARED} $< -o $@ ${LIBS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE}

.PHONY: all test test-% clean install
.PHONY: all test test-% clean install ${PREFIX}/lib/libzsvutil.a
2 changes: 1 addition & 1 deletion app/sheet/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include <unistd.h>
#include <zsv.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/index.h>
#include <zsv/utils/file.h>
#include <zsv/utils/writer.h>

#include "../utils/index.h"
#include "index.h"
static void build_memory_index_row_handler(void *ctx) {
struct zsvsheet_indexer *ixr = ctx;
Expand Down
5 changes: 3 additions & 2 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#include <string.h>
#include <pthread.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/index.h>
#include "sheet_internal.h"
#include "screen_buffer.h"
#include "../utils/index.h"
#include "index.h"

#if defined(WIN32) || defined(_WIN32)
Expand Down Expand Up @@ -207,6 +207,8 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_
pthread_mutex_lock(&uibuff->mutex);
char need_index = !uibuff->index_started && !uibuff->write_in_progress;
char *old_ui_status = uibuff->status;
if (need_index)
uibuff->index_started = 1;
pthread_mutex_unlock(&uibuff->mutex);

if (need_index) {
Expand All @@ -215,7 +217,6 @@ static int read_data(struct zsvsheet_ui_buffer **uibufferp, // a new zsvsheet_

uibuff->buff_used_rows = rows_read;
uibuff->dimensions.row_count = rows_read;
uibuff->index_started = 1;
if (original_row_num > 1 && rows_read > 0) {
opts.stream = NULL;
get_data_index_async(uibuff, filename, &opts, custom_prop_handler, old_ui_status);
Expand Down
41 changes: 36 additions & 5 deletions app/sheet/transformation.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <zsv/utils/file.h>
#include <zsv/utils/prop.h>

#include "handlers_internal.h"
#include "transformation.h"
#include "pthread.h"
#include "zsv/utils/file.h"
#include "zsv/utils/index.h"
#include "zsv/utils/prop.h"
#include "../utils/index.h"

struct zsvsheet_transformation {
zsv_parser parser;
Expand All @@ -33,11 +33,31 @@ static size_t transformation_write(const void *restrict ptr, size_t size, size_t
return count > 0 ? count : 0;
}

struct transformation_writer_index_ctx {
void *index;
zsv_csv_writer writer;
};

static void transformation_writer_index_on_row(void *p) {
struct transformation_writer_index_ctx *ctx = p;
uint64_t written = zsv_writer_cum_bytes_written(ctx->writer);
zsv_index_add_row(ctx->index, written);
}

static void transformation_writer_index_delete(void *p) {
struct transformation_writer_index_ctx *ctx = p;
uint64_t written = zsv_writer_cum_bytes_written(ctx->writer);
if (written)
zsv_index_add_row(ctx->index, written);
free(ctx);
}

enum zsv_status zsvsheet_transformation_new(struct zsvsheet_transformation_opts opts, zsvsheet_transformation *out) {
unsigned char *temp_buff = NULL;
char *temp_filename = NULL;
FILE *temp_f = NULL;
zsv_csv_writer temp_file_writer = NULL;
struct transformation_writer_index_ctx *ctx = NULL;
enum zsv_status zst = zsv_status_memory;

struct zsvsheet_transformation *trn = calloc(1, sizeof(*trn));
Expand All @@ -59,17 +79,26 @@ enum zsv_status zsvsheet_transformation_new(struct zsvsheet_transformation_opts
goto free;
trn->output_stream = temp_f;

ctx = calloc(1, sizeof(*ctx));
if (!ctx)
goto free;
ctx->index = opts.index;

struct zsv_csv_writer_options writer_opts = {
.with_bom = 0,
.index = opts.index,
.write = transformation_write,
.stream = trn,
.table_init = NULL,
.table_init_ctx = NULL,
.on_row = transformation_writer_index_on_row,
.on_row_ctx = ctx,
.on_delete = transformation_writer_index_delete,
.on_delete_ctx = ctx,
};
if (!(temp_file_writer = zsv_writer_new(&writer_opts)))
goto free;

ctx->writer = temp_file_writer;
const size_t temp_buff_size = 2 * 1024 * 1024;
temp_buff = malloc(temp_buff_size);
if (!temp_buff)
Expand Down Expand Up @@ -99,6 +128,8 @@ enum zsv_status zsvsheet_transformation_new(struct zsvsheet_transformation_opts
zsv_writer_delete(temp_file_writer);
if (temp_buff)
free(temp_buff);
if (ctx)
transformation_writer_index_delete(ctx);

return zst;
}
Expand Down
2 changes: 1 addition & 1 deletion app/sheet/ui_buffer.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <unistd.h> // unlink()
#include <pthread.h>
#include <zsv/utils/index.h>
#include "../utils/index.h"
#include "index.h"

struct zsvsheet_ui_buffer {
Expand Down
2 changes: 1 addition & 1 deletion app/utils/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <unistd.h>
#include <zsv.h>
#include <zsv/utils/prop.h>
#include <zsv/utils/index.h>
#include "index.h"

struct zsv_index *zsv_index_new(void) {
struct zsv_index *ix = calloc(1, sizeof(*ix));
Expand Down
File renamed without changes.
31 changes: 22 additions & 9 deletions app/utils/writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* https://opensource.org/licenses/MIT
*/

#include "zsv/utils/index.h"
#include <zsv/utils/writer.h>
#include <zsv/utils/compiler.h>
#include <stdio.h>
Expand Down Expand Up @@ -124,7 +123,12 @@ struct zsv_writer_data {
void *table_init_ctx;

const char *cell_prepend;
struct zsv_index *index;

void (*on_row)(void *);
void *on_row_ctx;

void (*on_delete)(void *);
void *on_delete_ctx;

unsigned char with_bom : 1;
unsigned char started : 1;
Expand Down Expand Up @@ -204,7 +208,10 @@ zsv_csv_writer zsv_writer_new(struct zsv_csv_writer_options *opts) {
w->with_bom = opts->with_bom;
w->table_init = opts->table_init;
w->table_init_ctx = opts->table_init_ctx;
w->index = opts->index;
w->on_row = opts->on_row;
w->on_row_ctx = opts->on_row_ctx;
w->on_delete = opts->on_delete;
w->on_delete_ctx = opts->on_delete_ctx;
}
}
return w;
Expand Down Expand Up @@ -235,8 +242,8 @@ enum zsv_writer_status zsv_writer_delete(zsv_csv_writer w) {
if (w->out.stream && w->out.write && w->out.buff)
zsv_output_buff_flush(&w->out);

if (w->started && w->index)
zsv_index_add_row(w->index, w->out.written);
if (w->on_delete)
w->on_delete(w->on_delete_ctx);

if (w->out.buff)
free(w->out.buff);
Expand Down Expand Up @@ -266,6 +273,10 @@ static inline enum zsv_writer_status zsv_writer_cell_aux(zsv_csv_writer w, const
return zsv_writer_status_ok;
}

uint64_t zsv_writer_cum_bytes_written(zsv_csv_writer w) {
return (uint64_t)(w->out.used + w->out.written);
}

enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row, const unsigned char *s, size_t len,
char check_if_needs_quoting) {
if (!w)
Expand All @@ -277,17 +288,19 @@ enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row, const uns
zsv_output_buff_write(&w->out, (const unsigned char *)"\xef\xbb\xbf", 3);
w->started = 1;
} else if (new_row) {
if (w->index)
zsv_index_add_row(w->index, (uint64_t)(w->out.used + w->out.written));
if (VERY_UNLIKELY(w->on_row != NULL))
w->on_row(w->on_row_ctx);
zsv_output_buff_write(&w->out, (const unsigned char *)"\n", 1);
} else
zsv_output_buff_write(&w->out, (const unsigned char *)",", 1);

if (VERY_UNLIKELY(w->cell_prepend && *w->cell_prepend)) {
char *tmp = NULL;
asprintf(&tmp, "%s%.*s", w->cell_prepend, (int)len, s ? s : (const unsigned char *)"");
if (!tmp)
return zsv_writer_status_error; // zsv_writer_status_memory;
if (!tmp) {
perror(NULL);
return zsv_writer_status_error;
}
s = (const unsigned char *)tmp;
len = len + strlen(w->cell_prepend);
enum zsv_writer_status stat = zsv_writer_cell_aux(w, s, len, 1);
Expand Down
12 changes: 11 additions & 1 deletion include/zsv/utils/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define ZSV_WRITER_H

#include <stdio.h>
#include <stdint.h> // uint64_t

#define ZSV_WRITER_NEW_ROW 1
#define ZSV_WRITER_SAME_ROW 0
Expand All @@ -26,7 +27,11 @@ struct zsv_csv_writer_options {
void (*table_init)(void *);
void *table_init_ctx;
const char *output_path; // if provided, will be created by zsv_writer_new() and closed by zsv_writer_delete()
struct zsv_index *index;
void (*on_row)(void *);
void *on_row_ctx;

void (*on_delete)(void *);
void *on_delete_ctx;
};

void zsv_writer_set_default_opts(struct zsv_csv_writer_options opts);
Expand Down Expand Up @@ -58,6 +63,11 @@ enum zsv_writer_status zsv_writer_cell(zsv_csv_writer,
char new_row, // ZSV_WRITER_NEW_ROW or ZSV_WRITER_SAME_ROW
const unsigned char *s, size_t len, char check_if_needs_quoting);

/*
* Get total bytes that have been written (to disk and buffer)
*/
uint64_t zsv_writer_cum_bytes_written(zsv_csv_writer);

unsigned char *zsv_writer_str_to_csv(const unsigned char *s, size_t len);

/*
Expand Down
1 change: 1 addition & 0 deletions scripts/test-expect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ cleanup() {
tmux send-keys -t "$TARGET" "q"
echo 'Incorrect output:'
cat "$CAPTURE"
echo "${CMP} -s $CAPTURE $EXPECTED"
${CMP} -s "$CAPTURE" "$EXPECTED"
exit 1
}
Expand Down

0 comments on commit 79e769b

Please sign in to comment.