Skip to content

Commit

Permalink
sheet: Add pivot builtin command
Browse files Browse the repository at this point in the history
The user can either pivot on the column under the cursor with 'v' or
enter an SQL group-by expression with V
  • Loading branch information
richiejp committed Dec 13, 2024
1 parent 8cd97b6 commit ac6859a
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 2 deletions.
1 change: 1 addition & 0 deletions app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ ${CLI} ${STANDALONE_PFX}2json${EXE}: MORE_OBJECTS+= ${BUILD_DIR}/objs/utils/db.o
# pretty uses termcap
${CLI} ${STANDALONE_PFX}pretty${EXE}: MORE_LIBS+=${LDFLAGS_TERMCAP}

${CLI} ${STANDALONE_PFX}sheet${EXE} ${STANDALONE_PFX}sql${EXE}: ${SQL_INTERNAL_OBJECT}
${CLI} ${STANDALONE_PFX}sheet${EXE} ${STANDALONE_PFX}sql${EXE}: MORE_OBJECTS+=${SQL_INTERNAL_OBJECT}

${STANDALONE_PFX}%${EXE}: %.c ${OBJECTS} ${MORE_OBJECTS} ${LIBZSV_INSTALL} ${UTF8PROC_OBJECT}
Expand Down
5 changes: 4 additions & 1 deletion app/sheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ static zsvsheet_status zsvsheet_help_handler(struct zsvsheet_proc_context *ctx)
return stat;
}

#include "sheet/pivot.c"
#include "sheet/newline_handler.c"

/* We do most procedures in one handler. More complex procedures can be
Expand Down Expand Up @@ -624,7 +625,9 @@ struct builtin_proc_desc {
{ zsvsheet_builtin_proc_filter, "filter", "Hide rows that do not contain the specified text", zsvsheet_filter_handler },
{ zsvsheet_builtin_proc_subcommand, "subcommand", "Editor subcommand", zsvsheet_subcommand_handler },
{ zsvsheet_builtin_proc_help, "help", "Display a list of actions and key-bindings", zsvsheet_help_handler },
{ zsvsheet_builtin_proc_newline, "<Enter>","Follow hyperlink (if any)", zsvsheet_newline_handler },
{ zsvsheet_builtin_proc_newline, "<Enter>","Follow hyperlink (if any) or drill down", zsvsheet_newline_handler },
{ zsvsheet_builtin_proc_pivot_cur_col, "pivotcur","Group rows by the column under the cursor", zsvsheet_pivot_handler },
{ zsvsheet_builtin_proc_pivot_expr, "pivotexpr","Group rows with group-by SQL expression", zsvsheet_pivot_handler },
{ -1, NULL, NULL, NULL }
};
/* clang-format on */
Expand Down
1 change: 1 addition & 0 deletions app/sheet/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ int zsvsheet_ui_buffer_open_file(const char *filename, const struct zsv_opts *zs
struct zsvsheet_ui_buffer **ui_buffer_stack_top);

zsvsheet_status zsvsheet_open_file_opts(struct zsvsheet_proc_context *ctx, struct zsvsheet_ui_buffer_opts *opts);
zsvsheet_status zsvsheet_open_file(struct zsvsheet_proc_context *ctx, const char *filepath, struct zsv_opts *zopts);

#endif
2 changes: 2 additions & 0 deletions app/sheet/key-bindings.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ struct zsvsheet_key_binding zsvsheet_vim_key_bindings[] = {
{ .ch = '?', .proc_id = zsvsheet_builtin_proc_help, },
{ .ch = '\n', .proc_id = zsvsheet_builtin_proc_newline, },
{ .ch = '\r', .proc_id = zsvsheet_builtin_proc_newline, },
{ .ch = 'v', .proc_id = zsvsheet_builtin_proc_pivot_cur_col, },
{ .ch = 'V', .proc_id = zsvsheet_builtin_proc_pivot_expr, },

{ .ch = -1 }
};
Expand Down
303 changes: 303 additions & 0 deletions app/sheet/pivot.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
/*
* Copyright (C) 2021 Liquidaty and zsv contributors. All rights reserved.
* This file is part of zsv/lib, distributed under the license defined at
* https://opensource.org/licenses/MIT
*/

#include <assert.h>
#include <errno.h>
#include "../external/sqlite3/sqlite3.h"
#include <zsv/ext/implementation.h>
#include <zsv/ext/sheet.h>
#include <zsv/utils/writer.h>
#include <zsv/utils/file.h>
#include <zsv/utils/prop.h>
#include "file.h"
#include "handlers_internal.h"
#include "../curses.h"
#include "../sql_internal.h"

struct pivot_row {
char *value; // to do: this will be the drill-down criteria
};

struct pivot_data {
char *value_sql; // the sql expression entered by the user e.g. City
char *data_filename;
struct {
struct pivot_row *data; // for each row, the value of the sql expression e.g. New York
size_t capacity;
size_t used;
} rows;
};

static void pivot_data_delete(void *h) {
struct pivot_data *pd = h;
if (pd) {
for (size_t i = 0; i < pd->rows.used; i++)
free(pd->rows.data[i].value);
free(pd->rows.data);
free(pd->value_sql);
free(pd->data_filename);
free(pd);
}
}

static struct pivot_data *pivot_data_new(const char *data_filename, const char *value_sql) {
struct pivot_data *pd = calloc(1, sizeof(*pd));
if (pd && (pd->value_sql = strdup(value_sql)) && (pd->data_filename = strdup(data_filename)))
return pd;
pivot_data_delete(pd);
return NULL;
}

#define ZSV_MYSHEET_PIVOT_DATA_ROWS_INITIAL 32
static int pivot_data_grow(struct pivot_data *pd) {
if (pd->rows.used == pd->rows.capacity) {
size_t new_capacity = pd->rows.capacity == 0 ? ZSV_MYSHEET_PIVOT_DATA_ROWS_INITIAL : pd->rows.capacity * 2;
struct pivot_row *new_data = realloc(pd->rows.data, new_capacity * sizeof(*pd->rows.data));
if (!new_data)
return ENOMEM;
pd->rows.data = new_data;
pd->rows.capacity = new_capacity;
}
return 0;
}

static int add_pivot_row(struct pivot_data *pd, const char *value, size_t len) {
int err = pivot_data_grow(pd);
char *value_dup = NULL;
if (!err && value && len) {
value_dup = malloc(len + 1);
if (value_dup) {
memcpy(value_dup, value, len);
value_dup[len] = '\0';
}
}
pd->rows.data[pd->rows.used++].value = value_dup;
return err;
}

static struct pivot_row *get_pivot_row_data(struct pivot_data *pd, size_t row_ix) {
if (pd && row_ix < pd->rows.used)
return &pd->rows.data[row_ix];
return NULL;
}

// TO DO: return zsvsheet_status
static enum zsv_ext_status get_cell_attrs(void *pdh, int *attrs, size_t start_row, size_t row_count, size_t cols) {
struct pivot_data *pd = pdh;
size_t end_row = start_row + row_count;
int attr = 0;

#ifdef A_BOLD
attr |= A_BOLD;
#endif
// Absent on Mac OSX 13
#ifdef A_ITALIC
attr |= A_ITALIC;
#endif

if (end_row > pd->rows.used)
end_row = pd->rows.used;
for (size_t i = start_row; i < end_row; i++)
attrs[i * cols] = attr;
return zsv_ext_status_ok;
}

static void pivot_on_header_cell(void *ctx, size_t col_ix, const char *colname) {
(void)colname;
if (col_ix == 0)
add_pivot_row(ctx, NULL, 0);
}

static void pivot_on_data_cell(void *ctx, size_t col_ix, const char *text, size_t len) {
if (col_ix == 0)
add_pivot_row(ctx, text, len);
}

static zsvsheet_status zsv_sqlite3_to_csv(zsvsheet_proc_context_t pctx, struct zsv_sqlite3_db *zdb, const char *sql,
void *ctx, void (*on_header_cell)(void *, size_t, const char *),
void (*on_data_cell)(void *, size_t, const char *, size_t len)) {
const char *err_msg = NULL;
zsvsheet_status zst = zsvsheet_status_error;
sqlite3_stmt *stmt = NULL;

if ((zdb->rc = sqlite3_prepare_v2(zdb->db, sql, -1, &stmt, NULL)) == SQLITE_OK) {
char *tmp_fn = zsv_get_temp_filename("zsv_mysheet_ext_XXXXXXXX");
struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
zsv_csv_writer cw = NULL;
if (!tmp_fn)
zst = zsvsheet_status_memory;
else if (!(writer_opts.stream = fopen(tmp_fn, "wb"))) {
zst = zsvsheet_status_error;
err_msg = strerror(errno);
} else if (!(cw = zsv_writer_new(&writer_opts)))
zst = zsvsheet_status_memory;
else {
zst = zsvsheet_status_ok;
unsigned char cw_buff[1024];
zsv_writer_set_temp_buff(cw, cw_buff, sizeof(cw_buff));

int col_count = sqlite3_column_count(stmt);
// write header row
for (int i = 0; i < col_count; i++) {
const char *colname = sqlite3_column_name(stmt, i);
zsv_writer_cell(cw, !i, (const unsigned char *)colname, colname ? strlen(colname) : 0, 1);
if (on_header_cell)
on_header_cell(ctx, i, colname);
}

// write sql results
while (sqlite3_step(stmt) == SQLITE_ROW) {
for (int i = 0; i < col_count; i++) {
const unsigned char *text = sqlite3_column_text(stmt, i);
int len = text ? sqlite3_column_bytes(stmt, i) : 0;
zsv_writer_cell(cw, !i, text, len, 1);
if (on_data_cell)
on_data_cell(ctx, i, (const char *)text, len);
}
}
}
if (cw)
zsv_writer_delete(cw);
if (writer_opts.stream)
fclose(writer_opts.stream);

if (tmp_fn && zsv_file_exists(tmp_fn)) {
struct zsvsheet_ui_buffer_opts uibopts = {0};
uibopts.data_filename = tmp_fn;
zst = zsvsheet_open_file_opts(pctx, &uibopts);
} else {
if (zst == zsvsheet_status_ok) {
zst = zsvsheet_status_error; // to do: make this more specific
if (!err_msg && zdb && zdb->rc != SQLITE_OK)
err_msg = sqlite3_errmsg(zdb->db);
}
}
if (zst != zsvsheet_status_ok)
free(tmp_fn);
}
if (stmt)
sqlite3_finalize(stmt);
if (err_msg)
zsvsheet_set_status(ctx, "Error: %s", err_msg);
return zst;
}

zsvsheet_status pivot_drill_down(zsvsheet_proc_context_t ctx) {
enum zsvsheet_status zst = zsvsheet_status_ok;
zsvsheet_buffer_t buff = zsvsheet_buffer_current(ctx);
struct pivot_data *pd;
struct zsvsheet_rowcol rc;
if (zsvsheet_buffer_get_ctx(buff, (void **)&pd) != zsv_ext_status_ok ||
zsvsheet_buffer_get_selected_cell(buff, &rc) != zsvsheet_status_ok) {
return zsvsheet_status_error;
}
struct pivot_row *pr = get_pivot_row_data(pd, rc.row);
if (pd && pd->data_filename && pd->value_sql && pr) {
struct zsv_sqlite3_dbopts dbopts = {0};
sqlite3_str *sql_str = NULL;
struct zsv_sqlite3_db *zdb = zsv_sqlite3_db_new(&dbopts);

if (!zdb || !(sql_str = sqlite3_str_new(zdb->db)))
zst = zsvsheet_status_memory;
else if (zdb->rc == SQLITE_OK && zsv_sqlite3_add_csv(zdb, pd->data_filename, NULL, NULL) == SQLITE_OK) {
if (zsvsheet_buffer_info(buff).has_row_num)
sqlite3_str_appendf(sql_str, "select *");
else
sqlite3_str_appendf(sql_str, "select rowid as [Row #], *");
sqlite3_str_appendf(sql_str, " from data where \"%w\" = %Q", pd->value_sql, pr->value);
zst = zsv_sqlite3_to_csv(ctx, zdb, sqlite3_str_value(sql_str), NULL, NULL, NULL);
}

if (sql_str)
sqlite3_free(sqlite3_str_finish(sql_str));
if (zdb) {
if (zst != zsvsheet_status_ok) {
// to do: consolidate this with same code in sql.c
if (zdb->err_msg)
fprintf(stderr, "Error: %s\n", zdb->err_msg);
else if (!zdb->db)
fprintf(stderr, "Error (unable to open db, code %i): %s\n", zdb->rc, sqlite3_errstr(zdb->rc));
else if (zdb->rc != SQLITE_OK)
fprintf(stderr, "Error (code %i): %s\n", zdb->rc, sqlite3_errstr(zdb->rc));
}
zsv_sqlite3_db_delete(zdb);
}
}
return zst;
}

/**
* Here we define a custom command for the zsv `sheet` feature
*/
static zsvsheet_status zsvsheet_pivot_handler(struct zsvsheet_proc_context *ctx) {
char result_buffer[256] = {0};
const char *expr;
struct zsvsheet_rowcol rc;
int ch = zsvsheet_ext_keypress(ctx);
if (ch < 0)
return zsvsheet_status_error;

zsvsheet_buffer_t buff = zsvsheet_buffer_current(ctx);
const char *data_filename = NULL;
if (buff)
data_filename = zsvsheet_buffer_data_filename(buff);

if (!data_filename) { // TO DO: check that the underlying data is a tabular file and we know how to parse
zsvsheet_set_status(ctx, "Pivot table only available for tabular data buffers");
return zsvsheet_status_ok;
}

switch (ctx->proc_id) {
case zsvsheet_builtin_proc_pivot_expr:
zsvsheet_ext_prompt(ctx, result_buffer, sizeof(result_buffer), "Pivot table: Enter group-by SQL expr");
if (*result_buffer == '\0')
return zsvsheet_status_ok;
expr = result_buffer;
break;
case zsvsheet_builtin_proc_pivot_cur_col:
if (zsvsheet_buffer_get_selected_cell(buff, &rc) != zsvsheet_status_ok)
return zsvsheet_status_error;
expr = zsvsheet_ui_buffer_get_header(buff, rc.col);
assert(expr);
break;
default:
assert(0);
return zsvsheet_status_error;
}

enum zsvsheet_status zst = zsvsheet_status_ok;
struct zsv_sqlite3_dbopts dbopts = {0};
struct zsv_opts zopts = zsvsheet_buffer_get_zsv_opts(buff);
struct zsv_sqlite3_db *zdb = zsv_sqlite3_db_new(&dbopts);
sqlite3_str *sql_str = NULL;
struct pivot_data *pd = NULL;
if (!zdb || !(sql_str = sqlite3_str_new(zdb->db)))
zst = zsvsheet_status_memory;
else if (zdb->rc == SQLITE_OK && zsv_sqlite3_add_csv(zdb, data_filename, &zopts, NULL) == SQLITE_OK) {
sqlite3_str_appendf(sql_str, "select \"%w\" as value, count(1) as Count from data group by \"%w\"", expr, expr);
if (!(pd = pivot_data_new(data_filename, expr)))
zst = zsvsheet_status_memory;
else {
zst = zsv_sqlite3_to_csv(ctx, zdb, sqlite3_str_value(sql_str), pd, pivot_on_header_cell, pivot_on_data_cell);
if (zst == zsvsheet_status_ok) {
buff = zsvsheet_buffer_current(ctx);
zsvsheet_buffer_set_ctx(buff, pd, pivot_data_delete);
zsvsheet_buffer_set_cell_attrs(buff, get_cell_attrs);
zsvsheet_buffer_on_newline(buff, pivot_drill_down);
pd = NULL; // so that it isn't cleaned up below
}
}
// TO DO: add param to ext_sheet_open_file to set filename vs data_filename, and set buffer type or proc owner
// TO DO: add way to attach custom context, and custom context destructor, to the new buffer
// TO DO: add cell highlighting
}

zsv_sqlite3_db_delete(zdb);
if (sql_str)
sqlite3_free(sqlite3_str_finish(sql_str));
pivot_data_delete(pd);
return zst;
}
2 changes: 2 additions & 0 deletions app/sheet/procedure.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ enum {
zsvsheet_builtin_proc_help,
zsvsheet_builtin_proc_vim_g_key_binding_dmux,
zsvsheet_builtin_proc_newline,
zsvsheet_builtin_proc_pivot_expr,
zsvsheet_builtin_proc_pivot_cur_col,
};

#define ZSVSHEET_PROC_INVALID 0
Expand Down
6 changes: 6 additions & 0 deletions app/sheet/ui_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,9 @@ int zsvsheet_ui_buffer_pop(struct zsvsheet_ui_buffer **base, struct zsvsheet_ui_
}
return 0;
}

static const char *zsvsheet_ui_buffer_get_header(struct zsvsheet_ui_buffer *uib, size_t col) {
struct zsvsheet_screen_buffer *sb = uib->buffer;

return (char *)zsvsheet_screen_buffer_cell_display(sb, 0, col);
}
16 changes: 15 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ test-sheet-cleanup:
@rm -f tmux-*.log
@tmux kill-server || printf ''

test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5 test-sheet-6 test-sheet-7 test-sheet-8 test-sheet-9 test-sheet-10 test-sheet-11 test-sheet-12 test-sheet-13 test-sheet-14 test-sheet-subcommand test-sheet-prop-cmd-opt
test-sheet-all: test-sheet-1 test-sheet-2 test-sheet-3 test-sheet-4 test-sheet-5 test-sheet-6 test-sheet-7 test-sheet-8 test-sheet-9 test-sheet-10 test-sheet-11 test-sheet-12 test-sheet-13 test-sheet-14 test-sheet-subcommand test-sheet-prop-cmd-opt test-sheet-pivot
@(for SESSION in $^; do ! tmux kill-session -t "$$SESSION" 2>/dev/null; done && ${TEST_PASS} || ${TEST_FAIL})

TMUX_TERM=xterm-256color
Expand Down Expand Up @@ -802,6 +802,20 @@ test-sheet-prop-cmd-opt: ${BUILD_DIR}/bin/zsv_sheet${EXE} ${BUILD_DIR}/bin/zsv_p
# @tmux send-keys -t $@ "q"
# @${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || (echo 'Incorrect output:' && cat ${TMP_DIR}/[email protected] && ${TEST_FAIL})

test-sheet-pivot: test-sheet-pivot-1

test-sheet-pivot-1: ${BUILD_DIR}/bin/zsv_sheet${EXE}
@${TEST_INIT}
@echo 'set-option default-terminal "${TMUX_TERM}"' > ~/.tmux.conf
@(tmux new-session -x 80 -y 25 -d -s "$@" "${PREFIX} $< worldcitiespop_mil.csv" && \
${EXPECT} $@ indexed && \
tmux send-keys -t $@ l v && \
${EXPECT} $@ groups && \
tmux send-keys -t $@ j j Enter && \
${EXPECT} $@ drilldown && \
tmux send-keys -t $@ G && \
${EXPECT} $@ && ${TEST_PASS} || ${TEST_FAIL})

benchmark-sheet-index: ${BUILD_DIR}/bin/zsv_sheet${EXE} ${TIMINGS_CSV}
@${TEST_INIT}
@if [ "${BIG_FILE}" = "none" ]; then \
Expand Down
Loading

0 comments on commit ac6859a

Please sign in to comment.