Skip to content

Commit

Permalink
sheet: update to use heap memory, fix zsv_new mem leak (#214)
Browse files Browse the repository at this point in the history
* sheet: update to use heap memory, fix zsv_new mem leak
* Use ubuntu-22.04 runner for clang-format
* use conditional includes for curses.h
* break out app/test/Makefile test-sheet into separate entries
---------

Co-authored-by: Azeem Sajid <[email protected]>
  • Loading branch information
liquidaty and iamazeem authored Oct 9, 2024
1 parent b81d5d8 commit ae83b79
Show file tree
Hide file tree
Showing 12 changed files with 412 additions and 183 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
echo "TAG=$TAG" >> "$GITHUB_OUTPUT"
clang-format:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- name: Checkout
Expand Down
223 changes: 116 additions & 107 deletions app/sheet.c

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions app/sheet/buffer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "buffer.h"

struct zsv_sheet_buffer {
size_t cols;
size_t long_cell_count;
struct zsv_sheet_buffer_opts opts;
unsigned char *data;
// to do: add hooks for extension
};

static inline size_t buffer_data_offset(zsv_sheet_buffer_t buff, size_t row, size_t col) {
assert(row < buff->opts.rows && col < buff->cols);
return row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len;
}

static void set_long_cell(zsv_sheet_buffer_t buff, size_t offset, char *heap) {
char **target = (char **)(buff->data + offset);
*target = heap;
// memcpy(buff->data + offset, heap, sizeof(heap));
// set flag indicating that this is long cell
*(buff->data + offset + buff->opts.cell_buff_len - 1) = (char)1;
}

static char *get_long_cell(zsv_sheet_buffer_t buff, size_t offset) {
char **valuep = (char **)(buff->data + offset);
if (valuep)
return *valuep;
return NULL;
}

static inline int is_long_cell(const unsigned char *mem, size_t cell_buff_len) {
return *(mem + cell_buff_len - 1) != '\0';
}

size_t zsv_sheet_buffer_cols(zsv_sheet_buffer_t buff) {
return buff->cols;
}

size_t zsv_sheet_buffer_rows(zsv_sheet_buffer_t buff) {
return buff->opts.rows;
}

static void free_long_cell(zsv_sheet_buffer_t buff, size_t offset) {
if (is_long_cell(buff->data + offset, buff->opts.cell_buff_len)) {
char *value_copy = get_long_cell(buff, offset);
free(value_copy);
memset(buff->data + offset, 0, buff->opts.cell_buff_len);
buff->long_cell_count--;
}
}

void zsv_sheet_buffer_delete(zsv_sheet_buffer_t buff) {
if (buff) {
for (size_t i = 0; i < buff->opts.rows && buff->long_cell_count > 0; i++) {
for (size_t j = 0; j < buff->cols && buff->long_cell_count > 0; j++) {
size_t offset = buffer_data_offset(buff, i, j);
free_long_cell(buff, offset);
}
}
free(buff->data);
free(buff);
}
}

zsv_sheet_buffer_t zsv_sheet_buffer_new(size_t cols, struct zsv_sheet_buffer_opts *opts,
enum zsv_sheet_buffer_status *stat) {
if (opts->rows == 0)
opts->rows = ZSV_SHEET_BUFFER_DEFAULT_ROW_COUNT;
else if (opts->rows < 256)
opts->rows = 256;
if (opts->cell_buff_len == 0)
opts->cell_buff_len = ZSV_SHEET_BUFFER_DEFAULT_CELL_BUFF_LEN;
if (opts->max_cell_len == 0)
opts->max_cell_len = ZSV_SHEET_BUFFER_DEFAULT_MAX_CELL_LEN;
if (opts->cell_buff_len < sizeof(void *) * 2)
*stat = zsv_sheet_buffer_status_error;
else {
if (!opts->no_rownum_column)
cols++;
void *data = calloc(opts->rows, cols * opts->cell_buff_len);
if (!data)
*stat = zsv_sheet_buffer_status_memory;
else {
struct zsv_sheet_buffer *buff = calloc(1, sizeof(*buff));
if (!buff)
*stat = zsv_sheet_buffer_status_memory;
else {
buff->opts.rows = opts->rows;
buff->cols = cols;
buff->data = data;
buff->opts = *opts;
return buff;
}
}
free(data);
}
return NULL;
}

#ifndef UTF8_NOT_FIRST_CHAR
#define UTF8_NOT_FIRST_CHAR(x) ((x & 0xC0) == 0x80)
#endif

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell_w_len(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value, size_t len) {
enum zsv_sheet_buffer_status stat = zsv_sheet_buffer_status_ok;
size_t offset = buffer_data_offset(buff, row, col);
free_long_cell(buff, offset);
if (len < buff->opts.cell_buff_len) {
if (len)
// copy into fixed-size buff
memcpy(buff->data + offset, value, len);
*(buff->data + offset + len) = '\0';
} else {
// we have a long cell
if (len > buff->opts.max_cell_len) {
len = buff->opts.max_cell_len;
while (len > 0 && value[len] >= 128 && UTF8_NOT_FIRST_CHAR(value[len]))
// we are in the middle of a multibyte char, so back up
len--;
}
if (!len) // the only reason len could be 0 is if our input was not valid utf8, but check to make sure anyway
stat = zsv_sheet_buffer_status_utf8;
else {
char *value_copy = malloc(1 + len);
if (value_copy) {
memcpy(value_copy, value, len);
value_copy[len] = '\0';
set_long_cell(buff, offset, value_copy);
buff->long_cell_count++;
} else
stat = zsv_sheet_buffer_status_memory;
}
}
return stat;
}

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value) {
return zsv_sheet_buffer_write_cell_w_len(buff, row, col, value, strlen((void *)value));
}

const unsigned char *zsv_sheet_buffer_cell_display(zsv_sheet_buffer_t buff, size_t row, size_t col) {
if (row < buff->opts.rows && col < buff->cols) {
size_t offset = row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len;
const unsigned char *cell = &buff->data[offset];
if (!is_long_cell(cell, buff->opts.cell_buff_len))
return cell;

// if the fixed-length cell memory does not end with NULL,
// then the cell value is a pointer to memory holding the value
return (unsigned char *)get_long_cell(buff, offset);
}
return NULL;
}
40 changes: 40 additions & 0 deletions app/sheet/buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef ZSV_SHEET_BUFFER_H
#define ZSV_SHEET_BUFFER_H

#define ZSV_SHEET_BUFFER_DEFAULT_CELL_BUFF_LEN 16
#define ZSV_SHEET_BUFFER_DEFAULT_MAX_CELL_LEN 32768 - 1
#define ZSV_SHEET_BUFFER_DEFAULT_ROW_COUNT 1000

typedef struct zsv_sheet_buffer *zsv_sheet_buffer_t;

enum zsv_sheet_buffer_status {
zsv_sheet_buffer_status_ok = 0,
zsv_sheet_buffer_status_memory,
zsv_sheet_buffer_status_error, // generic error
zsv_sheet_buffer_status_utf8
};

struct zsv_sheet_buffer_opts {
size_t cell_buff_len; // default = 16. must be >= 2 * sizeof(void *)
size_t max_cell_len; // length in bytes; defaults to 32767
size_t rows; // rows to buffer. cannot be < 256
char no_rownum_column; // reserved. TO DO: if set, omit row num column
};

zsv_sheet_buffer_t zsv_sheet_buffer_new(size_t cols, struct zsv_sheet_buffer_opts *opts,
enum zsv_sheet_buffer_status *stat);

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value);

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell_w_len(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value, size_t len);

const unsigned char *zsv_sheet_buffer_cell_display(zsv_sheet_buffer_t buff, size_t row, size_t col);

void zsv_sheet_buffer_delete(zsv_sheet_buffer_t);

size_t zsv_sheet_buffer_cols(zsv_sheet_buffer_t);
size_t zsv_sheet_buffer_rows(zsv_sheet_buffer_t buff);

#endif
14 changes: 8 additions & 6 deletions app/sheet/cursor.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ static void set_window_to_cursor(struct ztv_rowcol *buff_offset, size_t target_r
buff_offset->row = 0;
}

static int ztv_goto_input_raw_row(size_t input_raw_num, size_t input_header_span, struct ztv_rowcol *input_offset,
struct ztv_rowcol *buff_offset, struct input_dimensions *input_dims,
size_t *cursor_rowp, struct display_dims *ddims, size_t final_cursor_position) {
static int ztv_goto_input_raw_row(zsv_sheet_buffer_t buffer, size_t input_raw_num, size_t input_header_span,
struct ztv_rowcol *input_offset, struct ztv_rowcol *buff_offset,
struct input_dimensions *input_dims, size_t *cursor_rowp, struct display_dims *ddims,
size_t final_cursor_position) {
size_t buffer_rows = zsv_sheet_buffer_rows(buffer);
int update_buffer = 0;
if (input_raw_num < input_offset->row // move the buffer up
|| input_raw_num + input_header_span + 1 > ZTV_BUFFER_ROWS // move the buffer down
if (input_raw_num < input_offset->row // move the buffer up
|| input_raw_num + input_header_span + 1 > buffer_rows // move the buffer down
) {
input_offset->row = input_offset_centered(input_dims, ZTV_BUFFER_ROWS, input_raw_num);
input_offset->row = input_offset_centered(input_dims, buffer_rows, input_raw_num);
update_buffer = 1;
} else if (!(input_raw_num >= input_offset->row + buff_offset->row &&
input_raw_num <=
Expand Down
70 changes: 38 additions & 32 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <errno.h>
#include <zsv/utils/prop.h>
#include "sheet_internal.h"
#include "buffer.h"

#if defined(WIN32) || defined(_WIN32)
#define NO_MEMMEM
Expand All @@ -17,10 +18,12 @@ static char *ztv_found_in_row(zsv_parser parser, size_t col_count, const char *t
return memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, target, target_len);
}

static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN], const char *filename,
struct zsv_opts *zsv_optsp, size_t *max_col_countp, const char *row_filter, size_t start_row,
size_t start_col, size_t header_span, void *index, struct ztv_opts *ztv_opts,
struct zsv_prop_handler *custom_prop_handler, const char *opts_used, size_t *rows_readp) {
static int read_data(
zsv_sheet_buffer_t *bufferp,
struct zsv_sheet_buffer_opts *buff_opts, // if buff_opts is provided, then a new *buffer will be allocated
const char *filename, const struct zsv_opts *zsv_optsp, size_t *max_col_countp, const char *row_filter,
size_t start_row, size_t start_col, size_t header_span, void *index, struct ztv_opts *ztv_opts,
struct zsv_prop_handler *custom_prop_handler, const char *opts_used, size_t *rows_readp) {
(void)(index); // to do
FILE *fp = fopen(filename, "rb");
if (!fp)
Expand All @@ -35,6 +38,7 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],
zsv_parser parser = {0};
if (zsv_new_with_properties(&opts, custom_prop_handler, filename, opts_used, &parser) != zsv_status_ok) {
fclose(fp);
zsv_delete(parser);
return errno ? errno : -1;
}

Expand All @@ -44,7 +48,18 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],
size_t row_filter_len = row_filter ? strlen(row_filter) : 0;
size_t find_len = ztv_opts->find ? strlen(ztv_opts->find) : 0;
size_t rows_searched = 0;
while (zsv_next_row(parser) == zsv_status_row && rows_read < ZTV_BUFFER_ROWS) { // for each row
zsv_sheet_buffer_t buffer = bufferp ? *bufferp : NULL;
while (zsv_next_row(parser) == zsv_status_row &&
(rows_read == 0 || rows_read < zsv_sheet_buffer_rows(buffer))) { // for each row
if (buffer == NULL && buff_opts && zsv_cell_count(parser) > 0) {
enum zsv_sheet_buffer_status stat;
buffer = zsv_sheet_buffer_new(zsv_cell_count(parser), buff_opts, &stat);
if (buffer == NULL && stat != zsv_sheet_buffer_status_ok) {
zsv_delete(parser);
return -1;
}
*bufferp = buffer;
}
original_row_num++;
if (remaining_header_to_skip > 0) {
remaining_header_to_skip--;
Expand Down Expand Up @@ -74,42 +89,31 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],

// row number
size_t rownum_column_offset = 0;
if (ztv_opts->hide_row_nums == 0) {
if (rows_read == 0) // header
snprintf(dest[rows_read][0], ZTV_MAX_CELL_LEN, "Row #");
else if (snprintf(dest[rows_read][0], ZTV_MAX_CELL_LEN, "%zu", original_row_num - 1) >=
ZTV_MAX_CELL_LEN) // unlikely!
sprintf(dest[rows_read][0], "########");
if (ztv_opts->hide_row_nums == 0) { // to do: merge w zsv_sheet_buffer_opts.no_rownum_column
if (rows_read == 0) // header
zsv_sheet_buffer_write_cell(buffer, 0, 0, (const unsigned char *)"Row #");
/////
else {
char buff[32];
int n = snprintf(buff, sizeof(buff), "%zu", original_row_num - 1);
if (!(n > 0 && n < (int)sizeof(buff)))
sprintf(buff, "########");
zsv_sheet_buffer_write_cell(buffer, rows_read, 0, (unsigned char *)buff);
}
rownum_column_offset = 1;
}

for (size_t i = start_col; i < col_count + rownum_column_offset && i < ZTV_MAX_COLS; i++) {
for (size_t i = start_col; i < col_count && i + rownum_column_offset < zsv_sheet_buffer_cols(buffer); i++) {
struct zsv_cell c = zsv_get_cell(parser, i);
if (c.len) {
char done = 0;
if (c.len >= ZTV_MAX_CELL_LEN && c.str[c.len - 1] >= 128) { // we may have to truncate a multi-byte char
int err = 0;
size_t used_width;
size_t bytes_to_copy =
utf8_bytes_up_to_max_width_and_replace_newlines(c.str, c.len, ZTV_MAX_CELL_LEN, &used_width, &err);
if (!err) {
memcpy(dest[rows_read][i + rownum_column_offset], c.str, bytes_to_copy);
dest[rows_read][i + rownum_column_offset][bytes_to_copy] = '\0';
done = 1;
}
}
if (!done) {
memcpy(dest[rows_read][i + rownum_column_offset], c.str,
c.len < ZTV_MAX_CELL_LEN ? c.len : ZTV_MAX_CELL_LEN - 1);
dest[rows_read][i + rownum_column_offset][c.len] = '\0';
}
}
if (c.len)
zsv_sheet_buffer_write_cell_w_len(buffer, rows_read, i + rownum_column_offset, c.str, c.len);
}
rows_read++;
}
fclose(fp);
if (rows_readp)
*rows_readp = rows_read;
zsv_delete(parser);
return 0;
}

Expand Down Expand Up @@ -170,6 +174,7 @@ static void *get_data_index(struct get_data_index_data *d) {
#ifdef ZTV_USE_THREADS
pthread_mutex_unlock(mutexp);
#endif
zsv_delete(parser);
return NULL;
}

Expand All @@ -196,6 +201,7 @@ static void *get_data_index(struct get_data_index_data *d) {
#ifdef ZTV_USE_THREADS
pthread_mutex_unlock(mutexp);
#endif
zsv_delete(parser);
return NULL;
}

Expand Down Expand Up @@ -233,7 +239,7 @@ static size_t ztv_find_next(const char *filename, const char *row_filter, const
ztv_opts->find = needle;
ztv_opts->found_rownum = 0;
// TO DO: check if it exists in current row, later column (and change 'cursor_row - 1' below to 'cursor_row')
read_data(NULL, filename, zsv_opts, &input_dims->col_count, row_filter,
read_data(NULL, NULL, filename, zsv_opts, &input_dims->col_count, row_filter,
input_offset->row + buff_offset->row + header_span + cursor_row - 1, 0, header_span, NULL, ztv_opts,
custom_prop_handler, opts_used, NULL);
ztv_opts->find = NULL;
Expand Down
Loading

0 comments on commit ae83b79

Please sign in to comment.