Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sheet: update to use heap memory, fix zsv_new mem leak #214

Merged
merged 13 commits into from
Oct 9, 2024
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
echo "TAG=$TAG" >> "$GITHUB_OUTPUT"

clang-format:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- name: Checkout
Expand Down
223 changes: 116 additions & 107 deletions app/sheet.c

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions app/sheet/buffer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "buffer.h"

struct zsv_sheet_buffer {
size_t cols;
size_t long_cell_count;
struct zsv_sheet_buffer_opts opts;
unsigned char *data;
// to do: add hooks for extension
};

static inline size_t buffer_data_offset(zsv_sheet_buffer_t buff, size_t row, size_t col) {
assert(row < buff->opts.rows && col < buff->cols);
return row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len;
}

static void set_long_cell(zsv_sheet_buffer_t buff, size_t offset, char *heap) {
char **target = (char **)(buff->data + offset);
*target = heap;
// memcpy(buff->data + offset, heap, sizeof(heap));
// set flag indicating that this is long cell
*(buff->data + offset + buff->opts.cell_buff_len - 1) = (char)1;
}

static char *get_long_cell(zsv_sheet_buffer_t buff, size_t offset) {
char **valuep = (char **)(buff->data + offset);
if (valuep)
return *valuep;
return NULL;
}

static inline int is_long_cell(const unsigned char *mem, size_t cell_buff_len) {
return *(mem + cell_buff_len - 1) != '\0';
}

size_t zsv_sheet_buffer_cols(zsv_sheet_buffer_t buff) {
return buff->cols;
}

size_t zsv_sheet_buffer_rows(zsv_sheet_buffer_t buff) {
return buff->opts.rows;
}

static void free_long_cell(zsv_sheet_buffer_t buff, size_t offset) {
if (is_long_cell(buff->data + offset, buff->opts.cell_buff_len)) {
char *value_copy = get_long_cell(buff, offset);
free(value_copy);
memset(buff->data + offset, 0, buff->opts.cell_buff_len);
buff->long_cell_count--;
}
}

void zsv_sheet_buffer_delete(zsv_sheet_buffer_t buff) {
if (buff) {
for (size_t i = 0; i < buff->opts.rows && buff->long_cell_count > 0; i++) {
for (size_t j = 0; j < buff->cols && buff->long_cell_count > 0; j++) {
size_t offset = buffer_data_offset(buff, i, j);
free_long_cell(buff, offset);
}
}
free(buff->data);
free(buff);
}
}

zsv_sheet_buffer_t zsv_sheet_buffer_new(size_t cols, struct zsv_sheet_buffer_opts *opts,
enum zsv_sheet_buffer_status *stat) {
if (opts->rows == 0)
opts->rows = ZSV_SHEET_BUFFER_DEFAULT_ROW_COUNT;
else if (opts->rows < 256)
opts->rows = 256;
if (opts->cell_buff_len == 0)
opts->cell_buff_len = ZSV_SHEET_BUFFER_DEFAULT_CELL_BUFF_LEN;
if (opts->max_cell_len == 0)
opts->max_cell_len = ZSV_SHEET_BUFFER_DEFAULT_MAX_CELL_LEN;
if (opts->cell_buff_len < sizeof(void *) * 2)
*stat = zsv_sheet_buffer_status_error;
else {
if (!opts->no_rownum_column)
cols++;
void *data = calloc(opts->rows, cols * opts->cell_buff_len);
if (!data)
*stat = zsv_sheet_buffer_status_memory;
else {
struct zsv_sheet_buffer *buff = calloc(1, sizeof(*buff));
if (!buff)
*stat = zsv_sheet_buffer_status_memory;
else {
buff->opts.rows = opts->rows;
buff->cols = cols;
buff->data = data;
buff->opts = *opts;
return buff;
}
}
free(data);
}
return NULL;
}

#ifndef UTF8_NOT_FIRST_CHAR
#define UTF8_NOT_FIRST_CHAR(x) ((x & 0xC0) == 0x80)
#endif

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell_w_len(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value, size_t len) {
enum zsv_sheet_buffer_status stat = zsv_sheet_buffer_status_ok;
size_t offset = buffer_data_offset(buff, row, col);
free_long_cell(buff, offset);
if (len < buff->opts.cell_buff_len) {
if (len)
// copy into fixed-size buff
memcpy(buff->data + offset, value, len);
*(buff->data + offset + len) = '\0';
} else {
// we have a long cell
if (len > buff->opts.max_cell_len) {
len = buff->opts.max_cell_len;
while (len > 0 && value[len] >= 128 && UTF8_NOT_FIRST_CHAR(value[len]))
// we are in the middle of a multibyte char, so back up
len--;
}
if (!len) // the only reason len could be 0 is if our input was not valid utf8, but check to make sure anyway
stat = zsv_sheet_buffer_status_utf8;
else {
char *value_copy = malloc(1 + len);
if (value_copy) {
memcpy(value_copy, value, len);
value_copy[len] = '\0';
set_long_cell(buff, offset, value_copy);
buff->long_cell_count++;
} else
stat = zsv_sheet_buffer_status_memory;
}
}
return stat;
}

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value) {
return zsv_sheet_buffer_write_cell_w_len(buff, row, col, value, strlen((void *)value));
}

const unsigned char *zsv_sheet_buffer_cell_display(zsv_sheet_buffer_t buff, size_t row, size_t col) {
if (row < buff->opts.rows && col < buff->cols) {
size_t offset = row * buff->cols * buff->opts.cell_buff_len + col * buff->opts.cell_buff_len;
const unsigned char *cell = &buff->data[offset];
if (!is_long_cell(cell, buff->opts.cell_buff_len))
return cell;

// if the fixed-length cell memory does not end with NULL,
// then the cell value is a pointer to memory holding the value
return (unsigned char *)get_long_cell(buff, offset);
}
return NULL;
}
40 changes: 40 additions & 0 deletions app/sheet/buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef ZSV_SHEET_BUFFER_H
#define ZSV_SHEET_BUFFER_H

#define ZSV_SHEET_BUFFER_DEFAULT_CELL_BUFF_LEN 16
#define ZSV_SHEET_BUFFER_DEFAULT_MAX_CELL_LEN 32768 - 1
#define ZSV_SHEET_BUFFER_DEFAULT_ROW_COUNT 1000

typedef struct zsv_sheet_buffer *zsv_sheet_buffer_t;

enum zsv_sheet_buffer_status {
zsv_sheet_buffer_status_ok = 0,
zsv_sheet_buffer_status_memory,
zsv_sheet_buffer_status_error, // generic error
zsv_sheet_buffer_status_utf8
};

struct zsv_sheet_buffer_opts {
size_t cell_buff_len; // default = 16. must be >= 2 * sizeof(void *)
size_t max_cell_len; // length in bytes; defaults to 32767
size_t rows; // rows to buffer. cannot be < 256
char no_rownum_column; // reserved. TO DO: if set, omit row num column
};

zsv_sheet_buffer_t zsv_sheet_buffer_new(size_t cols, struct zsv_sheet_buffer_opts *opts,
enum zsv_sheet_buffer_status *stat);

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value);

enum zsv_sheet_buffer_status zsv_sheet_buffer_write_cell_w_len(zsv_sheet_buffer_t buff, size_t row, size_t col,
const unsigned char *value, size_t len);

const unsigned char *zsv_sheet_buffer_cell_display(zsv_sheet_buffer_t buff, size_t row, size_t col);

void zsv_sheet_buffer_delete(zsv_sheet_buffer_t);

size_t zsv_sheet_buffer_cols(zsv_sheet_buffer_t);
size_t zsv_sheet_buffer_rows(zsv_sheet_buffer_t buff);

#endif
14 changes: 8 additions & 6 deletions app/sheet/cursor.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ static void set_window_to_cursor(struct ztv_rowcol *buff_offset, size_t target_r
buff_offset->row = 0;
}

static int ztv_goto_input_raw_row(size_t input_raw_num, size_t input_header_span, struct ztv_rowcol *input_offset,
struct ztv_rowcol *buff_offset, struct input_dimensions *input_dims,
size_t *cursor_rowp, struct display_dims *ddims, size_t final_cursor_position) {
static int ztv_goto_input_raw_row(zsv_sheet_buffer_t buffer, size_t input_raw_num, size_t input_header_span,
struct ztv_rowcol *input_offset, struct ztv_rowcol *buff_offset,
struct input_dimensions *input_dims, size_t *cursor_rowp, struct display_dims *ddims,
size_t final_cursor_position) {
size_t buffer_rows = zsv_sheet_buffer_rows(buffer);
int update_buffer = 0;
if (input_raw_num < input_offset->row // move the buffer up
|| input_raw_num + input_header_span + 1 > ZTV_BUFFER_ROWS // move the buffer down
if (input_raw_num < input_offset->row // move the buffer up
|| input_raw_num + input_header_span + 1 > buffer_rows // move the buffer down
) {
input_offset->row = input_offset_centered(input_dims, ZTV_BUFFER_ROWS, input_raw_num);
input_offset->row = input_offset_centered(input_dims, buffer_rows, input_raw_num);
update_buffer = 1;
} else if (!(input_raw_num >= input_offset->row + buff_offset->row &&
input_raw_num <=
Expand Down
70 changes: 38 additions & 32 deletions app/sheet/read-data.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <errno.h>
#include <zsv/utils/prop.h>
#include "sheet_internal.h"
#include "buffer.h"

#if defined(WIN32) || defined(_WIN32)
#define NO_MEMMEM
Expand All @@ -17,10 +18,12 @@ static char *ztv_found_in_row(zsv_parser parser, size_t col_count, const char *t
return memmem(first_cell.str, last_cell.str - first_cell.str + last_cell.len, target, target_len);
}

static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN], const char *filename,
struct zsv_opts *zsv_optsp, size_t *max_col_countp, const char *row_filter, size_t start_row,
size_t start_col, size_t header_span, void *index, struct ztv_opts *ztv_opts,
struct zsv_prop_handler *custom_prop_handler, const char *opts_used, size_t *rows_readp) {
static int read_data(
zsv_sheet_buffer_t *bufferp,
struct zsv_sheet_buffer_opts *buff_opts, // if buff_opts is provided, then a new *buffer will be allocated
const char *filename, const struct zsv_opts *zsv_optsp, size_t *max_col_countp, const char *row_filter,
size_t start_row, size_t start_col, size_t header_span, void *index, struct ztv_opts *ztv_opts,
struct zsv_prop_handler *custom_prop_handler, const char *opts_used, size_t *rows_readp) {
(void)(index); // to do
FILE *fp = fopen(filename, "rb");
if (!fp)
Expand All @@ -35,6 +38,7 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],
zsv_parser parser = {0};
if (zsv_new_with_properties(&opts, custom_prop_handler, filename, opts_used, &parser) != zsv_status_ok) {
fclose(fp);
zsv_delete(parser);
return errno ? errno : -1;
}

Expand All @@ -44,7 +48,18 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],
size_t row_filter_len = row_filter ? strlen(row_filter) : 0;
size_t find_len = ztv_opts->find ? strlen(ztv_opts->find) : 0;
size_t rows_searched = 0;
while (zsv_next_row(parser) == zsv_status_row && rows_read < ZTV_BUFFER_ROWS) { // for each row
zsv_sheet_buffer_t buffer = bufferp ? *bufferp : NULL;
while (zsv_next_row(parser) == zsv_status_row &&
(rows_read == 0 || rows_read < zsv_sheet_buffer_rows(buffer))) { // for each row
if (buffer == NULL && buff_opts && zsv_cell_count(parser) > 0) {
enum zsv_sheet_buffer_status stat;
buffer = zsv_sheet_buffer_new(zsv_cell_count(parser), buff_opts, &stat);
if (buffer == NULL && stat != zsv_sheet_buffer_status_ok) {
zsv_delete(parser);
return -1;
}
*bufferp = buffer;
}
original_row_num++;
if (remaining_header_to_skip > 0) {
remaining_header_to_skip--;
Expand Down Expand Up @@ -74,42 +89,31 @@ static int read_data(char dest[ZTV_BUFFER_ROWS][ZTV_MAX_COLS][ZTV_MAX_CELL_LEN],

// row number
size_t rownum_column_offset = 0;
if (ztv_opts->hide_row_nums == 0) {
if (rows_read == 0) // header
snprintf(dest[rows_read][0], ZTV_MAX_CELL_LEN, "Row #");
else if (snprintf(dest[rows_read][0], ZTV_MAX_CELL_LEN, "%zu", original_row_num - 1) >=
ZTV_MAX_CELL_LEN) // unlikely!
sprintf(dest[rows_read][0], "########");
if (ztv_opts->hide_row_nums == 0) { // to do: merge w zsv_sheet_buffer_opts.no_rownum_column
if (rows_read == 0) // header
zsv_sheet_buffer_write_cell(buffer, 0, 0, (const unsigned char *)"Row #");
/////
else {
char buff[32];
int n = snprintf(buff, sizeof(buff), "%zu", original_row_num - 1);
if (!(n > 0 && n < (int)sizeof(buff)))
sprintf(buff, "########");
zsv_sheet_buffer_write_cell(buffer, rows_read, 0, (unsigned char *)buff);
}
rownum_column_offset = 1;
}

for (size_t i = start_col; i < col_count + rownum_column_offset && i < ZTV_MAX_COLS; i++) {
for (size_t i = start_col; i < col_count && i + rownum_column_offset < zsv_sheet_buffer_cols(buffer); i++) {
struct zsv_cell c = zsv_get_cell(parser, i);
if (c.len) {
char done = 0;
if (c.len >= ZTV_MAX_CELL_LEN && c.str[c.len - 1] >= 128) { // we may have to truncate a multi-byte char
int err = 0;
size_t used_width;
size_t bytes_to_copy =
utf8_bytes_up_to_max_width_and_replace_newlines(c.str, c.len, ZTV_MAX_CELL_LEN, &used_width, &err);
if (!err) {
memcpy(dest[rows_read][i + rownum_column_offset], c.str, bytes_to_copy);
dest[rows_read][i + rownum_column_offset][bytes_to_copy] = '\0';
done = 1;
}
}
if (!done) {
memcpy(dest[rows_read][i + rownum_column_offset], c.str,
c.len < ZTV_MAX_CELL_LEN ? c.len : ZTV_MAX_CELL_LEN - 1);
dest[rows_read][i + rownum_column_offset][c.len] = '\0';
}
}
if (c.len)
zsv_sheet_buffer_write_cell_w_len(buffer, rows_read, i + rownum_column_offset, c.str, c.len);
}
rows_read++;
}
fclose(fp);
if (rows_readp)
*rows_readp = rows_read;
zsv_delete(parser);
return 0;
}

Expand Down Expand Up @@ -170,6 +174,7 @@ static void *get_data_index(struct get_data_index_data *d) {
#ifdef ZTV_USE_THREADS
pthread_mutex_unlock(mutexp);
#endif
zsv_delete(parser);
return NULL;
}

Expand All @@ -196,6 +201,7 @@ static void *get_data_index(struct get_data_index_data *d) {
#ifdef ZTV_USE_THREADS
pthread_mutex_unlock(mutexp);
#endif
zsv_delete(parser);
return NULL;
}

Expand Down Expand Up @@ -233,7 +239,7 @@ static size_t ztv_find_next(const char *filename, const char *row_filter, const
ztv_opts->find = needle;
ztv_opts->found_rownum = 0;
// TO DO: check if it exists in current row, later column (and change 'cursor_row - 1' below to 'cursor_row')
read_data(NULL, filename, zsv_opts, &input_dims->col_count, row_filter,
read_data(NULL, NULL, filename, zsv_opts, &input_dims->col_count, row_filter,
input_offset->row + buff_offset->row + header_span + cursor_row - 1, 0, header_span, NULL, ztv_opts,
custom_prop_handler, opts_used, NULL);
ztv_opts->find = NULL;
Expand Down
Loading