Skip to content

Commit

Permalink
Added serialization of the trie into a memory buffer.
Browse files Browse the repository at this point in the history
  • Loading branch information
KOLANICH committed Jan 23, 2021
1 parent 3a37e64 commit fa19e1a
Show file tree
Hide file tree
Showing 15 changed files with 360 additions and 12 deletions.
5 changes: 5 additions & 0 deletions datrie/alpha-map-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ AlphaMap * alpha_map_fread_bin (FILE *file);

int alpha_map_fwrite_bin (const AlphaMap *alpha_map, FILE *file);

size_t alpha_map_get_serialized_size (const AlphaMap *alpha_map);

void alpha_map_serialize_bin (const AlphaMap *alpha_map, uint8 **ptr);


TrieIndex alpha_map_char_to_trie (const AlphaMap *alpha_map,
AlphaChar ac);

Expand Down
24 changes: 24 additions & 0 deletions datrie/alpha-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,30 @@ alpha_map_fwrite_bin (const AlphaMap *alpha_map, FILE *file)
return 0;
}

size_t
alpha_map_get_serialized_size (const AlphaMap *alpha_map)
{
int32 ranges_count = alpha_map_get_total_ranges (alpha_map);
return (
4 //ALPHAMAP_SIGNATURE
+ sizeof(ranges_count)
+ (sizeof(AlphaChar) * 2) * ranges_count // range->begin, range->end
);
}

void
alpha_map_serialize_bin (const AlphaMap *alpha_map, uint8 **ptr)
{
AlphaRange *range;
serialize_int32_be_incr (ptr, ALPHAMAP_SIGNATURE);
serialize_int32_be_incr (ptr, alpha_map_get_total_ranges (alpha_map));

for (range = alpha_map->first_range; range; range = range->next) {
serialize_int32_be_incr (ptr, range->begin);
serialize_int32_be_incr (ptr, range->end);
}
}

static int
alpha_map_add_range_only (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
{
Expand Down
20 changes: 20 additions & 0 deletions datrie/darray.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,26 @@ da_fwrite (const DArray *d, FILE *file)
}


size_t da_get_serialized_size(const DArray *d){
if(d->num_cells > 0)
return 4 * d->num_cells * 2; // `base` and `check`
else
return 0;
}


void
da_serialize (const DArray *d, uint8 **ptr)
{
TrieIndex i;

for (i = 0; i < d->num_cells; i++) {
serialize_int32_be_incr(ptr, d->cells[i].base);
serialize_int32_be_incr(ptr, d->cells[i].check);
}
}


/**
* @brief Get root state
*
Expand Down
5 changes: 5 additions & 0 deletions datrie/darray.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ DArray * da_fread (FILE *file);

void da_free (DArray *d);

size_t da_get_serialized_size(const DArray *d);


void da_serialize (const DArray *d, uint8 **ptr);

int da_fwrite (const DArray *d, FILE *file);


Expand Down
65 changes: 53 additions & 12 deletions datrie/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,53 +35,94 @@
* FUNCTIONS IMPLEMENTATIONS *
*--------------------------------*/

static int32
parse_int32_be (const uint8 *buff)
{
return (buff[0] << 24) | (buff[1] << 16) | (buff[2] << 8) | buff[3];
}

Bool
file_read_int32 (FILE *file, int32 *o_val)
{
unsigned char buff[4];
uint8 buff[4];

if (fread (buff, 4, 1, file) == 1) {
*o_val = (buff[0] << 24) | (buff[1] << 16) | (buff[2] << 8) | buff[3];
*o_val = parse_int32_be(buff);
return TRUE;
}

return FALSE;
}

Bool
file_write_int32 (FILE *file, int32 val)
void
serialize_int32_be (uint8 *buff, int32 val)
{
unsigned char buff[4];

buff[0] = (val >> 24) & 0xff;
buff[1] = (val >> 16) & 0xff;
buff[2] = (val >> 8) & 0xff;
buff[3] = val & 0xff;
}


void
serialize_int32_be_incr (uint8 **buff, int32 val)
{
serialize_int32_be(*buff, val);
*buff += 4; // don't sizeof!
}


Bool
file_write_int32 (FILE *file, int32 val)
{
uint8 buff[4];
serialize_int32_be(buff, val);
return (fwrite (buff, 4, 1, file) == 1);
}


int16
parse_int16_be (uint8 *buff)
{
return (buff[0] << 8) | buff[1];
}


Bool
file_read_int16 (FILE *file, int16 *o_val)
{
unsigned char buff[2];
uint8 buff[2];

if (fread (buff, 2, 1, file) == 1) {
*o_val = (buff[0] << 8) | buff[1];
*o_val = parse_int16_be(buff);
return TRUE;
}

return FALSE;
}

Bool
file_write_int16 (FILE *file, int16 val)
{
unsigned char buff[2];

void
serialize_int16_be (uint8 *buff, int16 val)
{
buff[0] = val >> 8;
buff[1] = val & 0xff;
}


void
serialize_int16_be_incr (uint8 **buff, int16 val)
{
serialize_int16_be(*buff, val);
*buff += 2; // don't sizeof!
}


Bool
file_write_int16 (FILE *file, int16 val)
{
uint8 buff[2];
serialize_int16_be(buff, val);
return (fwrite (buff, 2, 1, file) == 1);
}

Expand Down
2 changes: 2 additions & 0 deletions datrie/fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
#include <stdio.h>
#include <datrie/typedefs.h>

void serialize_int32_be_incr (uint8 **buff, int32 val);
Bool file_read_int32 (FILE *file, int32 *o_val);
Bool file_write_int32 (FILE *file, int32 val);

void serialize_int16_be_incr (uint8 **buff, int16 val);
Bool file_read_int16 (FILE *file, int16 *o_val);
Bool file_write_int16 (FILE *file, int16 val);

Expand Down
2 changes: 2 additions & 0 deletions datrie/libdatrie.def
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ trie_new
trie_new_from_file
trie_fread
trie_free
trie_get_serialized_size
trie_serialize
trie_save
trie_fwrite
trie_is_dirty
Expand Down
4 changes: 4 additions & 0 deletions datrie/libdatrie.map
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ DATRIE_0.2.7 {
alpha_char_strcmp;
} DATRIE_0.2.6;

DATRIE_0.2.13 {
trie_get_serialized_size;
trie_serialize;
} DATRIE_0.2.7;
54 changes: 54 additions & 0 deletions datrie/tail.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,60 @@ tail_fwrite (const Tail *t, FILE *file)
}


size_t
tail_get_serialized_size (const Tail *t)
{
size_t static_count = (
sizeof(int32) //sizeof(TAIL_SIGNATURE),
+ sizeof(t->first_free)
+ sizeof(t->num_tails)
);
size_t dynamic_count = 0u;
if(t->num_tails > 0){
TrieIndex i = 0;
dynamic_count += (
sizeof(t->tails[i].next_free) + sizeof(t->tails[i].data)
+ sizeof(int16) // length
) * t->num_tails;
for (; i < t->num_tails; i++) {
if (t->tails[i].suffix)
{
dynamic_count += trie_byte_strlen (t->tails[i].suffix);
}
}
}
return static_count + dynamic_count;
}


int
tail_serialize (const Tail *t, uint8 **ptr)
{
TrieIndex i;

serialize_int32_be_incr (ptr, TAIL_SIGNATURE);
serialize_int32_be_incr (ptr, t->first_free);
serialize_int32_be_incr (ptr, t->num_tails);

for (i = 0; i < t->num_tails; i++) {
int16 length;
serialize_int32_be_incr (ptr, t->tails[i].next_free);
serialize_int32_be_incr (ptr, t->tails[i].data);

length = t->tails[i].suffix ? trie_byte_strlen (t->tails[i].suffix)
: 0;
serialize_int16_be_incr (ptr, length);
if (length)
{
memcpy (*ptr, (char *)t->tails[i].suffix, length);
*ptr += length;
}
}

return 0;
}


/**
* @brief Get suffix
*
Expand Down
4 changes: 4 additions & 0 deletions datrie/tail.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ Tail * tail_fread (FILE *file);

void tail_free (Tail *t);

size_t tail_get_serialized_size (const Tail *t);

int tail_serialize (const Tail *t, uint8 **ptr);

int tail_fwrite (const Tail *t, FILE *file);


Expand Down
6 changes: 6 additions & 0 deletions datrie/trie-string.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ trie_char_strlen (const TrieChar *str)
return len;
}

size_t
trie_byte_strlen (const TrieChar *str)
{
return trie_char_strlen(str) * sizeof(TrieChar);
}

TrieChar *
trie_char_strdup (const TrieChar *str)
{
Expand Down
2 changes: 2 additions & 0 deletions datrie/trie-string.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@

size_t trie_char_strlen (const TrieChar *str);

size_t trie_byte_strlen (const TrieChar *str);

TrieChar * trie_char_strdup (const TrieChar *str);

/*-----------------------------*
Expand Down
44 changes: 44 additions & 0 deletions datrie/trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,50 @@ trie_save (Trie *trie, const char *path)
return res;
}


/**
* @brief Get trie serialized size
*
* @param trie : the trie
*
* @return size of the trie in bytes
*
* Returns size that would be occupied by a trie if it was
* serialized into a binary blob or file.
*
* Available since: 0.2.13
*/
size_t
trie_get_serialized_size (Trie *trie)
{
return alpha_map_get_serialized_size(trie->alpha_map) + da_get_serialized_size(trie->da) + tail_get_serialized_size(trie->tail);
}


/**
* @brief Serializes trie data into a memory buffer (including mapping)
*
* @param trie : the trie
*
* @param ptr : a pointer to current position inside of a preallocated buffer.
*
* Write @a trie data to a current position in a buffer pointed by @a ptr.
* This can be useful for embedding trie index as part of a file data.
* The size that the trie will occupy can be calculated using trie_get_serialized_size
*
* Available since: 0.2.13
*/
void
trie_serialize (Trie *trie, uint8 *ptr)
{
uint8 *ptr1 = ptr;
alpha_map_serialize_bin (trie->alpha_map, &ptr1);
da_serialize (trie->da, &ptr1);
tail_serialize (trie->tail, &ptr1);
trie->is_dirty = FALSE;
}


/**
* @brief Write trie data to an open file
*
Expand Down
4 changes: 4 additions & 0 deletions datrie/trie.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ Trie * trie_fread (FILE *file);

void trie_free (Trie *trie);

size_t trie_get_serialized_size (Trie *trie);

void trie_serialize (Trie *trie, uint8 *ptr);

int trie_save (Trie *trie, const char *path);

int trie_fwrite (Trie *trie, FILE *file);
Expand Down
Loading

0 comments on commit fa19e1a

Please sign in to comment.