diff --git a/atomic.h b/atomic.h index b3f7841..aa4d977 100644 --- a/atomic.h +++ b/atomic.h @@ -62,6 +62,10 @@ typedef _Atomic long FL2_atomic; #else /* No atomics */ +# ifndef FL2_SINGLETHREAD +# error No atomic operations available. Change compiler config or define FL2_SINGLETHREAD for the entire build. +# endif + typedef long FL2_atomic; #define ATOMIC_INITIAL_VALUE 0 #define FL2_atomic_increment(n) (n++) diff --git a/fast-lzma2.h b/fast-lzma2.h index a0c9b93..44ffd92 100644 --- a/fast-lzma2.h +++ b/fast-lzma2.h @@ -55,7 +55,7 @@ Introduction /*------ Version ------*/ #define FL2_VERSION_MAJOR 0 #define FL2_VERSION_MINOR 9 -#define FL2_VERSION_RELEASE 1 +#define FL2_VERSION_RELEASE 2 #define FL2_VERSION_NUMBER (FL2_VERSION_MAJOR *100*100 + FL2_VERSION_MINOR *100 + FL2_VERSION_RELEASE) FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check dll version */ @@ -156,7 +156,13 @@ typedef struct { typedef int (FL2LIB_CALL *FL2_progressFn)(size_t done, void* opaque); +/* Get the size of the overlap section. */ +FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* ctx); + +/* Copy the overlap section to the start to prepare for more data */ FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(const FL2_CCtx* ctx, FL2_blockBuffer *block); +/* Copy the overlap to a different buffer. This allows a dual-buffer configuration where + * data is read into one block while the other is compressed. */ FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(const FL2_CCtx* ctx, FL2_blockBuffer *block, unsigned char *dst); FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx); diff --git a/fl2_compress.c b/fl2_compress.c index 57fbb94..2825b25 100644 --- a/fl2_compress.c +++ b/fl2_compress.c @@ -12,6 +12,7 @@ #include #include "fast-lzma2.h" #include "fl2_internal.h" +#include "platform.h" #include "mem.h" #include "util.h" #include "fl2_compress_internal.h" @@ -22,6 +23,12 @@ #define MIN_BYTES_PER_THREAD 0x10000 +#ifdef __64BIT__ +#define ALIGNMENT_MASK ~((size_t)7) +#else +#define ALIGNMENT_MASK ~((size_t)3) +#endif + /*-===== Pre-defined compression levels =====-*/ #define FL2_CLEVEL_DEFAULT 9 @@ -443,16 +450,25 @@ FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* cctx, return dstBuf - (BYTE*)dst; } +FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* cctx) +{ + return OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); +} + FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(const FL2_CCtx* cctx, FL2_blockBuffer *block) { FL2_shiftBlock_switch(cctx, block, NULL); } -FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(const FL2_CCtx* cctx, FL2_blockBuffer *block, BYTE *dst) +FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(const FL2_CCtx* cctx, FL2_blockBuffer *block, unsigned char *dst) { size_t const block_overlap = OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); - if (block->end > block_overlap) { + if (block_overlap == 0) { + block->start = 0; + block->end = 0; + } + else if (block->end > block_overlap) { size_t const from = (block->end - block_overlap) & ~(sizeof(size_t) - 1); size_t const overlap = block->end - from; @@ -460,7 +476,7 @@ FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(const FL2_CCtx* cctx, FL2_bloc DEBUGLOG(5, "Copy overlap data : %u bytes", (U32)overlap); memcpy(dst ? dst : block->data, block->data + from, overlap); } - else { + else if (from != 0) { DEBUGLOG(5, "Move overlap data : %u bytes", (U32)overlap); memmove(block->data, block->data + from, overlap); } diff --git a/radix_mf.c b/radix_mf.c index 5fa40df..c722c7b 100644 --- a/radix_mf.c +++ b/radix_mf.c @@ -153,6 +153,7 @@ FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const { int isStruct; size_t dictionary_size; + size_t table_bytes; FL2_matchTable* tbl; RMF_parameters params = RMF_clampParams(*p); @@ -162,8 +163,10 @@ FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const DEBUGLOG(3, "RMF_createMatchTable : isStruct %d, dict %u", isStruct, (U32)dictionary_size); + table_bytes = isStruct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit) + : dictionary_size * sizeof(U32); tbl = (FL2_matchTable*)malloc( - sizeof(FL2_matchTable) + (isStruct ? dictionary_size * sizeof(RMF_unit) : dictionary_size * sizeof(U32)) - sizeof(U32)); + sizeof(FL2_matchTable) + table_bytes - sizeof(U32)); if (!tbl) return NULL; tbl->isStruct = isStruct;