diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile index baa1f24c6a7..f80ee8653cf 100644 --- a/contrib/linux-kernel/Makefile +++ b/contrib/linux-kernel/Makefile @@ -34,7 +34,6 @@ libzstd: -DFSE_STATIC_LINKING_ONLY \ -DHUF_STATIC_LINKING_ONLY \ -DXXH_STATIC_LINKING_ONLY \ - -DMEM_FORCE_MEMORY_ACCESS=0 \ -D__GNUC__ \ -D__linux__=1 \ -DSTATIC_BMI2=0 \ diff --git a/lib/common/mem.h b/lib/common/mem.h index 4b10f7c1f06..493782f6f76 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -133,21 +133,15 @@ MEM_STATIC size_t MEM_swapST(size_t in); /*-************************************************************** * Memory I/O Implementation *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -190,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; +typedef __attribute__((aligned(1))) size_t unalignArch; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index 23caaef5647..3a8b6bdddc9 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -190,21 +190,15 @@ typedef signed long long S64; /**************************************************************** * Memory I/O *****************************************************************/ -/* FSE_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* FSE_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define FSE_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -229,13 +223,13 @@ static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +static U16 FSE_read16(const void* ptr) { return *(const unalign16*)ptr; } +static U32 FSE_read32(const void* ptr) { return *(const unalign32*)ptr; } +static U64 FSE_read64(const void* ptr) { return *(const unalign64*)ptr; } #else diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 6eadd3948b7..05766d203c9 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -115,21 +115,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -155,15 +149,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 72fc9140d4a..15cf5f3d7fe 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -116,21 +116,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -156,15 +150,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index a2d281eb97d..d4594292de5 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -87,21 +87,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -127,15 +121,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 216c67d0194..06aec20a031 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -106,21 +106,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -148,17 +142,17 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index b224355d437..66f256a53eb 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -108,21 +108,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -148,15 +142,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index c12a091cb55..71c99ab63a7 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -268,21 +268,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -308,15 +302,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 810daa2ce86..6f3fb299490 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -26,8 +26,7 @@ * @param MEM_FORCE_MEMORY_ACCESS: * This flag controls how the zstd library accesses unaligned memory. * It can be undefined, or 0 through 2. If it is undefined, it selects - * the method to use based on the compiler. If testing with UBSAN set - * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * the method to use based on the compiler. * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION * This is the canonical flag to enable deterministic builds for fuzzing. * Changes to zstd for fuzzing are gated behind this define.