Skip to content

Commit

Permalink
Fixes for SSE detection and propagation
Browse files Browse the repository at this point in the history
On a machine without SSE4.1 the HAVE_SSE flag would still be set.
Because CFLAGS is set to include -msse4.2, the compiler happily
generates SSE 4.2 instructions. Running any resulting SSE-enabled
binary would then result in an illegal instruction error.

The HAVE_SSE check now checks for the presence of one of the SSE 4.1
instructions that is used in the SSE enabled convolutional decoder.
The check must run with -march=native to ensure it checks against the
host machine capabilities.

The HAVE_SSE definition is now propagated to downstream targets that
depend on libcorrect. This means they can now ifdef on HAVE_SSE to
decide whether or not to include libcorrect's SSE specific header.

Confirmed that the HAVE_SSE check now fails on an old machine without
SSE 4.1 (but with SSE 3 and SSSE 3).
  • Loading branch information
pietern committed Oct 10, 2018
1 parent 01d358b commit c72c5c4
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ project(Correct)
include(CheckLibraryExists)
include(CheckIncludeFiles)
include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

if(MSVC)
set(LIBM "")
Expand All @@ -27,15 +28,24 @@ endif(MSVC)

find_library(FEC fec)
CHECK_LIBRARY_EXISTS(FEC dotprod "" HAVE_LIBFEC)
check_cxx_source_compiles("

if(NOT CMAKE_CROSSCOMPILING)
# Check if host machine can compile with SSE 4.1 intrinsic
cmake_push_check_state(RESET)
set(CMAKE_REQUIRED_DEFINITIONS -march=native)
check_cxx_source_compiles("
#include <x86intrin.h>
int main() {
__m128i vec;
__m128i a;
__m128i b;
__m128i c = _mm_min_epu16(a, b);
return 0;
}" HAVE_SSE)
cmake_pop_check_state()
endif()

if(HAVE_SSE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
endif()

set(CMAKE_CXX_VISIBILITY_PRESET hidden)
Expand All @@ -54,13 +64,16 @@ if(HAVE_SSE)
set(correct_obj_files $<TARGET_OBJECTS:correct-reed-solomon> $<TARGET_OBJECTS:correct-convolutional> $<TARGET_OBJECTS:correct-convolutional-sse>)
set(INSTALL_HEADERS ${INSTALL_HEADERS} ${PROJECT_BINARY_DIR}/include/correct-sse.h)
add_custom_target(correct-sse-h ALL COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/include/correct-sse.h ${PROJECT_BINARY_DIR}/include/correct-sse.h)
add_definitions(-DHAVE_SSE=1)
else()
set(correct_obj_files $<TARGET_OBJECTS:correct-reed-solomon> $<TARGET_OBJECTS:correct-convolutional>)
endif()
add_library(correct SHARED ${correct_obj_files})
add_library(correct_static ${correct_obj_files})
set_target_properties(correct_static PROPERTIES OUTPUT_NAME "correct")
if(HAVE_SSE)
target_compile_definitions(correct PUBLIC HAVE_SSE=1)
target_compile_definitions(correct_static PUBLIC HAVE_SSE=1)
endif()

add_subdirectory(util)
add_subdirectory(tests)
Expand Down

0 comments on commit c72c5c4

Please sign in to comment.