From c72c5c41f7d3707e6b10bc24acbb73c0e06269b1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 9 Oct 2018 20:39:02 -0700 Subject: [PATCH] Fixes for SSE detection and propagation On a machine without SSE4.1 the HAVE_SSE flag would still be set. Because CFLAGS is set to include -msse4.2, the compiler happily generates SSE 4.2 instructions. Running any resulting SSE-enabled binary would then result in an illegal instruction error. The HAVE_SSE check now checks for the presence of one of the SSE 4.1 instructions that is used in the SSE enabled convolutional decoder. The check must run with -march=native to ensure it checks against the host machine capabilities. The HAVE_SSE definition is now propagated to downstream targets that depend on libcorrect. This means they can now ifdef on HAVE_SSE to decide whether or not to include libcorrect's SSE specific header. Confirmed that the HAVE_SSE check now fails on an old machine without SSE 4.1 (but with SSE 3 and SSSE 3). --- CMakeLists.txt | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bef9687..f1c3022 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(Correct) include(CheckLibraryExists) include(CheckIncludeFiles) include(CheckCXXSourceCompiles) +include(CMakePushCheckState) if(MSVC) set(LIBM "") @@ -27,15 +28,24 @@ endif(MSVC) find_library(FEC fec) CHECK_LIBRARY_EXISTS(FEC dotprod "" HAVE_LIBFEC) -check_cxx_source_compiles(" + +if(NOT CMAKE_CROSSCOMPILING) + # Check if host machine can compile with SSE 4.1 intrinsic + cmake_push_check_state(RESET) + set(CMAKE_REQUIRED_DEFINITIONS -march=native) + check_cxx_source_compiles(" #include int main() { - __m128i vec; + __m128i a; + __m128i b; + __m128i c = _mm_min_epu16(a, b); return 0; }" HAVE_SSE) + cmake_pop_check_state() +endif() if(HAVE_SSE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1") endif() set(CMAKE_CXX_VISIBILITY_PRESET hidden) @@ -54,13 +64,16 @@ if(HAVE_SSE) set(correct_obj_files $ $ $) set(INSTALL_HEADERS ${INSTALL_HEADERS} ${PROJECT_BINARY_DIR}/include/correct-sse.h) add_custom_target(correct-sse-h ALL COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/include/correct-sse.h ${PROJECT_BINARY_DIR}/include/correct-sse.h) - add_definitions(-DHAVE_SSE=1) else() set(correct_obj_files $ $) endif() add_library(correct SHARED ${correct_obj_files}) add_library(correct_static ${correct_obj_files}) set_target_properties(correct_static PROPERTIES OUTPUT_NAME "correct") +if(HAVE_SSE) + target_compile_definitions(correct PUBLIC HAVE_SSE=1) + target_compile_definitions(correct_static PUBLIC HAVE_SSE=1) +endif() add_subdirectory(util) add_subdirectory(tests)