diff --git a/projects/hipfft/clients/samples/CMakeLists.txt b/projects/hipfft/clients/samples/CMakeLists.txt index 488e09de85d..6eb7c92f3f6 100644 --- a/projects/hipfft/clients/samples/CMakeLists.txt +++ b/projects/hipfft/clients/samples/CMakeLists.txt @@ -124,8 +124,12 @@ foreach( sample ${sample_list} ) endforeach() -# cuFFT callback code must be compiled with -dc to enable relocatable -# device code -if( BUILD_WITH_LIB STREQUAL "CUDA" AND hipfft_callback IN_LIST sample_list ) - target_compile_options( hipfft_callback PRIVATE -dc ) +# callback code must be compiled as relocatable device code +if( hipfft_callback IN_LIST sample_list ) + if( BUILD_WITH_LIB STREQUAL "CUDA" ) + target_compile_options( hipfft_callback PRIVATE -dc ) + else() + target_compile_options( hipfft_callback PRIVATE -fgpu-rdc ) + target_link_options( hipfft_callback PRIVATE -fgpu-rdc ) + endif() endif() diff --git a/projects/hipfft/clients/tests/CMakeLists.txt b/projects/hipfft/clients/tests/CMakeLists.txt index e49c87b6f62..989a91c4353 100644 --- a/projects/hipfft/clients/tests/CMakeLists.txt +++ b/projects/hipfft/clients/tests/CMakeLists.txt @@ -193,6 +193,14 @@ else() target_link_libraries( hipfft-test PRIVATE ${GTEST_LIBRARIES} ) endif() +# tests have callback functions, which need to be built as relocatable device code +if( BUILD_WITH_LIB STREQUAL "CUDA" ) + target_compile_options( hipfft-test PRIVATE -dc ) +else() + target_compile_options( hipfft-test PRIVATE -fgpu-rdc ) + target_link_options( hipfft-test PRIVATE -fgpu-rdc ) +endif() + if(FFTW_MULTITHREAD) target_compile_options( hipfft-test PRIVATE -DFFTW_MULTITHREAD ) endif( ) diff --git a/projects/rocfft/clients/samples/rocfft/CMakeLists.txt b/projects/rocfft/clients/samples/rocfft/CMakeLists.txt index d07b22c9226..bfea7cea574 100644 --- a/projects/rocfft/clients/samples/rocfft/CMakeLists.txt +++ b/projects/rocfft/clients/samples/rocfft/CMakeLists.txt @@ -116,3 +116,7 @@ foreach( sample ${sample_list} ) target_link_libraries( ${sample} PRIVATE ${ROCFFT_CLIENTS_HOST_LINK_LIBS} ${ROCFFT_CLIENTS_DEVICE_LINK_LIBS} ) endforeach( ) + +# callback functions need to be built as relocatable device code +target_compile_options( rocfft_example_callback PRIVATE -fgpu-rdc ) +target_link_options( rocfft_example_callback PRIVATE -fgpu-rdc ) diff --git a/projects/rocfft/clients/tests/CMakeLists.txt b/projects/rocfft/clients/tests/CMakeLists.txt index cfec9a283dc..3bc6c40a6b6 100644 --- a/projects/rocfft/clients/tests/CMakeLists.txt +++ b/projects/rocfft/clients/tests/CMakeLists.txt @@ -100,6 +100,10 @@ add_executable( rtc_helper_crash rtc_helper_crash.cpp ) # of a mismatch target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 ) +# callback functions need to be built as relocatable device code +target_compile_options( rocfft-test PRIVATE -fgpu-rdc ) +target_link_options( rocfft-test PRIVATE -fgpu-rdc ) + find_package( Boost REQUIRED ) set( Boost_DEBUG ON ) set( Boost_DETAILED_FAILURE_MSG ON ) diff --git a/projects/rocfft/docs/how-to/load-store-callbacks.rst b/projects/rocfft/docs/how-to/load-store-callbacks.rst index 9d379954436..a8ee6ed253b 100644 --- a/projects/rocfft/docs/how-to/load-store-callbacks.rst +++ b/projects/rocfft/docs/how-to/load-store-callbacks.rst @@ -17,6 +17,11 @@ to the library using :cpp:func:`rocfft_execution_info_set_load_callback` and :cpp:func:`rocfft_execution_info_set_store_callback`. +.. note:: + + Callback functions must be built as relocatable device code by + passing the ``-fgpu-rdc`` option to the compiler and linker. + Device functions supplied as callbacks must load and store element data types appropriate for the transform being executed. diff --git a/projects/rocfft/library/src/CMakeLists.txt b/projects/rocfft/library/src/CMakeLists.txt index 0f840e2b04a..6b8d97030fe 100644 --- a/projects/rocfft/library/src/CMakeLists.txt +++ b/projects/rocfft/library/src/CMakeLists.txt @@ -394,6 +394,11 @@ add_library( rocfft ) rocfft_add_coverage_flags( rocfft ) +# rocFFT contains default implementations of callback functions that +# need to be built as relocatable device code +target_compile_options( rocfft PRIVATE -fgpu-rdc ) +target_link_options( rocfft PRIVATE -fgpu-rdc ) + if( ROCFFT_MPI_ENABLE ) target_compile_definitions(rocfft PRIVATE ROCFFT_MPI_ENABLE) include_directories(SYSTEM ${MPI_INCLUDE_PATH})