Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions projects/hipfft/clients/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,12 @@ foreach( sample ${sample_list} )

endforeach()

# cuFFT callback code must be compiled with -dc to enable relocatable
# device code
if( BUILD_WITH_LIB STREQUAL "CUDA" AND hipfft_callback IN_LIST sample_list )
target_compile_options( hipfft_callback PRIVATE -dc )
# callback code must be compiled as relocatable device code
if( hipfft_callback IN_LIST sample_list )
if( BUILD_WITH_LIB STREQUAL "CUDA" )
target_compile_options( hipfft_callback PRIVATE -dc )
else()
target_compile_options( hipfft_callback PRIVATE -fgpu-rdc )
target_link_options( hipfft_callback PRIVATE -fgpu-rdc )
endif()
endif()
8 changes: 8 additions & 0 deletions projects/hipfft/clients/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ else()
target_link_libraries( hipfft-test PRIVATE ${GTEST_LIBRARIES} )
endif()

# tests have callback functions, which need to be built as relocatable device code
if( BUILD_WITH_LIB STREQUAL "CUDA" )
target_compile_options( hipfft-test PRIVATE -dc )
else()
target_compile_options( hipfft-test PRIVATE -fgpu-rdc )
target_link_options( hipfft-test PRIVATE -fgpu-rdc )
endif()

if(FFTW_MULTITHREAD)
target_compile_options( hipfft-test PRIVATE -DFFTW_MULTITHREAD )
endif( )
Expand Down
4 changes: 4 additions & 0 deletions projects/rocfft/clients/samples/rocfft/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,7 @@ foreach( sample ${sample_list} )
target_link_libraries( ${sample} PRIVATE ${ROCFFT_CLIENTS_HOST_LINK_LIBS} ${ROCFFT_CLIENTS_DEVICE_LINK_LIBS} )

endforeach( )

# callback functions need to be built as relocatable device code
target_compile_options( rocfft_example_callback PRIVATE -fgpu-rdc )
target_link_options( rocfft_example_callback PRIVATE -fgpu-rdc )
4 changes: 4 additions & 0 deletions projects/rocfft/clients/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ add_executable( rtc_helper_crash rtc_helper_crash.cpp )
# of a mismatch
target_compile_options( rocfft-test PRIVATE -Xarch_device -O3 )

# callback functions need to be built as relocatable device code
target_compile_options( rocfft-test PRIVATE -fgpu-rdc )
target_link_options( rocfft-test PRIVATE -fgpu-rdc )

find_package( Boost REQUIRED )
set( Boost_DEBUG ON )
set( Boost_DETAILED_FAILURE_MSG ON )
Expand Down
5 changes: 5 additions & 0 deletions projects/rocfft/docs/how-to/load-store-callbacks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ to the library using
:cpp:func:`rocfft_execution_info_set_load_callback` and
:cpp:func:`rocfft_execution_info_set_store_callback`.

.. note::

Callback functions must be built as relocatable device code by
passing the ``-fgpu-rdc`` option to the compiler and linker.

Device functions supplied as callbacks must load and store element
data types appropriate for the transform being executed.

Expand Down
5 changes: 5 additions & 0 deletions projects/rocfft/library/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,11 @@ add_library( rocfft
)
rocfft_add_coverage_flags( rocfft )

# rocFFT contains default implementations of callback functions that
# need to be built as relocatable device code
target_compile_options( rocfft PRIVATE -fgpu-rdc )
target_link_options( rocfft PRIVATE -fgpu-rdc )
Comment on lines +397 to +400
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Windows-CI - Failure that is not related to this code change, would not normally block this team merge

This doesn't hold true. @ScottTodd identified adding -fgpu-rdc as a root cause for rocFFT being broken on Windows when building with TheRock (we don't build tests thus hipFFT wasn't impacted so far). I can confirm that reverting this commit unbreaks building via TheRock on Windows.

I'm not sure if that "WindowsCI - Internal" would have shown this error. I can't view the logs there (maybe they expired). We are seeing these errors in TheRock on ROCm/TheRock#1327 after trying to sync past this commit: https://github.com/ROCm/TheRock/actions/runs/17240878248/job/48917478820#step:11:360

[rocFFT] FAILED: staging/rocfft.dll library/src/rocfft.lib 
[rocFFT] C:\Windows\system32\cmd.exe /C "cd . && B:\build\core\clr\dist\lib\llvm\bin\clang++.exe -nostartfiles -nostdlib -DWIN32 -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS -DNOMINMAX -fms-extensions -fms-compatibility -D_ENABLE_EXTENDED_ALIGNED_STORAGE  -Wno-documentation-unknown-command -Wno-documentation-pedantic -Wno-unused-command-line-argument -Wno-explicit-specialization-storage-class -Wno-ignored-attributes -Wno-unknown-attributes -Wno-duplicate-decl-specifier --hip-path=B:/build/core/clr/dist --hip-device-lib-path=B:/build/core/clr/dist/lib/llvm/amdgcn/bitcode -O3 -DNDEBUG -D_DLL -D_MT -Xclang --dependent-lib=msvcrt  -L B:/build/third-party/sysdeps/windows/zlib/build/stage/lib/rocm_sysdeps/lib  -L B:/build/third-party/sysdeps/windows/zstd/build/stage/lib/rocm_sysdeps/lib  -L B:/build/compiler/amd-llvm/stage/lib/llvm/lib  -L B:/build/core/clr/stage/lib    -fgpu-rdc -fuse-ld=lld-link -shared -o staging\rocfft.dll  -Xlinker /MANIFEST:EMBED -Xlinker /implib:library\src\rocfft.lib -Xlinker /pdb:staging\rocfft.pdb -Xlinker /version:0.0 library/src/CMakeFiles/rocfft-rtc-cache.dir/rtc_cache.cpp.obj library/src/CMakeFiles/sqlite3.dir/__/__/_deps/sqlite_local-src/sqlite3.c.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_bluestein_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_realcomplex_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_stockham_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_transpose_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_twiddle_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_chirp_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/rtc_test_harness.cpp.obj library/src/CMakeFiles/rocfft-rtc-gen.dir/load_store_ops_gen.cpp.obj library/src/CMakeFiles/rocfft-rtc-compile.dir/rtc_compile.cpp.obj library/src/CMakeFiles/rocfft-rtc-subprocess.dir/rtc_subprocess.cpp.obj library/src/CMakeFiles/rocfft-rtc-common.dir/device/kernel-generator-embed.cpp.obj library/src/CMakeFiles/rocfft-rtc-common.dir/compute_scheme.cpp.obj library/src/CMakeFiles/rocfft-rtc-common.dir/rocfft_ostream.cpp.obj library/src/device/generator/CMakeFiles/generator.dir/generator.cpp.obj library/src/device/generator/CMakeFiles/generator.dir/fftgenerator.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_0.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_1.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_2.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_3.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_4.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_5.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_6.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool_init_7.cpp.obj library/src/device/CMakeFiles/rocfft-function-pool.dir/function_pool.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_bluestein_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_realcomplex_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_stockham_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_transpose_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_twiddle_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/rtc_chirp_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/load_store_ops_kernel.cpp.obj library/src/CMakeFiles/rocfft-rtc-launch.dir/tree_node_callback.cpp.obj library/src/CMakeFiles/rocfft-solution-map.dir/solution_map.cpp.obj library/src/CMakeFiles/rocfft-solution-map.dir/solutions.cpp.obj library/src/CMakeFiles/rocfft-tuning-helper.dir/tuning_helper.cpp.obj library/src/CMakeFiles/rocfft.dir/auxiliary.cpp.obj library/src/CMakeFiles/rocfft.dir/plan.cpp.obj library/src/CMakeFiles/rocfft.dir/transform.cpp.obj library/src/CMakeFiles/rocfft.dir/repo.cpp.obj library/src/CMakeFiles/rocfft.dir/powX.cpp.obj library/src/CMakeFiles/rocfft.dir/chirp.cpp.obj library/src/CMakeFiles/rocfft.dir/twiddles.cpp.obj library/src/CMakeFiles/rocfft.dir/kargs.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node_1D.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node_2D.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node_3D.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node_bluestein.cpp.obj library/src/CMakeFiles/rocfft.dir/tree_node_real.cpp.obj library/src/CMakeFiles/rocfft.dir/fuse_shim.cpp.obj library/src/CMakeFiles/rocfft.dir/assignment_policy.cpp.obj library/src/CMakeFiles/rocfft.dir/node_factory.cpp.obj library/src/CMakeFiles/rocfft.dir/enum_printer.cpp.obj library/src/CMakeFiles/rocfft.dir/rtc_exports.cpp.obj library/src/CMakeFiles/rocfft.dir/tuning_kernel_tuner.cpp.obj library/src/CMakeFiles/rocfft.dir/tuning_plan_tuner.cpp.obj  --hip-link  --offload-arch=gfx1100  --offload-arch=gfx1101  --offload-arch=gfx1102  C:/008985C5-A238-4B36-958A-4D5EFCDBF07E/build/core/clr/dist/lib/llvm/lib/clang/20/lib/windows/clang_rt.builtins-x86_64.lib  B:/build/core/clr/dist/lib/amdhip64.lib  B:/build/core/clr/dist/lib/hiprtc.lib  -lkernel32 -luser32 -lgdi32 -lwinspool -lshell32 -lole32 -loleaut32 -luuid -lcomdlg32 -ladvapi32 -loldnames  && cd ."
[rocFFT] clang++: error: invalid linker name in argument '-fuse-ld=lld-link'

[rocFFT] clang++: error: invalid linker name in argument '-fuse-ld=lld-link'

[rocFFT] clang++: error: invalid linker name in argument '-fuse-ld=lld-link'


[rocFFT] ninja: build stopped: subcommand failed.

We can confirm that reverting these changed lines that add the -fgpu-rdc flag in this commit does fix the build (locally and on CI).

@davidd-amd suggests that this -fgpu-rdc flag is used by other subprojects so it might be an issue with the flag ordering in the linker command.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, -fuse-ld=lld-link is used by other projects without error however the other projects do not use -fgpu-rdc. I wonder if this should be guarded by if(NOT WIN32). Let me spend 20 minutes looking at if/how we can form a link command that will work.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, okay. I also looked through https://clang.llvm.org/docs/HIPSupport.html#device-code-compilation and the related code in LLVM to see if that is exclusive to Linux but nothing really stood out to me. You might want to reach out to members of the compiler team or the upstream LLVM developers to weigh in as well.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I opened #1402 to unblock this issue (hopefully temporarily) while it is analyzed. @davidd-amd and @ScottTodd, please let me/us know if you found anything relevant to this issue.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I opened #1402 to unblock this issue (hopefully temporarily) while it is analyzed. @davidd-amd and @ScottTodd, please let me/us know if you found anything relevant to this issue.

The patch seems to fix the Windows build issue in TheRock and allows to drop the local revert.


if( ROCFFT_MPI_ENABLE )
target_compile_definitions(rocfft PRIVATE ROCFFT_MPI_ENABLE)
include_directories(SYSTEM ${MPI_INCLUDE_PATH})
Expand Down