Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
36e1c57
stub rocm into all CMakeLists.txt
jeffdaily Feb 7, 2023
d44ff2f
add faiss/gpu/hipify.sh
jeffdaily Feb 20, 2023
506f619
various updates
jeffdaily Feb 21, 2023
1056988
USE_ROCM section of faiss/gpu/utils/WarpShuffles.cuh
jeffdaily Feb 21, 2023
7c7cb03
USE_ROCM section of faiss/gpu/utils/MergeNetworkWarp.cuh
jeffdaily Feb 21, 2023
f19b7bc
more updates
jeffdaily Feb 21, 2023
b8f48a7
allow kWarpSize 64 in BinaryDistance.cu
jeffdaily Feb 21, 2023
3dcf89d
fix error: use of undeclared identifier 'half'
jeffdaily Feb 21, 2023
ed8fa1a
fix error: use of undeclared identifier 'cublasSgemmEx'
jeffdaily Feb 22, 2023
6473115
warp 64 fixes for IVFInterleaved
jeffdaily Feb 22, 2023
97fd7e4
stub out asm as TODO
jeffdaily Feb 22, 2023
8151f75
warp 64 fixes for IVFUtilsSelect1 IVFUtilsSelect2
jeffdaily Feb 22, 2023
2431d8f
fix error: use of undeclared identifier 'half'
jeffdaily Feb 22, 2023
c072ed6
warp 64 fixes for L2Select.cu
jeffdaily Feb 22, 2023
cc5c384
missing half, math_constants.h in VectorResidual
jeffdaily Feb 22, 2023
fdc80e8
only compile warp 32 functions if warp size is 32
jeffdaily Feb 22, 2023
5210714
cmake updates
jeffdaily Feb 23, 2023
9764491
build warp==32 dummy symbols to fix linking errors
jeffdaily Feb 23, 2023
8ecc1f6
gpu-rocm and python binding
Mar 26, 2023
f2d7665
fix error: #include <faiss/gpu/*>
Mar 27, 2023
115a0c5
Rewrite some asm code for ROCm in LoadStoreOperators
Apr 12, 2023
c6bec4a
Resolved TODO's in PQCodeLoad.cuh
May 2, 2023
96da5fe
Fix some bugs in LoadStoreOperators.cuh
May 2, 2023
a6700a9
Port the code to Navi 2x/3x, whose warp size is 32.
xinyazhang May 16, 2023
729e929
Properly implement getBitfield and GET_BITFIELD_U32/64 on ROCM.
xinyazhang May 16, 2023
46f2b0a
Fix LoadCode32<56> in ROCM, and put runtime safeguards in other speci…
xinyazhang May 16, 2023
ad74736
Fix the misuse of hip header in gpu/
xinyazhang May 17, 2023
53bdd9e
ROCM/Navi 2x: Fix LoadStore32 template and Float16 support in LoadStore.
xinyazhang May 18, 2023
4d5be04
Merge branch 'main_upstream' into xinyazhang/navi-21
jeffdaily Sep 12, 2023
e1b7aa9
fix build
jeffdaily Sep 12, 2023
1778639
partial revert of using kWarpSize*2 etc
jeffdaily Sep 13, 2023
ebc1701
relax warp size 32 constraint
jeffdaily Sep 13, 2023
72ab993
add and use getWarpSizeCurrentDevice()
jeffdaily Sep 13, 2023
7e3b7ed
compiles for warpSize 64, however failing tests
jeffdaily Sep 13, 2023
021722e
TestGpuSelect passes for warpSize 64
jeffdaily Sep 13, 2023
5bd71d9
modify __CUDA_ARCH__ or CUDA_VERSION checks with USE_ROCM
jeffdaily Oct 12, 2023
2d38b95
fix GeneralDistance for both 32 and 64 warp sizes
jeffdaily Oct 12, 2023
cff550e
fix launch bounds for PQCodeDistances-inl.cuh
jeffdaily Oct 12, 2023
bf472e3
fix hammin20 read past end of array
jeffdaily Nov 2, 2023
da5a818
interleaved based on kWarpSize, not hard-coded to 32
jeffdaily Nov 2, 2023
ededed6
cannot use kWarpSize in host code
jeffdaily Nov 3, 2023
f3c963e
cannot use kLanes aka kWarpSize in host code
jeffdaily Nov 3, 2023
9551699
additional uses of kWarpSize found in host code
jeffdaily Nov 3, 2023
9cd52cf
kWarpSize==64 fixes for WarpPackedBits 6bit and 4bit
jeffdaily Nov 3, 2023
ee8aea9
fix cuda build
jeffdaily Nov 6, 2023
8672522
clang-format
jeffdaily Nov 6, 2023
0b1751e
fix raft build due to TILE_SIZE redefined
jeffdaily Nov 7, 2023
14cdf80
Merge branch 'main' into jeffdaily/rocm2
jeffdaily Nov 7, 2023
97523ff
fix include statement so it hipifies properly
jeffdaily Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
set(FAISS_LANGUAGES CXX)

if(FAISS_ENABLE_GPU)
list(APPEND FAISS_LANGUAGES CUDA)
# if ROCm install detected, assume ROCm/HIP is GPU device
if (EXISTS /opt/rocm)
set(USE_ROCM TRUE)
list(APPEND FAISS_LANGUAGES HIP)
else()
list(APPEND FAISS_LANGUAGES CUDA)
endif()
endif()

if(FAISS_ENABLE_RAFT)
Expand Down Expand Up @@ -58,8 +64,13 @@ option(FAISS_ENABLE_PYTHON "Build Python extension." ON)
option(FAISS_ENABLE_C_API "Build C API." OFF)

if(FAISS_ENABLE_GPU)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
enable_language(CUDA)
if(USE_ROCM)
enable_language(HIP)
add_definitions(-DUSE_ROCM)
else ()
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
enable_language(CUDA)
endif()
endif()

if(FAISS_ENABLE_RAFT)
Expand All @@ -69,7 +80,11 @@ endif()
add_subdirectory(faiss)

if(FAISS_ENABLE_GPU)
add_subdirectory(faiss/gpu)
if(USE_ROCM)
add_subdirectory(faiss/gpu-rocm)
else()
add_subdirectory(faiss/gpu)
endif()
endif()

if(FAISS_ENABLE_PYTHON)
Expand All @@ -90,6 +105,10 @@ if(BUILD_TESTING)
add_subdirectory(tests)

if(FAISS_ENABLE_GPU)
add_subdirectory(faiss/gpu/test)
if(USE_ROCM)
add_subdirectory(faiss/gpu-rocm/test)
else()
add_subdirectory(faiss/gpu/test)
endif()
endif()
endif()
6 changes: 6 additions & 0 deletions c_api/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ target_sources(faiss_c PRIVATE
file(GLOB FAISS_C_API_GPU_HEADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.h")
faiss_install_headers("${FAISS_C_API_GPU_HEADERS}" c_api/gpu)

if (USE_ROCM)
find_package(HIP REQUIRED)
find_package(hipBLAS REQUIRED)
target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas)
else()
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
endif()

add_executable(example_gpu_c EXCLUDE_FROM_ALL example_gpu_c.c)
target_link_libraries(example_gpu_c PRIVATE faiss_c)
21 changes: 19 additions & 2 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ function(generate_ivf_interleaved_code)
"64|2048|8"
)

if(USE_ROCM)
set(CU_OR_HIP "hip")
else()
set(CU_OR_HIP "cu")
endif()

# Traverse through the Cartesian product of X and Y
foreach(sub_codec ${SUB_CODEC_TYPE})
foreach(metric_type ${SUB_METRIC_TYPE})
Expand All @@ -210,10 +216,10 @@ function(generate_ivf_interleaved_code)
set(filename "template_${sub_codec}_${metric_type}_${sub_threads}_${sub_num_warp_q}_${sub_num_thread_q}")
# Remove illegal characters from filename
string(REGEX REPLACE "[^A-Za-z0-9_]" "" filename ${filename})
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.cu")
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${CU_OR_HIP}")

# Read the template file
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/impl/scan/IVFInterleavedScanKernelTemplate.cu" template_content)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/impl/scan/IVFInterleavedScanKernelTemplate.${CU_OR_HIP}" template_content)

# Replace the placeholders
string(REPLACE "SUB_CODEC_TYPE" "${sub_codec}" template_content "${template_content}")
Expand Down Expand Up @@ -248,6 +254,10 @@ if(FAISS_ENABLE_RAFT)
target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)
endif()

if (USE_ROCM)
list(TRANSFORM FAISS_GPU_SRC REPLACE cu$ hip)
endif()

# Export FAISS_GPU_HEADERS variable to parent scope.
set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE)

Expand All @@ -261,8 +271,15 @@ foreach(header ${FAISS_GPU_HEADERS})
)
endforeach()

if (USE_ROCM)
find_package(HIP REQUIRED)
find_package(hipBLAS REQUIRED)
target_link_libraries(faiss PRIVATE hip::host roc::hipblas)
target_link_libraries(faiss_avx2 PRIVATE hip::host roc::hipblas)
else()
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_link_libraries(faiss_avx2 PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_compile_options(faiss PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
endif()
2 changes: 1 addition & 1 deletion faiss/gpu/GpuFaissAssert.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
/// Assertions
///

#ifdef __CUDA_ARCH__
#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
#define GPU_FAISS_ASSERT(X) assert(X)
#define GPU_FAISS_ASSERT_MSG(X, MSG) assert(X)
#define GPU_FAISS_ASSERT_FMT(X, FMT, ...) assert(X)
Expand Down
4 changes: 2 additions & 2 deletions faiss/gpu/StandardGpuResources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,8 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {

// Our code is pre-built with and expects warpSize == 32, validate that
FAISS_ASSERT_FMT(
prop.warpSize == 32,
"Device id %d does not have expected warpSize of 32",
prop.warpSize == 32 || prop.warpSize == 64,
"Device id %d does not have expected warpSize of 32 or 64",
device);

// Create streams
Expand Down
101 changes: 101 additions & 0 deletions faiss/gpu/hipify.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/bin/bash

# go one level up from faiss/gpu
top=$(dirname "${BASH_SOURCE[0]}")/..
echo "top=$top"
cd $top
echo "pwd=`pwd`"

# create all destination directories for hipified files into sibling 'gpu-rocm' directory
for src in $(find ./gpu -type d)
do
dst=$(echo $src | sed 's/gpu/gpu-rocm/')
echo "Creating $dst"
mkdir -p $dst
done

# run hipify-perl against all *.cu *.cuh *.h *.cpp files, no renaming
# run all files in parallel to speed up
for ext in cu cuh h cpp
do
for src in $(find ./gpu -name "*.$ext")
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
hipify-perl -o=$dst.tmp $src &
done
done
wait

# rename all hipified *.cu files to *.hip
for src in $(find ./gpu-rocm -name "*.cu.tmp")
do
dst=${src%.cu.tmp}.hip.tmp
mv $src $dst
done

# replace header include statements "<faiss/gpu/" with "<faiss/gpu-rocm"
# replace thrust::cuda::par with thrust::hip::par
# adjust header path location for hipblas.h to avoid unnecessary deprecation warnings
# adjust header path location for hiprand_kernel.h to avoid unnecessary deprecation warnings
for ext in hip cuh h cpp
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
do
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' $src
sed -i 's@thrust::cuda::par@thrust::hip::par@' $src
sed -i 's@#include <hipblas.h>@#include <hipblas/hipblas.h>@' $src
sed -i 's@#include <hiprand_kernel.h>@#include <hiprand/hiprand_kernel.h>@' $src
done
done

# hipify was run in parallel above
# don't copy the tmp file if it is unchanged
for ext in hip cuh h cpp
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
do
dst=${src%.tmp}
if test -f $dst
then
if diff -q $src $dst >& /dev/null
then
echo "$dst [unchanged]"
rm $src
else
echo "$dst"
mv $src $dst
fi
else
echo "$dst"
mv $src $dst
fi
done
done

# copy over CMakeLists.txt
for src in $(find ./gpu -name "CMakeLists.txt")
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
if test -f $dst
then
if diff -q $src $dst >& /dev/null
then
echo "$dst [unchanged]"
else
echo "$dst"
cp $src $dst
fi
else
echo "$dst"
cp $src $dst
fi
done

# run hipify-perl against python/swigfaiss.swig
# replace header include statements "<faiss/gpu/" with "<faiss/gpu-rocm" in python
for src in ./python/swigfaiss.swig
do
hipify-perl -inplace $src
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' $src
sed -i 's@thrust::cuda::par@thrust::hip::par@' $src
done
Loading