Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,9 @@ foreach(header ${FAISS_GPU_HEADERS})
endforeach()

if (USE_ROCM)
target_link_libraries(faiss_gpu PRIVATE $<$<BOOL:${USE_ROCM}>:hip::host> $<$<BOOL:${USE_ROCM}>:roc::hipblas>)
target_link_libraries(faiss_gpu PRIVATE
$<$<BOOL:${USE_ROCM}>:hip::host>
$<$<BOOL:${USE_ROCM}>:roc::hipblas>)
target_compile_options(faiss_gpu PRIVATE)
else()
# Prepares a host linker script and enables host linker to support
Expand All @@ -333,6 +335,13 @@ else()
target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass> $<$<BOOL:${FAISS_ENABLE_RAFT}>:OpenMP::OpenMP_CXX>)
target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$<BOOL:${FAISS_ENABLE_RAFT}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas
$<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
$<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>
$<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>
$<$<BOOL:${FAISS_ENABLE_RAFT}>:OpenMP::OpenMP_CXX>)
target_compile_options(faiss_gpu PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all
--expt-extended-lambda --expt-relaxed-constexpr
$<$<BOOL:${FAISS_ENABLE_RAFT}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
endif()
150 changes: 75 additions & 75 deletions faiss/gpu/hipify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,117 +3,117 @@
# go one level up from faiss/gpu
top=$(dirname "${BASH_SOURCE[0]}")/..
echo "top=$top"
cd $top
echo "pwd=`pwd`"
cd "$top" || exit
echo "pwd=$(pwd)"

# create all destination directories for hipified files into sibling 'gpu-rocm' directory
for src in $(find ./gpu -type d)
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's/gpu/gpu-rocm/')
dst="${src//gpu/gpu-rocm}"
echo "Creating $dst"
mkdir -p $dst
done
mkdir -p "$dst"
done < <(find ./gpu -type d -print0)

# run hipify-perl against all *.cu *.cuh *.h *.cpp files, no renaming
# run all files in parallel to speed up
for ext in cu cuh h cpp
do
for src in $(find ./gpu -name "*.$ext")
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
hipify-perl -o=$dst.tmp $src &
done
dst="${src//\.\/gpu/\.\/gpu-rocm}"
hipify-perl -o="$dst.tmp" "$src" &
done < <(find ./gpu -name "*.$ext" -print0)
done
wait

# rename all hipified *.cu files to *.hip
for src in $(find ./gpu-rocm -name "*.cu.tmp")
while IFS= read -r -d '' src
do
dst=${src%.cu.tmp}.hip.tmp
mv $src $dst
done
mv "$src" "$dst"
done < <(find ./gpu-rocm -name "*.cu.tmp" -print0)

# replace header include statements "<faiss/gpu/" with "<faiss/gpu-rocm"
# replace thrust::cuda::par with thrust::hip::par
# adjust header path location for hipblas.h to avoid unnecessary deprecation warnings
# adjust header path location for hiprand_kernel.h to avoid unnecessary deprecation warnings
for ext in hip cuh h cpp
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
while IFS= read -r -d '' src
do
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' $src
sed -i 's@thrust::cuda::par@thrust::hip::par@' $src
sed -i 's@#include <hipblas.h>@#include <hipblas/hipblas.h>@' $src
sed -i 's@#include <hiprand_kernel.h>@#include <hiprand/hiprand_kernel.h>@' $src
done
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' "$src"
sed -i 's@thrust::cuda::par@thrust::hip::par@' "$src"
sed -i 's@#include <hipblas.h>@#include <hipblas/hipblas.h>@' "$src"
sed -i 's@#include <hiprand_kernel.h>@#include <hiprand/hiprand_kernel.h>@' "$src"
done < <(find ./gpu-rocm -name "*.$ext.tmp" -print0)
done

# hipify was run in parallel above
# don't copy the tmp file if it is unchanged
for ext in hip cuh h cpp
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
while IFS= read -r -d '' src
do
dst=${src%.tmp}
if test -f $dst
if test -f "$dst"
then
if diff -q $src $dst >& /dev/null
if diff -q "$src" "$dst" >& /dev/null
then
echo "$dst [unchanged]"
rm $src
rm "$src"
else
echo "$dst"
mv $src $dst
mv "$src" "$dst"
fi
else
echo "$dst"
mv $src $dst
mv "$src" "$dst"
fi
done
done < <(find ./gpu-rocm -name "*.$ext.tmp" -print0)
done

# copy over CMakeLists.txt
for src in $(find ./gpu -name "CMakeLists.txt")
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
if test -f $dst
dst="${src//\.\/gpu/\.\/gpu-rocm}"
if test -f "$dst"
then
if diff -q $src $dst >& /dev/null
if diff -q "$src" "$dst" >& /dev/null
then
echo "$dst [unchanged]"
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
done
done < <(find ./gpu -name "CMakeLists.txt" -print0)

# Copy over other files
for ext in py
other_exts="py"
for ext in $other_exts
do
for src in $(find ./gpu -name "*.$ext")
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
if test -f $dst
dst="${src//\.\/gpu/\.\/gpu-rocm}"
if test -f "$dst"
then
if diff -q $src $dst >& /dev/null
if diff -q "$src" "$dst" >& /dev/null
then
echo "$dst [unchanged]"
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
done
done < <(find ./gpu -name "*.$ext" -print0)
done


###################################################################################
# C_API Support
###################################################################################
Expand All @@ -122,91 +122,91 @@ done
# This points to the faiss/c_api dir
top_c_api=$(dirname "${BASH_SOURCE[0]}")/../../c_api
echo "top=$top_c_api"
cd ../$top_c_api
echo "pwd=`pwd`"
cd "../$top_c_api" || exit
echo "pwd=$(pwd)"


# create all destination directories for hipified files into sibling 'gpu-rocm' directory
for src in $(find ./gpu -type d)
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's/gpu/gpu-rocm/')
dst="${src//gpu/gpu-rocm}"
echo "Creating $dst"
mkdir -p $dst
done
mkdir -p "$dst"
done < <(find ./gpu -type d -print0)

# run hipify-perl against all *.cu *.cuh *.h *.cpp files, no renaming
# run all files in parallel to speed up
for ext in cu cuh h cpp c
do
for src in $(find ./gpu -name "*.$ext")
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
hipify-perl -o=$dst.tmp $src &
done
dst="${src//\.\/gpu/\.\/gpu-rocm}"
hipify-perl -o="$dst.tmp" "$src" &
done < <(find ./gpu -name "*.$ext" -print0)
done
wait

# rename all hipified *.cu files to *.hip
for src in $(find ./gpu-rocm -name "*.cu.tmp")
while IFS= read -r -d '' src
do
dst=${src%.cu.tmp}.hip.tmp
mv $src $dst
done
mv "$src" "$dst"
done < <(find ./gpu-rocm -name "*.cu.tmp" -print0)

# replace header include statements "<faiss/gpu/" with "<faiss/gpu-rocm"
# replace thrust::cuda::par with thrust::hip::par
# adjust header path location for hipblas.h to avoid unnecessary deprecation warnings
# adjust header path location for hiprand_kernel.h to avoid unnecessary deprecation warnings
for ext in hip cuh h cpp c
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
while IFS= read -r -d '' src
do
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' $src
sed -i 's@thrust::cuda::par@thrust::hip::par@' $src
sed -i 's@#include <hipblas.h>@#include <hipblas/hipblas.h>@' $src
sed -i 's@#include <hiprand_kernel.h>@#include <hiprand/hiprand_kernel.h>@' $src
done
sed -i 's@#include <faiss/gpu/@#include <faiss/gpu-rocm/@' "$src"
sed -i 's@thrust::cuda::par@thrust::hip::par@' "$src"
sed -i 's@#include <hipblas.h>@#include <hipblas/hipblas.h>@' "$src"
sed -i 's@#include <hiprand_kernel.h>@#include <hiprand/hiprand_kernel.h>@' "$src"
done < <(find ./gpu-rocm -name "*.$ext.tmp" -print0)
done

# hipify was run in parallel above
# don't copy the tmp file if it is unchanged
for ext in hip cuh h cpp c
do
for src in $(find ./gpu-rocm -name "*.$ext.tmp")
while IFS= read -r -d '' src
do
dst=${src%.tmp}
if test -f $dst
if test -f "$dst"
then
if diff -q $src $dst >& /dev/null
if diff -q "$src" "$dst" >& /dev/null
then
echo "$dst [unchanged]"
rm $src
rm "$src"
else
echo "$dst"
mv $src $dst
mv "$src" "$dst"
fi
else
echo "$dst"
mv $src $dst
mv "$src" "$dst"
fi
done
done < <(find ./gpu-rocm -name "*.$ext.tmp" -print0)
done

# copy over CMakeLists.txt
for src in $(find ./gpu -name "CMakeLists.txt")
while IFS= read -r -d '' src
do
dst=$(echo $src | sed 's@./gpu@./gpu-rocm@')
if test -f $dst
dst="${src//\.\/gpu/\.\/gpu-rocm}"
if test -f "$dst"
then
if diff -q $src $dst >& /dev/null
if diff -q "$src" "$dst" >& /dev/null
then
echo "$dst [unchanged]"
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
else
echo "$dst"
cp $src $dst
cp "$src" "$dst"
fi
done
done < <(find ./gpu -name "CMakeLists.txt" -print0)
3 changes: 2 additions & 1 deletion faiss/gpu/impl/IVFAppend.cu
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ __global__ void ivfInterleavedAppend(
EncodeT* listStart = ((EncodeT*)listData[listId]);

// Each warp within the block handles a different chunk of kWarpSize
auto warpVec = alignedListVecStart + warpId * kWarpSize;
auto warpVec = alignedListVecStart +
(faiss::gpu::Tensor<long, 1, true>::DataType)warpId * kWarpSize;

// The warp data starts here
EncodeT* warpData = listStart + (warpVec / kWarpSize) * wordsPerVectorBlock;
Expand Down
4 changes: 2 additions & 2 deletions faiss/gpu/impl/InterleavedCodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ void unpackInterleavedWord(
int dims,
int bitsPerCode) {
int warpSize = getWarpSizeCurrentDevice();
int wordsPerDimBlock = warpSize * bitsPerCode / (8 * sizeof(T));
int wordsPerDimBlock = (size_t)warpSize * bitsPerCode / (8 * sizeof(T));
int wordsPerBlock = wordsPerDimBlock * dims;
int numBlocks = utils::divUp(numVecs, warpSize);

Expand Down Expand Up @@ -446,7 +446,7 @@ void packInterleavedWord(
int dims,
int bitsPerCode) {
int warpSize = getWarpSizeCurrentDevice();
int wordsPerDimBlock = warpSize * bitsPerCode / (8 * sizeof(T));
int wordsPerDimBlock = (size_t)warpSize * bitsPerCode / (8 * sizeof(T));
int wordsPerBlock = wordsPerDimBlock * dims;
int numBlocks = utils::divUp(numVecs, warpSize);

Expand Down
8 changes: 6 additions & 2 deletions faiss/gpu/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@
include(GoogleTest)
add_library(faiss_gpu_test_helper TestUtils.cpp)
if(USE_ROCM)
target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest $<$<BOOL:${USE_ROCM}>:hip::host>)
target_link_libraries(faiss_gpu_test_helper PUBLIC
faiss gtest $<$<BOOL:${USE_ROCM}>:hip::host>)
else()
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>)
target_link_libraries(faiss_gpu_test_helper PUBLIC
faiss gtest CUDA::cudart
$<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
$<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>)
endif()

macro(faiss_gpu_test file)
Expand Down
Loading