diff --git a/faiss/gpu/impl/DistanceUtils.cuh b/faiss/gpu/impl/DistanceUtils.cuh index fd894ae3bd..4ea899c8ec 100644 --- a/faiss/gpu/impl/DistanceUtils.cuh +++ b/faiss/gpu/impl/DistanceUtils.cuh @@ -303,7 +303,7 @@ __global__ void incrementIndex( int k, idx_t increment) { for (idx_t i = blockIdx.y; i < indices.getSize(0); i += gridDim.y) { - for (int j = threadIdx.x; j < k; j += blockDim.x) { + for (auto j = threadIdx.x; j < k; j += blockDim.x) { indices[i][idx_t(blockIdx.x) * k + j] += blockIdx.x * increment; } } diff --git a/faiss/gpu/impl/GpuScalarQuantizer.cuh b/faiss/gpu/impl/GpuScalarQuantizer.cuh index c2d781419d..186ecac1c2 100644 --- a/faiss/gpu/impl/GpuScalarQuantizer.cuh +++ b/faiss/gpu/impl/GpuScalarQuantizer.cuh @@ -377,7 +377,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) @@ -587,7 +587,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) @@ -753,7 +753,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) diff --git a/faiss/gpu/impl/IVFAppend.cu b/faiss/gpu/impl/IVFAppend.cu index ba5cedf3c7..dd1c9073b5 100644 --- a/faiss/gpu/impl/IVFAppend.cu +++ b/faiss/gpu/impl/IVFAppend.cu @@ -368,9 +368,9 @@ __global__ void ivfInterleavedAppend( // The set of addresses for each of the lists void** listData) { // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - int laneId = threadIdx.x % kWarpSize; - int warpId = threadIdx.x / kWarpSize; - int warpsPerBlock = blockDim.x / kWarpSize; + auto laneId = threadIdx.x % kWarpSize; + auto warpId = threadIdx.x / kWarpSize; + auto warpsPerBlock = blockDim.x / kWarpSize; // Each block is dedicated to a separate list idx_t listId = uniqueLists[blockIdx.x]; diff --git a/faiss/gpu/impl/IVFFlatScan.cu b/faiss/gpu/impl/IVFFlatScan.cu index 457d0afeb6..5c6307b032 100644 --- a/faiss/gpu/impl/IVFFlatScan.cu +++ b/faiss/gpu/impl/IVFFlatScan.cu @@ -65,9 +65,9 @@ struct IVFFlatScan { int limit = utils::divDown(dim, Codec::kDimPerIter); // Each warp handles a separate chunk of vectors - int warpId = threadIdx.x / kWarpSize; + auto warpId = threadIdx.x / kWarpSize; // FIXME: why does getLaneId() not work when we write out below!?!?! - int laneId = threadIdx.x % kWarpSize; // getLaneId(); + auto laneId = threadIdx.x % kWarpSize; // getLaneId(); // Divide the set of vectors among the warps idx_t vecsPerWarp = utils::divUp(numVecs, kIVFFlatScanWarps); diff --git a/faiss/gpu/impl/IVFInterleaved.cu b/faiss/gpu/impl/IVFInterleaved.cu index e5b13f3aa8..fc99a49163 100644 --- a/faiss/gpu/impl/IVFInterleaved.cu +++ b/faiss/gpu/impl/IVFInterleaved.cu @@ -27,7 +27,7 @@ __global__ void ivfInterleavedScan2( Tensor distanceOut, Tensor indicesOut) { if constexpr ((NumWarpQ == 1 && NumThreadQ == 1) || NumWarpQ >= kWarpSize) { - int queryId = blockIdx.x; + auto queryId = blockIdx.x; constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; @@ -99,7 +99,7 @@ __global__ void ivfInterleavedScan2( // Merge all final results heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { // Re-adjust the value we are selecting based on the sorting order distanceOut[queryId][i] = smemK[i] * adj; auto packedIndex = smemV[i]; diff --git a/faiss/gpu/impl/IVFInterleaved.cuh b/faiss/gpu/impl/IVFInterleaved.cuh index f1da8342d4..1b7fbbe7a1 100644 --- a/faiss/gpu/impl/IVFInterleaved.cuh +++ b/faiss/gpu/impl/IVFInterleaved.cuh @@ -56,7 +56,7 @@ __global__ void ivfInterleavedScan( for (idx_t queryId = blockIdx.y; queryId < queries.getSize(0); queryId += gridDim.y) { - int probeId = blockIdx.x; + auto probeId = blockIdx.x; idx_t listId = listIds[queryId][probeId]; // Safety guard in case NaNs in input cause no list ID to be @@ -69,8 +69,8 @@ __global__ void ivfInterleavedScan( int dim = queries.getSize(1); // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - int laneId = threadIdx.x % kWarpSize; - int warpId = threadIdx.x / kWarpSize; + auto laneId = threadIdx.x % kWarpSize; + auto warpId = threadIdx.x / kWarpSize; using EncodeT = typename Codec::EncodeT; @@ -215,7 +215,7 @@ __global__ void ivfInterleavedScan( auto distanceOutBase = distanceOut[queryId][probeId].data(); auto indicesOutBase = indicesOut[queryId][probeId].data(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { distanceOutBase[i] = smemK[i]; indicesOutBase[i] = smemV[i]; } diff --git a/faiss/gpu/impl/IVFUtilsSelect1.cu b/faiss/gpu/impl/IVFUtilsSelect1.cu index 3cb88bd9c7..c4f65bab8f 100644 --- a/faiss/gpu/impl/IVFUtilsSelect1.cu +++ b/faiss/gpu/impl/IVFUtilsSelect1.cu @@ -90,7 +90,7 @@ __global__ void pass1SelectLists( // Write out the final k-selected values; they should be all // together - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { heapDistances[queryId][sliceId][i] = smemK[i]; heapIndices[queryId][sliceId][i] = idx_t(smemV[i]); } diff --git a/faiss/gpu/impl/IVFUtilsSelect2.cu b/faiss/gpu/impl/IVFUtilsSelect2.cu index 3a94101bdb..2dbf3c0f00 100644 --- a/faiss/gpu/impl/IVFUtilsSelect2.cu +++ b/faiss/gpu/impl/IVFUtilsSelect2.cu @@ -100,7 +100,7 @@ __global__ void pass2SelectLists( // Merge all final results heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { outDistances[queryId][i] = smemK[i]; // `v` is the index in `heapIndices` diff --git a/faiss/gpu/impl/IcmEncoder.cu b/faiss/gpu/impl/IcmEncoder.cu index 20ff36e2d2..b86e390f85 100644 --- a/faiss/gpu/impl/IcmEncoder.cu +++ b/faiss/gpu/impl/IcmEncoder.cu @@ -46,8 +46,8 @@ __global__ void runIcmEncodeStep( int m) { using KVPair = Pair; - int id = blockIdx.x; // each block takes care of one vector - int code = threadIdx.x; // each thread takes care of one possible code + auto id = blockIdx.x; // each block takes care of one vector + auto code = threadIdx.x; // each thread takes care of one possible code // compute the objective value by look-up tables KVPair obj(0.0f, code); @@ -94,8 +94,8 @@ __global__ void runEvaluation( int M, int K, int dims) { - int id = blockIdx.x; // each block takes care of one vector - int d = threadIdx.x; // each thread takes care of one dimension + auto id = blockIdx.x; // each block takes care of one vector + auto d = threadIdx.x; // each thread takes care of one dimension float acc = 0.0f; #pragma unroll @@ -136,7 +136,7 @@ __global__ void runCodesPerturbation( int K, int nperts) { // each thread takes care of one vector - int id = blockIdx.x * blockDim.x + threadIdx.x; + auto id = blockIdx.x * blockDim.x + threadIdx.x; if (id >= n) { return; @@ -173,7 +173,7 @@ __global__ void runCodesSelection( int n, int M) { // each thread takes care of one vector - int id = blockIdx.x * blockDim.x + threadIdx.x; + auto id = blockIdx.x * blockDim.x + threadIdx.x; if (id >= n || objs[id] >= bestObjs[id]) { return; @@ -195,8 +195,8 @@ __global__ void runCodesSelection( * @param K number of codewords in a codebook */ __global__ void runNormAddition(float* uterm, const float* norm, int K) { - int id = blockIdx.x; - int code = threadIdx.x; + auto id = blockIdx.x; + auto code = threadIdx.x; uterm[id * K + code] += norm[code]; } diff --git a/faiss/gpu/impl/L2Norm.cu b/faiss/gpu/impl/L2Norm.cu index e76a0831ff..0e65015e44 100644 --- a/faiss/gpu/impl/L2Norm.cu +++ b/faiss/gpu/impl/L2Norm.cu @@ -40,7 +40,7 @@ __global__ void l2NormRowMajor( // these are fine to be int (just based on block dimensions) int numWarps = utils::divUp(blockDim.x, kWarpSize); int laneId = getLaneId(); - int warpId = threadIdx.x / kWarpSize; + auto warpId = threadIdx.x / kWarpSize; bool lastRowTile = (blockIdx.x == (gridDim.x - 1)); idx_t rowStart = idx_t(blockIdx.x) * RowTileSize;