diff --git a/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp b/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp index 3bc1bcc5a5048..2afaddbb18d71 100644 --- a/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp +++ b/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/LocalAccessorToSharedMemory.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -248,10 +249,15 @@ class LocalAccessorToSharedMemory : public ModulePass { if (!NvvmMetadata) return; + // It is possible that the annotations node contains multiple pointers to + // the same metadata, recognise visited ones. + SmallSet Visited; for (auto *MetadataNode : NvvmMetadata->operands()) { - if (MetadataNode->getNumOperands() != 3) + if (Visited.contains(MetadataNode) || MetadataNode->getNumOperands() != 3) continue; + Visited.insert(MetadataNode); + // NVPTX identifies kernel entry points using metadata nodes of the form: // !X = !{, !"kernel", i32 1} const MDOperand &TypeOperand = MetadataNode->getOperand(1); diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll new file mode 100644 index 0000000000000..458a00bab5c28 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll @@ -0,0 +1,35 @@ +; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s +; ModuleID = 'multiple-annotations.bc' +source_filename = "multiple-annotations.ll" +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; This test checks that the transformation is applied in the basic case with multiple identical annotations nodes. + +; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4 + +; Function Attrs: noinline +define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) { +; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) { +entry: +; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0 +; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* + %0 = load i32, i32 addrspace(3)* %a +; CHECK: %2 = load i32, i32 addrspace(3)* %a + %1 = load i32, i32 addrspace(1)* %b +; CHECK: %3 = load i32, i32 addrspace(1)* %b + %2 = add i32 %c, %c +; CHECK: %4 = add i32 %c, %c + ret void +} + +!nvvm.annotations = !{!0, !0} +!nvvmir.version = !{!5} + +!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1} +!1 = !{null, !"align", i32 8} +!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} +!3 = !{null, !"align", i32 16} +!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} +!5 = !{i32 1, i32 4}