NVIDIA · gmarkall · May 3, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
@@ -40,6 +40,7 @@
 from numba.cuda.cudadrv import nvvm
 from numba.cuda.descriptor import cuda_target
 from numba.cuda.target import CUDACABICallConv
+from numba.cuda import lowering
 
 
 def _nvvm_options_type(x):
@@ -163,6 +164,18 @@ def run_pass(self, state):
         return True
 
 
+@register_pass(mutates_CFG=True, analysis_only=False)
+class CUDANativeLowering(NativeLowering):
+    """Lowering pass for a CUDA native function IR described solely in terms of
+    Numba's standard `numba.core.ir` nodes."""
+
+    _name = "cuda_native_lowering"
+
+    @property
+    def lowering_class(self):
+        return lowering.CUDALower
+
+
 class CUDABytecodeInterpreter(Interpreter):
     # Based on the superclass implementation, but names the resulting variable
     # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
@@ -251,7 +264,7 @@ def define_cuda_lowering_pipeline(self, state):
 
         # lower
         pm.add_pass(CreateLibrary, "create library")
-        pm.add_pass(NativeLowering, "native lowering")
+        pm.add_pass(CUDANativeLowering, "cuda native lowering")
         pm.add_pass(CUDABackend, "cuda backend")
 
         pm.finalize()

diff --git a/numba_cuda/numba/cuda/debuginfo.py b/numba_cuda/numba/cuda/debuginfo.py
@@ -1,14 +1,20 @@
 from llvmlite import ir
-from numba.core import types
+from numba.core import types, cgutils
 from numba.core.debuginfo import DIBuilder
 from numba.cuda.types import GridGroup
 
 _BYTE_SIZE = 8
 
 
 class CUDADIBuilder(DIBuilder):
+    def __init__(self, module, filepath, cgctx, directives_only):
+        super().__init__(module, filepath, cgctx, directives_only)
+        # Cache for local variable metadata type and line deduplication
+        self._vartypelinemap = {}
+
     def _var_type(self, lltype, size, datamodel=None):
         is_bool = False
+        is_int_literal = False
         is_grid_group = False
 
         if isinstance(lltype, ir.IntType):
@@ -20,15 +26,23 @@ def _var_type(self, lltype, size, datamodel=None):
                 name = str(datamodel.fe_type)
                 if isinstance(datamodel.fe_type, types.Boolean):
                     is_bool = True
+                    if isinstance(datamodel.fe_type, types.BooleanLiteral):
+                        name = "bool"
+                elif isinstance(datamodel.fe_type, types.Integer):
+                    if isinstance(datamodel.fe_type, types.IntegerLiteral):
+                        name = f"int{_BYTE_SIZE * size}"
+                        is_int_literal = True
                 elif isinstance(datamodel.fe_type, GridGroup):
                     is_grid_group = True
 
-        if is_bool or is_grid_group:
+        if is_bool or is_int_literal or is_grid_group:
             m = self.module
             bitsize = _BYTE_SIZE * size
             # Boolean type workaround until upstream Numba is fixed
             if is_bool:
                 ditok = "DW_ATE_boolean"
+            elif is_int_literal:
+                ditok = "DW_ATE_signed"
             # GridGroup type should use numba.cuda implementation
             elif is_grid_group:
                 ditok = "DW_ATE_unsigned"
@@ -44,3 +58,79 @@ def _var_type(self, lltype, size, datamodel=None):
 
         # For other cases, use upstream Numba implementation
         return super()._var_type(lltype, size, datamodel=datamodel)
+
+    def mark_variable(
+        self,
+        builder,
+        allocavalue,
+        name,
+        lltype,
+        size,
+        line,
+        datamodel=None,
+        argidx=None,
+    ):
+        if name.startswith("$") or "." in name:
+            # Do not emit llvm.dbg.declare on user variable alias
+            return
+        else:
+            int_type = (ir.IntType,)
+            real_type = ir.FloatType, ir.DoubleType
+            if isinstance(lltype, int_type + real_type):
+                # Start with scalar variable, swtiching llvm.dbg.declare
+                # to llvm.dbg.value
+                return
+            else:
+                return super().mark_variable(
+                    builder,
+                    allocavalue,
+                    name,
+                    lltype,
+                    size,
+                    line,
+                    datamodel,
+                    argidx,
+                )
+
+    def update_variable(
+        self,
+        builder,
+        value,
+        name,
+        lltype,
+        size,
+        line,
+        datamodel=None,
+        argidx=None,
+    ):
+        m = self.module
+        fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3)
+        decl = cgutils.get_or_insert_function(m, fnty, "llvm.dbg.value")
+
+        mdtype = self._var_type(lltype, size, datamodel)
+        index = name.find(".")
+        if index >= 0:
+            name = name[:index]
+        # Merge DILocalVariable nodes with same name and type but different
+        # lines. Use the cached [(name, type) -> line] info to deduplicate
+        # metadata. Use the lltype as part of key.
+        key = (name, lltype)
+        if key in self._vartypelinemap:
+            line = self._vartypelinemap[key]
+        else:
+            self._vartypelinemap[key] = line
+        arg_index = 0 if argidx is None else argidx
+        mdlocalvar = m.add_debug_info(
+            "DILocalVariable",
+            {
+                "name": name,
+                "arg": arg_index,
+                "scope": self.subprograms[-1],
+                "file": self.difile,
+                "line": line,
+                "type": mdtype,
+            },
+        )
+        mdexpr = m.add_debug_info("DIExpression", {})
+
+        return builder.call(decl, [value, mdlocalvar, mdexpr])
diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py
@@ -0,0 +1,43 @@
+from numba.core.lowering import Lower
+from llvmlite import ir
+
+
+class CUDALower(Lower):
+    def storevar(self, value, name, argidx=None):
+        """
+        Store the value into the given variable.
+        """
+        super().storevar(value, name, argidx)
+
+        # Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
+        # variables immediately after a store instruction.
+        if (
+            self.context.enable_debuginfo
+            # Conditions used to elide stores in parent method
+            and (
+                name not in self._singly_assigned_vars
+                or self._disable_sroa_like_opt
+            )
+            # No emission of debuginfo for internal names
+            and not name.startswith("$")
+        ):
+            # Emit debug value for user variable
+            fetype = self.typeof(name)
+            lltype = self.context.get_value_type(fetype)
+            int_type = (ir.IntType,)
+            real_type = ir.FloatType, ir.DoubleType
+            if isinstance(lltype, int_type + real_type):
+                # Emit debug value for scalar variable
+                sizeof = self.context.get_abi_sizeof(lltype)
+                datamodel = self.context.data_model_manager[fetype]
+                line = self.loc.line if argidx is None else self.defn_loc.line
+                self.debuginfo.update_variable(
+                    self.builder,
+                    value,
+                    name,
+                    lltype,
+                    sizeof,
+                    line,
+                    datamodel,
+                    argidx,
+                )
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -310,6 +310,52 @@ def test_kernel_args_types_dump(self):
             with captured_stdout():
                 self._test_kernel_args_types()
 
+    def test_llvm_dbg_value(self):
+        sig = (types.int32, types.int32)
+
+        @cuda.jit("void(int32, int32)", debug=True, opt=False)
+        def f(x, y):
+            z = x  # noqa: F841
+            z = 100  # noqa: F841
+            z = y  # noqa: F841
+            z = True  # noqa: F841
+
+        llvm_ir = f.inspect_llvm(sig)
+        # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
+        pat1 = r'call void @"llvm.dbg.declare"'
+        match = re.compile(pat1).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
+        pat2 = r'call void @"llvm.dbg.value"'
+        match = re.compile(pat2).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+
+    def test_no_user_var_alias(self):
+        sig = (types.int32, types.int32)
+
+        @cuda.jit("void(int32, int32)", debug=True, opt=False)
+        def f(x, y):
+            z = x  # noqa: F841
+            z = y  # noqa: F841
+
+        llvm_ir = f.inspect_llvm(sig)
+        pat = r'!DILocalVariable.*name:\s+"z\$1".*'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
+
+    def test_no_literal_type(self):
+        sig = (types.int32,)
+
+        @cuda.jit("void(int32)", debug=True, opt=False)
+        def f(x):
+            z = x  # noqa: F841
+            z = 100  # noqa: F841
+            z = True  # noqa: F841
+
+        llvm_ir = f.inspect_llvm(sig)
+        pat = r'!DIBasicType.*name:\s+"Literal.*'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
+
 
 if __name__ == "__main__":
     unittest.main()