diff --git a/numba_cuda/numba/cuda/debuginfo.py b/numba_cuda/numba/cuda/debuginfo.py index d0f8dd506..99e27bf9f 100644 --- a/numba_cuda/numba/cuda/debuginfo.py +++ b/numba_cuda/numba/cuda/debuginfo.py @@ -4,6 +4,7 @@ import abc import os from contextlib import contextmanager +from enum import IntEnum import llvmlite from llvmlite import ir @@ -71,6 +72,16 @@ def _check_polymorphic_debug_info_support(): config.CUDA_DEBUG_POLY_USE_TYPED_CONST = DEBUG_POLY_USE_TYPED_CONST +class DwarfAddressClass(IntEnum): + GENERIC = 0x00 + GLOBAL = 0x01 + REGISTER = 0x02 + CONSTANT = 0x05 + LOCAL = 0x06 + PARAMETER = 0x07 + SHARED = 0x08 + + @contextmanager def suspend_emission(builder): """Suspends the emission of debug_metadata for the duration of the context @@ -179,6 +190,19 @@ def initialize(self): # constructing subprograms self.dicompileunit = self._di_compile_unit() + def get_dwarf_address_class(self, addrspace): + # Map NVVM address space to DWARF address class. + from numba.cuda.cudadrv import nvvm + + addrspace_to_addrclass_dict = { + nvvm.ADDRSPACE_GENERIC: None, + nvvm.ADDRSPACE_GLOBAL: DwarfAddressClass.GLOBAL, + nvvm.ADDRSPACE_SHARED: DwarfAddressClass.SHARED, + nvvm.ADDRSPACE_CONSTANT: DwarfAddressClass.CONSTANT, + nvvm.ADDRSPACE_LOCAL: DwarfAddressClass.LOCAL, + } + return addrspace_to_addrclass_dict.get(addrspace) + def _var_type(self, lltype, size, datamodel=None): if self._DEBUG: print( @@ -622,6 +646,11 @@ def __init__(self, module, filepath, cgctx, directives_only): super().__init__(module, filepath, cgctx, directives_only) # Cache for local variable metadata type and line deduplication self._vartypelinemap = {} + # Variable address space dictionary + self._var_addrspace_map = {} + + def _set_addrspace_map(self, map): + self._var_addrspace_map = map def _var_type(self, lltype, size, datamodel=None): is_bool = False @@ -796,6 +825,65 @@ def _var_type(self, lltype, size, datamodel=None): }, is_distinct=True, ) + + # Check if there's actually address space info to handle + addrspace = getattr(self, "_addrspace", None) + if ( + isinstance(lltype, ir.LiteralStructType) + and datamodel is not None + and datamodel.inner_models() + and addrspace not in (None, 0) + ): + # Process struct with datamodel that has address space info + meta = [] + offset = 0 + for element, field, model in zip( + lltype.elements, datamodel._fields, datamodel.inner_models() + ): + size_field = self.cgctx.get_abi_sizeof(element) + if isinstance(element, ir.PointerType) and field == "data": + # Create pointer type with correct address space + pointee_size = self.cgctx.get_abi_sizeof(element.pointee) + pointee_model = getattr(model, "_pointee_model", None) + pointee_type = self._var_type( + element.pointee, pointee_size, datamodel=pointee_model + ) + meta_ptr = { + "tag": ir.DIToken("DW_TAG_pointer_type"), + "baseType": pointee_type, + "size": _BYTE_SIZE * size_field, + } + dwarf_addrclass = self.get_dwarf_address_class(addrspace) + if dwarf_addrclass is not None: + meta_ptr["dwarfAddressSpace"] = int(dwarf_addrclass) + basetype = m.add_debug_info("DIDerivedType", meta_ptr) + else: + basetype = self._var_type( + element, size_field, datamodel=model + ) + derived_type = m.add_debug_info( + "DIDerivedType", + { + "tag": ir.DIToken("DW_TAG_member"), + "name": field, + "baseType": basetype, + "size": _BYTE_SIZE * size_field, + "offset": offset, + }, + ) + meta.append(derived_type) + offset += _BYTE_SIZE * size_field + + return m.add_debug_info( + "DICompositeType", + { + "tag": ir.DIToken("DW_TAG_structure_type"), + "name": f"{datamodel.fe_type}", + "elements": m.add_metadata(meta), + "size": offset, + }, + is_distinct=True, + ) # For other cases, use upstream Numba implementation return super()._var_type(lltype, size, datamodel=datamodel) @@ -848,16 +936,22 @@ def mark_variable( # to llvm.dbg.value return else: - return super().mark_variable( - builder, - allocavalue, - name, - lltype, - size, - line, - datamodel, - argidx, - ) + # Look up address space for this variable + self._addrspace = self._var_addrspace_map.get(name) + try: + return super().mark_variable( + builder, + allocavalue, + name, + lltype, + size, + line, + datamodel, + argidx, + ) + finally: + # Clean up address space info + self._addrspace = None def update_variable( self, diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 63fe5ba24..56d417c6e 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -11,6 +11,7 @@ from numba.cuda import HAS_NUMBA from numba.cuda.core import ir from numba.cuda import debuginfo, cgutils, utils, typing, types +from numba import cuda from numba.cuda.core import ( ir_utils, targetconfig, @@ -1677,10 +1678,31 @@ def decref(self, typ, val): class CUDALower(Lower): + def _is_shared_array_call(self, fnty): + # Check if function type is a cuda.shared.array call + if not hasattr(fnty, "typing_key"): + return False + return fnty.typing_key is cuda.shared.array + + def _lower_call_normal(self, fnty, expr, signature): + # Set flag for subsequent store to track shared address space + if self.context.enable_debuginfo and self._is_shared_array_call(fnty): + self._pending_shared_store = True + + return super()._lower_call_normal(fnty, expr, signature) + def storevar(self, value, name, argidx=None): """ Store the value into the given variable. """ + # Track address space for debug info + if self.context.enable_debuginfo and self._pending_shared_store: + from numba.cuda.cudadrv import nvvm + + self._addrspace_map[name] = nvvm.ADDRSPACE_SHARED + if not name.startswith("$") and not name.startswith("."): + self._pending_shared_store = False + # Handle polymorphic variables with CUDA_DEBUG_POLY enabled if config.CUDA_DEBUG_POLY: src_name = name.split(".")[0] @@ -1806,6 +1828,13 @@ def pre_lower(self): """ super().pre_lower() + # Track address space for debug info + self._addrspace_map = {} + self._pending_shared_store = False + if self.context.enable_debuginfo: + self.debuginfo._set_addrspace_map(self._addrspace_map) + + # Track polymorphic variables for debug info self.poly_var_typ_map = {} self.poly_var_loc_map = {} self.poly_var_set = set() diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py index 6063b229f..308040ff7 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py @@ -6,6 +6,7 @@ from numba.cuda.testing import skip_on_cudasim from numba import cuda from numba.cuda import types +from numba.cuda.np import numpy_support from numba.cuda.testing import CUDATestCase from numba.cuda.core import config from textwrap import dedent @@ -884,6 +885,94 @@ def foo(): """, ) + # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer with dwarfAddressSpace: 8 + # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace: 8 + address_class_filechecks = r""" + CHECK-DAG: [[SHARED_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "shared_arr"{{.*}}type: [[SHARED_COMPOSITE:![0-9]+]] + CHECK-DAG: [[SHARED_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[SHARED_ELEMENTS:![0-9]+]] + CHECK-DAG: [[SHARED_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[SHARED_DATA:![0-9]+]], {{.*}}, {{.*}}} + CHECK-DAG: [[SHARED_DATA]] = !DIDerivedType(baseType: [[SHARED_PTR:![0-9]+]], name: "data" + CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}dwarfAddressSpace: 8{{.*}}tag: DW_TAG_pointer_type + + CHECK-DAG: [[LOCAL_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "local_arr"{{.*}}type: [[LOCAL_COMPOSITE:![0-9]+]] + CHECK-DAG: [[LOCAL_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[LOCAL_ELEMENTS:![0-9]+]] + CHECK-DAG: [[LOCAL_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[LOCAL_DATA:![0-9]+]], {{.*}}, {{.*}}} + CHECK-DAG: [[LOCAL_DATA]] = !DIDerivedType(baseType: [[LOCAL_PTR:![0-9]+]], name: "data" + CHECK-DAG: [[LOCAL_PTR]] = !DIDerivedType(baseType: {{.*}}tag: DW_TAG_pointer_type + CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace: 8 + """ + + def _test_shared_memory_address_class(self, dtype): + """Test that shared memory arrays have correct DWARF address class. + + Shared memory pointers should have addressClass: 8 (DW_AT_address_class + for CUDA shared memory) in their debug metadata, while regular local + arrays should not have this annotation. + """ + sig = (numpy_support.from_dtype(dtype),) + + @cuda.jit(sig, debug=True, opt=False) + def kernel_with_shared(data): + shared_arr = cuda.shared.array(32, dtype=dtype) + local_arr = cuda.local.array(32, dtype=dtype) + idx = cuda.grid(1) + if idx < 32: + shared_arr[idx] = data + idx + local_arr[idx] = data * 2 + idx + cuda.syncthreads() + if idx == 0: + result = dtype(0) + for i in range(32): + result += shared_arr[i] + local_arr[i] + + llvm_ir = kernel_with_shared.inspect_llvm(sig) + + self.assertFileCheckMatches(llvm_ir, self.address_class_filechecks) + + def test_shared_memory_address_class_int32(self): + self._test_shared_memory_address_class(np.int32) + + def test_shared_memory_address_class_complex64(self): + self._test_shared_memory_address_class(np.complex64) + + def test_shared_memory_address_class_boolean(self): + self._test_shared_memory_address_class(np.bool) + + def test_shared_memory_address_class_float16(self): + self._test_shared_memory_address_class(np.float16) + + def test_shared_memory_address_class_record(self): + dtype = np.dtype( + [ + ("a", np.int32), + ("b", np.float32), + ] + ) + sig = (numpy_support.from_dtype(dtype),) + + @cuda.jit(sig, debug=True, opt=False) + def kernel_with_shared(data): + shared_arr = cuda.shared.array(32, dtype=dtype) + local_arr = cuda.local.array(32, dtype=dtype) + result = cuda.local.array(1, dtype=dtype) + idx = cuda.grid(1) + if idx < 32: + shared_arr[idx].a = data.a + idx + local_arr[idx].a = data.a * 2 + idx + shared_arr[idx].b = data.b + idx + local_arr[idx].b = data.b * 2 + idx + cuda.syncthreads() + if idx == 0: + result[0].a = 0 + result[0].b = 0.0 + for i in range(32): + result[0].a += shared_arr[i].a + local_arr[i].a + result[0].b += shared_arr[i].b + local_arr[i].b + + llvm_ir = kernel_with_shared.inspect_llvm(sig) + + self.assertFileCheckMatches(llvm_ir, self.address_class_filechecks) + if __name__ == "__main__": unittest.main()