diff --git a/numba_cuda/numba/cuda/debuginfo.py b/numba_cuda/numba/cuda/debuginfo.py index 2f83ffa10..d0f8dd506 100644 --- a/numba_cuda/numba/cuda/debuginfo.py +++ b/numba_cuda/numba/cuda/debuginfo.py @@ -5,12 +5,70 @@ import os from contextlib import contextmanager +import llvmlite from llvmlite import ir from numba.cuda import types from numba.cuda.core import config from numba.cuda import cgutils from numba.cuda.datamodel.models import ComplexModel, UnionModel, UniTupleModel from numba.cuda.types.ext_types import GridGroup +from cuda.bindings import runtime + + +# Check if CUDA Toolkit and llvmlite support polymorphic debug info +def _get_llvmlite_version(): + """Get llvmlite version as tuple (major, minor).""" + try: + version_str = llvmlite.__version__ + # Parse version string like "0.46.0" or "0.46.0dev" + parts = version_str.split(".") + if len(parts) < 2: + return (0, 0) + major = int(parts[0]) + minor = int(parts[1]) + return (major, minor) + except (IndexError, AttributeError, ValueError): + return (0, 0) + + +def _check_polymorphic_debug_info_support(): + """Check if CTK and llvmlite support polymorphic debug info. + + Returns: + tuple: (supported: bool, use_typed_const: bool) + - supported: Whether feature is supported at all + - use_typed_const: True for typed constant, + False for node reference + """ + # runtime.getLocalRuntimeVersion() returns (cudaError_t, version_int) + # Example: 13010 = CTK 13.1, 13020 = CTK 13.2 + _, ctk_version_number = runtime.getLocalRuntimeVersion() + ctk_major = ctk_version_number // 1000 + ctk_minor = (ctk_version_number % 1000) // 10 + ctk_version = (ctk_major, ctk_minor) + + llvmlite_version = _get_llvmlite_version() + + # Support not available with CTK 13.1 or older + if ctk_version <= (13, 1): + return (False, False) + + # llvmlite > 0.45: use typed constant + # llvmlite <= 0.45: use node reference + use_typed_const = llvmlite_version > (0, 45) + return (True, use_typed_const) + + +# Check support and determine mode +(DEBUG_POLY_SUPPORTED, DEBUG_POLY_USE_TYPED_CONST) = ( + _check_polymorphic_debug_info_support() +) + +# Set config based on polymorphic debug info support +if not hasattr(config, "CUDA_DEBUG_POLY"): + config.CUDA_DEBUG_POLY = DEBUG_POLY_SUPPORTED +if not hasattr(config, "CUDA_DEBUG_POLY_USE_TYPED_CONST"): + config.CUDA_DEBUG_POLY_USE_TYPED_CONST = DEBUG_POLY_USE_TYPED_CONST @contextmanager @@ -619,7 +677,10 @@ def _var_type(self, lltype, size, datamodel=None): # Ignore the "tag" field, focus on the "payload" field which # contains the data types in memory if field == "payload": - for mod in model.inner_models(): + # Store metadata dictionaries to create members later + member_metadata_dicts = [] + + for index, mod in enumerate(model.inner_models()): dtype = mod.get_value_type() membersize = self.cgctx.get_abi_sizeof(dtype) basetype = self._var_type( @@ -632,33 +693,109 @@ def _var_type(self, lltype, size, datamodel=None): # Use a prefix "_" on type names as field names membername = "_" + typename memberwidth = _BYTE_SIZE * membersize + # Build the metadata dictionary + metadata_dict = { + "tag": ir.DIToken("DW_TAG_member"), + "name": membername, + "baseType": basetype, + # DW_TAG_member size is in bits + "size": memberwidth, + } + if config.CUDA_DEBUG_POLY: + # Polymorphic debug info with DW_TAG_variant + # extraData depends on llvmlite version + if config.CUDA_DEBUG_POLY_USE_TYPED_CONST: + metadata_dict["extraData"] = ir.IntType(8)( + index + ) + else: + # Use metadata node reference + metadata_dict["extraData"] = m.add_metadata( + [ir.IntType(8)(index)] + ) + # Add offset to each variant member + # Offset equals the element's own width + metadata_dict["offset"] = memberwidth + member_metadata_dicts.append(metadata_dict) + if memberwidth > maxwidth: + maxwidth = memberwidth + + # Create the member DIDerivedTypes + for metadata_dict in member_metadata_dicts: derived_type = m.add_debug_info( - "DIDerivedType", - { - "tag": ir.DIToken("DW_TAG_member"), - "name": membername, - "baseType": basetype, - # DW_TAG_member size is in bits - "size": memberwidth, - }, + "DIDerivedType", metadata_dict ) meta.append(derived_type) - if memberwidth > maxwidth: - maxwidth = memberwidth - fake_union_name = "dbg_poly_union" - return m.add_debug_info( - "DICompositeType", - { - "file": self.difile, - "tag": ir.DIToken("DW_TAG_union_type"), - "name": fake_union_name, - "identifier": str(lltype), - "elements": m.add_metadata(meta), - "size": maxwidth, - }, - is_distinct=True, - ) + if config.CUDA_DEBUG_POLY: + # Polymorphic variable debug info generation + wrapper_struct_size = 2 * maxwidth + # Generate unique discriminator name based on composite type + variant_elements_metadata = m.add_metadata(meta) + discriminator_unique_id = str(id(variant_elements_metadata)) + discriminator_name = f"discriminator-{discriminator_unique_id}" + discriminator = m.add_debug_info( + "DIDerivedType", + { + "tag": ir.DIToken("DW_TAG_member"), + "name": discriminator_name, + "baseType": m.add_debug_info( + "DIBasicType", + { + "name": "int", + "size": _BYTE_SIZE, + "encoding": ir.DIToken("DW_ATE_unsigned"), + }, + ), + "size": _BYTE_SIZE, + "flags": ir.DIToken("DIFlagArtificial"), + }, + ) + # Create the final variant_part with actual members + variant_unique_identifier = discriminator_unique_id + variant_part_type = m.add_debug_info( + "DICompositeType", + { + "file": self.difile, + "tag": ir.DIToken("DW_TAG_variant_part"), + "name": "variant_part", + "identifier": variant_unique_identifier, + "elements": variant_elements_metadata, + "size": maxwidth, + "discriminator": discriminator, + }, + ) + # Create elements metadata for wrapper struct + elements_metadata = m.add_metadata( + [discriminator, variant_part_type] + ) + unique_identifier = str(id(elements_metadata)) + wrapper_struct = m.add_debug_info( + "DICompositeType", + { + "file": self.difile, + "tag": ir.DIToken("DW_TAG_structure_type"), + "name": "variant_wrapper_struct", + "identifier": unique_identifier, + "elements": elements_metadata, + "size": wrapper_struct_size, + }, + ) + return wrapper_struct + else: + fake_union_name = "dbg_poly_union" + return m.add_debug_info( + "DICompositeType", + { + "file": self.difile, + "tag": ir.DIToken("DW_TAG_union_type"), + "name": fake_union_name, + "identifier": str(lltype), + "elements": m.add_metadata(meta), + "size": maxwidth, + }, + is_distinct=True, + ) # For other cases, use upstream Numba implementation return super()._var_type(lltype, size, datamodel=datamodel) diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 0f47ea6b9..63fe5ba24 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -1681,6 +1681,51 @@ def storevar(self, value, name, argidx=None): """ Store the value into the given variable. """ + # Handle polymorphic variables with CUDA_DEBUG_POLY enabled + if config.CUDA_DEBUG_POLY: + src_name = name.split(".")[0] + if src_name in self.poly_var_typ_map: + # Ensure allocation happens first (if needed) + fetype = self.typeof(name) + self._alloca_var(name, fetype) + # Discriminant and data are located in the same union + ptr = self.poly_var_loc_map[src_name] + # Firstly write discriminant to the beginning of union as i8 + dtype = types.UnionType(self.poly_var_typ_map[src_name]) + # Compute discriminant = index of type in sorted union + if isinstance(fetype, types.Literal): + lookup_type = fetype.literal_type + else: + lookup_type = fetype + discriminant_val = list(dtype.types).index(lookup_type) + # Bitcast union pointer directly to i8* and write + # discriminant at offset 0 + discriminant_ptr = self.builder.bitcast( + ptr, llvm_ir.PointerType(llvm_ir.IntType(8)) + ) + discriminant_i8 = llvm_ir.Constant( + llvm_ir.IntType(8), discriminant_val + ) + self.builder.store(discriminant_i8, discriminant_ptr) + # Secondly write data at offset = sizeof(fetype) in bytes + lltype = self.context.get_value_type(fetype) + sizeof_bytes = self.context.get_abi_sizeof(lltype) + # Bitcast to i8* and use byte-level GEP + byte_ptr = self.builder.bitcast( + ptr, llvm_ir.PointerType(llvm_ir.IntType(8)) + ) + data_byte_ptr = self.builder.gep( + byte_ptr, + [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)], + ) + # Cast to the correct type pointer + castptr = self.builder.bitcast( + data_byte_ptr, llvm_ir.PointerType(lltype) + ) + self.builder.store(value, castptr) + return + + # For non-polymorphic variables, use parent implementation super().storevar(value, name, argidx) # Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar @@ -1806,8 +1851,13 @@ def _alloca_var(self, name, fetype): datamodel = self.context.data_model_manager[dtype] # UnionType has sorted set of types, max at last index maxsizetype = dtype.types[-1] - # Create a single element aggregate type - aggr_type = types.UniTuple(maxsizetype, 1) + if config.CUDA_DEBUG_POLY: + # allocate double the max element size to house + # [discriminant + data] + aggr_type = types.UniTuple(maxsizetype, 2) + else: + # allocate single element for data only + aggr_type = types.UniTuple(maxsizetype, 1) lltype = self.context.get_value_type(aggr_type) ptr = self.alloca_lltype(src_name, lltype, datamodel) # save the location of the union type for polymorphic var @@ -1858,9 +1908,27 @@ def getvar(self, name): src_name = name.split(".")[0] fetype = self.typeof(name) lltype = self.context.get_value_type(fetype) - castptr = self.builder.bitcast( - self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype) - ) + ptr = self.poly_var_loc_map[src_name] + + if config.CUDA_DEBUG_POLY: + # With CUDA_DEBUG_POLY enabled, read value at + # offset = sizeof(fetype) in bytes + sizeof_bytes = self.context.get_abi_sizeof(lltype) + # Bitcast to i8* and use byte-level GEP + byte_ptr = self.builder.bitcast( + ptr, llvm_ir.PointerType(llvm_ir.IntType(8)) + ) + value_byte_ptr = self.builder.gep( + byte_ptr, + [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)], + ) + # Cast to the correct type pointer + castptr = self.builder.bitcast( + value_byte_ptr, llvm_ir.PointerType(lltype) + ) + else: + # Otherwise, just bitcast to the correct type + castptr = self.builder.bitcast(ptr, llvm_ir.PointerType(lltype)) return castptr else: return super().getvar(name) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py index 4b1d33514..6063b229f 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py @@ -7,6 +7,7 @@ from numba import cuda from numba.cuda import types from numba.cuda.testing import CUDATestCase +from numba.cuda.core import config from textwrap import dedent import math import itertools @@ -403,6 +404,9 @@ def f(x): match = re.compile(pat).search(llvm_ir) self.assertIsNone(match, msg=llvm_ir) + @unittest.skipIf( + config.CUDA_DEBUG_POLY, "Uses old union format, not variant_part" + ) def test_union_poly_types(self): sig = (types.int32, types.int32) @@ -460,6 +464,46 @@ def a_union_use_case(arg, results): expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]" self.assertIn(expected, out.getvalue()) + @unittest.skipUnless(config.CUDA_DEBUG_POLY, "CUDA_DEBUG_POLY not enabled") + def test_poly_variant_part(self): + """Test polymorphic variables with DW_TAG_variant_part. + + This test verifies that when CUDA_DEBUG_POLY is enabled, + polymorphic variables generate proper DWARF5 variant_part + debug information with discriminator and variant members. + """ + # Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456 + if config.CUDA_DEBUG_POLY_USE_TYPED_CONST: + extradata_pattern = "i8 {{[0-9]+}}" + else: + extradata_pattern = "{{![0-9]+}}" + + @cuda.jit("void()", debug=True, opt=False) + def f(): + foo = 100 # noqa: F841 + foo = 3.14 # noqa: F841 + foo = True # noqa: F841 + foo = np.int32(42) # noqa: F841 + + llvm_ir = f.inspect_llvm()[tuple()] + + # Build FileCheck pattern dynamically based on config + # Capture node IDs and verify the hierarchical structure + check_pattern = """ + CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]] + CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type) + CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] } + CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator-{{[0-9]+}}"{{.*}}size: 8{{.*}}tag: DW_TAG_member) + CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part) + CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] } + CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member) + CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member) + CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member) + CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member) + """.replace("EXTRADATA", extradata_pattern) + + self.assertFileCheckMatches(llvm_ir, check_pattern) + def test_DW_LANG(self): @cuda.jit(debug=True, opt=False) def foo():