Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 161 additions & 24 deletions numba_cuda/numba/cuda/debuginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,70 @@
import os
from contextlib import contextmanager

import llvmlite
from llvmlite import ir
from numba.cuda import types
from numba.cuda.core import config
from numba.cuda import cgutils
from numba.cuda.datamodel.models import ComplexModel, UnionModel, UniTupleModel
from numba.cuda.types.ext_types import GridGroup
from cuda.bindings import runtime


# Check if CUDA Toolkit and llvmlite support polymorphic debug info
def _get_llvmlite_version():
"""Get llvmlite version as tuple (major, minor)."""
try:
version_str = llvmlite.__version__
# Parse version string like "0.46.0" or "0.46.0dev"
parts = version_str.split(".")
if len(parts) < 2:
return (0, 0)
major = int(parts[0])
minor = int(parts[1])
return (major, minor)
except (IndexError, AttributeError, ValueError):
return (0, 0)


def _check_polymorphic_debug_info_support():
"""Check if CTK and llvmlite support polymorphic debug info.

Returns:
tuple: (supported: bool, use_typed_const: bool)
- supported: Whether feature is supported at all
- use_typed_const: True for typed constant,
False for node reference
"""
# runtime.getLocalRuntimeVersion() returns (cudaError_t, version_int)
# Example: 13010 = CTK 13.1, 13020 = CTK 13.2
_, ctk_version_number = runtime.getLocalRuntimeVersion()
ctk_major = ctk_version_number // 1000
ctk_minor = (ctk_version_number % 1000) // 10
ctk_version = (ctk_major, ctk_minor)

llvmlite_version = _get_llvmlite_version()

# Support not available with CTK 13.1 or older
if ctk_version <= (13, 1):
return (False, False)

# llvmlite > 0.45: use typed constant
# llvmlite <= 0.45: use node reference
use_typed_const = llvmlite_version > (0, 45)
return (True, use_typed_const)


# Check support and determine mode
(DEBUG_POLY_SUPPORTED, DEBUG_POLY_USE_TYPED_CONST) = (
_check_polymorphic_debug_info_support()
)

# Set config based on polymorphic debug info support
if not hasattr(config, "CUDA_DEBUG_POLY"):
config.CUDA_DEBUG_POLY = DEBUG_POLY_SUPPORTED
if not hasattr(config, "CUDA_DEBUG_POLY_USE_TYPED_CONST"):
config.CUDA_DEBUG_POLY_USE_TYPED_CONST = DEBUG_POLY_USE_TYPED_CONST


@contextmanager
Expand Down Expand Up @@ -619,7 +677,10 @@ def _var_type(self, lltype, size, datamodel=None):
# Ignore the "tag" field, focus on the "payload" field which
# contains the data types in memory
if field == "payload":
for mod in model.inner_models():
# Store metadata dictionaries to create members later
member_metadata_dicts = []

for index, mod in enumerate(model.inner_models()):
dtype = mod.get_value_type()
membersize = self.cgctx.get_abi_sizeof(dtype)
basetype = self._var_type(
Expand All @@ -632,33 +693,109 @@ def _var_type(self, lltype, size, datamodel=None):
# Use a prefix "_" on type names as field names
membername = "_" + typename
memberwidth = _BYTE_SIZE * membersize
# Build the metadata dictionary
metadata_dict = {
"tag": ir.DIToken("DW_TAG_member"),
"name": membername,
"baseType": basetype,
# DW_TAG_member size is in bits
"size": memberwidth,
}
if config.CUDA_DEBUG_POLY:
# Polymorphic debug info with DW_TAG_variant
# extraData depends on llvmlite version
if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
metadata_dict["extraData"] = ir.IntType(8)(
index
)
else:
# Use metadata node reference
metadata_dict["extraData"] = m.add_metadata(
[ir.IntType(8)(index)]
)
# Add offset to each variant member
# Offset equals the element's own width
metadata_dict["offset"] = memberwidth
member_metadata_dicts.append(metadata_dict)
if memberwidth > maxwidth:
maxwidth = memberwidth

# Create the member DIDerivedTypes
for metadata_dict in member_metadata_dicts:
derived_type = m.add_debug_info(
"DIDerivedType",
{
"tag": ir.DIToken("DW_TAG_member"),
"name": membername,
"baseType": basetype,
# DW_TAG_member size is in bits
"size": memberwidth,
},
"DIDerivedType", metadata_dict
)
meta.append(derived_type)
if memberwidth > maxwidth:
maxwidth = memberwidth

fake_union_name = "dbg_poly_union"
return m.add_debug_info(
"DICompositeType",
{
"file": self.difile,
"tag": ir.DIToken("DW_TAG_union_type"),
"name": fake_union_name,
"identifier": str(lltype),
"elements": m.add_metadata(meta),
"size": maxwidth,
},
is_distinct=True,
)
if config.CUDA_DEBUG_POLY:
# Polymorphic variable debug info generation
wrapper_struct_size = 2 * maxwidth
# Generate unique discriminator name based on composite type
variant_elements_metadata = m.add_metadata(meta)
discriminator_unique_id = str(id(variant_elements_metadata))
discriminator_name = f"discriminator-{discriminator_unique_id}"
discriminator = m.add_debug_info(
"DIDerivedType",
{
"tag": ir.DIToken("DW_TAG_member"),
"name": discriminator_name,
"baseType": m.add_debug_info(
"DIBasicType",
{
"name": "int",
"size": _BYTE_SIZE,
"encoding": ir.DIToken("DW_ATE_unsigned"),
},
),
"size": _BYTE_SIZE,
"flags": ir.DIToken("DIFlagArtificial"),
},
)
# Create the final variant_part with actual members
variant_unique_identifier = discriminator_unique_id
variant_part_type = m.add_debug_info(
"DICompositeType",
{
"file": self.difile,
"tag": ir.DIToken("DW_TAG_variant_part"),
"name": "variant_part",
"identifier": variant_unique_identifier,
"elements": variant_elements_metadata,
"size": maxwidth,
"discriminator": discriminator,
},
)
# Create elements metadata for wrapper struct
elements_metadata = m.add_metadata(
[discriminator, variant_part_type]
)
unique_identifier = str(id(elements_metadata))
wrapper_struct = m.add_debug_info(
"DICompositeType",
{
"file": self.difile,
"tag": ir.DIToken("DW_TAG_structure_type"),
"name": "variant_wrapper_struct",
"identifier": unique_identifier,
"elements": elements_metadata,
"size": wrapper_struct_size,
},
)
return wrapper_struct
else:
fake_union_name = "dbg_poly_union"
return m.add_debug_info(
"DICompositeType",
{
"file": self.difile,
"tag": ir.DIToken("DW_TAG_union_type"),
"name": fake_union_name,
"identifier": str(lltype),
"elements": m.add_metadata(meta),
"size": maxwidth,
},
is_distinct=True,
)
# For other cases, use upstream Numba implementation
return super()._var_type(lltype, size, datamodel=datamodel)

Expand Down
78 changes: 73 additions & 5 deletions numba_cuda/numba/cuda/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,51 @@ def storevar(self, value, name, argidx=None):
"""
Store the value into the given variable.
"""
# Handle polymorphic variables with CUDA_DEBUG_POLY enabled
if config.CUDA_DEBUG_POLY:
src_name = name.split(".")[0]
if src_name in self.poly_var_typ_map:
# Ensure allocation happens first (if needed)
fetype = self.typeof(name)
self._alloca_var(name, fetype)
# Discriminant and data are located in the same union
ptr = self.poly_var_loc_map[src_name]
# Firstly write discriminant to the beginning of union as i8
dtype = types.UnionType(self.poly_var_typ_map[src_name])
# Compute discriminant = index of type in sorted union
if isinstance(fetype, types.Literal):
lookup_type = fetype.literal_type
else:
lookup_type = fetype
discriminant_val = list(dtype.types).index(lookup_type)
# Bitcast union pointer directly to i8* and write
# discriminant at offset 0
discriminant_ptr = self.builder.bitcast(
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
)
discriminant_i8 = llvm_ir.Constant(
llvm_ir.IntType(8), discriminant_val
)
self.builder.store(discriminant_i8, discriminant_ptr)
# Secondly write data at offset = sizeof(fetype) in bytes
lltype = self.context.get_value_type(fetype)
sizeof_bytes = self.context.get_abi_sizeof(lltype)
# Bitcast to i8* and use byte-level GEP
byte_ptr = self.builder.bitcast(
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
)
data_byte_ptr = self.builder.gep(
byte_ptr,
[llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
)
# Cast to the correct type pointer
castptr = self.builder.bitcast(
data_byte_ptr, llvm_ir.PointerType(lltype)
)
self.builder.store(value, castptr)
return

# For non-polymorphic variables, use parent implementation
super().storevar(value, name, argidx)

# Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
Expand Down Expand Up @@ -1806,8 +1851,13 @@ def _alloca_var(self, name, fetype):
datamodel = self.context.data_model_manager[dtype]
# UnionType has sorted set of types, max at last index
maxsizetype = dtype.types[-1]
# Create a single element aggregate type
aggr_type = types.UniTuple(maxsizetype, 1)
if config.CUDA_DEBUG_POLY:
# allocate double the max element size to house
# [discriminant + data]
aggr_type = types.UniTuple(maxsizetype, 2)
else:
# allocate single element for data only
aggr_type = types.UniTuple(maxsizetype, 1)
lltype = self.context.get_value_type(aggr_type)
ptr = self.alloca_lltype(src_name, lltype, datamodel)
# save the location of the union type for polymorphic var
Expand Down Expand Up @@ -1858,9 +1908,27 @@ def getvar(self, name):
src_name = name.split(".")[0]
fetype = self.typeof(name)
lltype = self.context.get_value_type(fetype)
castptr = self.builder.bitcast(
self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
)
ptr = self.poly_var_loc_map[src_name]

if config.CUDA_DEBUG_POLY:
# With CUDA_DEBUG_POLY enabled, read value at
# offset = sizeof(fetype) in bytes
sizeof_bytes = self.context.get_abi_sizeof(lltype)
# Bitcast to i8* and use byte-level GEP
byte_ptr = self.builder.bitcast(
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
)
value_byte_ptr = self.builder.gep(
byte_ptr,
[llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
)
# Cast to the correct type pointer
castptr = self.builder.bitcast(
value_byte_ptr, llvm_ir.PointerType(lltype)
)
else:
# Otherwise, just bitcast to the correct type
castptr = self.builder.bitcast(ptr, llvm_ir.PointerType(lltype))
return castptr
else:
return super().getvar(name)
Expand Down
44 changes: 44 additions & 0 deletions numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from numba import cuda
from numba.cuda import types
from numba.cuda.testing import CUDATestCase
from numba.cuda.core import config
from textwrap import dedent
import math
import itertools
Expand Down Expand Up @@ -403,6 +404,9 @@ def f(x):
match = re.compile(pat).search(llvm_ir)
self.assertIsNone(match, msg=llvm_ir)

@unittest.skipIf(
config.CUDA_DEBUG_POLY, "Uses old union format, not variant_part"
)
def test_union_poly_types(self):
sig = (types.int32, types.int32)

Expand Down Expand Up @@ -460,6 +464,46 @@ def a_union_use_case(arg, results):
expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
self.assertIn(expected, out.getvalue())

@unittest.skipUnless(config.CUDA_DEBUG_POLY, "CUDA_DEBUG_POLY not enabled")
def test_poly_variant_part(self):
"""Test polymorphic variables with DW_TAG_variant_part.

This test verifies that when CUDA_DEBUG_POLY is enabled,
polymorphic variables generate proper DWARF5 variant_part
debug information with discriminator and variant members.
"""
# Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
extradata_pattern = "i8 {{[0-9]+}}"
else:
extradata_pattern = "{{![0-9]+}}"

@cuda.jit("void()", debug=True, opt=False)
def f():
foo = 100 # noqa: F841
foo = 3.14 # noqa: F841
foo = True # noqa: F841
foo = np.int32(42) # noqa: F841

llvm_ir = f.inspect_llvm()[tuple()]

# Build FileCheck pattern dynamically based on config
# Capture node IDs and verify the hierarchical structure
check_pattern = """
CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator-{{[0-9]+}}"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
""".replace("EXTRADATA", extradata_pattern)

self.assertFileCheckMatches(llvm_ir, check_pattern)

def test_DW_LANG(self):
@cuda.jit(debug=True, opt=False)
def foo():
Expand Down