Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions misc/gdb_print_extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause

"""gdb printing extension for Numba types."""

import re

try:
import gdb.printing
import gdb
except ImportError:
raise ImportError("GDB python support is not available.")


class NumbaArrayPrinter:
def __init__(self, val):
self.val = val

def to_string(self):
try:
import numpy as np

HAVE_NUMPY = True
except ImportError:
HAVE_NUMPY = False

try:
NULL = 0x0

# Raw data references, these need unpacking/interpreting.

# Member "data" is...
# DW_TAG_member of DIDerivedType, tag of DW_TAG_pointer_type
# encoding e.g. DW_ATE_float
data = self.val["data"]

# Member "itemsize" is...
# DW_TAG_member of DIBasicType encoding DW_ATE_signed
itemsize = self.val["itemsize"]

# Members "shape" and "strides" are...
# DW_TAG_member of DIDerivedType, the type is a DICompositeType
# (it's a Numba UniTuple) with tag: DW_TAG_array_type, i.e. it's an
# array repr, it has a basetype of e.g. DW_ATE_unsigned and also
# "elements" which are referenced with a DISubrange(count: <const>)
# to say how many elements are in the array.
rshp = self.val["shape"]
rstrides = self.val["strides"]

# bool on whether the data is aligned.
is_aligned = False

# type information decode, simple type:
ty_str = str(self.val.type)
if HAVE_NUMPY and ("aligned" in ty_str or "Record" in ty_str):
ty_str = ty_str.replace("unaligned ", "").strip()
matcher = re.compile(r"array\((Record.*), (.*), (.*)\)\ \(.*")
# NOTE: need to deal with "Alignment" else dtype size is wrong
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The matcher.match(ty_str) can return None if the regex pattern doesn't match the type string, which will cause an AttributeError when calling .groups() on None. This should be checked before accessing groups.

Suggested change
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
match_result = matcher.match(ty_str)
if match_result is None:
raise ValueError(f"Unable to parse array type string: {ty_str}")
arr_info = [x.strip() for x in match_result.groups()]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the regex match result is not checked for None before calling .groups(), which will cause an AttributeError if the type string doesn't match the expected pattern

Suggested change
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
match = matcher.match(ty_str)
if match is None:
raise ValueError(f"Type string does not match expected pattern: {ty_str}")
arr_info = [x.strip() for x in match.groups()]

dtype_str, ndim_str, order_str = arr_info
rstr = r"Record\((.*\[.*\]);([0-9]+);(True|False)"
rstr_match = re.match(rstr, dtype_str)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The re.match(rstr, dtype_str) can return None if the pattern doesn't match, causing an AttributeError on the next line when calling .groups(). Add a check before accessing the match result.

Suggested change
rstr_match = re.match(rstr, dtype_str)
rstr_match = re.match(rstr, dtype_str)
if rstr_match is None:
raise ValueError(f"Unable to parse Record dtype string: {dtype_str}")

# balign is unused, it's the alignment
fields, balign, is_aligned_str = rstr_match.groups()
Comment on lines +62 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the regex match result is not checked for None before calling .groups(), which will cause an AttributeError if dtype_str doesn't match the expected Record pattern

Suggested change
rstr_match = re.match(rstr, dtype_str)
# balign is unused, it's the alignment
fields, balign, is_aligned_str = rstr_match.groups()
rstr_match = re.match(rstr, dtype_str)
if rstr_match is None:
raise ValueError(f"dtype_str does not match expected Record pattern: {dtype_str}")
# balign is unused, it's the alignment
fields, balign, is_aligned_str = rstr_match.groups()

is_aligned = is_aligned_str == "True"
field_dts = fields.split(",")
struct_entries = []
for f in field_dts:
splitted = f.split("[")
name = splitted[0]
dt_part = splitted[1:]
Comment on lines +69 to +71
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the field string f doesn't contain a '[' character, the split will return a single element, but the code accesses splitted[1:] which could be empty

Suggested change
splitted = f.split("[")
name = splitted[0]
dt_part = splitted[1:]
splitted = f.split("[")
if len(splitted) < 2:
raise ValueError(f"Invalid field format, missing '[': {f}")
name = splitted[0]
dt_part = splitted[1:]

if len(dt_part) > 1:
raise TypeError("Unsupported sub-type: %s" % f)
else:
dt_part = dt_part[0]
if "nestedarray" in dt_part:
raise TypeError("Unsupported sub-type: %s" % f)
dt_as_str = dt_part.split(";")[0].split("=")[1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

array index access without bounds checking - if the split operations don't produce the expected number of elements, this will cause an IndexError

Suggested change
dt_as_str = dt_part.split(";")[0].split("=")[1]
parts = dt_part.split(";")
if len(parts) < 1:
raise ValueError(f"Invalid dt_part format: {dt_part}")
type_parts = parts[0].split("=")
if len(type_parts) < 2:
raise ValueError(f"Invalid type format: {parts[0]}")
dt_as_str = type_parts[1]

dtype = np.dtype(dt_as_str)
struct_entries.append((name, dtype))
# The dtype is actually a record of some sort
dtype_str = struct_entries
else: # simple type
matcher = re.compile(r"array\((.*),(.*),(.*)\)\ \(.*")
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as line 59 - matcher.match(ty_str) can return None, causing an AttributeError when calling .groups(). This needs a null check.

Suggested change
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
match_result = matcher.match(ty_str)
if match_result is None:
raise ValueError(f"Unable to parse array type string: {ty_str}")
arr_info = [x.strip() for x in match_result.groups()]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the regex match result is not checked for None before calling .groups(), which will cause an AttributeError if the type string doesn't match the expected pattern

Suggested change
arr_info = [x.strip() for x in matcher.match(ty_str).groups()]
match = matcher.match(ty_str)
if match is None:
raise ValueError(f"Type string does not match expected pattern: {ty_str}")
arr_info = [x.strip() for x in match.groups()]

dtype_str, ndim_str, order_str = arr_info
# fix up unichr dtype
if "unichr x " in dtype_str:
dtype_str = dtype_str[1:-1].replace("unichr x ", "<U")

def dwarr2inttuple(dwarr):
# Converts a gdb handle to a dwarf array to a tuple of ints
fields = dwarr.type.fields()
lo, hi = fields[0].type.range()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accessing fields[0] without checking if the fields list is empty will cause an IndexError if there are no fields. Add a check to ensure fields is not empty.

Suggested change
lo, hi = fields[0].type.range()
fields = dwarr.type.fields()
if not fields:
raise ValueError("DWARF array type has no fields")
lo, hi = fields[0].type.range()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

accessing fields[0] without checking if the fields list is empty will cause an IndexError

Suggested change
lo, hi = fields[0].type.range()
fields = dwarr.type.fields()
if not fields:
raise ValueError("DWARF array type has no fields")
lo, hi = fields[0].type.range()

return tuple([int(dwarr[x]) for x in range(lo, hi + 1)])

# shape/strides extraction
shape = dwarr2inttuple(rshp)
strides = dwarr2inttuple(rstrides)

# if data is not NULL
if data != NULL:
if HAVE_NUMPY:
# The data extent in bytes is:
# sum(shape * strides)
# get the data, then wire to as_strided
shp_arr = np.array([max(0, x - 1) for x in shape])
strd_arr = np.array(strides)
extent = np.sum(shp_arr * strd_arr)
extent += int(itemsize)
Comment on lines +107 to +110
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the extent calculation could produce a negative value or zero if shape contains values <= 0, which would cause issues with read_memory. The max(0, x - 1) protects against negative intermediate values but doesn't validate the final extent

Suggested change
shp_arr = np.array([max(0, x - 1) for x in shape])
strd_arr = np.array(strides)
extent = np.sum(shp_arr * strd_arr)
extent += int(itemsize)
# The data extent in bytes is:
# sum(shape * strides)
# get the data, then wire to as_strided
shp_arr = np.array([max(0, x - 1) for x in shape])
strd_arr = np.array(strides)
extent = np.sum(shp_arr * strd_arr)
extent += int(itemsize)
if extent <= 0:
raise ValueError(f"Invalid extent calculated: {extent}")

dtype_clazz = np.dtype(dtype_str, align=is_aligned)
dtype = dtype_clazz
this_proc = gdb.selected_inferior()
mem = this_proc.read_memory(int(data), extent)
arr_data = np.frombuffer(mem, dtype=dtype)
new_arr = np.lib.stride_tricks.as_strided(
arr_data,
shape=shape,
strides=strides,
)
return "\n" + str(new_arr)
# Catch all for no NumPy
return "array([...], dtype=%s, shape=%s)" % (dtype_str, shape)
else:
# Not yet initialized or NULLed out data
buf = list(["NULL/Uninitialized"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use a list literal directly instead of wrapping it in list()

Suggested change
buf = list(["NULL/Uninitialized"])
buf = ["NULL/Uninitialized"]

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

return "array([" + ", ".join(buf) + "]" + ")"
except Exception as e:
return "array[Exception: Failed to parse. %s]" % e


class NumbaComplexPrinter:
def __init__(self, val):
self.val = val

def to_string(self):
return "%s+%sj" % (self.val["real"], self.val["imag"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The complex number formatting doesn't handle negative imaginary parts correctly. A complex number like 3-5j will be formatted as 3+-5j. Consider using conditional logic to format negative imaginary parts properly.

Suggested change
return "%s+%sj" % (self.val["real"], self.val["imag"])
imag_val = self.val["imag"]
if float(str(imag_val)) < 0:
return "%s%sj" % (self.val["real"], imag_val)
else:
return "%s+%sj" % (self.val["real"], imag_val)



class NumbaTuplePrinter:
def __init__(self, val):
self.val = val

def to_string(self):
fields = self.val.type.fields()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

accessing self.val.type.fields() without checking if it returns an empty list could cause issues in the list comprehension on line 146 if there are no fields

Suggested change
fields = self.val.type.fields()
fields = self.val.type.fields()
if not fields:
return "()"
buf = [str(self.val[f.name]) for f in fields]

buf = [str(self.val[f.name]) for f in fields]
return "(%s)" % ", ".join(buf)


class NumbaUniTuplePrinter:
def __init__(self, val):
self.val = val

def to_string(self):
# unituples are arrays
fields = self.val.type.fields()
lo, hi = fields[0].type.range()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as line 94 - accessing fields[0] without verifying the fields list is non-empty will cause an IndexError. Add validation before accessing.

Suggested change
lo, hi = fields[0].type.range()
fields = self.val.type.fields()
if not fields:
raise ValueError("UniTuple type has no fields")
lo, hi = fields[0].type.range()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

accessing fields[0] without checking if the fields list is empty will cause an IndexError

Suggested change
lo, hi = fields[0].type.range()
fields = self.val.type.fields()
if not fields:
return "()"
lo, hi = fields[0].type.range()

buf = [str(self.val[i]) for i in range(lo, hi + 1)]
return "(%s)" % ", ".join(buf)


class NumbaUnicodeTypePrinter:
def __init__(self, val):
self.val = val

def to_string(self):
NULL = 0x0
data = self.val["data"]
nitems = self.val["length"]
kind = self.val["kind"]
if data != NULL:
# This needs sorting out, encoding is wrong
this_proc = gdb.selected_inferior()
mem = this_proc.read_memory(int(data), nitems * kind)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The memory read size calculation nitems * kind has no bounds checking or validation. If nitems or kind contain corrupted or extremely large values from memory, this could attempt to read gigabytes of memory, causing GDB to hang or crash. Consider adding a reasonable upper bound check.

Suggested change
mem = this_proc.read_memory(int(data), nitems * kind)
mem_size = nitems * kind
MAX_STRING_SIZE = 1024 * 1024 # 1MB reasonable limit
if mem_size > MAX_STRING_SIZE:
return "'[String too large: %d bytes]'" % mem_size
mem = this_proc.read_memory(int(data), mem_size)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

memory read size calculation nitems * kind could result in a negative or extremely large value if nitems or kind have unexpected values, potentially causing a crash or reading excessive memory

Suggested change
mem = this_proc.read_memory(int(data), nitems * kind)
# This needs sorting out, encoding is wrong
this_proc = gdb.selected_inferior()
size = int(nitems) * int(kind)
if size < 0 or size > 10**9: # 1GB limit as sanity check
raise ValueError(f"Invalid memory read size: {size}")
mem = this_proc.read_memory(int(data), size)

if isinstance(mem, memoryview):
buf = bytes(mem).decode()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

calling .decode() without specifying an encoding will default to 'utf-8', which is correct, but it's better to be explicit. Additionally, the kind parameter (which represents the character width: 1, 2, or 4 bytes) is not being used properly to determine the encoding

Suggested change
buf = bytes(mem).decode()
if kind == 1:
buf = bytes(mem).decode('latin-1')
elif kind == 2:
buf = bytes(mem).decode('utf-16')
elif kind == 4:
buf = bytes(mem).decode('utf-32')
else:
buf = bytes(mem).decode('utf-8')

else:
buf = mem.decode("utf-8")
Comment on lines +175 to +178
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The encoding handling is inconsistent between the two branches. When mem is a memoryview, bytes(mem).decode() is called without specifying an encoding (defaults to UTF-8), but in the else branch, mem.decode("utf-8") explicitly specifies UTF-8. The comment on line 175 also acknowledges "encoding is wrong". Consider standardizing this to explicitly use the correct encoding based on the kind field (which should indicate the character width: 1, 2, or 4 bytes for UTF-8, UTF-16, or UTF-32).

Suggested change
if isinstance(mem, memoryview):
buf = bytes(mem).decode()
else:
buf = mem.decode("utf-8")
# Decode based on kind: 1=UTF-8, 2=UTF-16, 4=UTF-32
encoding_map = {1: 'utf-8', 2: 'utf-16', 4: 'utf-32'}
encoding = encoding_map.get(int(kind), 'utf-8')
if isinstance(mem, memoryview):
buf = bytes(mem).decode(encoding)
else:
buf = mem.decode(encoding)

else:
buf = str(data)
return "'%s'" % buf


def _create_printers():
printer = gdb.printing.RegexpCollectionPrettyPrinter("Numba")
printer.add_printer(
"Numba unaligned array printer",
"^unaligned array\\(",
NumbaArrayPrinter,
)
printer.add_printer("Numba array printer", "^array\\(", NumbaArrayPrinter)
printer.add_printer(
"Numba complex printer", "^complex[0-9]+\\ ", NumbaComplexPrinter
)
printer.add_printer("Numba Tuple printer", "^Tuple\\(", NumbaTuplePrinter)
printer.add_printer(
"Numba UniTuple printer", "^UniTuple\\(", NumbaUniTuplePrinter
)
printer.add_printer(
"Numba unicode_type printer",
"^unicode_type\\s+\\(",
NumbaUnicodeTypePrinter,
)
return printer


# register the Numba pretty printers for the current object
gdb.printing.register_pretty_printer(gdb.current_objfile(), _create_printers())
Loading