NVIDIA · gmarkall · Feb 4, 2026 · Dec 12, 2025 · Dec 23, 2025 · Jan 6, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,6 +12,7 @@ repos:
     - id: check-yaml
       exclude: ^conda/recipes/numba-cuda/meta.yaml
     - id: debug-statements
+      exclude: examples/debugging/hello.py
     - id: end-of-file-fixer
     - id: trailing-whitespace
     - id: mixed-line-ending
@@ -30,6 +31,7 @@ repos:
       language: python
       additional_dependencies:
         - pathspec==0.12.1
+      exclude: examples/debugging/debugging.code-workspace
 - repo: https://github.com/sphinx-contrib/sphinx-lint
   rev: v1.0.2
   hooks:

diff --git a/docs/source/_static/kernel-entry.png b/docs/source/_static/kernel-entry.png
diff --git a/docs/source/_static/launch-json.png b/docs/source/_static/launch-json.png
diff --git a/docs/source/_static/run-control.png b/docs/source/_static/run-control.png
diff --git a/docs/source/_static/starting-debugging.png b/docs/source/_static/starting-debugging.png
diff --git a/docs/source/user/debugging.rst b/docs/source/user/debugging.rst
diff --git a/docs/source/user/index.rst b/docs/source/user/index.rst
@@ -22,6 +22,7 @@ User guide
    random.rst
    device-management.rst
    examples.rst
+   debugging.rst
    simulator.rst
    reduction.rst
    ufunc.rst

diff --git a/examples/debugging/.vscode/launch.json b/examples/debugging/.vscode/launch.json
@@ -0,0 +1,70 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Numba: hello.py Example",
+            "type": "cuda-gdb",
+            "request": "launch",
+            "cwd": "${workspaceFolder}",
+
+            // Use cuda-gdb to debug the python interpreter executable
+            // The "${command:python.interpreterPath}" variable is provided by the Python extension
+            // for VS Code, and can be set by selecting the Python interpreter in the VS Code
+            // status bar. This allows the user to select the Python virtual environment to use for
+            // the debugging session.
+            "program": "${command:python.interpreterPath}",
+
+            // The Numba CUDA Python script to debug
+            "args": "${workspaceFolder}/hello.py",
+
+            // Break on kernel launch - we have explicit breakpoint() calls in the code,
+            // but this is a convenient shortcut to start the program running and stop on the first
+            // source line of any launched kernel.
+            // Setting "breakOnLaunch" to true in the launch.json file is equivalent to
+            // running "set cuda break_on_launch application" from the cuda-gdb command line.
+            "breakOnLaunch": true,
+
+            // Load the Numba CUDA cuda-gdb pretty-printer extension (if available).
+            // This is used to format Numba CUDA arrays in the debugger.
+            "setupCommands": [
+                {
+                    "description": "Load the Numba CUDA cuda-gdb pretty-printer extension",
+                    "text": "python import gdb_print_extension",
+                    // Don't raise an error if the extension cannot be found. This is usually
+                    // due to the directory holding the extension not being in the PYTHONPATH
+                    // environment variable, but should not cause the debugging session to fail.
+                    "ignoreFailures": true
+                }
+            ],
+            "environment": [
+                {
+                    // Add the path where to find the Numba CUDA cuda-gdb pretty-printer extension
+                    // to the PYTHONPATH environment variable. This is usually ``numba-cuda/misc`` and
+                    // is required for proper array formatting in the debugger.
+                    "name": "PYTHONPATH",
+                    "value": "${workspaceFolder}/../../misc:${env:PYTHONPATH}"
+                },
+                {
+                    // Don't buffer stdout and stderr - we want to see the output immediately.
+                    "name": "PYTHONUNBUFFERED",
+                    "value": "1"
+                },
+                {
+                    // Add the CUDA tools bin directory to the PATH environment variable.
+                    // This is used to find the cuda-gdb executable.
+                    "name": "PATH",
+                    "value": "/usr/local/cuda/bin:${env:PATH}"
+                },
+                {
+                    // Add the CUDA libraries lib64 directory to the LD_LIBRARY_PATH environment variable.
+                    // This is used to find the CUDA libraries used by the Numba CUDA program.
+                    "name": "LD_LIBRARY_PATH",
+                    "value": "/usr/local/cuda/lib64:${env:LD_LIBRARY_PATH}"
+                }
+            ]
+        },
+    ]
+}
diff --git a/examples/debugging/.vscode/settings.json b/examples/debugging/.vscode/settings.json
@@ -0,0 +1,13 @@
+{
+        // Add the path to the Anaconda environments to the python.venvPath environment variable.
+        // If your Anaconda environments are not in the home directory, you need to change this to
+        // the path to your Anaconda environments directory.
+        "python.venvPath": "${env:HOME}/anaconda3/envs",
+        "python-envs.pythonProjects": [
+                {
+                        "path": "",
+                        "envManager": "ms-python.python:conda",
+                        "packageManager": "ms-python.python:conda"
+                }
+        ]
+}
diff --git a/examples/debugging/debugging.code-workspace b/examples/debugging/debugging.code-workspace
@@ -0,0 +1,8 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {}
+}
diff --git a/examples/debugging/hello.py b/examples/debugging/hello.py
@@ -0,0 +1,99 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+import numpy as np
+from numba import cuda
+
+num_entries = 8
+
+
+#
+# In the following code, we use explicit breakpoint function calls to insert
+# breakpoints.
+#
+# Breakpoints can also be set by clicking to the left of the source line numbers in the
+# Editor window, and are shown as red dots next to the line numbers. They can also be set
+# from the CUDA GDB command line using the filename:line-number syntax, or by kernel or function name.
+#
+@cuda.jit(debug=True, opt=False)
+def hello(input, output):
+    gid = cuda.grid(1)
+    size = len(input)
+    if gid >= size:
+        return
+
+    # Print the value of the array at the current thread's index
+    # Output will likely not happen until the kernel is finished executing
+    # because host synchronization is needed to print the output.
+    # Output is not guaranteed to be printed in thread order.
+    print("input[", gid, "] =", input[gid])
+
+    # Reverse the input array
+    output[gid] = input[size - gid - 1]
+
+    # Synchronize all threads in the block
+    cuda.syncthreads()
+
+    # Have the first thread print a message to indicate that all threads
+    # have synchronized
+    if gid == 0:
+        print("All threads have synchronized (local memory array)")
+
+    # Print the value of the output array at the current thread's index
+    print("output[", gid, "] =", output[gid])
+
+    # Allocate a new array in shared memory
+    shared_array = cuda.shared.array(num_entries, dtype=np.int64)
+
+    # Hit a manually-inserted breakpoint here
+    breakpoint()
+
+    # Fill the shared array with the input array with negative values in reverse order
+    shared_array[gid] = -input[size - gid - 1]
+
+    # Synchronize all threads in the block
+    cuda.syncthreads()
+
+    # Have the first thread print a message to indicate that all threads
+    # have synchronized
+    if gid == 0:
+        print("All threads have synchronized (shared memory array)")
+
+    # Print the value of the shared array at the current thread's index
+    print("shared_array[", gid, "] =", shared_array[gid])
+
+    # Demonstrate polymorphic variables by setting the value of a variable to a different type.
+    # The print() calls are used to ensure that the variable is not optimized out.
+    # Stepping through the code will show how the variable changes value and type with each assignment.
+    # Only let the first thread do this to reduce clutter in the output.
+    if gid == 0:
+        # Hit another manually-inserted breakpoint here
+        breakpoint()
+
+        # Set the variable to different values and types.
+        variable = True
+        print("variable =", variable)
+        variable = 0x80000000
+        print("variable =", variable)
+        variable = 0x8000000000000000
+        print("variable =", variable)
+        variable = 3.141592653589793
+        print("variable =", variable)
+        variable = 2.718281828459045
+        print("variable =", variable)
+
+
+if __name__ == "__main__":
+    # Generate data
+    input = cuda.to_device(np.array(range(num_entries), dtype=np.int64))
+    print(f"input: {input.copy_to_host()}")
+
+    # Create a vector to hold the results (same size as the input)
+    output = cuda.to_device(np.zeros(len(input), dtype=np.int64))
+
+    # Launch the kernel
+    hello[1, len(input)](input, output)
+
+    # Print the results
+    print(f"output: {output.copy_to_host()}")
+    print("All Done!")