NVIDIA · gmarkall · Aug 22, 2025 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/ci/test_conda.sh b/ci/test_conda.sh
@@ -36,6 +36,7 @@ DEPENDENCIES=(
     "pytest"
     "pytest-xdist"
     "cffi"
+    "ml_dtypes"
     "python=${RAPIDS_PY_VERSION}"
 )
 # Constrain oldest supported dependencies for testing

diff --git a/ci/test_conda_ctypes_binding.sh b/ci/test_conda_ctypes_binding.sh
@@ -26,6 +26,7 @@ DEPENDENCIES=(
     "pytest"
     "pytest-xdist"
     "cffi"
+    "ml_dtypes"
     "python=${RAPIDS_PY_VERSION}"
     "numba-cuda"
 )

diff --git a/ci/test_simulator.sh b/ci/test_simulator.sh
@@ -13,6 +13,7 @@ DEPENDENCIES=(
     "pytest"
     "pytest-xdist"
     "cffi"
+    "ml_dtypes"
     "python=${RAPIDS_PY_VERSION}"
     "numba-cuda"
 )

diff --git a/configs/cuda_bf16.yml b/configs/cuda_bf16.yml
@@ -1,10 +1,12 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 Name: Numba Bfloat16
-Version: 0.0.1
-Entry Point: ./numba_cuda/numba/cuda/include/12/cuda_bf16.h
+Version: 0.0.2
+GPU Arch:
+    - sm_80 # sm_80 is the first CUDA architecture that supports bfloat16
+Entry Point: ./numba_cuda/numba/cuda/include/13/cuda_bf16.h
 File List:
-    - ./numba_cuda/numba/cuda/include/12/cuda_bf16.h
+    - ./numba_cuda/numba/cuda/include/13/cuda_bf16.h
 Exclude: {}
 Types:
     __nv_bfloat16_raw: Number
@@ -21,6 +23,4 @@ Data Models:
     __nv_bfloat162: StructModel
     nv_bfloat162: StructModel
 Shim Include Override: "\"cuda_bf16.h\""
-Additional Import:
-    - os
-Require Pynvjitlink: False
+Use Separate Registry: True