diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 000000000..9754f1be4
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,148 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+name: ci
+
+on:
+  push:
+    branches:
+      - "main"
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
+      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
+        with:
+          extra_args: --all-files --show-diff-on-failure
+  compute-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      BUILD_MATRIX: ${{ steps.compute-matrix.outputs.BUILD_MATRIX }}
+      SIMULATOR_MATRIX: ${{ steps.compute-matrix.outputs.SIMULATOR_MATRIX }}
+      TEST_MATRIX: ${{ steps.compute-matrix.outputs.TEST_MATRIX }}
+    steps:
+      - uses: actions/checkout@v5
+      - name: Compute Matrices
+        id: compute-matrix
+        run: |
+          BUILD_MATRIX="$(yq '.build-matrix' ci/matrix.yml)"
+          SIMULATOR_MATRIX="$(yq '.simulator-matrix' ci/matrix.yml)"
+          TEST_MATRIX="$(yq '.test-matrix' ci/matrix.yml)"
+          {
+            echo 'BUILD_MATRIX<<EOF'
+            echo "${BUILD_MATRIX}"
+            echo 'EOF'
+            echo 'SIMULATOR_MATRIX<<EOF'
+            echo "${SIMULATOR_MATRIX}"
+            echo 'EOF'
+            echo 'TEST_MATRIX<<EOF'
+            echo "${TEST_MATRIX}"
+            echo 'EOF'
+          } >> "${GITHUB_OUTPUT}"
+  build-conda:
+    needs:
+      - compute-matrix
+    uses: ./.github/workflows/conda-python-build.yaml
+    with:
+      build_type: pull-request
+      script: "ci/build_conda.sh"
+      matrix: ${{ needs.compute-matrix.outputs.BUILD_MATRIX }}
+  test-conda:
+    needs:
+      - compute-matrix
+    uses: ./.github/workflows/conda-python-tests.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_conda.sh"
+      run_codecov: false
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+  test-simulator:
+    needs:
+      - compute-matrix
+    uses: ./.github/workflows/simulator-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_simulator.sh"
+      matrix: ${{ needs.compute-matrix.outputs.SIMULATOR_MATRIX }}
+  build-wheels:
+    needs:
+      - compute-matrix
+    uses: ./.github/workflows/wheels-build.yaml
+    with:
+      build_type: pull-request
+      script: "ci/build_wheel.sh"
+      matrix: ${{ needs.compute-matrix.outputs.BUILD_MATRIX }}
+  build-wheels-windows:
+    uses: ./.github/workflows/wheel-windows-build.yaml
+  test-wheels-windows:
+    needs:
+      - build-wheels-windows
+    uses: ./.github/workflows/wheel-windows-tests.yaml
+  test-wheels:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_wheel.sh false"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+  test-wheels-deps-wheels:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_wheel_deps_wheels.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+  test-thirdparty-cudf:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_thirdparty_cudf.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      # TODO: Enable for CUDA 13 when a supporting version of cuDF is available
+      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+  test-thirdparty-nvmath:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_thirdparty_nvmath.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      # TODO: Enable for CUDA 13 when a supporting version of nvmath-python is available
+      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+  test-thirdparty-awkward:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_thirdparty_awkward.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      # TODO: Enable for CUDA 13 in future
+      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+
+  build-docs:
+    uses: ./.github/workflows/docs-build.yaml
+  coverage-report:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/coverage_report.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.12")) | .[0:1]'
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 620b2f84a..0dac198f4 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -7,7 +7,6 @@ on:
   push:
     branches:
       - "pull-request/[0-9]+"
-      - "main"
 
 concurrency:
   group: >-
@@ -31,7 +30,8 @@ jobs:
       - test-wheels-windows
       - test-wheels
       - test-wheels-deps-wheels
-      - test-thirdparty
+      - test-thirdparty-cudf
+      - test-thirdparty-awkward
       - build-docs
       - coverage-report
     secrets: inherit
@@ -127,17 +127,29 @@ jobs:
       script: "ci/test_wheel_deps_wheels.sh"
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
       matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
-  test-thirdparty:
+  test-thirdparty-cudf:
     needs:
       - build-wheels
       - compute-matrix
     uses: ./.github/workflows/wheels-test.yaml
     with:
       build_type: pull-request
-      script: "ci/test_thirdparty.sh"
+      script: "ci/test_thirdparty_cudf.sh"
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
       # TODO: Enable for CUDA 13 when a supporting version of cuDF is available
       matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+  test-thirdparty-awkward:
+    needs:
+      - build-wheels
+      - compute-matrix
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_thirdparty_awkward.sh"
+      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+      # TODO: Enable for CUDA 13 in future
+      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+
   build-docs:
     uses: ./.github/workflows/docs-build.yaml
   coverage-report:
diff --git a/ci/test_thirdparty_awkward.sh b/ci/test_thirdparty_awkward.sh
new file mode 100755
index 000000000..8e3c09ea8
--- /dev/null
+++ b/ci/test_thirdparty_awkward.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+set -euo pipefail
+
+CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
+AWKWARD_VERSION="2.8.10"
+
+rapids-logger "Install awkward and related libraries"
+
+pip install awkward==${AWKWARD_VERSION} cupy-cuda12x pyarrow pandas nox
+
+rapids-logger "Install wheel with test dependencies"
+package=$(realpath wheel/numba_cuda*.whl)
+echo "Package path: ${package}"
+python -m pip install \
+    "${package}" \
+    "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
+    "cuda-core==0.3.*" \
+    "nvidia-nvjitlink-cu12" \
+    --group test
+
+
+rapids-logger "Clone awkward repository"
+git clone --recursive https://github.com/scikit-hep/awkward.git
+pushd awkward
+git checkout v${AWKWARD_VERSION}
+
+rapids-logger "Generate awkward tests"
+nox -s prepare -- --tests
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+rapids-logger "Show Numba system info"
+python -m numba --sysinfo
+
+rapids-logger "Run Awkward CUDA tests"
+python -m pytest -n auto --benchmark-disable tests-cuda tests-cuda-kernels tests-cuda-kernels-explicit
+
+popd
diff --git a/ci/test_thirdparty.sh b/ci/test_thirdparty_cudf.sh
similarity index 100%
rename from ci/test_thirdparty.sh
rename to ci/test_thirdparty_cudf.sh
diff --git a/ci/test_thirdparty_nvmath.sh b/ci/test_thirdparty_nvmath.sh
new file mode 100755
index 000000000..e062b5d93
--- /dev/null
+++ b/ci/test_thirdparty_nvmath.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+set -euo pipefail
+
+CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
+
+NVMATH_PYTHON_VERSION="0.6.0"
+# The commit on Github corresponding to 0.6.0
+NVMATH_PYTHON_SHA="6bddfa71c39c07804127adeb23f5b0d2168ae38c"
+
+rapids-logger "Install nvmath-python"
+
+pip install nvmath-python[cu12,dx]==${NVMATH_PYTHON_VERSION}
+
+rapids-logger "Remove Extraneous numba-cuda"
+pip uninstall -y numba-cuda
+
+rapids-logger "Install wheel with test dependencies"
+package=$(realpath wheel/numba_cuda*.whl)
+echo "Package path: ${package}"
+python -m pip install \
+    "${package}" \
+    "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
+    "cuda-core==0.3.*" \
+    "nvidia-nvjitlink-cu12" \
+    --group test
+
+
+rapids-logger "Shallow clone nvmath-python repository"
+git clone https://github.com/NVIDIA/nvmath-python.git
+pushd nvmath-python
+git checkout ${NVMATH_PYTHON_SHA}
+
+rapids-logger "Install nvmath-python test dependencies"
+pip install -r requirements/pip/tests.txt
+pip install nvidia-mathdx
+pip install nvidia-cutlass
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+rapids-logger "Show Numba system info"
+python -m numba --sysinfo
+
+rapids-logger "Run nvmath-python device tests"
+pushd tests
+# Required for nvmath-python to locate pip-install MathDx
+export SYS_PREFIX=`python -c "import sys; print(sys.prefix)"`
+export MATHDX_HOME=${SYS_PREFIX}/lib/python3.13/site-packages/nvidia/mathdx
+python -m pytest nvmath_tests/device --tb=native -x
+
+popd
+popd
diff --git a/numba_cuda/numba/cuda/cudaimpl.py b/numba_cuda/numba/cuda/cudaimpl.py
index e0133fe4c..62ed0be75 100644
--- a/numba_cuda/numba/cuda/cudaimpl.py
+++ b/numba_cuda/numba/cuda/cudaimpl.py
@@ -11,7 +11,7 @@
 
 from numba.cuda.core.imputils import Registry
 from numba.cuda.typing.npydecl import parse_dtype
-from numba.cuda.datamodel import models
+from numba.cuda.datamodel.models import StructModel
 from numba.cuda import types
 from numba.cuda import cgutils
 from numba.cuda.np import ufunc_db
@@ -21,6 +21,10 @@
 from numba.cuda import nvvmutils, stubs
 from numba.cuda.types.ext_types import dim3, CUDADispatcher
 
+if cuda.HAS_NUMBA:
+    from numba.core.datamodel.models import StructModel as CoreStructModel
+    from numba.core import types as core_types
+
 registry = Registry("cudaimpl")
 lower = registry.lower
 lower_attr = registry.lower_getattr
@@ -880,13 +884,19 @@ def _generic_array(
         raise ValueError("array length <= 0")
 
     # Check that we support the requested dtype
+    number_domain = types.number_domain
+    struct_model_types = (StructModel,)
+    if cuda.HAS_NUMBA:
+        number_domain |= core_types.number_domain
+        struct_model_types = (StructModel, CoreStructModel)
+
     data_model = context.data_model_manager[dtype]
     other_supported_type = (
         isinstance(dtype, (types.Record, types.Boolean))
-        or isinstance(data_model, models.StructModel)
+        or isinstance(data_model, struct_model_types)
         or dtype == types.float16
     )
-    if dtype not in types.number_domain and not other_supported_type:
+    if dtype not in number_domain and not other_supported_type:
         raise TypeError("unsupported type: %s" % dtype)
 
     lldtype = context.get_data_type(dtype)
diff --git a/numba_cuda/numba/cuda/types/__init__.py b/numba_cuda/numba/cuda/types/__init__.py
index 75b53c43f..b5f394e51 100644
--- a/numba_cuda/numba/cuda/types/__init__.py
+++ b/numba_cuda/numba/cuda/types/__init__.py
@@ -14,7 +14,7 @@
 from .npytypes import *
 from .scalars import *
 from .function_type import *
-from .ext_types import bfloat16, dim3, grid_group
+from .ext_types import bfloat16, dim3, grid_group, GridGroup, Dim3
 
 numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
 
@@ -222,6 +222,8 @@
     bfloat16
     dim3
     grid_group
+    Dim3
+    GridGroup
 """
 
 
diff --git a/numba_cuda/numba/cuda/typing/templates.py b/numba_cuda/numba/cuda/typing/templates.py
index de4c3b464..ac7ec1815 100644
--- a/numba_cuda/numba/cuda/typing/templates.py
+++ b/numba_cuda/numba/cuda/typing/templates.py
@@ -200,7 +200,11 @@ def generic(self):
 def signature(return_type, *args, **kws):
     recvr = kws.pop("recvr", None)
     assert not kws
-    return Signature(return_type, args, recvr=recvr)
+    if HAS_NUMBA:
+        signature_class = CoreSignature
+    else:
+        signature_class = Signature
+    return signature_class(return_type, args, recvr=recvr)
 
 
 def fold_arguments(