diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml
index 7a9c57d..41deff3 100755
--- a/.azure-pipelines/azure-pipelines-linux.yml
+++ b/.azure-pipelines/azure-pipelines-linux.yml
@@ -40,6 +40,38 @@ jobs:
CONFIG: linux_64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313
UPLOAD_PACKAGES: 'True'
DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.13.____cp313:
+ CONFIG: linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.13.____cp313
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.10.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.10.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.11.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.11.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.12.____cpython:
+ CONFIG: linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.12.____cpython
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
+ linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313:
+ CONFIG: linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
timeoutInMinutes: 360
variables: {}
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml
new file mode 100644
index 0000000..e9c5194
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.10.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml
new file mode 100644
index 0000000..a39f708
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.11.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml
new file mode 100644
index 0000000..0b2bd6b
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.12.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.13.____cp313.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.13.____cp313.yaml
new file mode 100644
index 0000000..137a8fd
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13python3.13.____cp313.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.6'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '13'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.13.* *_cp313
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.10.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.10.____cpython.yaml
new file mode 100644
index 0000000..66b9fb1
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.10.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '14'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.10.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.11.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.11.____cpython.yaml
new file mode 100644
index 0000000..cfa782d
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.11.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '14'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.11.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.12.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.12.____cpython.yaml
new file mode 100644
index 0000000..b4ea683
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.12.____cpython.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '14'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.12.* *_cpython
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313.yaml
new file mode 100644
index 0000000..9284335
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version12.9cxx_compiler_version14python3.13.____cp313.yaml
@@ -0,0 +1,34 @@
+c_stdlib:
+- sysroot
+c_stdlib_version:
+- '2.17'
+cdt_name:
+- conda
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- cuda-nvcc
+cuda_compiler_version:
+- '12.9'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '14'
+docker_image:
+- quay.io/condaforge/linux-anvil-x86_64:alma9
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.13.* *_cp313
+pytorch:
+- '2.7'
+target_platform:
+- linux-aarch64
+zip_keys:
+- - cxx_compiler_version
+ - c_stdlib_version
+ - cuda_compiler_version
diff --git a/README.md b/README.md
index ed63c42..81d1cf2 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,62 @@ Current build status
+