diff --git a/.devcontainer/cuda12.9-gcc14/devcontainer.json b/.devcontainer/cuda12.9-gcc14/devcontainer.json new file mode 100644 index 00000000000..74a4d8f852b --- /dev/null +++ b/.devcontainer/cuda12.9-gcc14/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc14-cuda12.9", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9-gcc14" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.9-gcc14", + "CCCL_CUDA_VERSION": "12.9", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda12.9-gcc14", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda12.9-gcc14" +} diff --git a/.devcontainer/cuda12.9-nvhpc25.5/devcontainer.json b/.devcontainer/cuda12.9-nvhpc25.5/devcontainer.json new file mode 100644 index 00000000000..fa2a9aab0f6 --- /dev/null +++ b/.devcontainer/cuda12.9-nvhpc25.5/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-nvhpc25.5", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9-nvhpc25.5" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.9-nvhpc25.5", + "CCCL_CUDA_VERSION": "12.9", + "CCCL_HOST_COMPILER": "nvhpc", + "CCCL_HOST_COMPILER_VERSION": "25.5", + "CCCL_BUILD_INFIX": "cuda12.9-nvhpc25.5", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda12.9-nvhpc25.5" +} diff --git a/.devcontainer/cuda12.9ext-gcc13/devcontainer.json b/.devcontainer/cuda12.9ext-gcc14/devcontainer.json similarity index 91% rename from .devcontainer/cuda12.9ext-gcc13/devcontainer.json rename to .devcontainer/cuda12.9ext-gcc14/devcontainer.json index 81cba343b58..847b035d735 100644 --- a/.devcontainer/cuda12.9ext-gcc13/devcontainer.json +++ b/.devcontainer/cuda12.9ext-gcc14/devcontainer.json @@ -1,10 +1,10 @@ { "shutdownAction": "stopContainer", - "image": "rapidsai/devcontainers:25.10-cpp-gcc13-cuda12.9ext", + "image": "rapidsai/devcontainers:25.10-cpp-gcc14-cuda12.9ext", "runArgs": [ "--init", "--name", - "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9ext-gcc13" + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9ext-gcc14" ], "hostRequirements": { "gpu": "optional" @@ -24,11 +24,11 @@ "SCCACHE_BUCKET": "rapids-sccache-devs", "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", - "DEVCONTAINER_NAME": "cuda12.9ext-gcc13", + "DEVCONTAINER_NAME": "cuda12.9ext-gcc14", "CCCL_CUDA_VERSION": "12.9", "CCCL_HOST_COMPILER": "gcc", - "CCCL_HOST_COMPILER_VERSION": "13", - "CCCL_BUILD_INFIX": "cuda12.9ext-gcc13", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda12.9ext-gcc14", "CCCL_CUDA_EXTENDED": "true", "HOST_WORKSPACE": "${localWorkspaceFolder}" }, @@ -62,5 +62,5 @@ } } }, - "name": "cuda12.9ext-gcc13" + "name": "cuda12.9ext-gcc14" } diff --git a/.devcontainer/cuda12.9ext-llvm19/devcontainer.json b/.devcontainer/cuda12.9ext-llvm19/devcontainer.json new file mode 100644 index 00000000000..b671c55e7ad --- /dev/null +++ b/.devcontainer/cuda12.9ext-llvm19/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm19-cuda12.9ext", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9ext-llvm19" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.9ext-llvm19", + "CCCL_CUDA_VERSION": "12.9", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "19", + "CCCL_BUILD_INFIX": "cuda12.9ext-llvm19", + "CCCL_CUDA_EXTENDED": "true", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda12.9ext-llvm19" +} diff --git a/.devcontainer/cuda13.0-gcc11/devcontainer.json b/.devcontainer/cuda13.0-gcc11/devcontainer.json new file mode 100644 index 00000000000..ab1085badd1 --- /dev/null +++ b/.devcontainer/cuda13.0-gcc11/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc11-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-gcc11" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-gcc11", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda13.0-gcc11", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-gcc11" +} diff --git a/.devcontainer/cuda13.0-gcc12/devcontainer.json b/.devcontainer/cuda13.0-gcc12/devcontainer.json new file mode 100644 index 00000000000..533843e2c09 --- /dev/null +++ b/.devcontainer/cuda13.0-gcc12/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc12-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-gcc12" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-gcc12", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda13.0-gcc12", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-gcc12" +} diff --git a/.devcontainer/cuda13.0-gcc13/devcontainer.json b/.devcontainer/cuda13.0-gcc13/devcontainer.json new file mode 100644 index 00000000000..fdbe506273f --- /dev/null +++ b/.devcontainer/cuda13.0-gcc13/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc13-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-gcc13" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-gcc13", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "13", + "CCCL_BUILD_INFIX": "cuda13.0-gcc13", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-gcc13" +} diff --git a/.devcontainer/cuda13.0-gcc14/devcontainer.json b/.devcontainer/cuda13.0-gcc14/devcontainer.json new file mode 100644 index 00000000000..c3b3730cd5b --- /dev/null +++ b/.devcontainer/cuda13.0-gcc14/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc14-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-gcc14" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-gcc14", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda13.0-gcc14", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-gcc14" +} diff --git a/.devcontainer/cuda13.0-llvm15/devcontainer.json b/.devcontainer/cuda13.0-llvm15/devcontainer.json new file mode 100644 index 00000000000..67f1d8daf57 --- /dev/null +++ b/.devcontainer/cuda13.0-llvm15/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm15-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-llvm15" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-llvm15", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "15", + "CCCL_BUILD_INFIX": "cuda13.0-llvm15", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-llvm15" +} diff --git a/.devcontainer/cuda13.0-llvm16/devcontainer.json b/.devcontainer/cuda13.0-llvm16/devcontainer.json new file mode 100644 index 00000000000..8570a7bbfb8 --- /dev/null +++ b/.devcontainer/cuda13.0-llvm16/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm16-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-llvm16" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-llvm16", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "16", + "CCCL_BUILD_INFIX": "cuda13.0-llvm16", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-llvm16" +} diff --git a/.devcontainer/cuda13.0-llvm17/devcontainer.json b/.devcontainer/cuda13.0-llvm17/devcontainer.json new file mode 100644 index 00000000000..034ad5d3e91 --- /dev/null +++ b/.devcontainer/cuda13.0-llvm17/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm17-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-llvm17" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-llvm17", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "17", + "CCCL_BUILD_INFIX": "cuda13.0-llvm17", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-llvm17" +} diff --git a/.devcontainer/cuda13.0-llvm18/devcontainer.json b/.devcontainer/cuda13.0-llvm18/devcontainer.json new file mode 100644 index 00000000000..c3e8c8779b6 --- /dev/null +++ b/.devcontainer/cuda13.0-llvm18/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm18-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-llvm18" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-llvm18", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "18", + "CCCL_BUILD_INFIX": "cuda13.0-llvm18", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-llvm18" +} diff --git a/.devcontainer/cuda13.0-llvm19/devcontainer.json b/.devcontainer/cuda13.0-llvm19/devcontainer.json new file mode 100644 index 00000000000..9741e4091ee --- /dev/null +++ b/.devcontainer/cuda13.0-llvm19/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm19-cuda13.0", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-llvm19" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0-llvm19", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "19", + "CCCL_BUILD_INFIX": "cuda13.0-llvm19", + "CCCL_CUDA_EXTENDED": "false", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0-llvm19" +} diff --git a/.devcontainer/cuda13.0ext-gcc14/devcontainer.json b/.devcontainer/cuda13.0ext-gcc14/devcontainer.json new file mode 100644 index 00000000000..96d93260d12 --- /dev/null +++ b/.devcontainer/cuda13.0ext-gcc14/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-gcc14-cuda13.0ext", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0ext-gcc14" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0ext-gcc14", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda13.0ext-gcc14", + "CCCL_CUDA_EXTENDED": "true", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0ext-gcc14" +} diff --git a/.devcontainer/cuda13.0ext-llvm19/devcontainer.json b/.devcontainer/cuda13.0ext-llvm19/devcontainer.json new file mode 100644 index 00000000000..9e8cb48f8f4 --- /dev/null +++ b/.devcontainer/cuda13.0ext-llvm19/devcontainer.json @@ -0,0 +1,66 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:25.10-cpp-llvm19-cuda13.0ext", + "runArgs": [ + "--init", + "--name", + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0ext-llvm19" + ], + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/{build,wheelhouse}; if test -z ${localEnv:WSLENV}; then docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build >/dev/null; docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/wheelhouse --opt o=bind cccl-wheelhouse >/dev/null; else docker volume create cccl-build >/dev/null; docker volume create cccl-wheelhouse >/dev/null; fi;" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda13.0ext-llvm19", + "CCCL_CUDA_VERSION": "13.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "19", + "CCCL_BUILD_INFIX": "cuda13.0ext-llvm19", + "CCCL_CUDA_EXTENDED": "true", + "HOST_WORKSPACE": "${localWorkspaceFolder}" + }, + "workspaceFolder": "/home/coder/cccl", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cccl,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=cccl-build,target=/home/coder/cccl/build", + "source=cccl-wheelhouse,target=/home/coder/cccl/wheelhouse" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "seaube.clangformat", + "nvidia.nsight-vscode-edition", + "ms-vscode.cmake-tools" + ], + "settings": { + "editor.defaultFormatter": "seaube.clangformat", + "editor.formatOnSave": true, + "clang-format.executable": "/usr/bin/clang-format", + "clangd.arguments": [ + "--header-insertion=never", + "--compile-commands-dir=${workspaceFolder}" + ], + "files.eol": "\n", + "files.trimTrailingWhitespace": true + } + } + }, + "name": "cuda13.0ext-llvm19" +} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 459c8e21601..c3b3730cd5b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,10 +1,10 @@ { "shutdownAction": "stopContainer", - "image": "rapidsai/devcontainers:25.10-cpp-gcc13-cuda12.9", + "image": "rapidsai/devcontainers:25.10-cpp-gcc14-cuda13.0", "runArgs": [ "--init", "--name", - "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda12.9-gcc13" + "${localEnv:USER:anon}-${localWorkspaceFolderBasename}-cuda13.0-gcc14" ], "hostRequirements": { "gpu": "optional" @@ -24,11 +24,11 @@ "SCCACHE_BUCKET": "rapids-sccache-devs", "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", - "DEVCONTAINER_NAME": "cuda12.9-gcc13", - "CCCL_CUDA_VERSION": "12.9", + "DEVCONTAINER_NAME": "cuda13.0-gcc14", + "CCCL_CUDA_VERSION": "13.0", "CCCL_HOST_COMPILER": "gcc", - "CCCL_HOST_COMPILER_VERSION": "13", - "CCCL_BUILD_INFIX": "cuda12.9-gcc13", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda13.0-gcc14", "CCCL_CUDA_EXTENDED": "false", "HOST_WORKSPACE": "${localWorkspaceFolder}" }, @@ -62,5 +62,5 @@ } } }, - "name": "cuda12.9-gcc13" + "name": "cuda13.0-gcc14" } diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh index 36a197067c8..afc553423a7 100755 --- a/.devcontainer/make_devcontainers.sh +++ b/.devcontainer/make_devcontainers.sh @@ -126,6 +126,10 @@ fi # Get the devcontainer image version and define image tag root readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version') +# Internal image compiler versions: +readonly CUDA99_GCC_VERSION=$( echo "$matrix_json" | jq -r '.cuda99_gcc_version') +readonly CUDA99_LLVM_VERSION=$(echo "$matrix_json" | jq -r '.cuda99_clang_version') + # Get unique combinations of cuda version, compiler name/version, and Ubuntu version readonly combinations=$(echo "$matrix_json" | jq -c '.combinations[]') @@ -145,15 +149,8 @@ readonly DEFAULT_NAME=$(make_name "$DEFAULT_CUDA" "$DEFAULT_CUDA_EXT" "$DEFAULT_ update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_CUDA_EXT" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEVCONTAINER_VERSION" "false" mv "./temp_devcontainer.json" ${base_devcontainer_file} -# Always create an extended version of the default devcontainer: -readonly EXT_NAME=$(make_name "$DEFAULT_CUDA" true "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_VERSION") -update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$EXT_NAME" "$DEFAULT_CUDA" true "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEVCONTAINER_VERSION" "false" -mkdir -p "$EXT_NAME" -mv "./temp_devcontainer.json" "$EXT_NAME/devcontainer.json" - - # Create an array to keep track of valid subdirectory names -valid_subdirs=("$EXT_NAME") +valid_subdirs=() # The img folder should not be removed: valid_subdirs+=("img") @@ -164,10 +161,27 @@ for rapids_container in *rapids*; do done # Inject ctk version 99.9 -readonly cuda99_9_gcc=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -rsc '.[].cuda |= "99.9" | .[].internal |= true | .[-1]') -readonly cuda99_8_gcc=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -rsc '.[].cuda |= "99.8" | .[].internal |= true | .[-1]') -readonly cuda99_9_llvm=$(echo "$NEWEST_LLVM_CUDA_ENTRY" | jq -rsc '.[].cuda |= "99.9" | .[].internal |= true | .[-1]') -readonly cuda99_8_llvm=$(echo "$NEWEST_LLVM_CUDA_ENTRY" | jq -rsc '.[].cuda |= "99.8" | .[].internal |= true | .[-1]') +make_compiler_entry() { + local compiler_name="$1" + local compiler_version="$2" + local compiler_exe="$3" + local cuda_version="$4" + local cuda_ext="$5" + local internal="${6:-false}" + echo "{ + \"cuda\": \"$cuda_version\", + \"cuda_ext\": $cuda_ext, + \"compiler_name\": \"$compiler_name\", + \"compiler_exe\": \"$compiler_exe\", + \"compiler_version\": \"$compiler_version\", + \"internal\": $internal + }" | jq -c '.' +} + +readonly cuda99_8_gcc=$( make_compiler_entry "gcc" "$CUDA99_GCC_VERSION" "gcc" "99.8" "false" "true") +readonly cuda99_9_gcc=$( make_compiler_entry "gcc" "$CUDA99_GCC_VERSION" "gcc" "99.9" "false" "true") +readonly cuda99_8_llvm=$(make_compiler_entry "llvm" "$CUDA99_LLVM_VERSION" "clang" "99.8" "false" "true") +readonly cuda99_9_llvm=$(make_compiler_entry "llvm" "$CUDA99_LLVM_VERSION" "clang" "99.9" "false" "true") readonly all_comb="$combinations $cuda99_9_gcc $cuda99_8_gcc $cuda99_9_llvm $cuda99_8_llvm" # For each unique combination diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2401037a349..f2365bb69a6 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -14,6 +14,7 @@ ci/ @nvidia/cccl-infra-codeowners .clang-format @nvidia/cccl-infra-codeowners .clangd @nvidia/cccl-infra-codeowners c2h/ @nvidia/cccl-infra-codeowners +nvbench_helper/ @nvidia/cccl-infra-codeowners .vscode @nvidia/cccl-infra-codeowners # cmake @@ -27,5 +28,6 @@ benchmarks/ @nvidia/cccl-benchmark-codeowners **/benchmarks @nvidia/cccl-benchmark-codeowners # docs +README.md @nvidia/cccl-codeowners docs/ @nvidia/cccl-codeowners examples/ @nvidia/cccl-codeowners diff --git a/.github/actions/workflow-build/action.yml b/.github/actions/workflow-build/action.yml index 882bb8c8ae0..20755153429 100644 --- a/.github/actions/workflow-build/action.yml +++ b/.github/actions/workflow-build/action.yml @@ -88,10 +88,12 @@ runs: run: | echo "Parsing matrix file into a workflow..." + echo "::group::Generating GHA workflow from matrix.yaml" ${{ env.matrix_parser }} ${{ inputs.matrix_file }} \ --workflows ${{ inputs.workflows }} \ ${{ env.allow_override }} \ ${{ env.dirty_projects_flag }} ${{ env.dirty_projects }} + echo "::endgroup::" if [[ -f workflow/override.json ]]; then echo "::group::Override matrix" diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py index 4dc358d7426..61a75854426 100755 --- a/.github/actions/workflow-build/build-workflow.py +++ b/.github/actions/workflow-build/build-workflow.py @@ -119,10 +119,14 @@ def canonicalize_ctk_version(ctk_string): if ctk_string in matrix_yaml["ctk_versions"]: return ctk_string - # Check for aka's: + # Check for aliases: for ctk_key, ctk_value in matrix_yaml["ctk_versions"].items(): - if "aka" in ctk_value and ctk_string == ctk_value["aka"]: - return ctk_key + if "alias" in ctk_value: + # Allow a string or list of strings: + aliases = ctk_value["alias"] + aliases = [aliases] if isinstance(aliases, str) else aliases + if ctk_string in aliases: + return ctk_key raise Exception(f"Unknown CTK version '{ctk_string}'") @@ -136,7 +140,19 @@ def get_ctk(ctk_string): @memoize_result def parse_cxx_string(cxx_string): "Returns (id, version) tuple. Version may be None if not present." - return re.match(r"^([a-z]+)-?([\d\.]+)?$", cxx_string).groups() + # Captures three groups: + # 0: The compiler ID (e.g. 'nvhpc' in ['nvhpc', 'nvhpc25.7', 'nvhpc-25.7', 'nvhpc-prev']) + # 1: A maybe-hyphenated numeric version suffix (e.g. '10' in ['gcc10', 'gcc-10']) + # 2: A hyphenated string alias (e.g. 'prev' in 'nvhpc-prev') + # + # Either 1, 2, or both may be None. + match = re.match(r"^([^\d-]+)(?:(-?[\d\.]+)|-(.+))?$", cxx_string).groups() + # Clean up to (id, version): + if match[2] is None: + return (match[0], match[1]) + else: + return (match[0], match[2]) + return match @memoize_result @@ -163,11 +179,15 @@ def canonicalize_host_compiler_name(cxx_string): hc_def["versions"].keys(), key=lambda x: tuple(map(int, x.split("."))) ) - # Check for aka's: + # Check for aliases: if version not in hc_def["versions"]: for version_key, version_data in hc_def["versions"].items(): - if "aka" in version_data and version == version_data["aka"]: - version = version_key + if "alias" in version_data: + # Allow a string or list of strings: + aliases = version_data["alias"] + aliases = [aliases] if isinstance(aliases, str) else aliases + if version in aliases: + version = version_key if version not in hc_def["versions"]: raise Exception(f"Unknown version '{version}' for host compiler '{id}'.") @@ -284,7 +304,11 @@ def get_job_type_info(job): result["gpu"] = False if "cuda_ext" not in result: result["cuda_ext"] = False - if "force_producer_ctk" not in result: + if "force_producer_ctk" in result: + result["force_producer_ctk"] = canonicalize_ctk_version( + result["force_producer_ctk"] + ) + else: result["force_producer_ctk"] = None if "needs" not in result: result["needs"] = None @@ -396,13 +420,8 @@ def generate_dispatch_group_name(matrix_job): def generate_dispatch_job_name(matrix_job, job_type): job_info = get_job_type_info(job_type) - ctk = matrix_job["ctk"] - std_str = ("C++" + str(matrix_job["std"]) + " ") if "std" in matrix_job else "" cpu_str = matrix_job["cpu"] gpu_str = (", " + matrix_job["gpu"].upper()) if job_info["gpu"] else "" - py_version = ( - (", py" + matrix_job["py_version"]) if "py_version" in matrix_job else "" - ) cuda_compile_arch = ( (" sm{" + str(matrix_job["sm"]) + "}") if "sm" in matrix_job else "" ) @@ -410,9 +429,16 @@ def generate_dispatch_job_name(matrix_job, job_type): (" " + matrix_job["cmake_options"]) if "cmake_options" in matrix_job else "" ) + ctk = matrix_job["ctk"] host_compiler = get_host_compiler(matrix_job["cxx"]) + std_str = (" C++" + str(matrix_job["std"])) if "std" in matrix_job else "" + py_str = ( + (" py" + str(matrix_job["py_version"])) if "py_version" in matrix_job else "" + ) - config_tag = f"CTK{ctk} {std_str}{host_compiler['name']}{host_compiler['version']}" + config_tag = ( + f"CTK{ctk} {host_compiler['name']}{host_compiler['version']}{std_str}{py_str}" + ) extra_info = ( f":{cuda_compile_arch}{cmake_options}" @@ -420,9 +446,7 @@ def generate_dispatch_job_name(matrix_job, job_type): else "" ) - return ( - f"[{config_tag}] {job_info['name']}({cpu_str}{gpu_str}{py_version}){extra_info}" - ) + return f"[{config_tag}] {job_info['name']}({cpu_str}{gpu_str}){extra_info}" def generate_dispatch_job_runner(matrix_job, job_type): @@ -513,23 +537,29 @@ def generate_dispatch_job_origin(matrix_job, job_type): job_info = get_job_type_info(job_type) + # Replace the unexploded 'jobs' tag with the current single job type: + origin_job["jobs"] = [job_info["id"]] + # The origin tags are used to build the execution summary for the CI PR comment. # Use the human readable job label for the execution summary: - origin_job["jobs"] = job_info["name"] + origin_job["job_name"] = job_info["name"] + + if not job_info["gpu"]: + del origin_job["gpu"] # Replace some of the clunkier tags with a summary-friendly version: if "cxx" in origin_job: host_compiler = get_host_compiler(matrix_job["cxx"]) del origin_job["cxx"] - origin_job["cxx"] = host_compiler["name"] + host_compiler["version"] + origin_job["cxx"] = host_compiler["id"] + host_compiler["version"] origin_job["cxx_family"] = host_compiler["name"] if "cudacxx" in origin_job: device_compiler = get_device_compiler(matrix_job) del origin_job["cudacxx"] - origin_job["cudacxx"] = device_compiler["name"] + device_compiler["version"] + origin_job["cudacxx"] = device_compiler["id"] + device_compiler["version"] origin_job["cudacxx_family"] = device_compiler["name"] origin["matrix_job"] = origin_job @@ -1243,6 +1273,9 @@ def print_gha_workflow(args): def print_devcontainer_info(args): devcontainer_version = matrix_yaml["devcontainer_version"] + cuda99_gcc_version = matrix_yaml["cuda99_gcc_version"] + cuda99_clang_version = matrix_yaml["cuda99_clang_version"] + matrix_jobs = [] # Remove the `exclude` and `override` entries: @@ -1253,14 +1286,19 @@ def print_devcontainer_info(args): for workflow_name in workflow_names: matrix_jobs.extend(parse_workflow_matrix_jobs(args, workflow_name)) + # Explode jobs to ensure that the cuda_ext tags are correctly handled: + exploded_jobs = [] + for matrix_job in matrix_jobs: + exploded_jobs.extend(explode_tags(matrix_job, "jobs")) + matrix_jobs = exploded_jobs + # Check if the extended cuda images are needed: for matrix_job in matrix_jobs: cuda_ext = False - for job in matrix_job["jobs"]: - job_info = get_job_type_info(job) - if job_info["cuda_ext"]: - cuda_ext = True - break + job = matrix_job["jobs"] + job_info = get_job_type_info(job) + if job_info["cuda_ext"]: + cuda_ext = True matrix_job["cuda_ext"] = cuda_ext # Remove all but the following keys from the matrix jobs: @@ -1285,6 +1323,8 @@ def print_devcontainer_info(args): devcontainer_json = { "devcontainer_version": devcontainer_version, + "cuda99_gcc_version": cuda99_gcc_version, + "cuda99_clang_version": cuda99_clang_version, "combinations": unique_combinations, } diff --git a/.github/actions/workflow-results/prepare-execution-summary.py b/.github/actions/workflow-results/prepare-execution-summary.py index aa932775ea0..f3304b17536 100755 --- a/.github/actions/workflow-results/prepare-execution-summary.py +++ b/.github/actions/workflow-results/prepare-execution-summary.py @@ -123,7 +123,10 @@ def build_summary(jobs, job_times=None): update_summary_entry(projects[project], job, job_times) for tag in matrix_job.keys(): - if tag == "project": + # These are excluded from the summary table: + # - Project is already the top-level grouping. + # - Human-readable 'job_name' is used in place of 'jobs'. + if tag in ["project", "jobs"]: continue if tag not in tags: diff --git a/.github/actions/workflow-run-job-linux/action.yml b/.github/actions/workflow-run-job-linux/action.yml index 543b1b00712..f9420efdfee 100644 --- a/.github/actions/workflow-run-job-linux/action.yml +++ b/.github/actions/workflow-run-job-linux/action.yml @@ -45,6 +45,7 @@ runs: echo "::endgroup::" - name: Add NVCC problem matcher + continue-on-error: true shell: bash --noprofile --norc -euo pipefail {0} run: | echo "::add-matcher::${{github.workspace}}/.github/problem-matchers/problem-matcher.json" @@ -54,6 +55,28 @@ runs: role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA aws-region: us-east-2 role-duration-seconds: 43200 # 12 hours + - name: Print CI override matrix job def + env: + GH_TOKEN: ${{ github.token }} + continue-on-error: true + shell: bash --noprofile --norc -euo pipefail {0} + run: | + # Get the origin matrix job definition embedded in thw workflow artifact: + matrix_job=$(ci/util/workflow/get_job_def.sh | jq -c '.origin.matrix_job') + + # Delete the cxx_family and cudacxx_family fields + matrix_job=$(echo "$matrix_job" | jq 'del(.cxx_family, .cudacxx_family, .job_name)') + + # Convert to a single line of YAML, with unquoted keys: + matrix_job=$( + echo "- $matrix_job" | + yq -o=yaml | + sed -E 's/"([[:alnum:]_]+)"([[:space:]]*):/\1\2:/g' | + tr '"' "'" + ) + + echo -e "\e[1;34mOverride matrix entry:\e[0m" + echo -e "\e[1;34m $matrix_job\e[0m" - name: Run command # Do not change this step's name, it is checked in parse-job-times.py id: run shell: bash --noprofile --norc -euo pipefail {0} diff --git a/.github/workflows/build-matx.yml b/.github/workflows/build-matx.yml index 2e8bd345521..1485e47c62b 100644 --- a/.github/workflows/build-matx.yml +++ b/.github/workflows/build-matx.yml @@ -92,7 +92,7 @@ jobs: .devcontainer/launch.sh \ --docker \ --cuda 12.9 \ - --host gcc13 \ + --host gcc14 \ --cuda-ext \ --env "CCCL_TAG=${CCCL_TAG}" \ --env "CCCL_VERSION=${CCCL_VERSION}" \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3cf3f78478e..0830d733b32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,9 +25,13 @@ if (NOT CCCL_TOPLEVEL_PROJECT) include(cmake/CCCLAddSubdir.cmake) endif() -# We require a higher cmake version for dev builds if (CCCL_TOPLEVEL_PROJECT) + # We require a higher cmake version for dev builds cmake_minimum_required(VERSION 3.21) + + # Handle special CCCL values for CMAKE_CUDA_ARCHITECTURES + include(cmake/CCCLCheckCudaArchitectures.cmake) + cccl_check_cuda_architectures() endif() option(CCCL_ENABLE_LIBCUDACXX "Enable the libcu++ developer build." OFF) diff --git a/CMakePresets.json b/CMakePresets.json index b39ab345fc9..537e7ebc88a 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -13,7 +13,7 @@ "binaryDir": "${sourceDir}/build/$env{CCCL_BUILD_INFIX}/${presetName}", "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", - "CMAKE_CUDA_ARCHITECTURES": "60;70;80", + "CMAKE_CUDA_ARCHITECTURES": "all-major-cccl", "CCCL_ENABLE_UNSTABLE": true, "CCCL_ENABLE_LIBCUDACXX": false, "CCCL_ENABLE_CUB": false, @@ -55,6 +55,7 @@ "name": "all-dev", "inherits": "base", "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "native", "CCCL_ENABLE_LIBCUDACXX": true, "CCCL_ENABLE_CUB": true, "CCCL_ENABLE_THRUST": true, @@ -95,6 +96,7 @@ "displayName": "all-dev debug", "inherits": "all-dev", "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "native", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CUDA_FLAGS": "-G", "CCCL_ENABLE_BENCHMARKS": false, diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h index efd71918c0d..94e65f6b58b 100644 --- a/c2h/include/c2h/catch2_test_helper.h +++ b/c2h/include/c2h/catch2_test_helper.h @@ -36,6 +36,14 @@ # define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma("diag push") # define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma("diag pop") #endif +// The nv_diagnostic pragmas in Catch2 macros cause cicc to hang indefinitely in CTK 13.0. +// See NVBugs 5475335. +#if _CCCL_VERSION_COMPARE(_CCCL_CTK_, _CCCL_CTK, ==, 13, 0) +# undef CATCH_INTERNAL_START_WARNINGS_SUPPRESSION +# undef CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION +#endif // workaround for error // * MSVC14.39: #3185-D: no '#pragma diagnostic push' was found to match this 'diagnostic pop' // * MSVC14.29: internal error: assertion failed: alloc_copy_of_pending_pragma: copied pragma has source sequence entry diff --git a/ci/build_common.sh b/ci/build_common.sh index 0743a740162..a649b0170a9 100755 --- a/ci/build_common.sh +++ b/ci/build_common.sh @@ -96,13 +96,29 @@ if [ -z ${CCCL_BUILD_INFIX+x} ]; then CCCL_BUILD_INFIX="" fi -# Presets will be configured in this directory: -BUILD_DIR="../build/${CCCL_BUILD_INFIX}" +mkdir -p ../build +# Absolute path to cccl/build +BUILD_ROOT=$(cd "../build" && pwd) -# The most recent build will always be symlinked to cccl/build/latest +# Absolute path to per-devcontainer build directory +BUILD_DIR="$BUILD_ROOT/$CCCL_BUILD_INFIX" + +# The most recent devcontainer build dir will always be symlinked to cccl/build/latest mkdir -p $BUILD_DIR -rm -f ../build/latest -ln -sf $BUILD_DIR ../build/latest +rm -f $BUILD_ROOT/latest +ln -sf $BUILD_DIR $BUILD_ROOT/latest + +# The more recent preset build dir will always be symlinked to: +# cccl/build/latest/latest +# cccl/preset-latest +function symlink_latest_preset { + local PRESET=$1 + mkdir -p "$BUILD_DIR/$PRESET" + rm -f "$BUILD_ROOT/latest/latest" + ln -sf "$BUILD_DIR/$PRESET" "$BUILD_ROOT/latest/latest" + rm -f "$BUILD_ROOT/preset-latest" + ln -sf "$BUILD_DIR/$PRESET" "$BUILD_ROOT/preset-latest" +} # Now that BUILD_DIR exists, use readlink to canonicalize the path: BUILD_DIR=$(readlink -f "${BUILD_DIR}") @@ -192,6 +208,8 @@ function configure_preset() local CMAKE_OPTIONS=$3 local GROUP_NAME="🛠️ CMake Configure ${BUILD_NAME}" + symlink_latest_preset "$PRESET" + pushd .. > /dev/null if [[ -n "${GITHUB_ACTIONS:-}" ]]; then # Retry 5 times with 30 seconds between attempts to try to WAR network issues during CPM fetch on CI runners: @@ -223,6 +241,8 @@ function build_preset() { local red="1;31" local GROUP_NAME="🏗️ Build ${BUILD_NAME}" + symlink_latest_preset "$PRESET" + if $CONFIGURE_ONLY; then return 0 fi @@ -270,6 +290,8 @@ function test_preset() local PRESET=$2 local GPU_REQUIRED=${3:-true} + symlink_latest_preset "$PRESET" + if $CONFIGURE_ONLY; then return 0 fi diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 3f003633eda..e3f31ec78c5 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -5,138 +5,242 @@ workflows: # # Example: # override: - # - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'clang16']} + # - {jobs: ['test'], project: 'thrust', std: 17, ctk: '12.X', cxx: ['gcc12', 'clang16']} # override: pull_request: - # Old CTK/compiler - - {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7', 'gcc9', 'clang14', 'msvc2019']} - # Current CTK build-only - - {jobs: ['build'], std: 'max', cxx: ['gcc7', 'gcc8', 'gcc9']} - - {jobs: ['build'], std: 'all', cxx: ['gcc10', 'gcc11', 'gcc12']} - - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18']} - - {jobs: ['build'], std: 'max', cxx: ['msvc2019']} - - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} + # Old CTK: Oldest/newest supported host compilers: + - {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7', 'gcc12', 'clang14', 'msvc2019', 'msvc14.39']} + - {jobs: ['build'], std: 'minmax', ctk: '12.X', cxx: ['gcc7', 'gcc', 'clang14', 'clang', 'msvc2019', 'msvc' ]} + - {jobs: ['build'], std: 'minmax', ctk: '13.0', cxx: ['gcc11', 'gcc', 'clang15', 'clang', 'msvc2019', 'msvc' ]} + # Old CTK: cudax has a different support matrix: + - {jobs: ['build'], project: 'cudax', ctk: '12.0', std: 'minmax', cxx: ['gcc9', 'gcc12', 'clang14', 'msvc14.39']} + - {jobs: ['build'], project: 'cudax', ctk: '12.X', std: 'minmax', cxx: ['gcc9', 'gcc', 'clang14', 'clang', 'msvc']} + - {jobs: ['build'], project: 'cudax', ctk: '13.0', std: 'minmax', cxx: ['gcc11', 'gcc', 'clang15', 'clang', 'msvc']} + # Current CTK build-only: + - {jobs: ['build'], std: 'minmax', cxx: ['gcc11', 'clang15', 'msvc2019'] } # Oldest + - {jobs: ['build'], std: 'max', cxx: ['gcc12', 'gcc13'] } + - {jobs: ['build'], std: 'max', cxx: ['clang16', 'clang17'] } + - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Latest + # Current CTK build-only: cudax has a different support matrix: + - {jobs: ['build'], project: 'cudax', std: 'minmax', cxx: ['gcc11', 'clang15']} # Oldest + - {jobs: ['build'], project: 'cudax', std: 'max', cxx: ['gcc12']} + - {jobs: ['build'], project: 'cudax', std: 'max', cxx: ['clang16', 'clang17', 'clang18']} + - {jobs: ['build'], project: 'cudax', std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Newest # Current CTK testing: - - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} - - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} - # Split up cub tests: - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'} - - {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'} - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'} - # Modded builds: - - {jobs: ['build'], std: 'all', ctk: '12.9', cxx: 'nvhpc'} - - {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - {jobs: ['build'], std: 'max', cxx: ['gcc', 'msvc'], sm: ['90;90a', '100;120']} - - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', gpu: 'h100', sm: 'gpu' } - - {jobs: ['test_gpu'], project: 'thrust', std: 'max', gpu: 'h100', sm: 'gpu' } - - {jobs: ['test'], project: ['libcudacxx', 'cudax'], std: 'max', gpu: 'h100', sm: 'gpu' } - # Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly. - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - # default_projects: clang-cuda - - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - # Disabled; see discussion on #3633. Should be fixed in clang-20. -# - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'} -# - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'} - # nvrtc: + - {jobs: ['test'], project: 'thrust', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'libcudacxx', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cudax', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test_lid1', 'test_lid2'], project: 'cub', std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'} + # H100 coverage: + - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', gpu: 'h100' } + - {jobs: ['test_gpu'], project: 'thrust', std: 'max', gpu: 'h100' } + - {jobs: ['test'], project: ['libcudacxx', 'cudax'], std: 'max', gpu: 'h100' } + # Misc: + - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], std: 'max', cxx: ['gcc', 'clang']} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - # verify-codegen: - {jobs: ['verify_codegen'], project: 'libcudacxx'} - # cudax has different CTK reqs: - - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.39', 'gcc10', 'clang14']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18']} - - {jobs: ['build'], project: 'cudax', ctk: ['12.9'], std: 'all', cxx: ['nvhpc']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc', 'msvc'], sm: ['90;90a', '100;120']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} - # Python and c/parallel jobs: - - {jobs: ['test'], project: ['cccl_c_parallel'], gpu: ['rtx2080', 'l4', 'h100']} - # TODO Just need this line once cccl.parallel tests pass on 12.5 and 12.6: - # - {jobs: ['test'], project: 'python', ctk: ['12.5', 'curr'], py_version: ['3.10', '3.13'], gpu: 'l4'} - # These two can be removed once the above is working: - - {jobs: ['test_py_headers', 'test_py_coop', 'test_py_examples'], ctk: ['12.5', 'curr'], project: 'python', py_version: ['3.10', '3.13'], gpu: 'l4'} - - {jobs: ['test_py_par'], ctk: ['12.8', 'curr'], project: 'python', py_version: ['3.10', '3.13'], gpu: 'l4'} - - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100'} - # packaging: - - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'} - - {jobs: ['test'], project: 'packaging', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + # c.parallel -- pinned to gcc13 to match python + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + # Python -- pinned to gcc13 for consistency across CTK images + - {jobs: ['test_py_headers', 'test_py_coop'], ctk: ['12.5', '13.X'], project: 'python', py_version: ['3.10', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test_py_par', 'test_py_examples'], ctk: ['12.8', '13.X'], project: 'python', py_version: ['3.10', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + # CCCL packaging: + - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} - {jobs: ['install'], project: 'packaging'} - # NVHPC stdpar smoke tests - - {jobs: ['build'], project: 'stdpar', std: 'all', ctk: '12.9', cxx: 'nvhpc', cpu: ['amd64', 'arm64']} + # NVHPC build + - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + # clang-cuda + # - Can't add sm90+ until clang20 (#3633) + - {jobs: ['build'], cudacxx: 'clang', ctk: '12.X', std: 'all', cxx: 'clang', sm: '75;80'} + # - CTK 13.X unsupported: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/11079#note_1692019 + # - {jobs: ['build'], cudacxx: 'clang', ctk: '13.X', std: 'all', cxx: 'clang', sm: '75;80'} nightly: - # Edge-case jobs + # CTK 12.0 full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']} + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['clang14']} + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['msvc2019', 'msvc14.39']} + # CTK 12.X full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['msvc2019', 'msvc2022']} + # CTK 13.0 full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['msvc2019', 'msvc2022']} + # CTK '13.X' full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['msvc2019', 'msvc2022']} + # CTK 12.0 full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['clang14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['msvc14.39']} + # CTK 12.X full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['msvc2022']} + # CTK 13.0 full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['msvc2022']} + # CTK '13.X' full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['msvc2022']} + # CTK 12.X testing: + - {jobs: ['test'], project: 'libcudacxx', ctk: '12.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cub', ctk: '12.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test'], project: 'thrust', ctk: '12.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'cudax', ctk: '12.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'max', gpu: 'h100' } + # CTK '13.X' testing: + - {jobs: ['test'], project: 'libcudacxx', ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cub', ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test'], project: 'thrust', ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'cudax', ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'max', gpu: 'h100' } + # Misc: + - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: ['12.X', '13.X'], std: 'all', cxx: ['gcc', 'clang']} + # Coming in a later PR after some logistical issues with `-arch all` and RAM usage are resolved: + # - {jobs: ['build'], sm: 'all-cccl', project: ['cub', 'thrust', 'libcudacxx', 'cudax'], ctk: ['12.X', '13.X'], std: 'all', cxx: ['gcc', 'msvc'] } + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'} - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'} - # Old CTK/compiler - - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']} - - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'} - # Current CTK build-only - - {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']} - - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18']} - - {jobs: ['build'], std: 'all', cxx: ['msvc2019']} - # Test current CTK - - {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} - - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} - # Modded builds: - - {jobs: ['build'], std: 'all', ctk: '12.9', cxx: 'nvhpc'} - - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - # Extended GPU tests: - - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', sm: '70;75;89;90', gpu: ['v100', 't4', 'l4', 'h100']} - - {jobs: ['test_gpu'], project: 'thrust', std: 'max', sm: '70;75;89;90', gpu: ['v100', 't4', 'l4', 'h100']} - - {jobs: ['test'], project: ['libcudacxx', 'cudax'], std: 'max', sm: '70;75;89;90', gpu: ['v100', 't4', 'l4', 'h100']} - # MSVC build coverage of extended GPU nightly tests: - - {jobs: ['build'], project: ['cub', 'thrust', 'libcudacxx', 'cudax'], std: 'max', sm: '70;75;89;90', cxx: 'msvc'} - # Build-only coverage of extended arches. `sm` split up to parallelize. - - {jobs: ['build'], project: ['cub', 'thrust', 'libcudacxx', 'cudax'], std: 'max', sm: ['86;90a', '100;103', '120'], cxx: ['msvc', 'gcc'] } - - # default_projects: clang-cuda - - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - # Disabled; see discussion on #3633. Should be fixed in clang-20. - # - {jobs: ['build'], project: 'libcudacxx', std: 'all', cudacxx: 'clang', cxx: 'clang', sm: '90'} - # - {jobs: ['build'], project: 'libcudacxx', std: 'all', cudacxx: 'clang', cxx: 'clang', sm: '90a'} - # cudax - - {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12']} - - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18']} - - {jobs: ['build'], project: 'cudax', ctk: [ '12.9'], std: 'all', cxx: ['nvhpc']} - - {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: '20', cxx: ['msvc14.39']} - - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: '20', cxx: ['msvc']} - - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: '20', cxx: ['gcc', 'msvc'], sm: ['90;90a', '100;120']} - - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'} - - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc'] , gpu: 'rtx2080'} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'} - - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang'], gpu: 'rtx2080'} - # Python and c/parallel jobs: - - {jobs: ['test'], project: ['cccl_c_parallel'], gpu: ['rtx2080', 'l4', 'h100']} - # TODO Just need this line once cccl.parallel tests pass on 12.5 and 12.6: - # - {jobs: ['test'], project: 'python', ctk: ['12.5', '12.6', '12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4'} - # These two can be removed once the above is working. - - {jobs: ['test_py_headers', 'test_py_coop', 'test_py_examples'], ctk: ['12.5', '12.6', '12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], project: 'python', gpu: 'l4'} - - {jobs: ['test_py_par'], ctk: [ '12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], project: 'python', gpu: 'l4'} - - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100'} - # packaging: - - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'} - - {jobs: ['test'], project: 'packaging', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + # NVRTC tests don't currently support 12.0: + - {jobs: ['nvrtc'], project: 'libcudacxx', ctk: [ '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['verify_codegen'], project: 'libcudacxx', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: 'gcc12'} + # c.parallel -- pinned to gcc13 to match python + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + # Python -- pinned to gcc13 for consistency across CTK images + - {jobs: ['test_py_headers', 'test_py_coop'], project: 'python', ctk: ['12.5', '12.6', '12.8', '12.9', '13.0'], py_version: ['3.10', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test_py_par', 'test_py_examples'], project: 'python', ctk: [ '12.8', '12.9', '13.0'], py_version: ['3.10', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + # CCCL packaging: + - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} - {jobs: ['install'], project: 'packaging'} - # NVHPC stdpar smoke tests - - {jobs: ['build'], project: 'stdpar', std: 'all', ctk: '12.9', cxx: 'nvhpc', cpu: ['amd64', 'arm64']} + # NVHPC build + - {jobs: ['build'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + # clang-cuda + # - Can't add sm90+ until clang20 (#3633) + - {jobs: ['build'], cudacxx: 'clang', ctk: '12.X', std: 'all', cxx: 'clang', sm: '75;80'} + # - CTK 13.X unsupported: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/11079#note_1692019 + # - {jobs: ['build'], cudacxx: 'clang', ctk: '13.X', std: 'all', cxx: 'clang', sm: '75;80'} weekly: + # CTK 12.0 full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']} + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['clang14']} + - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['msvc2019', 'msvc14.39']} + # CTK 12.X full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['msvc2019', 'msvc2022']} + # CTK 13.0 full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['msvc2019', 'msvc2022']} + # CTK '13.X' full matrix build: default projects + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['msvc2019', 'msvc2022']} + # CTK 12.0 full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['clang14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['msvc14.39']} + # CTK 12.X full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['msvc2022']} + # CTK 13.0 full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['msvc2022']} + # CTK '13.X' full matrix build: cudax + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19']} + - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['msvc2022']} + # CTK 12.X testing: + - {jobs: ['test'], project: 'libcudacxx', ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cub', ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test'], project: 'thrust', ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'cudax', ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'minmax', gpu: 'h100' } + # CTK '13.X' testing: + - {jobs: ['test'], project: 'libcudacxx', ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cub', ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test'], project: 'thrust', ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'cudax', ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} + - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'minmax', gpu: 'h100' } + # Misc: + - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: ['12.X', '13.X'], std: 'all', cxx: ['gcc', 'clang']} + # Coming in a later PR after some logistical issues with `-arch all` and RAM usage are resolved: + # - {jobs: ['build'], sm: 'all-cccl', project: ['cub', 'thrust', 'libcudacxx', 'cudax'], ctk: ['12.X', '13.X'], std: 'all', cxx: ['gcc', 'msvc'] } + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'} + - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'} + # NVRTC tests don't currently support 12.0: + - {jobs: ['nvrtc'], project: 'libcudacxx', ctk: [ '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['verify_codegen'], project: 'libcudacxx', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: 'gcc12'} + # c.parallel -- pinned to gcc13 to match python + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + # Python -- pinned to gcc13 for consistency across CTK images + - {jobs: ['test_py_headers', 'test_py_coop'], project: 'python', ctk: ['12.5', '12.6', '12.8', '12.9', '13.0'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test_py_par', 'test_py_examples'], project: 'python', ctk: [ '12.8', '12.9', '13.0'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + # CCCL packaging: + - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} + - {jobs: ['install'], project: 'packaging'} + # NVHPC build + - {jobs: ['build'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + # clang-cuda + # - Can't add sm90+ until clang20 (#3633) + - {jobs: ['build'], cudacxx: 'clang', ctk: '12.X', std: 'all', cxx: 'clang', sm: '75;80'} + # - CTK 13.X unsupported: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/11079#note_1692019 + # - {jobs: ['build'], cudacxx: 'clang', ctk: '13.X', std: 'all', cxx: 'clang', sm: '75;80'} + # compute-sanitizer - {jobs: ['compute_sanitizer'], project: 'cub', std: 'max', gpu: 'rtxa6000', sm: 'gpu', cmake_options: '-DCMAKE_CUDA_FLAGS=-lineinfo'} python-wheels: - # TODO Just need this line once cccl.parallel tests pass on 12.5 and 12.6: - # - {jobs: ['test'], project: 'python', ctk: ['12.5', '12.6', '12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4'} - # These two can be removed once the above is working. - - {jobs: ['test_py_headers', 'test_py_coop', 'test_py_examples'], ctk: ['12.5', '12.6', '12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], project: 'python', gpu: 'l4'} - - {jobs: ['test_py_par'], ctk: ['12.8', '12.9'], py_version: ['3.10', '3.11', '3.12', '3.13'], project: 'python', gpu: 'l4'} + - {jobs: ['test_py_headers', 'test_py_coop'], project: 'python', ctk: ['12.5', '12.6', '12.8', '12.9', '13.0'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test_py_par', 'test_py_examples'], project: 'python', ctk: [ '12.8', '12.9', '13.0'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'} + - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'} + + # This is just used to ensure that we generate devcontainers for all images we build. + # These do not map to any actual jobs. + devcontainers: + - {jobs: ['dc'], ctk: ['12.0', '12.9' ], cxx: ['clang14']} + - {jobs: ['dc'], ctk: ['12.0', '12.9' ], cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10']} + - {jobs: ['dc'], ctk: ['12.0', '12.9', '13.0'], cxx: ['gcc11', 'gcc12']} + - {jobs: ['dc'], ctk: [ '12.9', '13.0'], cxx: ['gcc13']} + - {jobs: ['dc'], ctk: [ '12.9', '13.0'], cxx: ['clang15', 'clang16', 'clang17', 'clang18']} + - {jobs: ['dc'], ctk: [ '12.9', '13.0'], cxx: ['gcc14', 'clang19']} + - {jobs: ['dc_ext'], ctk: [ '12.9', '13.0'], cxx: ['gcc14', 'clang19']} + # 12.X python images, pinned at gcc13 for consistency, as 12.5 / 12.6 don't support newer gcc. + - {jobs: ['dc'], ctk: ['12.5', '12.6', '12.8', '12.9', '13.0'], cxx: 'gcc13'} + # NVHPC + - {jobs: ['dc'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev'} + - {jobs: ['dc'], cxx: 'nvhpc', ctk: 'nvhpc'} # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows. exclude: @@ -150,16 +254,27 @@ workflows: # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '25.10' +# Compiler versions used for the cuda99.X internal builds: +cuda99_gcc_version: 13 +cuda99_clang_version: 19 + # All supported C++ standards: all_stds: [17, 20] +# Aliases: +# - 12.X: Newest CTK 12.X version. +# - 13.X: Newest CTK 13.X version. +# - nvhpc: CTK shipped in newest NVHPC +# - nvhpc-prev: CTK shipped in previous NVHPC +# - pybuild: Selects image to use for python wheel builds' outer docker instance ctk_versions: 12.0: { stds: [17, 20] } 12.5: { stds: [17, 20] } 12.6: { stds: [17, 20] } # 12.7 not buildable by current devcontainer scripting. 12.8: { stds: [17, 20] } - 12.9: { stds: [17, 20], aka: 'curr' } + 12.9: { stds: [17, 20], alias: ['12.X', 'nvhpc', 'nvhpc-prev', 'pybuild'] } + 13.0: { stds: [17, 20], alias: ['13.X'] } device_compilers: nvcc: # Version / stds are taken from CTK @@ -182,6 +297,7 @@ host_compilers: 11: { stds: [17, 20] } 12: { stds: [17, 20] } 13: { stds: [17, 20] } + 14: { stds: [17, 20] } clang: name: 'Clang' container_tag: 'llvm' @@ -198,24 +314,27 @@ host_compilers: container_tag: 'cl' exe: cl versions: - 14.29: { stds: [ 17, ], aka: '2019' } - 14.39: { stds: [ 17, 20]} # CTK 12.0 doesn't recognize >14.39 as MSVC 2022. - 14.43: { stds: [ 17, 20], aka: '2022' } + 14.29: { stds: [17, ], alias: '2019' } + 14.39: { stds: [17, 20] } # CTK 12.0 doesn't recognize >14.39 as MSVC 2022. + 14.43: { stds: [17, 20], alias: '2022' } nvhpc: name: 'NVHPC' container_tag: 'nvhpc' exe: nvc++ versions: - 25.7: { stds: [17, 20 ] } + # !! Update the ctk_versions 'nvhpc*' aliases when updating NVHPC versions: + 25.5: { stds: [17, 20], alias: 'prev' } + 25.7: { stds: [17, 20] } # Jobs support the following properties: # -# - gpu: Whether the job requires a GPU runner. Default is false. # - name: The human-readable name of the job. Default is the capitalized job key. # - needs: # - A list of jobs that must be completed before this job can run. Default is an empty list. -# - These are automatically added if needed: +# - These jobs are automatically added if needed: # - Eg. "jobs: ['test']" in the workflow def will also create the required 'build' jobs. +# - gpu: Whether the job requires a GPU runner. Default is false. +# - cuda_ext: Whether the job requires a devcontainer with extra CUDA libraries. Default is false. # - invoke: # - Map the job type to the script invocation spec: # - prefix: The script invocation prefix. Default is the job name. @@ -230,6 +349,10 @@ host_compilers: # - E.g. "force_producer_ctk: '12.0'" on a test step will force the generated build step to use CTK 12.0. jobs: + # Only used for generating devcontainers. No scripts actually exist for these: + dc: { gpu: false } + dc_ext: { gpu: false, cuda_ext: true } + # General: build: { gpu: false } test: { gpu: true, needs: 'build' } @@ -272,10 +395,14 @@ jobs: # Python: build_py_wheel: { name: "Build cuda.cccl", gpu: false, invoke: { prefix: 'build_cuda_cccl'} } - test_py_headers: { name: "Test cuda.cccl.headers", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "12.9", invoke: { prefix: 'test_cuda_cccl_headers'} } - test_py_coop: { name: "Test cuda.cccl.cooperative", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "12.9", invoke: { prefix: 'test_cuda_cccl_cooperative'} } - test_py_par: { name: "Test cuda.cccl.parallel", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "12.9", invoke: { prefix: 'test_cuda_cccl_parallel'} } - test_py_examples: { name: "Test cuda.cccl.examples", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "12.9", invoke: { prefix: 'test_cuda_cccl_examples'} } + test_py_headers: { name: "Test cuda.cccl.headers", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_headers'} } + test_py_coop: { name: "Test cuda.cccl.cooperative", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_cooperative'} } + test_py_par: { name: "Test cuda.cccl.parallel", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_parallel'} } + test_py_examples: { name: "Test cuda.cccl.examples", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_examples'} } + + # Only used for generating devcontainers. No scripts actually exist for these: + dc: { gpu: false } + dc_ext: { gpu: false, cuda_ext: true } # Projects have the following properties: # @@ -350,7 +477,7 @@ tags: jobs: { required: true } # CUDA ToolKit version # See the `ctks` map. - ctk: { default: 'curr' } + ctk: { default: '13.X' } # CPU architecture cpu: { default: 'amd64' } # GPU model diff --git a/ci/matx/build_matx.sh b/ci/matx/build_matx.sh index f15f98bca3f..9057a89d132 100755 --- a/ci/matx/build_matx.sh +++ b/ci/matx/build_matx.sh @@ -95,7 +95,7 @@ rm -rf build mkdir build cd build cmake -G Ninja ../MatX \ - "-DCMAKE_CUDA_ARCHITECTURES=60;70;80" \ + "-DCMAKE_CUDA_ARCHITECTURES=75;80" \ "-DRAPIDS_CMAKE_CPM_OVERRIDE_VERSION_FILE=${version_override_file}" \ -DMATX_BUILD_TESTS=ON \ -DMATX_BUILD_EXAMPLES=ON \ diff --git a/ci/util/artifacts/download/fetch.sh b/ci/util/artifacts/download/fetch.sh index 9e1ce9740d8..2f1c682eaaa 100755 --- a/ci/util/artifacts/download/fetch.sh +++ b/ci/util/artifacts/download/fetch.sh @@ -29,6 +29,7 @@ mkdir -p "$2" readonly target_directory="$(cd "$2" && pwd)" echo "Downloading artifact '$artifact_name' to '$target_directory'" -gh run download ${GITHUB_RUN_ID} \ - --name "$artifact_name" \ - --dir "$target_directory" +"$ci_dir/util/retry.sh" 5 30 \ + gh run download ${GITHUB_RUN_ID} \ + --name "$artifact_name" \ + --dir "$target_directory" diff --git a/cmake/CCCLCheckCudaArchitectures.cmake b/cmake/CCCLCheckCudaArchitectures.cmake new file mode 100644 index 00000000000..8ceea67f9cb --- /dev/null +++ b/cmake/CCCLCheckCudaArchitectures.cmake @@ -0,0 +1,106 @@ +# This file provides utilities to handle special CMAKE_CUDA_ARCHITECTURES lists for CCCL. +# +# If CMAKE_CUDA_ARCHITECTURES is set to one of the following values, it will be replaced +# as described: +# +# 'all-cccl': All architectures known to the current NVCC above minimum_cccl_arch. +# +# 'all-major-cccl': All major architectures known to the current NVCC above minimum_cccl_arch, +# plus 'minimum_cccl_arch'. +# +# For example on 12.9: +# all: 50-real;52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;87-real;89-real;90-real;100-real;101-real;103-real;120-real;121-real;121-virtual +# all-cccl: 75-real;80-real;86-real;87-real;89-real;90-real;100-real;101-real;103-real;120-real;121-real;121-virtual +# all-major: 50-real;60-real;70-real;80-real;90-real;100-real;120-real;120-virtual +# all-major-cccl: 75-real;80-real;90-real;100-real;120-real;120-virtual + +# We don't support arches below what the latest CTK release supports: +set(minimum_cccl_arch 75) # 13.x dropped below Turing + +# Check CMAKE_CUDA_ARCHITECTURES for special CCCL values and update as described above. +function(cccl_check_cuda_architectures) + if (CMAKE_CUDA_ARCHITECTURES MATCHES "-cccl$") + message(STATUS "Detected special CCCL arch request: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") + + _cccl_detect_nvcc_arch_support(arches) + _cccl_filter_to_supported_arches(arches) + + if(CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major-cccl") + _cccl_filter_to_all_major_cccl(arches) + elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "all-cccl") + # No further filtering needed, just use the arches as is. + else() + message(FATAL_ERROR "Invalid CMAKE_CUDA_ARCHITECTURES value: ${CMAKE_CUDA_ARCHITECTURES}") + endif() + + _cccl_add_real_virtual_arch_tags(arches) + message(STATUS "Replacing with CMAKE_CUDA_ARCHITECTURES=${arches}") + set(CMAKE_CUDA_ARCHITECTURES "${arches}" CACHE STRING "CUDA architectures for CCCL" FORCE) + endif() +endfunction() + +# Query nvcc --help to determine which architectures are supported. +function(_cccl_detect_nvcc_arch_support arches_var) + find_package(CUDAToolkit) + if (NOT CUDAToolkit_FOUND) + message(FATAL_ERROR "CUDAToolkit not found, '${CMAKE_CUDA_ARCHITECTURES}' arch detection failed.") + endif() + + execute_process( + COMMAND "${CUDAToolkit_NVCC_EXECUTABLE}" --help + OUTPUT_VARIABLE nvcc_help_output + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + string(REGEX MATCHALL "compute_[0-9]+" supported_arches "${nvcc_help_output}") + string(REPLACE "compute_" "" supported_arches "${supported_arches}") + list(SORT supported_arches COMPARE NATURAL) + list(REMOVE_DUPLICATES supported_arches) + message(VERBOSE "NVCC supports: ${supported_arches}") + set(${arches_var} ${supported_arches} PARENT_SCOPE) +endfunction() + +# Remove all arches < minimum_cccl_arch +function(_cccl_filter_to_supported_arches arches_var) + set(cccl_arches "") + foreach(arch IN LISTS ${arches_var}) + if (arch GREATER_EQUAL minimum_cccl_arch) + list(APPEND cccl_arches ${arch}) + endif() + endforeach() + message(VERBOSE "CCCL supported arches: ${cccl_arches}") + set(${arches_var} ${cccl_arches} PARENT_SCOPE) +endfunction() + +# Convert all-cccl to all-major-cccl. +function(_cccl_filter_to_all_major_cccl arches_var) + set(major_arches "") + foreach(arch IN LISTS ${arches_var}) + math(EXPR major "(${arch} / 10) * 10") + if (major LESS minimum_cccl_arch) + set(major "${minimum_cccl_arch}") + endif() + if (NOT major IN_LIST major_arches) + list(APPEND major_arches ${major}) + endif() + endforeach() + message(VERBOSE "CCCL all-major arches: ${major_arches}") + set(${arches_var} ${major_arches} PARENT_SCOPE) +endfunction() + +function(_cccl_add_real_virtual_arch_tags arches_var) + set(tagged_arches "") + + list(POP_BACK ${arches_var} last_arch) + + foreach(arch IN LISTS ${arches_var}) + list(APPEND tagged_arches "${arch}-real") + endforeach() + + list(APPEND tagged_arches "${last_arch}-real") + list(APPEND tagged_arches "${last_arch}-virtual") + + message(VERBOSE "CCCL tagged arches: ${tagged_arches}") + set(${arches_var} ${tagged_arches} PARENT_SCOPE) +endfunction() diff --git a/cub/test/ptx-json/CMakeLists.txt b/cub/test/ptx-json/CMakeLists.txt index f0fcea76697..cd7aaf0595b 100644 --- a/cub/test/ptx-json/CMakeLists.txt +++ b/cub/test/ptx-json/CMakeLists.txt @@ -64,7 +64,9 @@ function(cub_detail_ptx_json_add_test target_name_var source) ) set_target_properties(${target_name} PROPERTIES CUDA_PTX_COMPILATION ON - CUDA_ARCHITECTURES 90 + # Use compute_80 -- CTK 13.0 started running ptxas on 90+, even when just producing PTX. + # This breaks the ptx-json stuff, which produces intentionally invalid PTX. + CUDA_ARCHITECTURES "80-virtual" ) add_test(NAME ${target_name} diff --git a/cudax/include/cuda/experimental/__graph/graph_node_ref.cuh b/cudax/include/cuda/experimental/__graph/graph_node_ref.cuh index 3fcfe3cd609..f854829f6f1 100644 --- a/cudax/include/cuda/experimental/__graph/graph_node_ref.cuh +++ b/cudax/include/cuda/experimental/__graph/graph_node_ref.cuh @@ -251,7 +251,7 @@ struct graph_node_ref __graph_, __deps.data(), // dependencies __src_arr.get(), // dependant nodes - __nullptr, // no edge data + nullptr, // no edge data __deps.size()); // number of dependencies #else _CCCL_TRY_CUDA_API( diff --git a/libcudacxx/test/libcudacxx/cuda/annotated_ptr/annotated_ptr_constexpr.pass.cpp b/libcudacxx/test/libcudacxx/cuda/annotated_ptr/annotated_ptr_constexpr.pass.cpp index dbb891a2c79..31e001c87f0 100644 --- a/libcudacxx/test/libcudacxx/cuda/annotated_ptr/annotated_ptr_constexpr.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/annotated_ptr/annotated_ptr_constexpr.pass.cpp @@ -10,7 +10,7 @@ // UNSUPPORTED: nvrtc // error: expression must have a constant value annotated_ptr.h: note #2701-D: attempt to access run-time storage -// UNSUPPORTED: clang-14, gcc-11, gcc-10, gcc-9, gcc-8, gcc-7, msvc-19.29 +// UNSUPPORTED: clang-14, gcc-12, gcc-11, gcc-10, gcc-9, gcc-8, gcc-7, msvc-19.29 // UNSUPPORTED: msvc && nvcc-12.0 #include diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/default_accessor/accessor.submdspan.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/default_accessor/accessor.submdspan.pass.cpp index b095bebecfa..ad94155c7b8 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/default_accessor/accessor.submdspan.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/default_accessor/accessor.submdspan.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // nvbug5272086 -// UNSUPPORTED: nvcc-12.9 && msvc +// UNSUPPORTED: msvc #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/iterators/transform_input_output_iterator/ctor.value.pass.cpp b/libcudacxx/test/libcudacxx/cuda/iterators/transform_input_output_iterator/ctor.value.pass.cpp index 6cad936e0c9..440cd396dc1 100644 --- a/libcudacxx/test/libcudacxx/cuda/iterators/transform_input_output_iterator/ctor.value.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/iterators/transform_input_output_iterator/ctor.value.pass.cpp @@ -34,10 +34,10 @@ __host__ __device__ constexpr bool test() buffer[2] = 2; // The test iterators are not `is_nothrow_move_constructible` -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) static_assert( !noexcept(cuda::transform_input_output_iterator{random_access_iterator{buffer + 2}, input_func, output_func})); -#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) static_assert( cuda::std::is_same_v, InputFn, OutputFn>>); @@ -64,11 +64,11 @@ __host__ __device__ constexpr bool test() assert(buffer[2] == output_func(3)); buffer[2] = 2; -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) // The test iterators are not `is_nothrow_move_constructible` static_assert(!noexcept(cuda::transform_input_output_iterator, InputFn, OutputFn>{ random_access_iterator{buffer + 2}, input_func, output_func})); -#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) } { diff --git a/libcudacxx/test/libcudacxx/cuda/iterators/transform_output_iterator/ctor.value.pass.cpp b/libcudacxx/test/libcudacxx/cuda/iterators/transform_output_iterator/ctor.value.pass.cpp index 4c6c680d445..0eaab87cfb1 100644 --- a/libcudacxx/test/libcudacxx/cuda/iterators/transform_output_iterator/ctor.value.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/iterators/transform_output_iterator/ctor.value.pass.cpp @@ -30,10 +30,10 @@ __host__ __device__ constexpr bool test() *iter = 3; assert(buffer[2] == 3 + 1); buffer[2] = 2; -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) // The test iterators are not `is_nothrow_move_constructible` static_assert(!noexcept(cuda::transform_output_iterator{random_access_iterator{buffer + 2}, func})); -#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) static_assert( cuda::std::is_same_v, Fn>>); } @@ -54,11 +54,11 @@ __host__ __device__ constexpr bool test() *iter = 3; assert(buffer[2] == 3 + 1); buffer[2] = 2; -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) // The test iterators are not `is_nothrow_move_constructible` static_assert(!noexcept( cuda::transform_output_iterator, Fn>{random_access_iterator{buffer + 2}, func})); -#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) } { diff --git a/libcudacxx/test/libcudacxx/libcxx/macros/architecture.compile.pass.cpp b/libcudacxx/test/libcudacxx/libcxx/macros/architecture.compile.pass.cpp index 345fce365f1..4ef1da4884d 100644 --- a/libcudacxx/test/libcudacxx/libcxx/macros/architecture.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/libcxx/macros/architecture.compile.pass.cpp @@ -10,19 +10,21 @@ #include #include -#if !defined(__CUDACC_RTC__) +#if !_CCCL_COMPILER(NVRTC) # if _CCCL_ARCH(X86_64) # if _CCCL_COMPILER(MSVC) # include # elif _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) # include -# endif -# endif +# endif // _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) +# endif // _CCCL_ARCH(X86_64) -# if _CCCL_ARCH(ARM64) && defined(__ARM_ACLE) -# include -# endif -#endif +# if !_CCCL_COMPILER(NVHPC) // nvbug5395777 +# if _CCCL_ARCH(ARM64) && defined(__ARM_ACLE) +# include +# endif // _CCCL_ARCH(ARM64) && defined(__ARM_ACLE) +# endif // !_CCCL_COMPILER(NVHPC) +#endif // !_CCCL_COMPILER(NVRTC) int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan/deduction.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan/deduction.pass.cpp index 9c9dcf8c568..d87c43c4bb2 100644 --- a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan/deduction.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan/deduction.pass.cpp @@ -48,7 +48,7 @@ // typename MappingType::layout_type, AccessorType>; // nvbug5272086 -// UNSUPPORTED: nvcc-12.9 && msvc +// UNSUPPORTED: msvc #include #include diff --git a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_left.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_left.pass.cpp index 4b506b70ecf..8b77ef07838 100644 --- a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_left.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_left.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // nvbug5272086 -// UNSUPPORTED: nvcc-12.9 && msvc +// UNSUPPORTED: msvc // diff --git a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_right.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_right.pass.cpp index d1bf237a8ce..1e9c23c5096 100644 --- a/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_right.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/containers/views/mdspan/submdspan/layout_right.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // nvbug5272086 -// UNSUPPORTED: nvcc-12.9 && msvc +// UNSUPPORTED: msvc // diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.all/range.all/all.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.all/range.all/all.pass.cpp index 541ab57bb42..4662f91fb7f 100644 --- a/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.all/range.all/all.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.all/range.all/all.pass.cpp @@ -161,9 +161,9 @@ __host__ __device__ constexpr bool test() static_assert(cuda::std::is_same_v())), View>); static_assert(noexcept(cuda::std::views::all(View()))); // old GCC seems to fall over the noexcept clauses here -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) static_assert(!noexcept(cuda::std::views::all(View()))); -#endif // no broken noexcept +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) auto viewCopy = cuda::std::views::all(View(2)); static_assert(cuda::std::is_same_v>); @@ -175,9 +175,9 @@ __host__ __device__ constexpr bool test() static_assert(cuda::std::is_same_v&>())), CopyableView>); static_assert(noexcept(cuda::std::views::all(CopyableView()))); -#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC2019) +#if !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) static_assert(!noexcept(cuda::std::views::all(CopyableView()))); -#endif // no broken noexcept +#endif // !TEST_COMPILER(GCC, <, 9) && !TEST_COMPILER(MSVC) CopyableView view(2); auto viewCopy = cuda::std::views::all(view); diff --git a/libcudacxx/test/utils/libcudacxx/test/config.py b/libcudacxx/test/utils/libcudacxx/test/config.py index 489fc033347..ee475e78790 100644 --- a/libcudacxx/test/utils/libcudacxx/test/config.py +++ b/libcudacxx/test/utils/libcudacxx/test/config.py @@ -12,6 +12,7 @@ import re import shlex import shutil +import subprocess import sys import libcudacxx.util @@ -194,6 +195,70 @@ def get_compute_capabilities(self): ) return deduced_comput_archs_str + def _get_nvcc_archs(self): + if self.cxx.type != "nvcc": + self.lit_config.fatal( + "Retrieving compute capabilities is only supported for nvcc compiler type" + ) + return [] + + cmd = ( + f"{self.cxx.path} --help | grep -oE 'compute_[0-9]+' | " + "sed -E 's/compute_//g' | sort -ug" + ) + result = subprocess.run( + cmd, + shell=True, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + archs = result.stdout.strip().splitlines() + + if not archs: + self.lit_config.fatal( + "Failed to retrieve compute capabilities or no capabilities found." + ) + return [] + + return sorted(set(int(arch) for arch in archs)) + + def get_all_major_compute_capabilities(self): + archs = self._get_nvcc_archs() + if not archs: + return "" + + # Build the same list used by --arch=all-major: + + # Handle special case where the first architecture is not a round decade (e.g., first arch is 75, not 70). + oldest = archs[0] + archs = sorted(set((arch // 10 * 10) for arch in archs)) + archs[0] = oldest + last_arch = archs[-1] + archs = [f"{arch}-real" for arch in archs] + archs.append(f"{last_arch}-virtual") + + archs = ";".join(archs) + + self.lit_config.note("Deduced major compute capabilities are: %s" % archs) + + return archs + + def get_all_compute_capabilities(self): + archs = self._get_nvcc_archs() + if not archs: + return "" + last_arch = archs[-1] + archs = [f"{arch}-real" for arch in archs] + archs.append(f"{last_arch}-virtual") + + archs = ";".join(archs) + + self.lit_config.note("Deduced compute capabilities are: %s" % archs) + + return archs + def get_modules_enabled(self): return self.get_lit_bool( "enable_modules", default=False, env_var="LIBCUDACXX_ENABLE_MODULES" @@ -752,8 +817,12 @@ def configure_compile_flags(self): self.lit_config.note("Compute Archs: %s" % compute_archs) if compute_archs == "native": compute_archs = self.get_compute_capabilities() + elif compute_archs == "all": + compute_archs = self.get_all_compute_capabilities() + elif compute_archs == "all-major": + compute_archs = self.get_all_major_compute_capabilities() - compute_archs = set(sorted(re.split("\\s|;|,", compute_archs))) + compute_archs = sorted(set(re.split("\\s|;|,", compute_archs))) for s in compute_archs: # Split arch and mode i.e. 80-virtual -> 80, virtual arch, *mode = re.split("-", s)