From b8e2221859757e0491a7fd5cb5f54064b7087d55 Mon Sep 17 00:00:00 2001 From: Dennis Wuitz Date: Thu, 18 Jul 2024 22:22:50 +0200 Subject: [PATCH 1/6] python312Packages.openai-triton: 2.1.0 -> 3.0.0, update links to repository --- .../python-modules/triton/default.nix | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index 98585b850e912..b8c2624981dc0 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -4,7 +4,6 @@ addDriverRunpath, buildPythonPackage, fetchFromGitHub, - fetchpatch, setuptools, cmake, ninja, @@ -30,24 +29,19 @@ let in buildPythonPackage rec { pname = "triton"; - version = "2.1.0"; + version = "3.0.0"; pyproject = true; src = fetchFromGitHub { - owner = "openai"; + owner = "triton-lang"; repo = pname; - rev = "v${version}"; - hash = "sha256-8UTUwLH+SriiJnpejdrzz9qIquP2zBp1/uwLdHmv0XQ="; + # latest branch commit from https://github.com/triton-lang/triton/commits/release/3.0.x/ + rev = "91f24d87e50cb748b121a6c24e65a01187699c22"; + hash = "sha256-L5KqiR+TgSyKjEBlkE0yOU1pemMHFk2PhEmxLdbbxUU="; }; patches = [ - # fix overflow error - (fetchpatch { - url = "https://github.com/openai/triton/commit/52c146f66b79b6079bcd28c55312fc6ea1852519.patch"; - hash = "sha256-098/TCQrzvrBAbQiaVGCMaF3o5Yc3yWDxzwSkzIuAtY="; - }) - # Upstream startded pinning CUDA version and falling back to downloading from Conda # in https://github.com/triton-lang/triton/pull/1574/files#diff-eb8b42d9346d0a5d371facf21a8bfa2d16fb49e213ae7c21f03863accebe0fcfR120-R123 ./0000-dont-download-ptxas.patch @@ -208,7 +202,7 @@ buildPythonPackage rec { }; pythonRemoveDeps = [ - # Circular dependency, cf. https://github.com/openai/triton/issues/1374 + # Circular dependency, cf. https://github.com/triton-lang/triton/issues/1374 "torch" # CLI tools without dist-info @@ -218,12 +212,13 @@ buildPythonPackage rec { meta = with lib; { description = "Language and compiler for writing highly efficient custom Deep-Learning primitives"; - homepage = "https://github.com/openai/triton"; + homepage = "https://github.com/triton-lang/triton"; platforms = platforms.linux; license = licenses.mit; maintainers = with maintainers; [ SomeoneSerge Madouura + derdennisop ]; }; } From 5ed0e25be8c2a0ad23fea55515b7ac01f419beaa Mon Sep 17 00:00:00 2001 From: Dennis Wuitz Date: Fri, 19 Jul 2024 16:54:19 +0200 Subject: [PATCH 2/6] python312Packages.triton*: repair package --- pkgs/by-name/tr/triton-llvm/package.nix | 14 +- .../triton/0000-dont-download-ptxas.patch | 74 ++++++++-- ...ble-version-key-for-non-cuda-targets.patch | 27 ---- .../development/python-modules/triton/bin.nix | 7 +- .../python-modules/triton/default.nix | 138 ++++++------------ .../python-modules/triton/prefetch.sh | 40 ----- 6 files changed, 120 insertions(+), 180 deletions(-) delete mode 100644 pkgs/development/python-modules/triton/0001-ptxas-disable-version-key-for-non-cuda-targets.patch delete mode 100755 pkgs/development/python-modules/triton/prefetch.sh diff --git a/pkgs/by-name/tr/triton-llvm/package.nix b/pkgs/by-name/tr/triton-llvm/package.nix index d45aa2fafe65f..f7e12ba32c11b 100644 --- a/pkgs/by-name/tr/triton-llvm/package.nix +++ b/pkgs/by-name/tr/triton-llvm/package.nix @@ -11,6 +11,7 @@ , libedit , libffi , libpfm +, lit , mpfr , zlib , ncurses @@ -45,7 +46,7 @@ let isNative = stdenv.hostPlatform == stdenv.buildPlatform; in stdenv.mkDerivation (finalAttrs: { pname = "triton-llvm"; - version = "17.0.0-c5dede880d17"; + version = "19.1.0-rc1"; # One of the tags at https://github.com/llvm/llvm-project/commit/10dc3a8e916d73291269e5e2b82dd22681489aa1 outputs = [ "out" @@ -60,8 +61,8 @@ in stdenv.mkDerivation (finalAttrs: { src = fetchFromGitHub { owner = "llvm"; repo = "llvm-project"; - rev = "c5dede880d175f7229c9b2923f4753e12702305d"; - hash = "sha256-v4r3+7XVFK+Dzxt/rErZNJ9REqFO3JmGN4X4vZ+77ew="; + rev = "10dc3a8e916d73291269e5e2b82dd22681489aa1"; + hash = "sha256-9DPvcFmhzw6MipQeCQnr35LktW0uxtEL8axMMPXIfWw="; }; nativeBuildInputs = [ @@ -74,6 +75,7 @@ in stdenv.mkDerivation (finalAttrs: { doxygen sphinx python3Packages.recommonmark + python3Packages.myst-parser ]; buildInputs = [ @@ -154,9 +156,11 @@ in stdenv.mkDerivation (finalAttrs: { rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s ''; - postInstall = lib.optionalString (!isNative) '' + postInstall = '' + cp ${lib.getExe lit} $out/bin/llvm-lit + '' + (lib.optionalString (!isNative) '' cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native - ''; + ''); doCheck = buildTests; diff --git a/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch b/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch index d31a4798af05c..265595e93de9f 100644 --- a/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch +++ b/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch @@ -1,15 +1,67 @@ +From 10f3d49aa6084d1b9b9624017cce7df106b9fb7e Fri Jul 19 00:00:00 2024 +From: derdennisop +Date: Fri, 19 jul 2024 00:00:00 +0100 +Subject: [PATCH] ptxas: disable version key for non-cuda targets + +--- + python/setup.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + diff --git a/python/setup.py b/python/setup.py -index 18764ec13..b3bb5b60a 100644 +index d55972b4b..bd875a701 100644 --- a/python/setup.py +++ b/python/setup.py -@@ -269,10 +269,6 @@ class CMakeBuild(build_ext): - subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=cmake_dir, env=env) - subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=cmake_dir) - +@@ -437,54 +117,5 @@ + with open(nvidia_version_path, "r") as nvidia_version_file: + NVIDIA_TOOLCHAIN_VERSION = nvidia_version_file.read().strip() + +-download_and_copy( +- name="ptxas", +- src_path="bin/ptxas", +- variable="TRITON_PTXAS_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cuobjdump", +- src_path="bin/cuobjdump", +- variable="TRITON_CUOBJDUMP_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="nvdisasm", +- src_path="bin/nvdisasm", +- variable="TRITON_NVDISASM_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cudacrt", +- src_path="include", +- variable="TRITON_CUDACRT_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cudart", +- src_path="include", +- variable="TRITON_CUDART_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cupti", +- src_path="include", +- variable="TRITON_CUPTI_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2", +-) - --download_and_copy_ptxas() -- -- - setup( - name="triton", - version="2.1.0", + backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()] diff --git a/pkgs/development/python-modules/triton/0001-ptxas-disable-version-key-for-non-cuda-targets.patch b/pkgs/development/python-modules/triton/0001-ptxas-disable-version-key-for-non-cuda-targets.patch deleted file mode 100644 index 3941d54b8b37f..0000000000000 --- a/pkgs/development/python-modules/triton/0001-ptxas-disable-version-key-for-non-cuda-targets.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 10f3d49aa6084d1b9b9624017cce7df106b9fb7e Mon Sep 17 00:00:00 2001 -From: Yaroslav Bolyukin -Date: Tue, 6 Feb 2024 13:51:28 +0100 -Subject: [PATCH] ptxas: disable version key for non-cuda targets - ---- - python/triton/runtime/jit.py | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/python/triton/runtime/jit.py b/python/triton/runtime/jit.py -index d55972b4b..bd875a701 100644 ---- a/python/triton/runtime/jit.py -+++ b/python/triton/runtime/jit.py -@@ -117,8 +117,8 @@ def version_key(): - with open(lib.module_finder.find_spec(lib.name).origin, "rb") as f: - contents += [hashlib.md5(f.read()).hexdigest()] - # ptxas version -- ptxas = path_to_ptxas()[0] -- ptxas_version = hashlib.md5(subprocess.check_output([ptxas, "--version"])).hexdigest() -+ # ptxas = path_to_ptxas()[0] -+ ptxas_version = "noptxas" - return '-'.join(TRITON_VERSION) + '-' + ptxas_version + '-' + '-'.join(contents) - - --- -2.43.0 - diff --git a/pkgs/development/python-modules/triton/bin.nix b/pkgs/development/python-modules/triton/bin.nix index 0189278bc0e9b..6bb67753a8bd7 100644 --- a/pkgs/development/python-modules/triton/bin.nix +++ b/pkgs/development/python-modules/triton/bin.nix @@ -5,11 +5,8 @@ cudaPackages, buildPythonPackage, fetchurl, - isPy38, - isPy39, - isPy310, - isPy311, python, + pythonOlder, autoPatchelfHook, filelock, lit, @@ -29,7 +26,7 @@ buildPythonPackage rec { in fetchurl srcs; - disabled = !(isPy38 || isPy39 || isPy310 || isPy311); + disabled = pythonOlder "3.8"; pythonRemoveDeps = [ "cmake" diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index b8c2624981dc0..9a6efae237b8a 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -1,100 +1,56 @@ { lib, - config, - addDriverRunpath, buildPythonPackage, - fetchFromGitHub, - setuptools, cmake, - ninja, - pybind11, + config, + cudaPackages, + fetchFromGitHub, + filelock, gtest, - zlib, - ncurses, libxml2, lit, llvm, - filelock, - torchWithRocm, + ncurses, + ninja, + pybind11, python, - runCommand, - - cudaPackages, + setuptools, + torchWithRocm, + zlib, cudaSupport ? config.cudaSupport, }: -let - ptxas = lib.getExe' cudaPackages.cuda_nvcc "ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py) -in -buildPythonPackage rec { +buildPythonPackage { pname = "triton"; version = "3.0.0"; pyproject = true; src = fetchFromGitHub { owner = "triton-lang"; - repo = pname; + repo = "triton"; # latest branch commit from https://github.com/triton-lang/triton/commits/release/3.0.x/ rev = "91f24d87e50cb748b121a6c24e65a01187699c22"; hash = "sha256-L5KqiR+TgSyKjEBlkE0yOU1pemMHFk2PhEmxLdbbxUU="; }; - patches = - [ - # Upstream startded pinning CUDA version and falling back to downloading from Conda - # in https://github.com/triton-lang/triton/pull/1574/files#diff-eb8b42d9346d0a5d371facf21a8bfa2d16fb49e213ae7c21f03863accebe0fcfR120-R123 - ./0000-dont-download-ptxas.patch - ] - ++ lib.optionals (!cudaSupport) [ - # triton wants to get ptxas version even if ptxas is not - # used, resulting in ptxas not found error. - ./0001-ptxas-disable-version-key-for-non-cuda-targets.patch - ]; + # triton wants to download every dependency, even if we are not using cuda. + patches = lib.optionals (!cudaSupport) [ ./0000-dont-download-ptxas.patch ]; postPatch = - let - quote = x: ''"${x}"''; - subs.ldFlags = - let - # Bash was getting weird without linting, - # but basically upstream contains [cc, ..., "-lcuda", ...] - # and we replace it with [..., "-lcuda", "-L/run/opengl-driver/lib", "-L$stubs", ...] - old = [ "-lcuda" ]; - new = [ - "-lcuda" - "-L${addDriverRunpath.driverLink}" - "-L${cudaPackages.cuda_cudart}/lib/stubs/" - ]; - in - { - oldStr = lib.concatMapStringsSep ", " quote old; - newStr = lib.concatMapStringsSep ", " quote new; - }; - in '' # Use our `cmakeFlags` instead and avoid downloading dependencies + # remove any downloads substituteInPlace python/setup.py \ - --replace "= get_thirdparty_packages(triton_cache_path)" "= os.environ[\"cmakeFlags\"].split()" - - # Already defined in llvm, when built with -DLLVM_INSTALL_UTILS - substituteInPlace bin/CMakeLists.txt \ - --replace "add_subdirectory(FileCheck)" "" + --replace-fail "get_json_package_info(), get_pybind11_package_info()" ""\ + --replace-fail "get_pybind11_package_info(), get_llvm_package_info()" ""\ + --replace-fail 'packages += ["triton/profiler"]' ""\ + --replace-fail "curr_version != version" "False" # Don't fetch googletest substituteInPlace unittest/CMakeLists.txt \ - --replace "include (\''${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)" ""\ - --replace "include(GoogleTest)" "find_package(GTest REQUIRED)" - - cat << \EOF >> python/triton/common/build.py - def libcuda_dirs(): - return [ "${addDriverRunpath.driverLink}/lib" ] - EOF - '' - + lib.optionalString cudaSupport '' - # Use our linker flags - substituteInPlace python/triton/common/build.py \ - --replace '${subs.ldFlags.oldStr}' '${subs.ldFlags.newStr}' + --replace-fail "include (\''${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)" ""\ + --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" ''; nativeBuildInputs = [ @@ -133,40 +89,38 @@ buildPythonPackage rec { ]; # Avoid GLIBCXX mismatch with other cuda-enabled python packages - preConfigure = - '' - # Ensure that the build process uses the requested number of cores - export MAX_JOBS="$NIX_BUILD_CORES" + preConfigure = '' + # Ensure that the build process uses the requested number of cores + export MAX_JOBS="$NIX_BUILD_CORES" - # Upstream's setup.py tries to write cache somewhere in ~/ - export HOME=$(mktemp -d) + # Upstream's setup.py tries to write cache somewhere in ~/ + export HOME=$(mktemp -d) - # Upstream's github actions patch setup.cfg to write base-dir. May be redundant - echo " - [build_ext] - base-dir=$PWD" >> python/setup.cfg + # Upstream's github actions patch setup.cfg to write base-dir. May be redundant + echo " + [build_ext] + base-dir=$PWD" >> python/setup.cfg - # The rest (including buildPhase) is relative to ./python/ - cd python - '' - + lib.optionalString cudaSupport '' - export CC=${cudaPackages.backendStdenv.cc}/bin/cc; - export CXX=${cudaPackages.backendStdenv.cc}/bin/c++; + # The rest (including buildPhase) is relative to ./python/ + cd python + ''; - # Work around download_and_copy_ptxas() - mkdir -p $PWD/triton/third_party/cuda/bin - ln -s ${ptxas} $PWD/triton/third_party/cuda/bin - ''; + env = { + TRITON_BUILD_PROTON = "OFF"; + } // lib.optionalAttrs cudaSupport { + CC = "${cudaPackages.backendStdenv.cc}/bin/cc"; + CXX = "${cudaPackages.backendStdenv.cc}/bin/c++"; + + TRITON_PTXAS_PATH = lib.getExe' cudaPackages.cuda_nvcc "ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py) + TRITON_CUOBJDUMP_PATH = cudaPackages.cuda_cuobjdump; + TRITON_NVDISASM_PATH = cudaPackages.cuda_nvdisasm; + TRITON_CUDACRT_PATH = cudaPackages.cuda_nvcc; + TRITON_CUDART_PATH = cudaPackages.cuda_cudart; + TRITON_CUPTI_PATH = cudaPackages.cuda_cupti; + }; # CMake is run by setup.py instead dontUseCmakeConfigure = true; - - # Setuptools (?) strips runpath and +x flags. Let's just restore the symlink - postFixup = lib.optionalString cudaSupport '' - rm -f $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas - ln -s ${ptxas} $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas - ''; - checkInputs = [ cmake ]; # ctest dontUseSetuptoolsCheck = true; diff --git a/pkgs/development/python-modules/triton/prefetch.sh b/pkgs/development/python-modules/triton/prefetch.sh deleted file mode 100755 index f218718a5cf30..0000000000000 --- a/pkgs/development/python-modules/triton/prefetch.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env nix-shell -#!nix-shell -i bash -p nix-prefetch-scripts - -set -eou pipefail - -version=$1 - -linux_bucket="https://download.pytorch.org/whl" - -url_and_key_list=( - "x86_64-linux-38 $linux_bucket/triton-${version}-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl triton-${version}-cp38-cp38-linux_x86_64.whl" - "x86_64-linux-39 $linux_bucket/triton-${version}-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl triton-${version}-cp39-cp39-linux_x86_64.whl" - "x86_64-linux-310 $linux_bucket/triton-${version}-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl triton-${version}-cp310-cp310-linux_x86_64.whl" - "x86_64-linux-311 $linux_bucket/triton-${version}-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl triton-${version}-cp311-cp311-linux_x86_64.whl" -) - -hashfile=binary-hashes-"$version".nix -echo " \"$version\" = {" >> $hashfile - -for url_and_key in "${url_and_key_list[@]}"; do - key=$(echo "$url_and_key" | cut -d' ' -f1) - url=$(echo "$url_and_key" | cut -d' ' -f2) - name=$(echo "$url_and_key" | cut -d' ' -f3) - - echo "prefetching ${url}..." - hash=$(nix hash to-sri --type sha256 `nix-prefetch-url "$url" --name "$name"`) - - cat << EOF >> $hashfile - $key = { - name = "$name"; - url = "$url"; - hash = "$hash"; - }; -EOF - - echo -done - -echo " };" >> $hashfile -echo "done." From e262792bf1b6fca90e8d2ce861ed9725d077468f Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 29 Jul 2024 15:01:48 +0000 Subject: [PATCH 3/6] python312Packages.triton: use more generic patch to unvendor ptxas/cuda --- .../triton/0000-dont-download-ptxas.patch | 67 ------------------- ...up.py-introduce-TRITON_OFFLINE_BUILD.patch | 64 ++++++++++++++++++ .../python-modules/triton/default.nix | 6 +- 3 files changed, 68 insertions(+), 69 deletions(-) delete mode 100644 pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch create mode 100644 pkgs/development/python-modules/triton/0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch diff --git a/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch b/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch deleted file mode 100644 index 265595e93de9f..0000000000000 --- a/pkgs/development/python-modules/triton/0000-dont-download-ptxas.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 10f3d49aa6084d1b9b9624017cce7df106b9fb7e Fri Jul 19 00:00:00 2024 -From: derdennisop -Date: Fri, 19 jul 2024 00:00:00 +0100 -Subject: [PATCH] ptxas: disable version key for non-cuda targets - ---- - python/setup.py | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/python/setup.py b/python/setup.py -index d55972b4b..bd875a701 100644 ---- a/python/setup.py -+++ b/python/setup.py -@@ -437,54 +117,5 @@ - with open(nvidia_version_path, "r") as nvidia_version_file: - NVIDIA_TOOLCHAIN_VERSION = nvidia_version_file.read().strip() - --download_and_copy( -- name="ptxas", -- src_path="bin/ptxas", -- variable="TRITON_PTXAS_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", --) --download_and_copy( -- name="cuobjdump", -- src_path="bin/cuobjdump", -- variable="TRITON_CUOBJDUMP_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", --) --download_and_copy( -- name="nvdisasm", -- src_path="bin/nvdisasm", -- variable="TRITON_NVDISASM_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", --) --download_and_copy( -- name="cudacrt", -- src_path="include", -- variable="TRITON_CUDACRT_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", --) --download_and_copy( -- name="cudart", -- src_path="include", -- variable="TRITON_CUDART_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2", --) --download_and_copy( -- name="cupti", -- src_path="include", -- variable="TRITON_CUPTI_PATH", -- version=NVIDIA_TOOLCHAIN_VERSION, -- url_func=lambda arch, version: -- f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2", --) -- - backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()] diff --git a/pkgs/development/python-modules/triton/0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch b/pkgs/development/python-modules/triton/0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch new file mode 100644 index 0000000000000..5b195fd7f8821 --- /dev/null +++ b/pkgs/development/python-modules/triton/0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch @@ -0,0 +1,64 @@ +From 587d1f3428eca63544238802f19e0be670d03244 Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Mon, 29 Jul 2024 14:31:11 +0000 +Subject: [PATCH] setup.py: introduce TRITON_OFFLINE_BUILD + +To prevent any vendoring whatsoever +--- + python/setup.py | 26 ++++++++++++++++++++++++-- + 1 file changed, 24 insertions(+), 2 deletions(-) + +diff --git a/python/setup.py b/python/setup.py +index 73800ec40..4e5b04de4 100644 +--- a/python/setup.py ++++ b/python/setup.py +@@ -112,6 +112,20 @@ def get_env_with_keys(key: list): + return os.environ[k] + return "" + ++def is_offline_build() -> bool: ++ """ ++ Downstream projects and distributions which bootstrap their own dependencies from scratch ++ and run builds in offline sandboxes ++ may set `TRITON_OFFLINE_BUILD` in the build environment to prevent any attempts at downloading ++ pinned dependencies from the internet or at using dependencies vendored in-tree. ++ ++ Dependencies must be defined using respective search paths (cf. `syspath_var_name` in `Package`). ++ Missing dependencies lead to an early abortion. ++ Dependencies' compatibility is not verified. ++ ++ Note that this flag isn't tested by the CI and does not provide any guarantees. ++ """ ++ return os.environ.get("TRITON_OFFLINE_BUILD", "") != "" + + # --- third party packages ----- + +@@ -220,8 +234,14 @@ def get_thirdparty_packages(packages: list): + if os.environ.get(p.syspath_var_name): + package_dir = os.environ[p.syspath_var_name] + version_file_path = os.path.join(package_dir, "version.txt") +- if p.syspath_var_name not in os.environ and\ +- (not os.path.exists(version_file_path) or Path(version_file_path).read_text() != p.url): ++ ++ input_defined = p.syspath_var_name not in os.environ ++ input_exists = input_defined and os.path.exists(version_file_path) ++ input_compatible = input_exists and Path(version_file_path).read_text() == p.url ++ ++ if is_offline_build() and not input_defined: ++ raise RuntimeError(f"Requested an offline build but {p.syspath_var_name} is not set") ++ if not is_offline_build() and not input_compatible: + with contextlib.suppress(Exception): + shutil.rmtree(package_root_dir) + os.makedirs(package_root_dir, exist_ok=True) +@@ -245,6 +265,8 @@ def get_thirdparty_packages(packages: list): + + + def download_and_copy(name, src_path, variable, version, url_func): ++ if is_offline_build(): ++ return + triton_cache_path = get_triton_cache_path() + if variable in os.environ: + return +-- +2.45.1 + diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index 9a6efae237b8a..1a4dd63c43140 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -34,8 +34,9 @@ buildPythonPackage { hash = "sha256-L5KqiR+TgSyKjEBlkE0yOU1pemMHFk2PhEmxLdbbxUU="; }; - # triton wants to download every dependency, even if we are not using cuda. - patches = lib.optionals (!cudaSupport) [ ./0000-dont-download-ptxas.patch ]; + patches = [ + ./0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch + ]; postPatch = '' @@ -107,6 +108,7 @@ buildPythonPackage { env = { TRITON_BUILD_PROTON = "OFF"; + TRITON_OFFLINE_BUILD = true; } // lib.optionalAttrs cudaSupport { CC = "${cudaPackages.backendStdenv.cc}/bin/cc"; CXX = "${cudaPackages.backendStdenv.cc}/bin/c++"; From ae560061d89acf618097a27a73d9b05ba8fadcab Mon Sep 17 00:00:00 2001 From: SomeoneSerge Date: Mon, 14 Oct 2024 17:27:10 +0000 Subject: [PATCH 4/6] python3Packages.triton: fix cuda (ptxas, cudart paths) --- .../0001-_build-allow-extra-cc-flags.patch | 35 +++++ ...driver-short-circuit-before-ldconfig.patch | 70 +++++++++ .../0003-nvidia-cudart-a-systempath.patch | 46 ++++++ .../0004-nvidia-allow-static-ptxas-path.patch | 26 ++++ .../python-modules/triton/default.nix | 134 +++++++++++------- 5 files changed, 260 insertions(+), 51 deletions(-) create mode 100644 pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch create mode 100644 pkgs/development/python-modules/triton/0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch create mode 100644 pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch create mode 100644 pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch diff --git a/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch b/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch new file mode 100644 index 0000000000000..1e473dc59f46a --- /dev/null +++ b/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch @@ -0,0 +1,35 @@ +From 2751c5de5c61c90b56e3e392a41847f4c47258fd Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Sun, 13 Oct 2024 14:16:48 +0000 +Subject: [PATCH 1/3] _build: allow extra cc flags + +--- + python/triton/runtime/build.py | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py +index d7baeb286..d334dce77 100644 +--- a/python/triton/runtime/build.py ++++ b/python/triton/runtime/build.py +@@ -42,9 +42,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): + py_include_dir = sysconfig.get_paths(scheme=scheme)["include"] + include_dirs = include_dirs + [srcdir, py_include_dir] + cc_cmd = [cc, src, "-O3", "-shared", "-fPIC", "-o", so] ++ ++ # Nixpkgs support branch ++ # Allows passing e.g. extra -Wl,-rpath ++ cc_cmd_extra_flags = "@ccCmdExtraFlags@" ++ if cc_cmd_extra_flags != ("@" + "ccCmdExtraFlags@"): # substituteAll hack ++ import shlex ++ cc_cmd.extend(shlex.split(cc_cmd_extra_flags)) ++ + cc_cmd += [f'-l{lib}' for lib in libraries] + cc_cmd += [f"-L{dir}" for dir in library_dirs] +- cc_cmd += [f"-I{dir}" for dir in include_dirs] ++ cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] + ret = subprocess.check_call(cc_cmd) + if ret == 0: + return so +-- +2.46.0 + diff --git a/pkgs/development/python-modules/triton/0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch b/pkgs/development/python-modules/triton/0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch new file mode 100644 index 0000000000000..aa65cad58ed81 --- /dev/null +++ b/pkgs/development/python-modules/triton/0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch @@ -0,0 +1,70 @@ +From 7407cb03eec82768e333909d87b7668b633bfe86 Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Sun, 13 Oct 2024 14:28:48 +0000 +Subject: [PATCH 2/3] {nvidia,amd}/driver: short-circuit before ldconfig + +--- + python/triton/runtime/build.py | 6 +++--- + third_party/amd/backend/driver.py | 7 +++++++ + third_party/nvidia/backend/driver.py | 3 +++ + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py +index d334dce77..a64e98da0 100644 +--- a/python/triton/runtime/build.py ++++ b/python/triton/runtime/build.py +@@ -42,6 +42,9 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): + py_include_dir = sysconfig.get_paths(scheme=scheme)["include"] + include_dirs = include_dirs + [srcdir, py_include_dir] + cc_cmd = [cc, src, "-O3", "-shared", "-fPIC", "-o", so] ++ cc_cmd += [f'-l{lib}' for lib in libraries] ++ cc_cmd += [f"-L{dir}" for dir in library_dirs] ++ cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] + + # Nixpkgs support branch + # Allows passing e.g. extra -Wl,-rpath +@@ -50,9 +53,6 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): + import shlex + cc_cmd.extend(shlex.split(cc_cmd_extra_flags)) + +- cc_cmd += [f'-l{lib}' for lib in libraries] +- cc_cmd += [f"-L{dir}" for dir in library_dirs] +- cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] + ret = subprocess.check_call(cc_cmd) + if ret == 0: + return so +diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py +index 0a8cd7bed..aab8805f6 100644 +--- a/third_party/amd/backend/driver.py ++++ b/third_party/amd/backend/driver.py +@@ -24,6 +24,13 @@ def _get_path_to_hip_runtime_dylib(): + return env_libhip_path + raise RuntimeError(f"TRITON_LIBHIP_PATH '{env_libhip_path}' does not point to a valid {lib_name}") + ++ # ...on release/3.1.x: ++ # return mmapped_path ++ # raise RuntimeError(f"memory mapped '{mmapped_path}' in process does not point to a valid {lib_name}") ++ ++ if os.path.isdir("@libhipDir@"): ++ return ["@libhipDir@"] ++ + paths = [] + + import site +diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py +index 90f71138b..30fbadb2a 100644 +--- a/third_party/nvidia/backend/driver.py ++++ b/third_party/nvidia/backend/driver.py +@@ -21,6 +21,9 @@ def libcuda_dirs(): + if env_libcuda_path: + return [env_libcuda_path] + ++ if os.path.exists("@libcudaStubsDir@"): ++ return ["@libcudaStubsDir@"] ++ + libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode() + # each line looks like the following: + # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1 +-- +2.46.0 + diff --git a/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch b/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch new file mode 100644 index 0000000000000..144d84e151fe1 --- /dev/null +++ b/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch @@ -0,0 +1,46 @@ +From 6f92d54e5a544bc34bb07f2808d554a71cc0e4c3 Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Sun, 13 Oct 2024 14:30:19 +0000 +Subject: [PATCH 3/3] nvidia: cudart a systempath + +--- + third_party/nvidia/backend/driver.c | 2 +- + third_party/nvidia/backend/driver.py | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c +index 44524da27..fbdf0d156 100644 +--- a/third_party/nvidia/backend/driver.c ++++ b/third_party/nvidia/backend/driver.c +@@ -1,4 +1,4 @@ +-#include "cuda.h" ++#include + #include + #include + #define PY_SSIZE_T_CLEAN +diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py +index 30fbadb2a..65c0562ed 100644 +--- a/third_party/nvidia/backend/driver.py ++++ b/third_party/nvidia/backend/driver.py +@@ -10,7 +10,8 @@ from triton.backends.compiler import GPUTarget + from triton.backends.driver import GPUDriver + + dirname = os.path.dirname(os.path.realpath(__file__)) +-include_dir = [os.path.join(dirname, "include")] ++import shlex ++include_dir = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")] + libdevice_dir = os.path.join(dirname, "lib") + libraries = ['cuda'] + +@@ -149,7 +150,7 @@ def make_launcher(constants, signature, ids): + # generate glue code + params = [i for i in signature.keys() if i not in constants] + src = f""" +-#include \"cuda.h\" ++#include + #include + #include + #include +-- +2.46.0 + diff --git a/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch b/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch new file mode 100644 index 0000000000000..eea1834d1750b --- /dev/null +++ b/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch @@ -0,0 +1,26 @@ +From e503e572b6d444cd27f1cdf124aaf553aa3a8665 Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Mon, 14 Oct 2024 00:12:05 +0000 +Subject: [PATCH 4/4] nvidia: allow static ptxas path + +--- + third_party/nvidia/backend/compiler.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py +index 6d7994923..6720e8f97 100644 +--- a/third_party/nvidia/backend/compiler.py ++++ b/third_party/nvidia/backend/compiler.py +@@ -20,6 +20,9 @@ def _path_to_binary(binary: str): + os.path.join(os.path.dirname(__file__), "bin", binary), + ] + ++ import shlex ++ paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@")) ++ + for bin in paths: + if os.path.exists(bin) and os.path.isfile(bin): + result = subprocess.check_output([bin, "--version"], stderr=subprocess.STDOUT) +-- +2.46.0 + diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index 1a4dd63c43140..5ee5971d77720 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -1,5 +1,6 @@ { lib, + addDriverRunpath, buildPythonPackage, cmake, config, @@ -15,10 +16,13 @@ pybind11, python, runCommand, + substituteAll, setuptools, torchWithRocm, zlib, cudaSupport ? config.cudaSupport, + rocmSupport ? config.rocmSupport, + rocmPackages, }: buildPythonPackage { @@ -34,29 +38,53 @@ buildPythonPackage { hash = "sha256-L5KqiR+TgSyKjEBlkE0yOU1pemMHFk2PhEmxLdbbxUU="; }; - patches = [ - ./0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch - ]; + patches = + [ + ./0001-setup.py-introduce-TRITON_OFFLINE_BUILD.patch + (substituteAll { + src = ./0001-_build-allow-extra-cc-flags.patch; + ccCmdExtraFlags = "-Wl,-rpath,${addDriverRunpath.driverLink}/lib"; + }) + (substituteAll ( + { + src = ./0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch; + } + // lib.optionalAttrs rocmSupport { libhipDir = "${lib.getLib rocmPackages.clr}/lib"; } + // lib.optionalAttrs cudaSupport { + libcudaStubsDir = "${lib.getLib cudaPackages.cuda_cudart}/lib/stubs"; + ccCmdExtraFlags = "-Wl,-rpath,${addDriverRunpath.driverLink}/lib"; + } + )) + ] + ++ lib.optionals cudaSupport [ + (substituteAll { + src = ./0003-nvidia-cudart-a-systempath.patch; + cudaToolkitIncludeDirs = "${lib.getInclude cudaPackages.cuda_cudart}/include"; + }) + (substituteAll { + src = ./0004-nvidia-allow-static-ptxas-path.patch; + nixpkgsExtraBinaryPaths = lib.escapeShellArgs [ (lib.getExe' cudaPackages.cuda_nvcc "ptxas") ]; + }) + ]; + + postPatch = '' + # Use our `cmakeFlags` instead and avoid downloading dependencies + # remove any downloads + substituteInPlace python/setup.py \ + --replace-fail "get_json_package_info(), get_pybind11_package_info()" ""\ + --replace-fail "get_pybind11_package_info(), get_llvm_package_info()" ""\ + --replace-fail 'packages += ["triton/profiler"]' ""\ + --replace-fail "curr_version != version" "False" + + # Don't fetch googletest + substituteInPlace unittest/CMakeLists.txt \ + --replace-fail "include (\''${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)" ""\ + --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" + ''; - postPatch = - '' - # Use our `cmakeFlags` instead and avoid downloading dependencies - # remove any downloads - substituteInPlace python/setup.py \ - --replace-fail "get_json_package_info(), get_pybind11_package_info()" ""\ - --replace-fail "get_pybind11_package_info(), get_llvm_package_info()" ""\ - --replace-fail 'packages += ["triton/profiler"]' ""\ - --replace-fail "curr_version != version" "False" - - # Don't fetch googletest - substituteInPlace unittest/CMakeLists.txt \ - --replace-fail "include (\''${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)" ""\ - --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" - ''; + build-system = [ setuptools ]; nativeBuildInputs = [ - setuptools - # pytestCheckHook # Requires torch (circular dependency) and probably needs GPUs: cmake ninja @@ -76,7 +104,7 @@ buildPythonPackage { zlib ]; - propagatedBuildInputs = [ + dependencies = [ filelock # triton uses setuptools at runtime: # https://github.com/NixOS/nixpkgs/pull/286763/#discussion_r1480392652 @@ -106,26 +134,40 @@ buildPythonPackage { cd python ''; - env = { - TRITON_BUILD_PROTON = "OFF"; - TRITON_OFFLINE_BUILD = true; - } // lib.optionalAttrs cudaSupport { - CC = "${cudaPackages.backendStdenv.cc}/bin/cc"; - CXX = "${cudaPackages.backendStdenv.cc}/bin/c++"; - - TRITON_PTXAS_PATH = lib.getExe' cudaPackages.cuda_nvcc "ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py) - TRITON_CUOBJDUMP_PATH = cudaPackages.cuda_cuobjdump; - TRITON_NVDISASM_PATH = cudaPackages.cuda_nvdisasm; - TRITON_CUDACRT_PATH = cudaPackages.cuda_nvcc; - TRITON_CUDART_PATH = cudaPackages.cuda_cudart; - TRITON_CUPTI_PATH = cudaPackages.cuda_cupti; - }; + env = + { + TRITON_BUILD_PROTON = "OFF"; + TRITON_OFFLINE_BUILD = true; + } + // lib.optionalAttrs cudaSupport { + CC = lib.getExe' cudaPackages.backendStdenv.cc "cc"; + CXX = lib.getExe' cudaPackages.backendStdenv.cc "c++"; + + # TODO: Unused because of how TRITON_OFFLINE_BUILD currently works (subject to change) + TRITON_PTXAS_PATH = lib.getExe' cudaPackages.cuda_nvcc "ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py) + TRITON_CUOBJDUMP_PATH = lib.getExe' cudaPackages.cuda_cuobjdump "cuobjdump"; + TRITON_NVDISASM_PATH = lib.getExe' cudaPackages.cuda_nvdisasm "nvdisasm"; + TRITON_CUDACRT_PATH = lib.getInclude cudaPackages.cuda_nvcc; + TRITON_CUDART_PATH = lib.getInclude cudaPackages.cuda_cudart; + TRITON_CUPTI_PATH = cudaPackages.cuda_cupti; + }; + + pythonRemoveDeps = [ + # Circular dependency, cf. https://github.com/triton-lang/triton/issues/1374 + "torch" + + # CLI tools without dist-info + "cmake" + "lit" + ]; # CMake is run by setup.py instead dontUseCmakeConfigure = true; - checkInputs = [ cmake ]; # ctest - dontUseSetuptoolsCheck = true; + nativeCheckInputs = [ + cmake + # Requires torch (circular dependency) and GPU access: pytestCheckHook + ]; preCheck = '' # build/temp* refers to build_ext.build_temp (looked up in the build logs) (cd ./build/temp* ; ctest) @@ -134,11 +176,10 @@ buildPythonPackage { cd test/unit ''; - # Circular dependency on torch - # pythonImportsCheck = [ - # "triton" - # "triton.language" - # ]; + pythonImportsCheck = [ + "triton" + "triton.language" + ]; # Ultimately, torch is our test suite: passthru.tests = { @@ -157,15 +198,6 @@ buildPythonPackage { ''; }; - pythonRemoveDeps = [ - # Circular dependency, cf. https://github.com/triton-lang/triton/issues/1374 - "torch" - - # CLI tools without dist-info - "cmake" - "lit" - ]; - meta = with lib; { description = "Language and compiler for writing highly efficient custom Deep-Learning primitives"; homepage = "https://github.com/triton-lang/triton"; From 2aa951facd53b1887d1885bbad15a0be67817321 Mon Sep 17 00:00:00 2001 From: SomeoneSerge Date: Mon, 14 Oct 2024 17:31:14 +0000 Subject: [PATCH 5/6] python3Packages.triton.tests.axpy-cuda: init --- ...ropagate-cmakeFlags-from-environment.patch | 29 +++++ .../python-modules/torch/default.nix | 56 ++++++++-- .../python-modules/triton/default.nix | 103 +++++++++++++++--- pkgs/top-level/python-packages.nix | 8 +- 4 files changed, 164 insertions(+), 32 deletions(-) create mode 100644 pkgs/development/python-modules/torch/0001-cmake.py-propagate-cmakeFlags-from-environment.patch diff --git a/pkgs/development/python-modules/torch/0001-cmake.py-propagate-cmakeFlags-from-environment.patch b/pkgs/development/python-modules/torch/0001-cmake.py-propagate-cmakeFlags-from-environment.patch new file mode 100644 index 0000000000000..e30f6449c7bc5 --- /dev/null +++ b/pkgs/development/python-modules/torch/0001-cmake.py-propagate-cmakeFlags-from-environment.patch @@ -0,0 +1,29 @@ +From c5d4087519eae6f41c80bbd8ffbcc9390db44c7f Mon Sep 17 00:00:00 2001 +From: SomeoneSerge +Date: Thu, 10 Oct 2024 19:19:18 +0000 +Subject: [PATCH] cmake.py: propagate cmakeFlags from environment + +--- + tools/setup_helpers/cmake.py | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py +index 4b605fe5975..ea1d6a1ef46 100644 +--- a/tools/setup_helpers/cmake.py ++++ b/tools/setup_helpers/cmake.py +@@ -332,6 +332,12 @@ class CMake: + file=sys.stderr, + ) + print(e, file=sys.stderr) ++ ++ # Nixpkgs compat: ++ if "cmakeFlags" in os.environ: ++ import shlex ++ args.extend(shlex.split(os.environ["cmakeFlags"])) ++ + # According to the CMake manual, we should pass the arguments first, + # and put the directory as the last element. Otherwise, these flags + # may not be passed correctly. +-- +2.46.0 + diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 7b5b8e9f6726c..00a2a66072677 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -35,10 +35,8 @@ removeReferencesTo, # Build inputs + darwin, numactl, - Accelerate, - CoreServices, - libobjc, # Propagated build inputs astunparse, @@ -56,6 +54,17 @@ tritonSupport ? (!stdenv.hostPlatform.isDarwin), triton, + # TODO: 1. callPackage needs to learn to distinguish between the task + # of "asking for an attribute from the parent scope" and + # the task of "exposing a formal parameter in .override". + # TODO: 2. We should probably abandon attributes such as `torchWithCuda` (etc.) + # as they routinely end up consuming the wrong arguments\ + # (dependencies without cuda support). + # Instead we should rely on overlays and nixpkgsFun. + # (@SomeoneSerge) + _tritonEffective ? if cudaSupport then triton-cuda else triton, + triton-cuda, + # Unit tests hypothesis, psutil, @@ -95,6 +104,8 @@ let ; inherit (cudaPackages) cudaFlags cudnn nccl; + triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence"; + rocmPackages = rocmPackages_5; setBool = v: if v then "1" else "0"; @@ -240,6 +251,7 @@ buildPythonPackage rec { # Allow setting PYTHON_LIB_REL_PATH with an environment variable. # https://github.com/pytorch/pytorch/pull/128419 ./passthrough-python-lib-rel-path.patch + ./0001-cmake.py-propagate-cmakeFlags-from-environment.patch ] ++ lib.optionals cudaSupport [ ./fix-cmake-cuda-toolkit.patch ] ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [ @@ -257,7 +269,18 @@ buildPythonPackage rec { ]; postPatch = - lib.optionalString rocmSupport '' + '' + substituteInPlace cmake/public/cuda.cmake \ + --replace-fail \ + 'message(FATAL_ERROR "Found two conflicting CUDA' \ + 'message(WARNING "Found two conflicting CUDA' \ + --replace-warn \ + "set(CUDAToolkit_ROOT" \ + "# Upstream: set(CUDAToolkit_ROOT" + substituteInPlace third_party/gloo/cmake/Cuda.cmake \ + --replace-warn "find_package(CUDAToolkit 7.0" "find_package(CUDAToolkit" + '' + + lib.optionalString rocmSupport '' # https://github.com/facebookincubator/gloo/pull/297 substituteInPlace third_party/gloo/cmake/Hipify.cmake \ --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" @@ -351,6 +374,17 @@ buildPythonPackage rec { # NB technical debt: building without NNPACK as workaround for missing `six` USE_NNPACK = 0; + cmakeFlags = + [ + # (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true) + (lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaVersion) + ] + ++ lib.optionals cudaSupport [ + # Unbreaks version discovery in enable_language(CUDA) when wrapping nvcc with ccache + # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/26363 + (lib.cmakeFeature "CMAKE_CUDA_COMPILER_TOOLKIT_VERSION" cudaPackages.cudaVersion) + ]; + preBuild = '' export MAX_JOBS=$NIX_BUILD_CORES ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only @@ -495,11 +529,11 @@ buildPythonPackage rec { ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] ++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ - Accelerate - CoreServices - libobjc + darwin.apple_sdk.frameworks.Accelerate + darwin.apple_sdk.frameworks.CoreServices + darwin.libobjc ] - ++ lib.optionals tritonSupport [ triton ] + ++ lib.optionals tritonSupport [ _tritonEffective ] ++ lib.optionals MPISupport [ mpi ] ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; @@ -527,7 +561,7 @@ buildPythonPackage rec { # torch/csrc requires `pybind11` at runtime pybind11 - ] ++ lib.optionals tritonSupport [ triton ]; + ] ++ lib.optionals tritonSupport [ _tritonEffective ]; propagatedCxxBuildInputs = [ ] ++ lib.optionals MPISupport [ mpi ] ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; @@ -662,7 +696,9 @@ buildPythonPackage rec { thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds - platforms = with lib.platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; + platforms = + lib.platforms.linux + ++ lib.optionals (!cudaSupport && !rocmSupport) lib.platforms.darwin; broken = builtins.any trivial.id (builtins.attrValues brokenConditions); }; } diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index 5ee5971d77720..6e4c66e4acea6 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -15,7 +15,8 @@ ninja, pybind11, python, - runCommand, + pytestCheckHook, + stdenv, substituteAll, setuptools, torchWithRocm, @@ -23,6 +24,7 @@ cudaSupport ? config.cudaSupport, rocmSupport ? config.rocmSupport, rocmPackages, + triton, }: buildPythonPackage { @@ -164,16 +166,10 @@ buildPythonPackage { # CMake is run by setup.py instead dontUseCmakeConfigure = true; - nativeCheckInputs = [ - cmake - # Requires torch (circular dependency) and GPU access: pytestCheckHook - ]; + nativeCheckInputs = [ cmake ]; preCheck = '' # build/temp* refers to build_ext.build_temp (looked up in the build logs) (cd ./build/temp* ; ctest) - - # For pytestCheckHook - cd test/unit ''; pythonImportsCheck = [ @@ -181,20 +177,91 @@ buildPythonPackage { "triton.language" ]; - # Ultimately, torch is our test suite: + passthru.gpuCheck = stdenv.mkDerivation { + pname = "triton-pytest"; + inherit (triton) version src; + + requiredSystemFeatures = [ "cuda" ]; + + nativeBuildInputs = [ + (python.withPackages (ps: [ + ps.scipy + ps.torchWithCuda + ps.triton-cuda + ])) + ]; + + dontBuild = true; + nativeCheckInputs = [ pytestCheckHook ]; + + doCheck = true; + + preCheck = '' + cd python/test/unit + export HOME=$TMPDIR + ''; + checkPhase = "pytestCheckPhase"; + + installPhase = "touch $out"; + }; + passthru.tests = { + # Ultimately, torch is our test suite: inherit torchWithRocm; - # Implemented as alternative to pythonImportsCheck, in case if circular dependency on torch occurs again, - # and pythonImportsCheck is commented back. - import-triton = - runCommand "import-triton" - { nativeBuildInputs = [ (python.withPackages (ps: [ ps.triton ])) ]; } + + # Test as `nix run -f "" python3Packages.triton.tests.axpy-cuda` + # or, using `programs.nix-required-mounts`, as `nix build -f "" python3Packages.triton.tests.axpy-cuda.gpuCheck` + axpy-cuda = + cudaPackages.writeGpuTestPython + { + libraries = ps: [ + ps.triton + ps.torch-no-triton + ]; + } '' - python << \EOF + # Adopted from Philippe Tillet https://triton-lang.org/main/getting-started/tutorials/01-vector-add.html + import triton - import triton.language - EOF - touch "$out" + import triton.language as tl + import torch + import os + + @triton.jit + def axpy_kernel(n, a: tl.constexpr, x_ptr, y_ptr, out, BLOCK_SIZE: tl.constexpr): + pid = tl.program_id(axis=0) + block_start = pid * BLOCK_SIZE + offsets = block_start + tl.arange(0, BLOCK_SIZE) + mask = offsets < n + x = tl.load(x_ptr + offsets, mask=mask) + y = tl.load(y_ptr + offsets, mask=mask) + output = a * x + y + tl.store(out + offsets, output, mask=mask) + + def axpy(a, x, y): + output = torch.empty_like(x) + assert x.is_cuda and y.is_cuda and output.is_cuda + n_elements = output.numel() + + def grid(meta): + return (triton.cdiv(n_elements, meta['BLOCK_SIZE']), ) + + axpy_kernel[grid](n_elements, a, x, y, output, BLOCK_SIZE=1024) + return output + + if __name__ == "__main__": + if os.environ.get("HOME", None) == "/homeless-shelter": + os.environ["HOME"] = os.environ.get("TMPDIR", "/tmp") + if "CC" not in os.environ: + os.environ["CC"] = "${lib.getExe' cudaPackages.backendStdenv.cc "cc"}" + torch.manual_seed(0) + size = 12345 + x = torch.rand(size, device='cuda') + y = torch.rand(size, device='cuda') + output_torch = 3.14 * x + y + output_triton = axpy(3.14, x, y) + assert output_torch.sub(output_triton).abs().max().item() < 1e-6 + print("Triton axpy: OK") ''; }; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index c026d04370f7f..b259dff2b346d 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -15717,10 +15717,10 @@ self: super: with self; { toposort = callPackage ../development/python-modules/toposort { }; - torch = callPackage ../development/python-modules/torch { - inherit (pkgs.darwin.apple_sdk.frameworks) Accelerate CoreServices; - inherit (pkgs.darwin) libobjc; - }; + torch = callPackage ../development/python-modules/torch { }; + + # Required to test triton + torch-no-triton = self.torch.override { tritonSupport = false; }; torch-audiomentations = callPackage ../development/python-modules/torch-audiomentations { }; From 0daa24192cb4a4f4499d92f3585aa7845e30c3fc Mon Sep 17 00:00:00 2001 From: SomeoneSerge Date: Wed, 16 Oct 2024 14:05:08 +0000 Subject: [PATCH 6/6] triton-llvm: patch for glibc 2.40 support --- pkgs/by-name/tr/triton-llvm/package.nix | 26 ++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/pkgs/by-name/tr/triton-llvm/package.nix b/pkgs/by-name/tr/triton-llvm/package.nix index f7e12ba32c11b..829f0c912afd2 100644 --- a/pkgs/by-name/tr/triton-llvm/package.nix +++ b/pkgs/by-name/tr/triton-llvm/package.nix @@ -1,6 +1,7 @@ { lib , stdenv , fetchFromGitHub +, fetchpatch , pkgsBuildBuild , pkg-config , cmake @@ -64,6 +65,15 @@ in stdenv.mkDerivation (finalAttrs: { rev = "10dc3a8e916d73291269e5e2b82dd22681489aa1"; hash = "sha256-9DPvcFmhzw6MipQeCQnr35LktW0uxtEL8axMMPXIfWw="; }; + patches = [ + # glibc-2.40 support + # [llvm-exegesis] Use correct rseq struct size #100804 + # https://github.com/llvm/llvm-project/issues/100791 + (fetchpatch { + url = "https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch"; + hash = "sha256-QKa+kyXjjGXwTQTEpmKZx5yYjOyBX8A8NQoIYUaGcIw="; + }) + ]; nativeBuildInputs = [ pkg-config @@ -92,7 +102,9 @@ in stdenv.mkDerivation (finalAttrs: { ncurses ]; - sourceRoot = "${finalAttrs.src.name}/llvm"; + preConfigure = '' + cd llvm + ''; cmakeFlags = [ (lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild')) @@ -142,18 +154,18 @@ in stdenv.mkDerivation (finalAttrs: { postPatch = '' # `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied` - chmod +w -R ../mlir - patchShebangs ../mlir/test/mlir-reduce + chmod +w -R ./mlir + patchShebangs ./mlir/test/mlir-reduce # FileSystem permissions tests fail with various special bits - rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test - rm unittests/Support/Path.cpp + rm llvm/test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test + rm llvm/unittests/Support/Path.cpp - substituteInPlace unittests/Support/CMakeLists.txt \ + substituteInPlace llvm/unittests/Support/CMakeLists.txt \ --replace "Path.cpp" "" '' + lib.optionalString stdenv.hostPlatform.isAarch64 '' # Not sure why this fails - rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s + rm llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s ''; postInstall = ''