Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e3cd660
rocmPackages_6: 6.0.2 -> 6.3.1
LunNova Jan 4, 2025
c6bb729
ucx: fic unsupported argument parallel-jobs error by using rocm stdenv
LunNova Jan 4, 2025
e651d43
pkgsRocm: add package set with cuda config off, rocm config on
LunNova Jan 4, 2025
15a37c4
torch: fix rocm build
LunNova Jan 4, 2025
e684c1f
torchvision: fix rocm build
LunNova Jan 4, 2025
1602f3f
ollama: fix rocm build
LunNova Jan 17, 2025
e3a6615
rocm-6/llvm: apply patch suggested by @shuni64 for testing
LunNova Jan 20, 2025
ecec98d
rocm-6: reapply ISA compatibility and UB patches
LunNova Jan 21, 2025
f5a7ad8
hipblaslt: apply compression patch
LunNova Feb 2, 2025
a771a33
rocmPackages_6.rocblas: make hipBLASLt configurable
GZGavinZhao Jan 22, 2025
6ca8bdb
rocmPackages_6.hipblaslt: respect NIX_BUILD_CORES in tensilelite
GZGavinZhao Jan 22, 2025
9d955e8
rocmPackages_6.hipblas: propagate hipblas-common
GZGavinZhao Jan 22, 2025
1d51534
rocmPackages_6.clr: avoid confusion with hipClangPath
GZGavinZhao Jan 22, 2025
7f3e382
llama-cpp: fix ROCm build
GZGavinZhao Jan 22, 2025
2d2401e
rocmPackages_6.hipblas: correct hipblas-common library type
GZGavinZhao Feb 15, 2025
f68349d
rocmPackages_6: remove 2-stage composable_kernel unpacking
GZGavinZhao Feb 15, 2025
875d697
rocmPackages_6: don't zstd compress composable_kernel
GZGavinZhao Feb 16, 2025
37a1a86
rocmPackages_6.composable_kernel: always build gfx90a to enable devic…
GZGavinZhao Feb 16, 2025
37a2684
rocmPackages_6: refactor LLVM and use standalone HIP compiler
GZGavinZhao Feb 22, 2025
cf3f422
rocmPackages_6.rocblas: cleanup rocblas
GZGavinZhao Feb 22, 2025
9d19330
rocmPackages_6.rocprim: fix build with standalone HIP compiler
GZGavinZhao Feb 22, 2025
3ba40b0
rocmPackages_6.rocsparse: fix build with standalone HIP compiler
GZGavinZhao Feb 22, 2025
c97c634
rocmPackages_6.rocfft: fix build with new HIP compiler and respect NI…
GZGavinZhao Feb 23, 2025
adebc12
rocmPackages_6.rocsolver: fix build with new HIP compiler
GZGavinZhao Feb 23, 2025
66d8857
rocmPackages_6: cleanup old LLVM package names
GZGavinZhao Feb 25, 2025
08bf5f8
rocmPackages_6.hipblas: fix build with new HIP compiler
GZGavinZhao Feb 25, 2025
919f1b4
rocmPackages_6.composable_kernel: fix build with new HIP compiler
GZGavinZhao Feb 25, 2025
4449f63
llama-cpp: fix ROCm build with new HIP compiler
GZGavinZhao Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions pkgs/by-name/ll/llama-cpp/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
++ optionals vulkanSupport vulkanBuildInputs
++ [ curl ];

hardeningDisable = optionals rocmSupport [ "zerocallusedregs" "stackprotector" ];

cmakeFlags =
[
# -march=native is non-deterministic; override with platform-specific flags if needed
Expand All @@ -152,16 +154,11 @@ effectiveStdenv.mkDerivation (finalAttrs: {
++ optionals cudaSupport [
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
]
++ optionals rocmSupport [
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals rocmSupport (with rocmPackages; [
(cmakeFeature "CMAKE_HIP_COMPILER" "${clr.hipClangPath}/clang++")
# TODO: this should become `clr.gpuTargets` in the future.
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocblas.amdgpu_targets)
])
++ optionals metalSupport [
(cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
Expand Down
25 changes: 19 additions & 6 deletions pkgs/by-name/ol/ollama/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ let

rocmLibs = [
rocmPackages.clr
rocmPackages.hipblas-common
rocmPackages.hipblas
rocmPackages.rocblas
rocmPackages.rocsolver
rocmPackages.rocsparse
rocmPackages.rocm-device-libs
rocmPackages.rocm-smi
];
rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
rocmPath = buildEnv {
name = "rocm-path";
paths = rocmLibs ++ [ rocmClang ];
paths = rocmLibs;
};

cudaLibs = [
Expand Down Expand Up @@ -149,6 +149,13 @@ goBuild {
ROCM_PATH = rocmPath;
CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
HIP_PATH = rocmPath;
CFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include";
CXXFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include";
}
// lib.optionalAttrs (enableRocm && (rocmPackages.clr.localGpuTargets or false) != false) {
# If rocm CLR is set to build for an exact set of targets reuse that target list,
# otherwise let ollama use its builtin defaults
HIP_ARCHS = lib.concatStringsSep ";" rocmPackages.clr.localGpuTargets;
}
// lib.optionalAttrs enableCuda { CUDA_PATH = cudaPath; };

Expand Down Expand Up @@ -183,10 +190,16 @@ goBuild {
];

# replace inaccurate version number with actual release version
postPatch = ''
substituteInPlace version/version.go \
--replace-fail 0.0.0 '${version}'
'';
postPatch =
''
substituteInPlace version/version.go \
--replace-fail 0.0.0 '${version}'
''
+ lib.optionalString enableRocm ''
substituteInPlace make/Makefile.rocm \
--replace-fail '-I./llama/' '-I./llama/ -I${rocmPath}/include' \
--replace-fail ' $(ROCBLAS_DIST_DEP_MANIFEST) ' ' '
'';

overrideModAttrs = (
finalAttrs: prevAttrs: {
Expand Down
4 changes: 3 additions & 1 deletion pkgs/by-name/uc/ucx/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ let
paths = rocmList;
};

# rocm build fails with gcc stdenv due to unrecognised arg parallel-jobs
stdenv' = if enableRocm then rocmPackages.stdenv else stdenv;
in
stdenv.mkDerivation rec {
stdenv'.mkDerivation rec {
pname = "ucx";
version = "1.18.0";

Expand Down
49 changes: 35 additions & 14 deletions pkgs/development/python-modules/torch/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
if cudaSupport then
magma-cuda-static
else if rocmSupport then
magma-hip
null
else
magma,
magma,
magma-hip,
magma-cuda-static,
# Use the system NCCL as long as we're targeting CUDA on a supported platform.
useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported || rocmSupport),
Expand All @@ -36,6 +35,7 @@
symlinkJoin,
which,
pybind11,
pkg-config,
removeReferencesTo,

# Build inputs
Expand All @@ -54,6 +54,7 @@
cffi,
click,
typing-extensions,
six,
# ROCm build and `torch.compile` requires `triton`
tritonSupport ? (!stdenv.hostPlatform.isDarwin),
triton,
Expand All @@ -66,7 +67,13 @@
# (dependencies without cuda support).
# Instead we should rely on overlays and nixpkgsFun.
# (@SomeoneSerge)
_tritonEffective ? if cudaSupport then triton-cuda else triton,
_tritonEffective ?
if cudaSupport then
triton-cuda
else if rocmSupport then
rocmPackages.triton
else
triton,
triton-cuda,

# Unit tests
Expand All @@ -86,13 +93,13 @@

# dependencies for torch.utils.tensorboard
pillow,
six,
future,
tensorboard,
protobuf,

# ROCm dependencies
rocmSupport ? config.rocmSupport,
rocmPackages_5,
rocmPackages,
gpuTargets ? [ ],

vulkanSupport ? false,
Expand All @@ -112,8 +119,6 @@ let

triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence";

rocmPackages = rocmPackages_5;

setBool = v: if v then "1" else "0";

# https://github.com/pytorch/pytorch/blob/v2.4.0/torch/utils/cpp_extension.py#L1953
Expand Down Expand Up @@ -181,7 +186,7 @@ let
clr
rccl
miopen
miopengemm
aotriton
rocrand
rocblas
rocsparse
Expand All @@ -193,10 +198,12 @@ let
rocfft
rocsolver
hipfft
hiprand
hipsolver
hipblas-common
hipblas
hipblaslt
rocminfo
rocm-thunk
rocm-comgr
rocm-device-libs
rocm-runtime
Expand All @@ -213,6 +220,7 @@ let
brokenConditions = attrsets.filterAttrs (_: cond: cond) {
"CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport;
"CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux;
"ROCm 6 is currently not compatible with magma" = rocmSupport && effectiveMagma != null;
"Unsupported CUDA version" =
cudaSupport
&& !(builtins.elem cudaPackages.cudaMajorVersion [
Expand All @@ -226,8 +234,6 @@ let
# In particular, this triggered warnings from cuda's `aliases.nix`
"Magma cudaPackages does not match cudaPackages" =
cudaSupport && (effectiveMagma.cudaPackages.cudaVersion != cudaPackages.cudaVersion);
"Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" =
rocmSupport;
};

unroll-src = writeShellScript "unroll-src" ''
Expand Down Expand Up @@ -294,6 +300,10 @@ buildPythonPackage rec {

# annotations (3.7), print_function (3.0), with_statement (2.6) are all supported
sed -i -e "/from __future__ import/d" **.py
substituteInPlace third_party/NNPACK/CMakeLists.txt --replace "PYTHONPATH=" 'PYTHONPATH=$ENV{PYTHONPATH}:'
# flag from cmakeFlags doesn't work, not clear why
# setting it at the top of NNPACK's own CMakeLists does
sed -i '2s;^;set(PYTHON_SIX_SOURCE_DIR ${six.src})\n;' third_party/NNPACK/CMakeLists.txt
''
+ lib.optionalString rocmSupport ''
# https://github.com/facebookincubator/gloo/pull/297
Expand Down Expand Up @@ -366,6 +376,10 @@ buildPythonPackage rec {
# We only do an imports check, so do not build tests either.
BUILD_TEST = setBool false;

# ninja hook doesn't automatically turn on ninja
# because pytorch setup.py is responsible for this
CMAKE_GENERATOR = "Ninja";

# Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
# it by default. PyTorch currently uses its own vendored version
# of oneDNN through Intel iDeep.
Expand All @@ -376,14 +390,15 @@ buildPythonPackage rec {
# Also avoids pytorch exporting the headers of pybind11
USE_SYSTEM_PYBIND11 = true;

# NB technical debt: building without NNPACK as workaround for missing `six`
USE_NNPACK = 0;
# Multicore CPU convnet support
USE_NNPACK = 1;

# Explicitly enable MPS for Darwin
USE_MPS = setBool stdenv.hostPlatform.isDarwin;

cmakeFlags =
[
(lib.cmakeFeature "PYTHON_SIX_SOURCE_DIR" "${six.src}")
# (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true)
(lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaVersion)
]
Expand Down Expand Up @@ -432,6 +447,8 @@ buildPythonPackage rec {

env =
{
# Builds faster without this and we don't have enough inputs that cmd length is an issue
NIX_CC_USE_RESPONSE_FILE = 0;
# disable warnings as errors as they break the build on every compiler
# bump, among other things.
# Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
Expand All @@ -441,6 +458,9 @@ buildPythonPackage rec {
}
// lib.optionalAttrs vulkanSupport {
VULKAN_SDK = shaderc.bin;
}
// lib.optionalAttrs rocmSupport {
AOTRITON_INSTALLED_PREFIX = "${rocmPackages.aotriton}";
};

nativeBuildInputs =
Expand All @@ -449,6 +469,7 @@ buildPythonPackage rec {
which
ninja
pybind11
pkg-config
removeReferencesTo
]
++ lib.optionals cudaSupport (
Expand Down Expand Up @@ -495,7 +516,7 @@ buildPythonPackage rec {
]
)
++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]
++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ]
++ lib.optionals (effectiveMagma != null && (cudaSupport || rocmSupport)) [ effectiveMagma ]
++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ]
++ lib.optionals stdenv.hostPlatform.isDarwin [
apple-sdk_13
Expand Down
1 change: 0 additions & 1 deletion pkgs/development/python-modules/torchaudio/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ let
hipsolver
hipblas
rocminfo
rocm-thunk
rocm-comgr
rocm-device-libs
rocm-runtime
Expand Down
Loading
Loading