|  | 
|  | 1 | +{ | 
|  | 2 | +  lib, | 
|  | 3 | +  config, | 
|  | 4 | +  stdenv, | 
|  | 5 | +  mkShell, | 
|  | 6 | +  cmake, | 
|  | 7 | +  ninja, | 
|  | 8 | +  pkg-config, | 
|  | 9 | +  git, | 
|  | 10 | +  python3, | 
|  | 11 | +  mpi, | 
|  | 12 | +  openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations | 
|  | 13 | +  cudaPackages, | 
|  | 14 | +  darwin, | 
|  | 15 | +  rocmPackages, | 
|  | 16 | +  clblast, | 
|  | 17 | +  useBlas ? builtins.all (x: !x) [ | 
|  | 18 | +    useCuda | 
|  | 19 | +    useMetalKit | 
|  | 20 | +    useOpenCL | 
|  | 21 | +    useRocm | 
|  | 22 | +  ], | 
|  | 23 | +  useCuda ? config.cudaSupport, | 
|  | 24 | +  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, | 
|  | 25 | +  useMpi ? false, # Increases the runtime closure size by ~700M | 
|  | 26 | +  useOpenCL ? false, | 
|  | 27 | +  useRocm ? config.rocmSupport, | 
|  | 28 | +  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | 
|  | 29 | +}@inputs: | 
|  | 30 | + | 
|  | 31 | +let | 
|  | 32 | +  inherit (lib) | 
|  | 33 | +    cmakeBool | 
|  | 34 | +    cmakeFeature | 
|  | 35 | +    optionals | 
|  | 36 | +    strings | 
|  | 37 | +    versionOlder | 
|  | 38 | +    ; | 
|  | 39 | + | 
|  | 40 | +  # It's necessary to consistently use backendStdenv when building with CUDA support, | 
|  | 41 | +  # otherwise we get libstdc++ errors downstream. | 
|  | 42 | +  stdenv = throw "Use effectiveStdenv instead"; | 
|  | 43 | +  effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; | 
|  | 44 | + | 
|  | 45 | +  suffices = | 
|  | 46 | +    lib.optionals useBlas [ "BLAS" ] | 
|  | 47 | +    ++ lib.optionals useCuda [ "CUDA" ] | 
|  | 48 | +    ++ lib.optionals useMetalKit [ "MetalKit" ] | 
|  | 49 | +    ++ lib.optionals useMpi [ "MPI" ] | 
|  | 50 | +    ++ lib.optionals useOpenCL [ "OpenCL" ] | 
|  | 51 | +    ++ lib.optionals useRocm [ "ROCm" ]; | 
|  | 52 | + | 
|  | 53 | +  pnameSuffix = | 
|  | 54 | +    strings.optionalString (suffices != [ ]) | 
|  | 55 | +      "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | 
|  | 56 | +  descriptionSuffix = | 
|  | 57 | +    strings.optionalString (suffices != [ ]) | 
|  | 58 | +      ", accelerated with ${strings.concatStringsSep ", " suffices}"; | 
|  | 59 | + | 
|  | 60 | +  # TODO: package the Python in this repository in a Nix-like way. | 
|  | 61 | +  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo | 
|  | 62 | +  # is PEP 517-compatible, and ensure the correct .dist-info is generated. | 
|  | 63 | +  # https://peps.python.org/pep-0517/ | 
|  | 64 | +  llama-python = python3.withPackages ( | 
|  | 65 | +    ps: [ | 
|  | 66 | +      ps.numpy | 
|  | 67 | +      ps.sentencepiece | 
|  | 68 | +    ] | 
|  | 69 | +  ); | 
|  | 70 | + | 
|  | 71 | +  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | 
|  | 72 | +  llama-python-extra = python3.withPackages ( | 
|  | 73 | +    ps: [ | 
|  | 74 | +      ps.numpy | 
|  | 75 | +      ps.sentencepiece | 
|  | 76 | +      ps.torchWithoutCuda | 
|  | 77 | +      ps.transformers | 
|  | 78 | +    ] | 
|  | 79 | +  ); | 
|  | 80 | + | 
|  | 81 | +  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | 
|  | 82 | +  # separately | 
|  | 83 | +  darwinBuildInputs = | 
|  | 84 | +    with darwin.apple_sdk.frameworks; | 
|  | 85 | +    [ | 
|  | 86 | +      Accelerate | 
|  | 87 | +      CoreVideo | 
|  | 88 | +      CoreGraphics | 
|  | 89 | +    ] | 
|  | 90 | +    ++ optionals useMetalKit [ MetalKit ]; | 
|  | 91 | + | 
|  | 92 | +  cudaBuildInputs = with cudaPackages; [ | 
|  | 93 | +    cuda_cccl.dev # <nv/target> | 
|  | 94 | + | 
|  | 95 | +    # A temporary hack for reducing the closure size, remove once cudaPackages | 
|  | 96 | +    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 | 
|  | 97 | +    cuda_cudart.dev | 
|  | 98 | +    cuda_cudart.lib | 
|  | 99 | +    cuda_cudart.static | 
|  | 100 | +    libcublas.dev | 
|  | 101 | +    libcublas.lib | 
|  | 102 | +    libcublas.static | 
|  | 103 | +  ]; | 
|  | 104 | + | 
|  | 105 | +  rocmBuildInputs = with rocmPackages; [ | 
|  | 106 | +    clr | 
|  | 107 | +    hipblas | 
|  | 108 | +    rocblas | 
|  | 109 | +  ]; | 
|  | 110 | +in | 
|  | 111 | + | 
|  | 112 | +effectiveStdenv.mkDerivation ( | 
|  | 113 | +  finalAttrs: { | 
|  | 114 | +    pname = "llama-cpp${pnameSuffix}"; | 
|  | 115 | +    version = llamaVersion; | 
|  | 116 | + | 
|  | 117 | +    src = lib.cleanSourceWith { | 
|  | 118 | +      filter = | 
|  | 119 | +        name: type: | 
|  | 120 | +        !(builtins.any (_: _) [ | 
|  | 121 | +          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | 
|  | 122 | +          (name == "README.md") # Ignore *.md changes whe computing outPaths | 
|  | 123 | +          (lib.hasPrefix "." name) # Skip hidden files and directories | 
|  | 124 | +        ]); | 
|  | 125 | +      src = lib.cleanSource ../../.; | 
|  | 126 | +    }; | 
|  | 127 | + | 
|  | 128 | +    postPatch = '' | 
|  | 129 | +      substituteInPlace ./ggml-metal.m \ | 
|  | 130 | +        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | 
|  | 131 | +
 | 
|  | 132 | +      # TODO: Package up each Python script or service appropriately. | 
|  | 133 | +      # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, | 
|  | 134 | +      # we could make those *.py into setuptools' entrypoints | 
|  | 135 | +      substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" | 
|  | 136 | +    ''; | 
|  | 137 | + | 
|  | 138 | +    nativeBuildInputs = | 
|  | 139 | +      [ | 
|  | 140 | +        cmake | 
|  | 141 | +        ninja | 
|  | 142 | +        pkg-config | 
|  | 143 | +        git | 
|  | 144 | +      ] | 
|  | 145 | +      ++ optionals useCuda [ | 
|  | 146 | +        cudaPackages.cuda_nvcc | 
|  | 147 | + | 
|  | 148 | +        # TODO: Replace with autoAddDriverRunpath | 
|  | 149 | +        # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged | 
|  | 150 | +        cudaPackages.autoAddOpenGLRunpathHook | 
|  | 151 | +      ]; | 
|  | 152 | + | 
|  | 153 | +    buildInputs = | 
|  | 154 | +      optionals effectiveStdenv.isDarwin darwinBuildInputs | 
|  | 155 | +      ++ optionals useCuda cudaBuildInputs | 
|  | 156 | +      ++ optionals useMpi [ mpi ] | 
|  | 157 | +      ++ optionals useOpenCL [ clblast ] | 
|  | 158 | +      ++ optionals useRocm rocmBuildInputs; | 
|  | 159 | + | 
|  | 160 | +    cmakeFlags = | 
|  | 161 | +      [ | 
|  | 162 | +        (cmakeBool "LLAMA_NATIVE" true) | 
|  | 163 | +        (cmakeBool "LLAMA_BUILD_SERVER" true) | 
|  | 164 | +        (cmakeBool "BUILD_SHARED_LIBS" true) | 
|  | 165 | +        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | 
|  | 166 | +        (cmakeBool "LLAMA_BLAS" useBlas) | 
|  | 167 | +        (cmakeBool "LLAMA_CLBLAST" useOpenCL) | 
|  | 168 | +        (cmakeBool "LLAMA_CUBLAS" useCuda) | 
|  | 169 | +        (cmakeBool "LLAMA_HIPBLAS" useRocm) | 
|  | 170 | +        (cmakeBool "LLAMA_METAL" useMetalKit) | 
|  | 171 | +        (cmakeBool "LLAMA_MPI" useMpi) | 
|  | 172 | +      ] | 
|  | 173 | +      ++ optionals useCuda [ | 
|  | 174 | +        ( | 
|  | 175 | +          with cudaPackages.flags; | 
|  | 176 | +          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | 
|  | 177 | +            builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | 
|  | 178 | +          ) | 
|  | 179 | +        ) | 
|  | 180 | +      ] | 
|  | 181 | +      ++ optionals useRocm [ | 
|  | 182 | +        (cmakeFeature "CMAKE_C_COMPILER" "hipcc") | 
|  | 183 | +        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") | 
|  | 184 | + | 
|  | 185 | +        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | 
|  | 186 | +        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | 
|  | 187 | +        # and select the line that matches the current nixpkgs version of rocBLAS. | 
|  | 188 | +        # Should likely use `rocmPackages.clr.gpuTargets`. | 
|  | 189 | +        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | 
|  | 190 | +      ] | 
|  | 191 | +      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] | 
|  | 192 | +      ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; | 
|  | 193 | + | 
|  | 194 | +    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | 
|  | 195 | +    # if they haven't been added yet. | 
|  | 196 | +    postInstall = '' | 
|  | 197 | +      mv $out/bin/main $out/bin/llama | 
|  | 198 | +      mv $out/bin/server $out/bin/llama-server | 
|  | 199 | +      mkdir -p $out/include | 
|  | 200 | +      cp $src/llama.h $out/include/ | 
|  | 201 | +    ''; | 
|  | 202 | + | 
|  | 203 | +    # Define the shells here, but don't add in the inputsFrom to avoid recursion. | 
|  | 204 | +    passthru = { | 
|  | 205 | +      inherit | 
|  | 206 | +        useBlas | 
|  | 207 | +        useCuda | 
|  | 208 | +        useMetalKit | 
|  | 209 | +        useMpi | 
|  | 210 | +        useOpenCL | 
|  | 211 | +        useRocm | 
|  | 212 | +        ; | 
|  | 213 | + | 
|  | 214 | +      shell = mkShell { | 
|  | 215 | +        name = "shell-${finalAttrs.finalPackage.name}"; | 
|  | 216 | +        description = "contains numpy and sentencepiece"; | 
|  | 217 | +        buildInputs = [ llama-python ]; | 
|  | 218 | +        inputsFrom = [ finalAttrs.finalPackage ]; | 
|  | 219 | +      }; | 
|  | 220 | + | 
|  | 221 | +      shell-extra = mkShell { | 
|  | 222 | +        name = "shell-extra-${finalAttrs.finalPackage.name}"; | 
|  | 223 | +        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | 
|  | 224 | +        buildInputs = [ llama-python-extra ]; | 
|  | 225 | +        inputsFrom = [ finalAttrs.finalPackage ]; | 
|  | 226 | +      }; | 
|  | 227 | +    }; | 
|  | 228 | + | 
|  | 229 | +    meta = { | 
|  | 230 | +      # Configurations we don't want even the CI to evaluate. Results in the | 
|  | 231 | +      # "unsupported platform" messages. This is mostly a no-op, because | 
|  | 232 | +      # cudaPackages would've refused to evaluate anyway. | 
|  | 233 | +      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; | 
|  | 234 | + | 
|  | 235 | +      # Configurations that are known to result in build failures. Can be | 
|  | 236 | +      # overridden by importing Nixpkgs with `allowBroken = true`. | 
|  | 237 | +      broken = (useMetalKit && !effectiveStdenv.isDarwin); | 
|  | 238 | + | 
|  | 239 | +      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | 
|  | 240 | +      homepage = "https://github.com/ggerganov/llama.cpp/"; | 
|  | 241 | +      license = lib.licenses.mit; | 
|  | 242 | + | 
|  | 243 | +      # Accommodates `nix run` and `lib.getExe` | 
|  | 244 | +      mainProgram = "llama"; | 
|  | 245 | + | 
|  | 246 | +      # These people might respond, on the best effort basis, if you ping them | 
|  | 247 | +      # in case of Nix-specific regressions or for reviewing Nix-specific PRs. | 
|  | 248 | +      # Consider adding yourself to this list if you want to ensure this flake | 
|  | 249 | +      # stays maintained and you're willing to invest your time. Do not add | 
|  | 250 | +      # other people without their consent. Consider removing people after | 
|  | 251 | +      # they've been unreachable for long periods of time. | 
|  | 252 | + | 
|  | 253 | +      # Note that lib.maintainers is defined in Nixpkgs, but you may just add | 
|  | 254 | +      # an attrset following the same format as in | 
|  | 255 | +      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | 
|  | 256 | +      maintainers = with lib.maintainers; [ | 
|  | 257 | +        philiptaron | 
|  | 258 | +        SomeoneSerge | 
|  | 259 | +      ]; | 
|  | 260 | + | 
|  | 261 | +      # Extend `badPlatforms` instead | 
|  | 262 | +      platforms = lib.platforms.all; | 
|  | 263 | +    }; | 
|  | 264 | +  } | 
|  | 265 | +) | 
0 commit comments