feat(ollama): init

shivaraj-bh · shivaraj-bh · commit d84efa4788d2 · 2024-06-12T16:09:12.000+05:30
ported from https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix Also see the nixos module: https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/ollama.nix
diff --git a/doc/ollama.md b/doc/ollama.md
@@ -0,0 +1,40 @@
+# Ollama
+
+[Ollama](https://github.com/ollama/ollama) enables you to get up and running with Llama 3, Mistral, Gemma, and other large language models.
+
+## Getting Started
+
+```nix
+# In `perSystem.process-compose.<name>`
+{
+  services.ollama."ollama1".enable = true;
+}
+```
+
+## Acceleration
+
+By default Ollama uses the CPU for inference. To enable GPU acceleration:
+
+### Cuda
+
+```nix
+# In `perSystem.process-compose.<name>`
+{
+  services.ollama."ollama1" = {
+    enable = true;
+    acceleration = "cuda";
+  };
+}
+```
+
+### ROCm
+
+```nix
+# In `perSystem.process-compose.<name>`
+{
+  services.ollama."ollama1" = {
+    enable = true;
+    acceleration = "rocm";
+  };
+}
+```
diff --git a/nix/default.nix b/nix/default.nix
@@ -9,6 +9,7 @@ in
     ./elasticsearch.nix
     ./mysql
     ./nginx
+    ./ollama.nix
     ./postgres
     ./redis-cluster.nix
     ./redis.nix
diff --git a/nix/ollama.nix b/nix/ollama.nix
@@ -0,0 +1,150 @@
+# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix
+{ pkgs, lib, name, config, ... }:
+let
+  inherit (lib) types;
+  ollamaPackage = pkgs.ollama.override {
+    inherit (config) acceleration;
+    linuxPackages = config.boot.kernelPackages // {
+      nvidia_x11 = config.hardware.nvidia.package;
+    };
+  };
+in
+{
+  options = {
+    enable = lib.mkEnableOption "Enable the Ollama service";
+    package = lib.mkOption {
+      type = types.package;
+      default = ollamaPackage;
+      description = "The Ollama package to use";
+    };
+    port = lib.mkOption {
+      type = types.port;
+      default = 11434;
+      description = "The port on which the Ollama service's REST API will listen";
+    };
+    host = lib.mkOption {
+      type = types.str;
+      default = "127.0.0.1";
+      example = "0.0.0.0";
+      description = "The host on which the Ollama service's REST API will listen";
+    };
+    dataDir = lib.mkOption {
+      type = types.str;
+      default = "./data/ollama";
+      description = ''
+        The directory containing the Ollama models.
+        Sets the `OLLAMA_MODELS` environment variable.
+      '';
+    };
+    keepAlive = lib.mkOption {
+      type = types.str;
+      default = "5m";
+      description = ''
+        The duration that models stay loaded in memory.
+        Sets the `OLLAMA_KEEP_ALIVE` environment variable.
+
+        Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds.
+      '';
+      example = "70";
+    };
+    models = lib.mkOption {
+      type = types.listOf types.str;
+      default = [ ];
+      description = ''
+        The models to load post start.
+        Search for models of your choice from: https://ollama.com/library
+      '';
+    };
+    acceleration = lib.mkOption {
+      type = types.nullOr (types.enum [ false "rocm" "cuda" ]);
+      default = null;
+      example = "rocm";
+      description = ''
+        What interface to use for hardware acceleration.
+
+        - `null`: default behavior
+          - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
+          - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
+          - otherwise defaults to `false`
+        - `false`: disable GPU, only use CPU
+        - `"rocm"`: supported by most modern AMD GPUs
+        - `"cuda"`: supported by most modern NVIDIA GPUs
+      '';
+    };
+    environment = lib.mkOption {
+      type = types.attrsOf types.str;
+      default = { };
+      example = {
+        OLLAMA_DEBUG = "1";
+      };
+      description = ''
+        Extra environment variables passed to the `ollama-server` process.
+      '';
+    };
+
+    outputs.settings = lib.mkOption {
+      type = types.deferredModule;
+      internal = true;
+      readOnly = true;
+      default = {
+        processes = {
+          "${name}" =
+            let
+              startScript = pkgs.writeShellApplication {
+                name = "ollama-server";
+                text = ''
+                  if [ ! -d ${config.dataDir} ]; then
+                    echo "Creating directory ${config.dataDir}"
+                    mkdir -p ${config.dataDir}
+                  fi
+
+                  ${lib.getExe config.package} serve
+                '';
+              };
+            in
+            {
+              command = startScript;
+
+              environment = {
+                OLLAMA_MODELS = config.dataDir;
+                OLLAMA_HOST = "${config.host}:${toString config.port}";
+                OLLAMA_KEEP_ALIVE = config.keepAlive;
+              } // config.environment;
+
+              readiness_probe = {
+                http_get = {
+                  host = config.host;
+                  port = config.port;
+                };
+                initial_delay_seconds = 2;
+                period_seconds = 10;
+                timeout_seconds = 4;
+                success_threshold = 1;
+                failure_threshold = 5;
+              };
+              namespace = name;
+              availability.restart = "on_failure";
+            };
+
+          "${name}-models" = {
+            command = pkgs.writeShellApplication {
+              name = "ollama-models";
+              text = ''
+                set -x
+                OLLAMA_HOST=${config.host}:${toString config.port}
+                export OLLAMA_HOST
+                models="${lib.concatStringsSep " " config.models}"
+                for model in $models
+                do
+                  ${lib.getExe config.package} pull "$model"
+                done
+              '';
+            };
+            namespace = name;
+            depends_on."${name}".condition = "process_healthy";
+          };
+        };
+      };
+    };
+  };
+}
diff --git a/nix/ollama_test.nix b/nix/ollama_test.nix
@@ -0,0 +1,16 @@
+{ pkgs, ... }: {
+  services.ollama."ollama1".enable = true;
+
+  # Cannot test auto-loading models yet because that requires internet connection.
+  settings.processes.test =
+    {
+      command = pkgs.writeShellApplication {
+        runtimeInputs = [ pkgs.curl ];
+        text = ''
+          curl http://127.0.0.1:11434
+        '';
+        name = "ollama-test";
+      };
+      depends_on."ollama1".condition = "process_healthy";
+    };
+}
diff --git a/test/flake.nix b/test/flake.nix
@@ -38,6 +38,7 @@
             "${inputs.services-flake}/nix/elasticsearch_test.nix"
             "${inputs.services-flake}/nix/mysql/mysql_test.nix"
             "${inputs.services-flake}/nix/nginx/nginx_test.nix"
+            "${inputs.services-flake}/nix/ollama_test.nix"
             "${inputs.services-flake}/nix/postgres/postgres_test.nix"
             "${inputs.services-flake}/nix/redis_test.nix"
             "${inputs.services-flake}/nix/redis-cluster_test.nix"