diff --git a/README.md b/README.md
index 31d0e3cc333..cb61b03bcc2 100644
--- a/README.md
+++ b/README.md
@@ -104,6 +104,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
- [Moonshot AI](https://www.moonshot.ai/)
- [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local)
- [CometAPI (chat models)](https://api.cometapi.com/)
+- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**Embedder models:**
diff --git a/docker/.env.example b/docker/.env.example
index 27fa1c013c6..a3d81f3e3f0 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -149,6 +149,11 @@ GID='1000'
# FOUNDRY_MODEL_PREF='phi-3.5-mini'
# FOUNDRY_MODEL_TOKEN_LIMIT=4096
+# LLM_PROVIDER='docker-model-runner'
+# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
+# DOCKER_MODEL_RUNNER_MODEL_PREF='phi-3.5-mini'
+# DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT=4096
+
###########################################
######## Embedding API SElECTION ##########
###########################################
diff --git a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx
new file mode 100644
index 00000000000..3ec0854b2b6
--- /dev/null
+++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx
@@ -0,0 +1,152 @@
+import { useState, useEffect } from "react";
+import System from "@/models/system";
+import PreLoader from "@/components/Preloader";
+import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants";
+import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
+import { upperCaseFirst } from "text-case";
+
+export default function DockerModelRunnerOptions({ settings }) {
+ const {
+ autoDetecting: loading,
+ basePath,
+ basePathValue,
+ handleAutoDetectClick,
+ } = useProviderEndpointAutoDiscovery({
+ provider: "docker-model-runner",
+ initialBasePath: settings?.DockerModelRunnerBasePath,
+ ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS,
+ });
+
+ const [maxTokens, setMaxTokens] = useState(
+ settings?.DockerModelRunnerModelTokenLimit || 4096
+ );
+
+ return (
+
+
+
+
+
+ {loading ? (
+
+ ) : (
+ <>
+ {!basePathValue.value && (
+
+ )}
+ >
+ )}
+
+
+
+
+
+
+ setMaxTokens(Number(e.target.value))}
+ onScroll={(e) => e.target.blur()}
+ required={true}
+ autoComplete="off"
+ />
+
+
+
+ );
+}
+
+function DockerModelRunnerModelSelection({ settings, basePath = null }) {
+ const [customModels, setCustomModels] = useState([]);
+ const [loading, setLoading] = useState(true);
+
+ useEffect(() => {
+ async function findCustomModels() {
+ if (!basePath) {
+ setCustomModels([]);
+ setLoading(false);
+ return;
+ }
+
+ setLoading(true);
+ const { models } = await System.customModels(
+ "docker-model-runner",
+ null,
+ basePath
+ );
+ setCustomModels(models || []);
+ setLoading(false);
+ }
+ findCustomModels();
+ }, [basePath]);
+
+ if (loading) {
+ return (
+
+
+
+
+ );
+ }
+
+ return (
+
+
+
+
+ );
+}
diff --git a/frontend/src/media/llmprovider/docker-model-runner.png b/frontend/src/media/llmprovider/docker-model-runner.png
new file mode 100644
index 00000000000..961d921cb22
Binary files /dev/null and b/frontend/src/media/llmprovider/docker-model-runner.png differ
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
index 671f7e867da..edd70526e35 100644
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -35,6 +35,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png";
import FoundryLogo from "@/media/llmprovider/foundry-local.png";
+import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@@ -67,6 +68,7 @@ import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions";
import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
import FoundryOptions from "@/components/LLMSelection/FoundryOptions";
+import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@@ -327,6 +329,18 @@ export const AVAILABLE_LLM_PROVIDERS = [
"FoundryModelTokenLimit",
],
},
+ {
+ name: "Docker Model Runner",
+ value: "docker-model-runner",
+ logo: DockerModelRunnerLogo,
+ options: (settings) => ,
+ description: "Run models locally using Docker Model Runner.",
+ requiredConfig: [
+ "DockerModelRunnerBasePath",
+ "DockerModelRunnerModelPref",
+ "DockerModelRunnerModelTokenLimit",
+ ],
+ },
{
name: "xAI",
value: "xai",
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index b12979a889d..38fbcda7c00 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -41,6 +41,7 @@ import DPAISLogo from "@/media/llmprovider/dpais.png";
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png";
import FoundryLogo from "@/media/llmprovider/foundry-local.png";
+import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import React, { useState, useEffect } from "react";
import paths from "@/utils/paths";
@@ -269,6 +270,13 @@ export const LLM_SELECTION_PRIVACY = {
],
logo: FoundryLogo,
},
+ "docker-model-runner": {
+ name: "Docker Model Runner",
+ description: [
+ "Your model and chats are only accessible on the machine running Docker Model Runner",
+ ],
+ logo: DockerModelRunnerLogo,
+ },
};
export const VECTOR_DB_PRIVACY = {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index 7a16985fe11..1938078a41e 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -29,6 +29,7 @@ import PPIOLogo from "@/media/llmprovider/ppio.png";
import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png";
+import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@@ -59,6 +60,7 @@ import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions";
import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions";
+import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system";
@@ -281,6 +283,13 @@ const LLMS = [
options: (settings) => ,
description: "500+ AI Models all in one API.",
},
+ {
+ name: "Docker Model Runner",
+ value: "docker-model-runner",
+ logo: DockerModelRunnerLogo,
+ options: (settings) => ,
+ description: "Run models locally using Docker Model Runner.",
+ },
];
export default function LLMPreference({
diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js
index a6efc519c20..50eac997a2a 100644
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@@ -51,6 +51,14 @@ export const NVIDIA_NIM_COMMON_URLS = [
"http://172.17.0.1:8000/v1/version",
];
+export const DOCKER_MODEL_RUNNER_COMMON_URLS = [
+ "http://localhost:12434/engines/llama.cpp/v1",
+ "http://127.0.0.1:12434/engines/llama.cpp/v1",
+ "http://model-runner.docker.internal/engines/llama.cpp/v1",
+ "http://host.docker.internal:12434/engines/llama.cpp/v1",
+ "http://172.17.0.1:12434/engines/llama.cpp/v1",
+];
+
export function fullApiUrl() {
if (API_BASE !== "/api") return API_BASE;
return `${window.location.origin}/api`;
diff --git a/server/.env.example b/server/.env.example
index 3dc0bd596c4..0ec7d9f2ae1 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -148,6 +148,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# FOUNDRY_MODEL_PREF='phi-3.5-mini'
# FOUNDRY_MODEL_TOKEN_LIMIT=4096
+# LLM_PROVIDER='docker-model-runner'
+# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
+# DOCKER_MODEL_RUNNER_MODEL_PREF='phi-3.5-mini'
+# DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT=4096
+
###########################################
######## Embedding API SElECTION ##########
###########################################
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 3a7a4b21554..d8931a41ea4 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -574,6 +574,12 @@ const SystemSettings = {
FoundryModelPref: process.env.FOUNDRY_MODEL_PREF,
FoundryModelTokenLimit: process.env.FOUNDRY_MODEL_TOKEN_LIMIT,
+ // Docker Model Runner Keys
+ DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+ DockerModelRunnerModelPref: process.env.DOCKER_MODEL_RUNNER_MODEL_PREF,
+ DockerModelRunnerModelTokenLimit:
+ process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT,
+
AwsBedrockLLMConnectionMethod:
process.env.AWS_BEDROCK_LLM_CONNECTION_METHOD || "iam",
AwsBedrockLLMAccessKeyId: !!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID,
diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js
new file mode 100644
index 00000000000..dd5991924db
--- /dev/null
+++ b/server/utils/AiProviders/dockerModelRunner/index.js
@@ -0,0 +1,220 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const {
+ LLMPerformanceMonitor,
+} = require("../../helpers/chat/LLMPerformanceMonitor");
+const {
+ handleDefaultStreamResponseV2,
+ formatChatHistory,
+} = require("../../helpers/chat/responses");
+
+class DockerModelRunnerLLM {
+ constructor(embedder = null, modelPreference = null) {
+ if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
+ throw new Error("No Docker Model Runner Base Path was set.");
+
+ const { OpenAI: OpenAIApi } = require("openai");
+ const basePath = parseDMREndpoint(
+ process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+ "openai"
+ );
+ this.openai = new OpenAIApi({
+ baseURL: basePath,
+ apiKey: null,
+ });
+
+ this.model =
+ modelPreference || process.env.DOCKER_MODEL_RUNNER_MODEL_PREF || null;
+ if (!this.model) throw new Error("No Docker Model Runner model was set.");
+
+ this.limits = {
+ history: this.promptWindowLimit() * 0.15,
+ system: this.promptWindowLimit() * 0.15,
+ user: this.promptWindowLimit() * 0.7,
+ };
+
+ this.embedder = embedder ?? new NativeEmbedder();
+ this.defaultTemp = 0.7;
+ this.log(
+ `Initialized with model: ${this.model} and base path: ${basePath}`
+ );
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[DockerModelRunnerLLM]\x1b[0m ${text}`, ...args);
+ }
+
+ #appendContext(contextTexts = []) {
+ if (!contextTexts || !contextTexts.length) return "";
+ return (
+ "\nContext:\n" +
+ contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")
+ );
+ }
+
+ streamingEnabled() {
+ return "streamGetChatCompletion" in this;
+ }
+
+ static promptWindowLimit(_modelName) {
+ const limit = process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT || 4096;
+ if (!limit || isNaN(Number(limit)))
+ throw new Error("No Docker Model Runner token context limit was set.");
+ return Number(limit);
+ }
+
+ promptWindowLimit() {
+ const limit = process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT || 4096;
+ if (!limit || isNaN(Number(limit)))
+ throw new Error("No Docker Model Runner token context limit was set.");
+ return Number(limit);
+ }
+
+ async isValidChatCompletionModel() {
+ return true;
+ }
+
+ /**
+ * Generates appropriate content array for a message + attachments.
+ * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
+ * @returns {string|object[]}
+ */
+ #generateContent({ userPrompt, attachments = [] }) {
+ if (!attachments.length) {
+ return userPrompt;
+ }
+
+ const content = [{ type: "text", text: userPrompt }];
+ for (let attachment of attachments) {
+ content.push({
+ type: "image_url",
+ image_url: {
+ url: attachment.contentString,
+ detail: "auto",
+ },
+ });
+ }
+ return content.flat();
+ }
+
+ /**
+ * Construct the user prompt for this model.
+ * @param {{attachments: import("../../helpers").Attachment[]}} param0
+ * @returns
+ */
+ constructPrompt({
+ systemPrompt = "",
+ contextTexts = [],
+ chatHistory = [],
+ userPrompt = "",
+ attachments = [],
+ }) {
+ const prompt = {
+ role: "system",
+ content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+ };
+ return [
+ prompt,
+ ...formatChatHistory(chatHistory, this.#generateContent),
+ {
+ role: "user",
+ content: this.#generateContent({ userPrompt, attachments }),
+ },
+ ];
+ }
+
+ async getChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!this.model)
+ throw new Error(
+ `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
+ );
+
+ const result = await LLMPerformanceMonitor.measureAsyncFunction(
+ this.openai.chat.completions.create({
+ model: this.model,
+ messages,
+ temperature,
+ })
+ );
+
+ if (
+ !result.output.hasOwnProperty("choices") ||
+ result.output.choices.length === 0
+ )
+ return null;
+
+ return {
+ textResponse: result.output.choices[0].message.content,
+ metrics: {
+ prompt_tokens: result.output.usage?.prompt_tokens || 0,
+ completion_tokens: result.output.usage?.completion_tokens || 0,
+ total_tokens: result.output.usage?.total_tokens || 0,
+ outputTps:
+ (result.output.usage?.completion_tokens || 0) / result.duration,
+ duration: result.duration,
+ },
+ };
+ }
+
+ async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!this.model)
+ throw new Error(
+ `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
+ );
+
+ const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
+ this.openai.chat.completions.create({
+ model: this.model,
+ stream: true,
+ messages,
+ temperature,
+ }),
+ messages
+ );
+ return measuredStreamRequest;
+ }
+
+ handleStream(response, stream, responseProps) {
+ return handleDefaultStreamResponseV2(response, stream, responseProps);
+ }
+
+ // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+ async embedTextInput(textInput) {
+ return await this.embedder.embedTextInput(textInput);
+ }
+ async embedChunks(textChunks = []) {
+ return await this.embedder.embedChunks(textChunks);
+ }
+
+ async compressMessages(promptArgs = {}, rawHistory = []) {
+ const { messageArrayCompressor } = require("../../helpers/chat");
+ const messageArray = this.constructPrompt(promptArgs);
+ return await messageArrayCompressor(this, messageArray, rawHistory);
+ }
+}
+
+/**
+ * Parse the base path of the Docker Model Runner endpoint and return the host and port.
+ * @param {string} basePath - The base path of the Docker Model Runner endpoint.
+ * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible)
+ * @returns {string | null}
+ */
+function parseDMREndpoint(basePath = null, to = "openai") {
+ if (!basePath) return null;
+ try {
+ const url = new URL(basePath);
+ if (to === "openai") url.pathname = "engines/v1";
+ else if (to === "dmr") url.pathname = "";
+ return url.toString();
+ } catch (e) {
+ return basePath;
+ }
+}
+
+module.exports = {
+ DockerModelRunnerLLM,
+ parseDMREndpoint,
+};
diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js
index 65b5a146dda..73f1b0d4ae3 100644
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@@ -974,6 +974,8 @@ ${this.getHistory({ to: route.to })
return new Providers.CometApiProvider({ model: config.model });
case "foundry":
return new Providers.FoundryProvider({ model: config.model });
+ case "docker-model-runner":
+ return new Providers.DockerModelRunnerProvider({ model: config.model });
default:
throw new Error(
`Unknown provider: ${config.provider}. Please use a valid provider.`
diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js
index 507015cb0cb..7a2bc70bdf9 100644
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@@ -270,6 +270,14 @@ class Provider {
...config,
});
}
+ case "docker-model-runner":
+ return new ChatOpenAI({
+ configuration: {
+ baseURL: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+ },
+ apiKey: null,
+ ...config,
+ });
default:
throw new Error(`Unsupported provider ${provider} for this task.`);
diff --git a/server/utils/agents/aibitat/providers/dockerModelRunner.js b/server/utils/agents/aibitat/providers/dockerModelRunner.js
new file mode 100644
index 00000000000..7127e38611a
--- /dev/null
+++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js
@@ -0,0 +1,91 @@
+const OpenAI = require("openai");
+const Provider = require("./ai-provider.js");
+const InheritMultiple = require("./helpers/classes.js");
+const UnTooled = require("./helpers/untooled.js");
+const {
+ parseDMREndpoint,
+} = require("../../../../utils/AiProviders/dockerModelRunner");
+
+/**
+ * The agent provider for the Docker Model Runner provider.
+ */
+class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
+ model;
+
+ constructor(config = {}) {
+ const { model = process.env.DOCKER_MODEL_RUNNER_MODEL_PREF } = config;
+ super();
+ const client = new OpenAI({
+ baseURL: parseDMREndpoint(process.env.DOCKER_MODEL_RUNNER_BASE_PATH),
+ apiKey: null,
+ });
+ this._client = client;
+ this.model = model;
+ this.verbose = true;
+ }
+
+ get client() {
+ return this._client;
+ }
+
+ get supportsAgentStreaming() {
+ return true;
+ }
+
+ async #handleFunctionCallChat({ messages = [] }) {
+ return await this.client.chat.completions
+ .create({
+ model: this.model,
+ messages,
+ })
+ .then((result) => {
+ if (!result.hasOwnProperty("choices"))
+ throw new Error("Docker Model Runner chat: No results!");
+ if (result.choices.length === 0)
+ throw new Error("Docker Model Runner chat: No results length!");
+ return result.choices[0].message.content;
+ })
+ .catch(() => {
+ return null;
+ });
+ }
+
+ async #handleFunctionCallStream({ messages = [] }) {
+ return await this.client.chat.completions.create({
+ model: this.model,
+ stream: true,
+ messages,
+ });
+ }
+
+ async stream(messages, functions = [], eventHandler = null) {
+ return await UnTooled.prototype.stream.call(
+ this,
+ messages,
+ functions,
+ this.#handleFunctionCallStream.bind(this),
+ eventHandler
+ );
+ }
+
+ async complete(messages, functions = []) {
+ return await UnTooled.prototype.complete.call(
+ this,
+ messages,
+ functions,
+ this.#handleFunctionCallChat.bind(this)
+ );
+ }
+
+ /**
+ * Get the cost of the completion.
+ *
+ * @returns The cost of the completion.
+ * Stubbed since Docker Model Runner has no cost basis.
+ */
+ getCost(_usage) {
+ return 0;
+ }
+}
+
+module.exports = DockerModelRunnerProvider;
diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js
index 8cf2e7422b3..1e09fbdf977 100644
--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@@ -26,6 +26,7 @@ const DellProAiStudioProvider = require("./dellProAiStudio.js");
const MoonshotAiProvider = require("./moonshotAi.js");
const CometApiProvider = require("./cometapi.js");
const FoundryProvider = require("./foundry.js");
+const DockerModelRunnerProvider = require("./dockerModelRunner.js");
module.exports = {
OpenAIProvider,
@@ -56,4 +57,5 @@ module.exports = {
DellProAiStudioProvider,
MoonshotAiProvider,
FoundryProvider,
+ DockerModelRunnerProvider,
};
diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js
index 98d3d774a09..db21cdb0e65 100644
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@@ -213,6 +213,12 @@ class AgentHandler {
if (!process.env.FOUNDRY_BASE_PATH)
throw new Error("Foundry base path must be provided to use agents.");
break;
+ case "docker-model-runner":
+ if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
+ throw new Error(
+ "Docker Model Runner base path must be provided to use agents."
+ );
+ break;
default:
throw new Error(
@@ -288,6 +294,8 @@ class AgentHandler {
return process.env.COMETAPI_LLM_MODEL_PREF ?? "gpt-5-mini";
case "foundry":
return process.env.FOUNDRY_MODEL_PREF ?? null;
+ case "docker-model-runner":
+ return process.env.DOCKER_MODEL_RUNNER_MODEL_PREF ?? null;
default:
return null;
}
diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js
index 0a9012c428f..212f22ae363 100644
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@@ -10,6 +10,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio");
const { GeminiLLM } = require("../AiProviders/gemini");
const { fetchCometApiModels } = require("../AiProviders/cometapi");
const { parseFoundryBasePath } = require("../AiProviders/foundry");
+const { parseDMREndpoint } = require("../AiProviders/dockerModelRunner");
const SUPPORT_CUSTOM_MODELS = [
"openai",
@@ -37,6 +38,7 @@ const SUPPORT_CUSTOM_MODELS = [
"dpais",
"moonshotai",
"foundry",
+ "docker-model-runner",
// Embedding Engines
"native-embedder",
];
@@ -96,6 +98,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return await getMoonshotAiModels(apiKey);
case "foundry":
return await getFoundryModels(basePath);
+ case "docker-model-runner":
+ return await getDockerModelRunnerModels(basePath);
case "native-embedder":
return await getNativeEmbedderModels();
default:
@@ -759,6 +763,49 @@ async function getFoundryModels(basePath = null) {
}
}
+async function getDockerModelRunnerModels(basePath = null) {
+ try {
+ const { OpenAI: OpenAIApi } = require("openai");
+ const openai = new OpenAIApi({
+ baseURL: parseDMREndpoint(
+ basePath || process.env.DOCKER_MODEL_RUNNER_BASE_PATH
+ ),
+ apiKey: null,
+ });
+
+ // eg: ai/llama3.2:latest -> llama3.2
+ const parseDMRModelName = (modelId = null) => {
+ if (!modelId) return modelId;
+ const match = modelId.match(/^[^/]+\/(.*?):.*$/);
+ if (!match) return modelId;
+ return match?.[1]?.trim() || modelId;
+ };
+
+ const models = await openai.models
+ .list()
+ .then((results) => results.data)
+ .then((models) =>
+ models.map((model) => ({
+ id: model.id,
+ name: parseDMRModelName(model.id),
+ organization: model.owned_by,
+ }))
+ )
+ .catch((e) => {
+ console.error(`DockerModelRunner:listModels`, e.message);
+ return [];
+ });
+
+ return { models, error: null };
+ } catch (e) {
+ console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message);
+ return {
+ models: [],
+ error: "Could not fetch Docker Model Runner Models",
+ };
+ }
+}
+
module.exports = {
getCustomModels,
SUPPORT_CUSTOM_MODELS,
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 819a464c6d0..a331cb90f97 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -218,6 +218,11 @@ function getLLMProvider({ provider = null, model = null } = {}) {
case "foundry":
const { FoundryLLM } = require("../AiProviders/foundry");
return new FoundryLLM(embedder, model);
+ case "docker-model-runner":
+ const {
+ DockerModelRunnerLLM,
+ } = require("../AiProviders/dockerModelRunner");
+ return new DockerModelRunnerLLM(embedder, model);
default:
throw new Error(
`ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@@ -374,6 +379,11 @@ function getLLMProviderClass({ provider = null } = {}) {
case "foundry":
const { FoundryLLM } = require("../AiProviders/foundry");
return FoundryLLM;
+ case "docker-model-runner":
+ const {
+ DockerModelRunnerLLM,
+ } = require("../AiProviders/dockerModelRunner");
+ return DockerModelRunnerLLM;
default:
return null;
}
@@ -446,6 +456,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
return process.env.COMETAPI_LLM_MODEL_PREF;
case "foundry":
return process.env.FOUNDRY_MODEL_PREF;
+ case "docker-model-runner":
+ return process.env.DOCKER_MODEL_RUNNER_MODEL_PREF;
default:
return null;
}
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index c8109efb193..7759e867050 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -727,6 +727,20 @@ const KEY_MAPPING = {
checks: [],
},
+ // Docker Model Runner Options
+ DockerModelRunnerBasePath: {
+ envKey: "DOCKER_MODEL_RUNNER_BASE_PATH",
+ checks: [isNotEmpty],
+ },
+ DockerModelRunnerModelPref: {
+ envKey: "DOCKER_MODEL_RUNNER_MODEL_PREF",
+ checks: [isNotEmpty],
+ },
+ DockerModelRunnerModelTokenLimit: {
+ envKey: "DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT",
+ checks: [nonZero],
+ },
+
// CometAPI Options
CometApiLLMApiKey: {
envKey: "COMETAPI_LLM_API_KEY",
@@ -851,6 +865,7 @@ function supportedLLM(input = "") {
"moonshotai",
"cometapi",
"foundry",
+ "docker-model-runner",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}