diff --git a/README.md b/README.md index 31d0e3cc333..cb61b03bcc2 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [Moonshot AI](https://www.moonshot.ai/) - [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local) - [CometAPI (chat models)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **Embedder models:** diff --git a/docker/.env.example b/docker/.env.example index 27fa1c013c6..a3d81f3e3f0 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -149,6 +149,11 @@ GID='1000' # FOUNDRY_MODEL_PREF='phi-3.5-mini' # FOUNDRY_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx new file mode 100644 index 00000000000..3ec0854b2b6 --- /dev/null +++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx @@ -0,0 +1,152 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; +import PreLoader from "@/components/Preloader"; +import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants"; +import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery"; +import { upperCaseFirst } from "text-case"; + +export default function DockerModelRunnerOptions({ settings }) { + const { + autoDetecting: loading, + basePath, + basePathValue, + handleAutoDetectClick, + } = useProviderEndpointAutoDiscovery({ + provider: "docker-model-runner", + initialBasePath: settings?.DockerModelRunnerBasePath, + ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS, + }); + + const [maxTokens, setMaxTokens] = useState( + settings?.DockerModelRunnerModelTokenLimit || 4096 + ); + + return ( +
+
+
+
+ + {loading ? ( + + ) : ( + <> + {!basePathValue.value && ( + + )} + + )} +
+ +
+ +
+ + setMaxTokens(Number(e.target.value))} + onScroll={(e) => e.target.blur()} + required={true} + autoComplete="off" + /> +
+
+
+ ); +} + +function DockerModelRunnerModelSelection({ settings, basePath = null }) { + const [customModels, setCustomModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!basePath) { + setCustomModels([]); + setLoading(false); + return; + } + + setLoading(true); + const { models } = await System.customModels( + "docker-model-runner", + null, + basePath + ); + setCustomModels(models || []); + setLoading(false); + } + findCustomModels(); + }, [basePath]); + + if (loading) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/media/llmprovider/docker-model-runner.png b/frontend/src/media/llmprovider/docker-model-runner.png new file mode 100644 index 00000000000..961d921cb22 Binary files /dev/null and b/frontend/src/media/llmprovider/docker-model-runner.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 671f7e867da..edd70526e35 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -35,6 +35,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png"; import FoundryLogo from "@/media/llmprovider/foundry-local.png"; +import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; @@ -67,6 +68,7 @@ import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions"; import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import FoundryOptions from "@/components/LLMSelection/FoundryOptions"; +import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -327,6 +329,18 @@ export const AVAILABLE_LLM_PROVIDERS = [ "FoundryModelTokenLimit", ], }, + { + name: "Docker Model Runner", + value: "docker-model-runner", + logo: DockerModelRunnerLogo, + options: (settings) => , + description: "Run models locally using Docker Model Runner.", + requiredConfig: [ + "DockerModelRunnerBasePath", + "DockerModelRunnerModelPref", + "DockerModelRunnerModelTokenLimit", + ], + }, { name: "xAI", value: "xai", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index b12979a889d..38fbcda7c00 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -41,6 +41,7 @@ import DPAISLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png"; import FoundryLogo from "@/media/llmprovider/foundry-local.png"; +import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png"; import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; @@ -269,6 +270,13 @@ export const LLM_SELECTION_PRIVACY = { ], logo: FoundryLogo, }, + "docker-model-runner": { + name: "Docker Model Runner", + description: [ + "Your model and chats are only accessible on the machine running Docker Model Runner", + ], + logo: DockerModelRunnerLogo, + }, }; export const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 7a16985fe11..1938078a41e 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -29,6 +29,7 @@ import PPIOLogo from "@/media/llmprovider/ppio.png"; import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png"; +import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -59,6 +60,7 @@ import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions"; import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; +import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -281,6 +283,13 @@ const LLMS = [ options: (settings) => , description: "500+ AI Models all in one API.", }, + { + name: "Docker Model Runner", + value: "docker-model-runner", + logo: DockerModelRunnerLogo, + options: (settings) => , + description: "Run models locally using Docker Model Runner.", + }, ]; export default function LLMPreference({ diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index a6efc519c20..50eac997a2a 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -51,6 +51,14 @@ export const NVIDIA_NIM_COMMON_URLS = [ "http://172.17.0.1:8000/v1/version", ]; +export const DOCKER_MODEL_RUNNER_COMMON_URLS = [ + "http://localhost:12434/engines/llama.cpp/v1", + "http://127.0.0.1:12434/engines/llama.cpp/v1", + "http://model-runner.docker.internal/engines/llama.cpp/v1", + "http://host.docker.internal:12434/engines/llama.cpp/v1", + "http://172.17.0.1:12434/engines/llama.cpp/v1", +]; + export function fullApiUrl() { if (API_BASE !== "/api") return API_BASE; return `${window.location.origin}/api`; diff --git a/server/.env.example b/server/.env.example index 3dc0bd596c4..0ec7d9f2ae1 100644 --- a/server/.env.example +++ b/server/.env.example @@ -148,6 +148,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # FOUNDRY_MODEL_PREF='phi-3.5-mini' # FOUNDRY_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 3a7a4b21554..d8931a41ea4 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -574,6 +574,12 @@ const SystemSettings = { FoundryModelPref: process.env.FOUNDRY_MODEL_PREF, FoundryModelTokenLimit: process.env.FOUNDRY_MODEL_TOKEN_LIMIT, + // Docker Model Runner Keys + DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + DockerModelRunnerModelPref: process.env.DOCKER_MODEL_RUNNER_MODEL_PREF, + DockerModelRunnerModelTokenLimit: + process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT, + AwsBedrockLLMConnectionMethod: process.env.AWS_BEDROCK_LLM_CONNECTION_METHOD || "iam", AwsBedrockLLMAccessKeyId: !!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID, diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js new file mode 100644 index 00000000000..dd5991924db --- /dev/null +++ b/server/utils/AiProviders/dockerModelRunner/index.js @@ -0,0 +1,220 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + LLMPerformanceMonitor, +} = require("../../helpers/chat/LLMPerformanceMonitor"); +const { + handleDefaultStreamResponseV2, + formatChatHistory, +} = require("../../helpers/chat/responses"); + +class DockerModelRunnerLLM { + constructor(embedder = null, modelPreference = null) { + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error("No Docker Model Runner Base Path was set."); + + const { OpenAI: OpenAIApi } = require("openai"); + const basePath = parseDMREndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + "openai" + ); + this.openai = new OpenAIApi({ + baseURL: basePath, + apiKey: null, + }); + + this.model = + modelPreference || process.env.DOCKER_MODEL_RUNNER_MODEL_PREF || null; + if (!this.model) throw new Error("No Docker Model Runner model was set."); + + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + this.log( + `Initialized with model: ${this.model} and base path: ${basePath}` + ); + } + + log(text, ...args) { + console.log(`\x1b[36m[DockerModelRunnerLLM]\x1b[0m ${text}`, ...args); + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + static promptWindowLimit(_modelName) { + const limit = process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Docker Model Runner token context limit was set."); + return Number(limit); + } + + promptWindowLimit() { + const limit = process.env.DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Docker Model Runner token context limit was set."); + return Number(limit); + } + + async isValidChatCompletionModel() { + return true; + } + + /** + * Generates appropriate content array for a message + attachments. + * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} + * @returns {string|object[]} + */ + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) { + return userPrompt; + } + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + detail: "auto", + }, + }); + } + return content.flat(); + } + + /** + * Construct the user prompt for this model. + * @param {{attachments: import("../../helpers").Attachment[]}} param0 + * @returns + */ + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...formatChatHistory(chatHistory, this.#generateContent), + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const result = await LLMPerformanceMonitor.measureAsyncFunction( + this.openai.chat.completions.create({ + model: this.model, + messages, + temperature, + }) + ); + + if ( + !result.output.hasOwnProperty("choices") || + result.output.choices.length === 0 + ) + return null; + + return { + textResponse: result.output.choices[0].message.content, + metrics: { + prompt_tokens: result.output.usage?.prompt_tokens || 0, + completion_tokens: result.output.usage?.completion_tokens || 0, + total_tokens: result.output.usage?.total_tokens || 0, + outputTps: + (result.output.usage?.completion_tokens || 0) / result.duration, + duration: result.duration, + }, + }; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( + this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }), + messages + ); + return measuredStreamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +/** + * Parse the base path of the Docker Model Runner endpoint and return the host and port. + * @param {string} basePath - The base path of the Docker Model Runner endpoint. + * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible) + * @returns {string | null} + */ +function parseDMREndpoint(basePath = null, to = "openai") { + if (!basePath) return null; + try { + const url = new URL(basePath); + if (to === "openai") url.pathname = "engines/v1"; + else if (to === "dmr") url.pathname = ""; + return url.toString(); + } catch (e) { + return basePath; + } +} + +module.exports = { + DockerModelRunnerLLM, + parseDMREndpoint, +}; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 65b5a146dda..73f1b0d4ae3 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -974,6 +974,8 @@ ${this.getHistory({ to: route.to }) return new Providers.CometApiProvider({ model: config.model }); case "foundry": return new Providers.FoundryProvider({ model: config.model }); + case "docker-model-runner": + return new Providers.DockerModelRunnerProvider({ model: config.model }); default: throw new Error( `Unknown provider: ${config.provider}. Please use a valid provider.` diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 507015cb0cb..7a2bc70bdf9 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -270,6 +270,14 @@ class Provider { ...config, }); } + case "docker-model-runner": + return new ChatOpenAI({ + configuration: { + baseURL: process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + }, + apiKey: null, + ...config, + }); default: throw new Error(`Unsupported provider ${provider} for this task.`); diff --git a/server/utils/agents/aibitat/providers/dockerModelRunner.js b/server/utils/agents/aibitat/providers/dockerModelRunner.js new file mode 100644 index 00000000000..7127e38611a --- /dev/null +++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js @@ -0,0 +1,91 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); +const { + parseDMREndpoint, +} = require("../../../../utils/AiProviders/dockerModelRunner"); + +/** + * The agent provider for the Docker Model Runner provider. + */ +class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + constructor(config = {}) { + const { model = process.env.DOCKER_MODEL_RUNNER_MODEL_PREF } = config; + super(); + const client = new OpenAI({ + baseURL: parseDMREndpoint(process.env.DOCKER_MODEL_RUNNER_BASE_PATH), + apiKey: null, + }); + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + get supportsAgentStreaming() { + return true; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("Docker Model Runner chat: No results!"); + if (result.choices.length === 0) + throw new Error("Docker Model Runner chat: No results length!"); + return result.choices[0].message.content; + }) + .catch(() => { + return null; + }); + } + + async #handleFunctionCallStream({ messages = [] }) { + return await this.client.chat.completions.create({ + model: this.model, + stream: true, + messages, + }); + } + + async stream(messages, functions = [], eventHandler = null) { + return await UnTooled.prototype.stream.call( + this, + messages, + functions, + this.#handleFunctionCallStream.bind(this), + eventHandler + ); + } + + async complete(messages, functions = []) { + return await UnTooled.prototype.complete.call( + this, + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + } + + /** + * Get the cost of the completion. + * + * @returns The cost of the completion. + * Stubbed since Docker Model Runner has no cost basis. + */ + getCost(_usage) { + return 0; + } +} + +module.exports = DockerModelRunnerProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index 8cf2e7422b3..1e09fbdf977 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -26,6 +26,7 @@ const DellProAiStudioProvider = require("./dellProAiStudio.js"); const MoonshotAiProvider = require("./moonshotAi.js"); const CometApiProvider = require("./cometapi.js"); const FoundryProvider = require("./foundry.js"); +const DockerModelRunnerProvider = require("./dockerModelRunner.js"); module.exports = { OpenAIProvider, @@ -56,4 +57,5 @@ module.exports = { DellProAiStudioProvider, MoonshotAiProvider, FoundryProvider, + DockerModelRunnerProvider, }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 98d3d774a09..db21cdb0e65 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -213,6 +213,12 @@ class AgentHandler { if (!process.env.FOUNDRY_BASE_PATH) throw new Error("Foundry base path must be provided to use agents."); break; + case "docker-model-runner": + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error( + "Docker Model Runner base path must be provided to use agents." + ); + break; default: throw new Error( @@ -288,6 +294,8 @@ class AgentHandler { return process.env.COMETAPI_LLM_MODEL_PREF ?? "gpt-5-mini"; case "foundry": return process.env.FOUNDRY_MODEL_PREF ?? null; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_MODEL_PREF ?? null; default: return null; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 0a9012c428f..212f22ae363 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -10,6 +10,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio"); const { GeminiLLM } = require("../AiProviders/gemini"); const { fetchCometApiModels } = require("../AiProviders/cometapi"); const { parseFoundryBasePath } = require("../AiProviders/foundry"); +const { parseDMREndpoint } = require("../AiProviders/dockerModelRunner"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -37,6 +38,7 @@ const SUPPORT_CUSTOM_MODELS = [ "dpais", "moonshotai", "foundry", + "docker-model-runner", // Embedding Engines "native-embedder", ]; @@ -96,6 +98,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getMoonshotAiModels(apiKey); case "foundry": return await getFoundryModels(basePath); + case "docker-model-runner": + return await getDockerModelRunnerModels(basePath); case "native-embedder": return await getNativeEmbedderModels(); default: @@ -759,6 +763,49 @@ async function getFoundryModels(basePath = null) { } } +async function getDockerModelRunnerModels(basePath = null) { + try { + const { OpenAI: OpenAIApi } = require("openai"); + const openai = new OpenAIApi({ + baseURL: parseDMREndpoint( + basePath || process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + apiKey: null, + }); + + // eg: ai/llama3.2:latest -> llama3.2 + const parseDMRModelName = (modelId = null) => { + if (!modelId) return modelId; + const match = modelId.match(/^[^/]+\/(.*?):.*$/); + if (!match) return modelId; + return match?.[1]?.trim() || modelId; + }; + + const models = await openai.models + .list() + .then((results) => results.data) + .then((models) => + models.map((model) => ({ + id: model.id, + name: parseDMRModelName(model.id), + organization: model.owned_by, + })) + ) + .catch((e) => { + console.error(`DockerModelRunner:listModels`, e.message); + return []; + }); + + return { models, error: null }; + } catch (e) { + console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message); + return { + models: [], + error: "Could not fetch Docker Model Runner Models", + }; + } +} + module.exports = { getCustomModels, SUPPORT_CUSTOM_MODELS, diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 819a464c6d0..a331cb90f97 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -218,6 +218,11 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "foundry": const { FoundryLLM } = require("../AiProviders/foundry"); return new FoundryLLM(embedder, model); + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return new DockerModelRunnerLLM(embedder, model); default: throw new Error( `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` @@ -374,6 +379,11 @@ function getLLMProviderClass({ provider = null } = {}) { case "foundry": const { FoundryLLM } = require("../AiProviders/foundry"); return FoundryLLM; + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return DockerModelRunnerLLM; default: return null; } @@ -446,6 +456,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) { return process.env.COMETAPI_LLM_MODEL_PREF; case "foundry": return process.env.FOUNDRY_MODEL_PREF; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_MODEL_PREF; default: return null; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index c8109efb193..7759e867050 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -727,6 +727,20 @@ const KEY_MAPPING = { checks: [], }, + // Docker Model Runner Options + DockerModelRunnerBasePath: { + envKey: "DOCKER_MODEL_RUNNER_BASE_PATH", + checks: [isNotEmpty], + }, + DockerModelRunnerModelPref: { + envKey: "DOCKER_MODEL_RUNNER_MODEL_PREF", + checks: [isNotEmpty], + }, + DockerModelRunnerModelTokenLimit: { + envKey: "DOCKER_MODEL_RUNNER_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, + // CometAPI Options CometApiLLMApiKey: { envKey: "COMETAPI_LLM_API_KEY", @@ -851,6 +865,7 @@ function supportedLLM(input = "") { "moonshotai", "cometapi", "foundry", + "docker-model-runner", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; }