From b90397696007ef7c0ab16a5327cb9e890c5c1d74 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sat, 20 Apr 2024 14:44:15 -0500 Subject: [PATCH 1/3] Added hugging face inference endpoints support --- src/components/chat-header.vue | 4 + src/components/settings-dialog.vue | 28 ++++- src/libs/hugging-face-api-access.js | 153 ++++++++++++++++++++++++++++ src/libs/utils.js | 2 +- src/views/ChatLayout.vue | 82 ++++++++++++++- 5 files changed, 263 insertions(+), 6 deletions(-) create mode 100644 src/libs/hugging-face-api-access.js diff --git a/src/components/chat-header.vue b/src/components/chat-header.vue index 705cdcaf..006417cb 100644 --- a/src/components/chat-header.vue +++ b/src/components/chat-header.vue @@ -55,6 +55,10 @@ function onShowConversationsClick() { href="https://github.com/fingerthief/minimal-gpt#try-minimalgpt" target="_blank" class="no-style-link"> MinimalLocal + + MinimalHugging + diff --git a/src/components/settings-dialog.vue b/src/components/settings-dialog.vue index f4c0d2dd..8bc9b4a0 100644 --- a/src/components/settings-dialog.vue +++ b/src/components/settings-dialog.vue @@ -7,11 +7,14 @@ const props = defineProps({ selectedModel: String, localModelName: String, localModelEndpoint: String, + huggingFaceEndpoint: String, localSliderValue: Number, gptKey: String, + hfKey: String, sliderValue: Number, claudeKey: String, claudeSliderValue: Number, + hfSliderValue: Number, selectedDallEImageCount: Number, selectedDallEImageResolution: String, selectedAutoSaveOption: String @@ -22,10 +25,13 @@ const emit = defineEmits([ 'update:localModelName', 'update:localModelEndpoint', 'update:localSliderValue', + 'update:huggingFaceEndpoint', 'update:gptKey', + 'update:hfKey', 'update:sliderValue', 'update:claudeKey', 'update:claudeSliderValue', + 'update:hfSliderValue', 'update:selectedDallEImageCount', 'update:selectedDallEImageResolution', 'update:selectedAutoSaveOption', @@ -52,7 +58,7 @@ function toggleSidebar() { - Settings | V5.0.2 + Settings | V5.0.3 @@ -111,6 +118,25 @@ function toggleSidebar() { @blur="update('claudeSliderValue', $event.target.value)"> Creative + +
+ + +
+ +
+ + +
+ +
+ Serious + + Creative +
+
DALL-E Image Count: diff --git a/src/libs/hugging-face-api-access.js b/src/libs/hugging-face-api-access.js new file mode 100644 index 00000000..489450bd --- /dev/null +++ b/src/libs/hugging-face-api-access.js @@ -0,0 +1,153 @@ +/* eslint-disable no-unused-vars */ +import { showToast, sleep } from "./utils"; + +let hfStreamRetryCount = 0; +export async function fetchHuggingFaceModelResponseStream(conversation, attitude, model, huggingFaceEndpoint, updateUiFunction, apiKey) { + const gptMessagesOnly = filterMessages(conversation); + + const requestOptions = { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model: model, + stream: true, + messages: gptMessagesOnly, + temperature: attitude * 0.01 + }), + }; + + try { + const response = await fetch(`https://corsproxy.io/?${huggingFaceEndpoint + `/v1/chat/completions`}`, requestOptions); + + const result = await readResponseStream(response, updateUiFunction); + + hfStreamRetryCount = 0; + return result; + } catch (error) { + console.error("Error fetching Hugging Face Model response:", error); + hfStreamRetryCount++ + + if (hfStreamRetryCount < 3) { + await sleep(1500); + return fetchHuggingFaceModelResponseStream(conversation, attitude, model, huggingFaceEndpoint, updateUiFunction); + } + + return "Error fetching response from Hugging Face Model"; + + } +} + + +let retryCount = 0; +export async function getConversationTitleFromHuggingFaceModel(messages, model, sliderValue, HuggingFaceModelEndpoint) { + try { + const apiKey = document.getElementById('api-key'); + apiKey.value = localStorage.getItem("hfKey"); + + let tempMessages = messages.slice(0); + tempMessages.push({ role: 'user', content: "Summarize my inital request or greeting in 5 words or less." }); + + const requestOptions = { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model: model, + stream: true, + messages: tempMessages, + temperature: sliderValue * 0.01 + }), + }; + + const response = await fetch(`https://corsproxy.io/?${HuggingFaceModelEndpoint + `/v1/chat/completions`}`, requestOptions); + + const result = await readResponseStream(response); + + hfStreamRetryCount = 0; + return result; + } catch (error) { + + if (retryCount < 5) { + retryCount++; + getConversationTitleFromHuggingFaceModel(messages, model, sliderValue); + } + + console.error("Error fetching Hugging Face Model response:", error); + return "An error occurred while generating conversaton title."; + } +} + +async function readResponseStream(response, updateUiFunction) { + let decodedResult = ""; + + const reader = await response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + while (true) { + const { done, value } = await reader.read(); + if (done) { + return decodedResult + }; + const chunk = decoder.decode(value); + const parsedLines = parseHuggingFaceResponseChunk(chunk); + for (const parsedLine of parsedLines) { + const { choices } = parsedLine; + const { delta } = choices[0]; + const { content } = delta; + if (content) { + decodedResult += content; + + if (updateUiFunction) { + updateUiFunction(content); + } + } + } + } +} + +let buffer = ""; // Buffer to hold incomplete JSON data across chunks +function parseHuggingFaceResponseChunk(chunk) { + buffer += chunk; // Append new chunk to buffer + const lines = buffer.split("\n"); + + const completeLines = lines.slice(0, -1); // All lines except the last one + buffer = lines[lines.length - 1]; // Last line might be incomplete, keep it in buffer + + const results = []; + for (const line of completeLines) { + let cleanedLine = line.trim(); + + // Check if the line contains the control message [DONE] and remove it + if (cleanedLine.includes("[DONE]")) { + cleanedLine = cleanedLine.replace("[DONE]", "").trim(); + } + + // Remove any "data: " prefix that might be present after cleaning + // Using regex to handle any case variations and extra spaces + cleanedLine = cleanedLine.replace(/^data:\s*/i, "").trim(); + + if (cleanedLine !== "") { + try { + const parsed = JSON.parse(cleanedLine); + results.push(parsed); + } catch (error) { + console.error("Failed to parse JSON:", cleanedLine, error); + } + } + } + return results; +} + +function filterMessages(conversation) { + let lastMessageContent = ""; + return conversation.filter(message => { + const isGPT = !message.content.trim().toLowerCase().startsWith("image::") && + !lastMessageContent.startsWith("image::"); + lastMessageContent = message.content.trim().toLowerCase(); + return isGPT; + }); +} \ No newline at end of file diff --git a/src/libs/utils.js b/src/libs/utils.js index 930e9239..aa956321 100644 --- a/src/libs/utils.js +++ b/src/libs/utils.js @@ -52,7 +52,7 @@ export async function getConversationTitleFromGPT(messages, model, sliderValue) if (retryCount < 5) { retryCount++; - self.getConversationTitleFromGPT(messages, model, sliderValue); + getConversationTitleFromGPT(messages, model, sliderValue); } console.error("Error fetching GPT response:", error); diff --git a/src/views/ChatLayout.vue b/src/views/ChatLayout.vue index cf8c3662..bf876684 100644 --- a/src/views/ChatLayout.vue +++ b/src/views/ChatLayout.vue @@ -8,6 +8,7 @@ import { fetchClaudeConversationTitle, streamClaudeResponse } from '@/libs/claud import { getConversationTitleFromGPT, showToast } from '@/libs/utils'; import { analyzeImage } from '@/libs/image-analysis'; import { fetchLocalModelResponseStream } from '@/libs/local-model-access'; +import { fetchHuggingFaceModelResponseStream, getConversationTitleFromHuggingFaceModel } from '@/libs/hugging-face-api-access'; import messageItem from '@/components/message-item.vue'; import chatInput from '@/components/chat-input.vue'; @@ -42,6 +43,11 @@ const selectedDallEImageCount = ref(parseInt(localStorage.getItem("selectedDallE const selectedDallEImageResolution = ref(localStorage.getItem("selectedDallEImageResolution") || '256x256'); const selectedAutoSaveOption = ref(localStorage.getItem("selectedAutoSaveOption") || true); +const hfKey = ref(localStorage.getItem("hfKey") || ''); +const hfSliderValue = ref(parseInt(localStorage.getItem("hf-attitude")) || 50); +const huggingFaceEndpoint = ref(localStorage.getItem("huggingFaceEndpoint") || ''); +const isUsingHuggingFaceModel = ref(false); + const conversations = ref(loadConversationTitles()); const conversationTitles = ref(loadConversationTitles()); const storedConversations = ref(loadStoredConversations()); @@ -58,14 +64,16 @@ watch(selectedModel, (newValue) => { const MODEL_TYPES = { LMSTUDIO: 'lmstudio', CLAUDE: 'claude', - BISON: 'bison' + HUGGING_FACE: 'tgi' }; // Default settings let useLocalModel = false; + const flags = { isUsingLocalModel: false, - isClaudeEnabled: false + isClaudeEnabled: false, + isUsingHuggingFaceModel: false }; // Determine settings based on model type @@ -73,8 +81,12 @@ watch(selectedModel, (newValue) => { useLocalModel = true; flags.isUsingLocalModel = true; } + else if (newValue.includes(MODEL_TYPES.HUGGING_FACE)) { + useLocalModel = false; + flags.isUsingHuggingFaceModel = true; + } else if (newValue.includes(MODEL_TYPES.CLAUDE)) { - useLocalModel = true; + useLocalModel = false; flags.isClaudeEnabled = true; } @@ -85,12 +97,25 @@ watch(selectedModel, (newValue) => { localStorage.setItem('selectedModel', newValue); isUsingLocalModel.value = flags.isUsingLocalModel; isClaudeEnabled.value = flags.isClaudeEnabled; + isUsingHuggingFaceModel.value = flags.isUsingHuggingFaceModel; } catch (error) { console.error('Error updating settings:', error); } }); +watch(huggingFaceEndpoint, (newValue) => { + localStorage.setItem('huggingFaceEndpoint', newValue); +}); + +watch(hfSliderValue, (newValue) => { + localStorage.setItem('hf-attitude', newValue); +}); + +watch(hfKey, (newValue) => { + localStorage.setItem('hfKey', newValue); +}); + watch(localModelName, (newValue) => { localStorage.setItem('localModelName', newValue); }); @@ -347,6 +372,7 @@ async function createNewConversationWithTitle() { if (isClaudeEnabled.value) { newConversationWithTitle.title = await fetchClaudeConversationTitle(messages.value.slice(0)); } + if (isUsingLocalModel.value) { //Local Models are weird with trying to title conversations... @@ -355,6 +381,16 @@ async function createNewConversationWithTitle() { newConversationWithTitle.title = firstMessage.substring(0, Math.min(firstMessage.length, titleLength)); } + + if (isUsingHuggingFaceModel.value) { + //newConversationWithTitle.title = await getConversationTitleFromHuggingFaceModel(messages.value.slice(0), selectedModel.value, hfSliderValue.value, huggingFaceEndpoint.value); + + //HF Models are weird with trying to title conversations... + const firstMessage = messages.value[0].content; + const titleLength = 100; + + newConversationWithTitle.title = firstMessage.substring(0, Math.min(firstMessage.length, titleLength)); + } else { newConversationWithTitle.title = await getConversationTitleFromGPT(messages.value.slice(0), selectedModel.value, sliderValue.value); } @@ -461,6 +497,11 @@ async function sendMessage(event) { return; } + if (selectedModel.value.indexOf("tgi") !== -1) { + await sendHuggingFaceMessage(messageText); + return; + } + isClaudeEnabled.value = false; addMessage("user", messageText); @@ -525,6 +566,32 @@ async function sendGPTMessage(message) { } } +async function sendHuggingFaceMessage(message) { + addMessage("user", message); + + scrollToBottom(); + + userText.value = ""; + + streamedMessageText.value = ""; + isLoading.value = true; + + try { + let response = await fetchHuggingFaceModelResponseStream(messages.value, hfSliderValue.value, selectedModel.value, huggingFaceEndpoint.value, updateUI, hfKey.value); + + isLoading.value = false; + + addMessage('assistant', response); + + await saveMessages(); + + scrollToBottom(); + } + catch (error) { + console.error("Error sending message:", error); + } +} + async function sendClaudeMessage(messageText) { if (messageText.toLowerCase().startsWith("vision::")) { addMessage("user", messageText); @@ -712,7 +779,10 @@ const refs = { claudeSliderValue, selectedDallEImageCount, selectedDallEImageResolution, - selectedAutoSaveOption + selectedAutoSaveOption, + hfKey, + hfSliderValue, + huggingFaceEndpoint }; // Event handlers for updating the parent's state when the child emits an update const updateSetting = (field, value) => { @@ -766,10 +836,14 @@ onMounted(() => { Date: Sat, 20 Apr 2024 15:25:05 -0500 Subject: [PATCH 2/3] Added new max_tokens settings for Hugging Face models as context windows vary greatly between models and for some reason hugging face defaults to a 500 token window for responses... Added an overlay that dims the screen when the settings or conversatioons panels are open. Other finishing touches for hugging face support. --- README.md | 4 +-- src/components/settings-dialog.vue | 11 ++++++-- src/libs/hugging-face-api-access.js | 14 +++++---- src/views/ChatLayout.vue | 44 +++++++++++++++++++++-------- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 89dfc483..00c8f15a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## [Try MinimalGPT/MinimalClaude/MinimalLocal (Public Site)](https://minimalgpt.app/) ![Build Status](https://img.shields.io/badge/build-passing-brightgreen) -![Version](https://img.shields.io/badge/version-5.0.2-blue) +![Version](https://img.shields.io/badge/version-5.0.3-blue) ![License](https://img.shields.io/badge/license-MIT-green) **MinimalChat** is an open-source LLM chat web app designed to be as self-contained as possible. All conversations are stored locally on the client's device, with the only information being sent to the server being API calls to GPT or Claude (uses a CORS proxy) chat when the user sends a message and when a user saves a conversation to generate a conversation title. @@ -165,5 +165,3 @@ Also `npm run build` will output a dist folder with minified files etc...`npm ru ### Building/Bundling (WIP) - Running `npm run build` will perform a dist build process that incldues minification and cache busting (sort of) and output to the `dist` folder. - - diff --git a/src/components/settings-dialog.vue b/src/components/settings-dialog.vue index 8bc9b4a0..8b4fa47b 100644 --- a/src/components/settings-dialog.vue +++ b/src/components/settings-dialog.vue @@ -17,10 +17,12 @@ const props = defineProps({ hfSliderValue: Number, selectedDallEImageCount: Number, selectedDallEImageResolution: String, - selectedAutoSaveOption: String + selectedAutoSaveOption: String, + maxTokens: Number }); const emit = defineEmits([ + 'update:maxTokens', 'update:model', 'update:localModelName', 'update:localModelEndpoint', @@ -72,8 +74,8 @@ function toggleSidebar() { - +
@@ -129,6 +131,11 @@ function toggleSidebar() { + +
+ + +
Serious diff --git a/src/libs/hugging-face-api-access.js b/src/libs/hugging-face-api-access.js index 489450bd..2b66d1ab 100644 --- a/src/libs/hugging-face-api-access.js +++ b/src/libs/hugging-face-api-access.js @@ -2,7 +2,7 @@ import { showToast, sleep } from "./utils"; let hfStreamRetryCount = 0; -export async function fetchHuggingFaceModelResponseStream(conversation, attitude, model, huggingFaceEndpoint, updateUiFunction, apiKey) { +export async function fetchHuggingFaceModelResponseStream(conversation, attitude, model, huggingFaceEndpoint, updateUiFunction, apiKey, maxTokens) { const gptMessagesOnly = filterMessages(conversation); const requestOptions = { @@ -15,12 +15,13 @@ export async function fetchHuggingFaceModelResponseStream(conversation, attitude model: model, stream: true, messages: gptMessagesOnly, - temperature: attitude * 0.01 + temperature: attitude * 0.01, + max_tokens: parseInt(maxTokens) }), }; try { - const response = await fetch(`https://corsproxy.io/?${huggingFaceEndpoint + `/v1/chat/completions`}`, requestOptions); + const response = await fetch(`${huggingFaceEndpoint + `/v1/chat/completions`}`, requestOptions); const result = await readResponseStream(response, updateUiFunction); @@ -54,17 +55,18 @@ export async function getConversationTitleFromHuggingFaceModel(messages, model, method: "POST", headers: { "Content-Type": "application/json", - "Authorization": `Bearer ${apiKey}`, + "Authorization": `Bearer ${apiKey.value}`, }, body: JSON.stringify({ model: model, stream: true, messages: tempMessages, - temperature: sliderValue * 0.01 + temperature: sliderValue * 0.01, + max_tokens: 500 }), }; - const response = await fetch(`https://corsproxy.io/?${HuggingFaceModelEndpoint + `/v1/chat/completions`}`, requestOptions); + const response = await fetch(`${HuggingFaceModelEndpoint + `/v1/chat/completions`}`, requestOptions); const result = await readResponseStream(response); diff --git a/src/views/ChatLayout.vue b/src/views/ChatLayout.vue index bf876684..f6555394 100644 --- a/src/views/ChatLayout.vue +++ b/src/views/ChatLayout.vue @@ -47,6 +47,7 @@ const hfKey = ref(localStorage.getItem("hfKey") || ''); const hfSliderValue = ref(parseInt(localStorage.getItem("hf-attitude")) || 50); const huggingFaceEndpoint = ref(localStorage.getItem("huggingFaceEndpoint") || ''); const isUsingHuggingFaceModel = ref(false); +const maxTokens = ref(parseInt(localStorage.getItem("hf-max-tokens")) || 3000); const conversations = ref(loadConversationTitles()); const conversationTitles = ref(loadConversationTitles()); @@ -104,6 +105,10 @@ watch(selectedModel, (newValue) => { } }); +watch(maxTokens, (newValue) => { + localStorage.setItem('maxTokens', newValue); +}); + watch(huggingFaceEndpoint, (newValue) => { localStorage.setItem('huggingFaceEndpoint', newValue); }); @@ -383,13 +388,7 @@ async function createNewConversationWithTitle() { } if (isUsingHuggingFaceModel.value) { - //newConversationWithTitle.title = await getConversationTitleFromHuggingFaceModel(messages.value.slice(0), selectedModel.value, hfSliderValue.value, huggingFaceEndpoint.value); - - //HF Models are weird with trying to title conversations... - const firstMessage = messages.value[0].content; - const titleLength = 100; - - newConversationWithTitle.title = firstMessage.substring(0, Math.min(firstMessage.length, titleLength)); + newConversationWithTitle.title = await getConversationTitleFromHuggingFaceModel(messages.value.slice(0), selectedModel.value, hfSliderValue.value, huggingFaceEndpoint.value); } else { newConversationWithTitle.title = await getConversationTitleFromGPT(messages.value.slice(0), selectedModel.value, sliderValue.value); @@ -577,7 +576,7 @@ async function sendHuggingFaceMessage(message) { isLoading.value = true; try { - let response = await fetchHuggingFaceModelResponseStream(messages.value, hfSliderValue.value, selectedModel.value, huggingFaceEndpoint.value, updateUI, hfKey.value); + let response = await fetchHuggingFaceModelResponseStream(messages.value, hfSliderValue.value, selectedModel.value, huggingFaceEndpoint.value, updateUI, hfKey.value, maxTokens.value); isLoading.value = false; @@ -782,7 +781,8 @@ const refs = { selectedAutoSaveOption, hfKey, hfSliderValue, - huggingFaceEndpoint + huggingFaceEndpoint, + maxTokens }; // Event handlers for updating the parent's state when the child emits an update const updateSetting = (field, value) => { @@ -831,6 +831,9 @@ onMounted(() => {
+ +
+