Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ List of command-line flags
| `--no_use_cuda_fp16` | This can make models faster on some systems. |
| `--desc_act` | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
| `--disable_exllama` | Disable ExLlama kernel, which can improve inference speed on some systems. |
| `--disable_exllamav2` | Disable ExLlamav2 kernel. |

#### GPTQ-for-LLaMa

Expand Down
1 change: 0 additions & 1 deletion docs/07 - Extensions.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an
|[multimodal](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal) | Adds multimodality support (text+images). For a detailed description see [README.md](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal/README.md) in the extension directory. |
|[google_translate](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/google_translate)| Automatically translates inputs and outputs using Google Translate.|
|[silero_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/silero_tts)| Text-to-speech extension using [Silero](https://github.com/snakers4/silero-models). When used in chat mode, responses are replaced with an audio widget. |
|[elevenlabs_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/elevenlabs_tts)| Text-to-speech extension using the [ElevenLabs](https://beta.elevenlabs.io/) API. You need an API key to use it. |
|[whisper_stt](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/whisper_stt)| Allows you to enter your inputs in chat mode using your microphone. |
|[sd_api_pictures](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures)| Allows you to request pictures from the bot in chat mode, which will be generated using the AUTOMATIC1111 Stable Diffusion API. See examples [here](https://github.com/oobabooga/text-generation-webui/pull/309). |
|[character_bias](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/character_bias)| Just a very simple example that adds a hidden string at the beginning of the bot's reply in chat mode. |
Expand Down
Empty file.
1 change: 0 additions & 1 deletion extensions/elevenlabs_tts/requirements.txt

This file was deleted.

197 changes: 0 additions & 197 deletions extensions/elevenlabs_tts/script.py

This file was deleted.

2 changes: 1 addition & 1 deletion extensions/openai/logits.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ def _get_next_logits(body):
state = process_parameters(body) if use_samplers else {}
state['stream'] = True

return get_next_logits(body['prompt'], state, use_samplers, "", return_dict=True)
return get_next_logits(body['prompt'], state, use_samplers, "", top_logits=body['top_logits'], return_dict=True)
7 changes: 4 additions & 3 deletions extensions/openai/typing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import time
from typing import List
from typing import Dict, List

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -120,7 +120,7 @@ class ChatCompletionResponse(BaseModel):


class EmbeddingsRequest(BaseModel):
input: str | List[str]
input: str | List[str] | List[int] | List[List[int]]
model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.")
encoding_format: str = Field(default="float", description="Can be float or base64.")
user: str | None = Field(default=None, description="Unused parameter.")
Expand Down Expand Up @@ -156,6 +156,7 @@ class TokenCountResponse(BaseModel):
class LogitsRequestParams(BaseModel):
prompt: str
use_samplers: bool = False
top_logits: int | None = 50
frequency_penalty: float | None = 0
max_tokens: int | None = 16
presence_penalty: float | None = 0
Expand All @@ -168,7 +169,7 @@ class LogitsRequest(GenerationOptions, LogitsRequestParams):


class LogitsResponse(BaseModel):
logits: dict
logits: Dict[str, float]


class ModelInfoResponse(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion extensions/whisper_stt/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SpeechRecognition==3.10.0
git+https://github.com/oobabooga/whisper.git
openai-whisper
soundfile
ffmpeg
11 changes: 1 addition & 10 deletions instruction-templates/Mistral.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
instruction_template: |-
{%- set found_item = false -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set found_item = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not found_item -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{{- message['content'] -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'[INST] ' + message['content'] + ' [/INST]'-}}
Expand Down
2 changes: 1 addition & 1 deletion models/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@
instruction_template: 'OpenChat'
.*codellama.*instruct:
instruction_template: 'Llama-v2'
.*mistral.*instruct:
.*(mistral|mixtral).*instruct:
instruction_template: 'Mistral'
.*mistral.*openorca:
instruction_template: 'ChatML'
Expand Down
1 change: 1 addition & 0 deletions modules/AutoGPTQ_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def load_quantized(model_name):
'quantize_config': quantize_config,
'use_cuda_fp16': not shared.args.no_use_cuda_fp16,
'disable_exllama': shared.args.disable_exllama,
'disable_exllamav2': shared.args.disable_exllamav2,
}

logger.info(f"The AutoGPTQ params are: {params}")
Expand Down
33 changes: 17 additions & 16 deletions modules/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,40 +215,47 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
yield output
return

just_started = True
visible_text = None
stopping_strings = get_stopping_strings(state)
is_stream = state['stream']

# Prepare the input
if not any((regenerate, _continue)):
if not (regenerate or _continue):
visible_text = html.escape(text)

# Apply extensions
text, visible_text = apply_extensions('chat_input', text, visible_text, state)
text = apply_extensions('input', text, state, is_chat=True)

output['internal'].append([text, ''])
output['visible'].append([visible_text, ''])

# *Is typing...*
if loading_message:
yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
yield {
'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]],
'internal': output['internal']
}
else:
text, visible_text = output['internal'][-1][0], output['visible'][-1][0]
if regenerate:
output['visible'].pop()
output['internal'].pop()

# *Is typing...*
if loading_message:
yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
yield {
'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]],
'internal': output['internal'][:-1] + [[text, '']]
}
elif _continue:
last_reply = [output['internal'][-1][1], output['visible'][-1][1]]
if loading_message:
yield {'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']], 'internal': output['internal']}
yield {
'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']],
'internal': output['internal']
}

# Generate the prompt
kwargs = {
'_continue': _continue,
'history': output,
'history': output if _continue else {k: v[:-1] for k, v in output.items()}
}
prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs)
if prompt is None:
Expand All @@ -270,12 +277,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
yield output
return

if just_started:
just_started = False
if not _continue:
output['internal'].append(['', ''])
output['visible'].append(['', ''])

if _continue:
output['internal'][-1] = [text, last_reply[0] + reply]
output['visible'][-1] = [visible_text, last_reply[1] + visible_reply]
Expand Down
2 changes: 2 additions & 0 deletions modules/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
'rope_freq_base',
'compress_pos_emb',
'disable_exllama',
'disable_exllamav2',
'transformers_info'
],
'llama.cpp': [
Expand Down Expand Up @@ -94,6 +95,7 @@
'groupsize',
'desc_act',
'disable_exllama',
'disable_exllamav2',
'gpu_memory',
'cpu_memory',
'cpu',
Expand Down
Loading