Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions mistralrs-core/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,18 @@ pub enum ModelCategory {
Speech,
}

impl std::fmt::Debug for ModelCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ModelCategory::Text => write!(f, "ModelCategory::Text"),
ModelCategory::Vision { .. } => write!(f, "ModelCategory::Vision {{ prefixer: .. }}"),
ModelCategory::Diffusion => write!(f, "ModelCategory::Diffusion"),
ModelCategory::Audio => write!(f, "ModelCategory::Audio"),
ModelCategory::Speech => write!(f, "ModelCategory::Speech"),
}
}
}

impl PartialEq for ModelCategory {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
Expand Down
29 changes: 11 additions & 18 deletions mistralrs-quant/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{
borrow::Cow,
fmt::Debug,
num::NonZeroUsize,
sync::{atomic::AtomicUsize, Arc, Mutex, MutexGuard, OnceLock},
sync::{atomic::AtomicUsize, Arc, Mutex, MutexGuard},
};

use blockwise_fp8::blockwise_fp8_linear_b;
Expand Down Expand Up @@ -63,33 +63,26 @@ pub use utils::{log, BitWiseOp, CumSumOp, LeftshiftOp, NonZeroOp, SortOp, UQFF_Q
use candle_nn::{Linear, Module};
use serde::{Deserialize, Deserializer, Serialize};

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct ImmediateIsqParams {
pub guard: QuantizeOntoGuard,
pub ty: Option<IsqType>,
pub predicates: Vec<Regex>,
}

static IMMEDIATE_ISQ: OnceLock<Mutex<ImmediateIsqParams>> = OnceLock::new();
static IMMEDIATE_ISQ: Mutex<Option<ImmediateIsqParams>> = Mutex::new(None);

pub fn set_immediate_isq(isq: Option<IsqType>, predicates: Vec<Regex>) {
IMMEDIATE_ISQ
.get_or_init(|| {
Mutex::new(ImmediateIsqParams {
guard: QuantizeOntoGuard::new(),
ty: None,
predicates,
})
})
.lock()
.unwrap()
.ty = isq;
let mut guard = IMMEDIATE_ISQ.lock().expect("IMMEDIATE_ISQ mutex poisoned");
*guard = Some(ImmediateIsqParams {
guard: QuantizeOntoGuard::new(),
ty: isq,
predicates,
});
}

pub fn get_immediate_isq() -> Option<ImmediateIsqParams> {
IMMEDIATE_ISQ
.get()
.map(|guard| guard.lock().unwrap().clone())
IMMEDIATE_ISQ.lock().ok().and_then(|guard| guard.clone())
}

pub fn should_apply_immediate_isq(vb: &ShardedVarBuilder) -> bool {
Expand Down Expand Up @@ -545,7 +538,7 @@ pub trait QuantizedSerde {
}

/// Used to gate access to quantizing onto the host device
#[derive(Clone)]
#[derive(Clone, Debug)]
#[allow(unused)]
pub struct QuantizeOntoGuard(Arc<Mutex<()>>);

Expand Down
20 changes: 14 additions & 6 deletions mistralrs-web-chat/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<h2>Control panel</h2>
<select id="modelSelect"></select>
<button id="newChatBtn">➕ New chat</button>
<button id="clearBtn">🗑️ Clear chat</button>
<button id="clearBtn">🧹 Clear chat</button>
<button id="renameBtn">✏️ Rename chat</button>
<button id="deleteBtn">🗑️ Delete chat</button>
<ul id="chatList"></ul>
Expand All @@ -22,11 +22,19 @@ <h1 style="margin-top:0;">Mistral.rs Chat</h1>

<div id="log"></div>

<form id="form" autocomplete="off">
<textarea id="input" placeholder="Type your message… (Press Ctrl+Enter to send)" rows="1"></textarea>
<label id="imageLabel" for="imageInput">📎 Image</label>
<input id="imageInput" type="file" accept="image/*" />
<button type="submit">Send</button>
<form id="form">
<textarea
id="input"
placeholder="Type your message… (Press Ctrl+Enter to send)"
></textarea>

<!-- hidden file input -->
<input type="file" id="imageInput" accept="image/*" hidden />

<!-- image label and send button share the same class -->
<label for="imageInput" class="btn" id="imageLabel">📎 Image</label>
<button type="submit" class="btn">Send</button>
<div id="spinner" class="spinner hidden" aria-hidden="true"></div>
</form>

<div id="image-container"></div>
Expand Down
9 changes: 9 additions & 0 deletions mistralrs-web-chat/static/js/websocket.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ function handleWebSocketMessage(ev) {
}

if (!assistantDiv) {
// remove inline spinner when first assistant data arrives
const spinner = document.getElementById('spinner');
if (spinner) spinner.remove();
assistantDiv = append('', 'assistant');
}

Expand All @@ -58,6 +61,12 @@ function sendMessage() {
assistantBuf = '';
assistantDiv = null;
ws.send(msg);
// dynamically add spinner in log area
const log = document.getElementById('log');
const spinnerEl = document.createElement('div');
spinnerEl.classList.add('spinner');
spinnerEl.id = 'spinner';
log.appendChild(spinnerEl);
input.value = '';

// Trigger textarea resize
Expand Down
43 changes: 29 additions & 14 deletions mistralrs-web-chat/static/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,23 @@ body {
font-family: inherit;
}

#form button[type="submit"] {
.btn {
display: inline-flex;
align-items: center;
justify-content: center;
background: var(--primary);
color: #fff;
border: none;
padding: 0.6rem 1.2rem;
border-radius: var(--radius);
cursor: pointer;
transition: background 0.2s;
font-family: inherit;
font-size: 1rem;
}

.btn:hover {
background: var(--primary-hov);
}

#form button[type="submit"]:hover {
Expand All @@ -209,19 +219,6 @@ body {
display:none;
}

#imageLabel {
padding:0.6rem 1.2rem;
border-radius:var(--radius);
background:var(--accent);
color:#fff;
cursor:pointer;
transition:background 0.2s;
}

#imageLabel:hover {
background:var(--accent-hov);
}

img.chat-preview {
max-width: 150px;
max-height: 150px;
Expand Down Expand Up @@ -305,3 +302,21 @@ pre {
pre:hover .copy-btn {
opacity: 1;
}

/* Spinner */
.spinner {
width: 1.5rem;
height: 1.5rem;
border: 3px solid var(--border-color);
border-top: 3px solid var(--primary);
border-radius: 50%;
animation: spin 1s linear infinite;
}

.spinner.hidden {
display: none;
}

@keyframes spin {
to { transform: rotate(360deg); }
}
11 changes: 11 additions & 0 deletions mistralrs/src/messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use serde_json::{json, Value};
/// A type which can be used as a chat request.
pub trait RequestLike {
fn messages_ref(&self) -> &[IndexMap<String, MessageContent>];
fn images_ref(&self) -> &[DynamicImage];
fn take_messages(&mut self) -> RequestMessage;
fn take_logits_processors(&mut self) -> Option<Vec<Arc<dyn CustomLogitsProcessor>>>;
fn take_adapters(&mut self) -> Option<Vec<String>>;
Expand Down Expand Up @@ -85,6 +86,9 @@ impl RequestLike for TextMessages {
fn messages_ref(&self) -> &[IndexMap<String, MessageContent>] {
&self.0
}
fn images_ref(&self) -> &[DynamicImage] {
&[]
}
fn take_messages(&mut self) -> RequestMessage {
let mut other = Vec::new();
std::mem::swap(&mut other, &mut self.0);
Expand Down Expand Up @@ -205,6 +209,9 @@ impl RequestLike for VisionMessages {
fn messages_ref(&self) -> &[IndexMap<String, MessageContent>] {
&self.messages
}
fn images_ref(&self) -> &[DynamicImage] {
&self.images
}
fn take_messages(&mut self) -> RequestMessage {
let mut other_messages = Vec::new();
std::mem::swap(&mut other_messages, &mut self.messages);
Expand Down Expand Up @@ -523,6 +530,10 @@ impl RequestLike for RequestBuilder {
&self.messages
}

fn images_ref(&self) -> &[DynamicImage] {
&self.images
}

fn take_messages(&mut self) -> RequestMessage {
if self.images.is_empty() {
let mut other = Vec::new();
Expand Down
Loading