Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions examples/main/src/components/GuideScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ export default function GuideScreen() {
llama.cpp
</a>
. It enables running LLM inference directly on browser by leveraging
the power of <b>WebAssembly</b>. It accepts GGUF as model format.
the power of <b>WebAssembly</b> and <b>WebGPU</b>. It accepts GGUF as model format.
</div>

<div className="mb-3">
Please note that:
<ul>
<li>
Due to WebAssembly overhead, performance will not be as good as
Due to WebAssembly and WebGPU overhead, performance will not be as good as
running llama.cpp in native. Performance degradation can range
from 25% to 50%.
</li>
Expand All @@ -48,8 +48,7 @@ export default function GuideScreen() {
</li>
<li>Running on smartphone maybe buggy.</li>
<li>
<b>WebGPU is not supported</b>. We're still working hard to add
support for WebGPU.
<b>WebGPU support is experimental</b>. We're still working hard to stabilize and optimize the WebGPU backend, and it may not work well on some devices.
</li>
</ul>
</div>
Expand Down
100 changes: 93 additions & 7 deletions examples/main/src/components/ModelScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ import {
faCheck,
} from '@fortawesome/free-solid-svg-icons';
import { DEFAULT_INFERENCE_PARAMS, MAX_GGUF_SIZE } from '../config';
import { toHumanReadableSize, useDebounce } from '../utils/utils';
import {
getWebGPUMemoryBudget,
toHumanReadableSize,
useDebounce,
} from '../utils/utils';
import { useEffect, useState } from 'react';
import ScreenWrapper from './ScreenWrapper';
import { DisplayedModel } from '../utils/displayed-model';
import { isValidGgufFile } from '@wllama/wllama';

export default function ModelScreen() {
const [showAddCustom, setShowAddCustom] = useState(false);
const [webgpuMemoryBudget, setWebgpuMemoryBudget] = useState<
number | undefined
>();
const {
models,
removeCachedModel,
Expand All @@ -27,8 +34,33 @@ export default function ModelScreen() {
} = useWllama();

const blockModelBtn = !!(loadedModel || isDownloading || isLoadingModel);
const effectiveWebGPUMemoryBudget = webgpuMemoryBudget
? Math.floor(webgpuMemoryBudget * 0.8)
: undefined;

useEffect(() => {
let cancelled = false;

getWebGPUMemoryBudget()
.then((budget) => {
if (!cancelled) {
setWebgpuMemoryBudget(budget);
}
})
.catch(() => {
if (!cancelled) {
setWebgpuMemoryBudget(undefined);
}
});

const onChange = (key: keyof typeof currParams) => (e: any) => {
return () => {
cancelled = true;
};
}, []);

const onChange = (
key: 'nThreads' | 'nContext' | 'nPredict' | 'temperature'
) => (e: any) => {
setParams({ ...currParams, [key]: parseFloat(e.target.value || -1) });
};

Expand Down Expand Up @@ -88,6 +120,25 @@ export default function ModelScreen() {
/>
</label>

<label className="label cursor-pointer justify-start gap-3 mb-2">
<input
type="checkbox"
className="toggle toggle-primary"
checked={currParams.preferWebGPU}
onChange={(e) =>
setParams({ ...currParams, preferWebGPU: e.target.checked })
}
disabled={blockModelBtn}
/>
<span className="label-text">Prefer WebGPU</span>
</label>

{currParams.preferWebGPU && effectiveWebGPUMemoryBudget && (
<div className="text-sm opacity-80 mb-2">
Usable WebGPU Budget: {toHumanReadableSize(effectiveWebGPUMemoryBudget)}
</div>
)}

<button
className="btn btn-sm mr-2"
onClick={() => setParams(DEFAULT_INFERENCE_PARAMS)}
Expand Down Expand Up @@ -127,7 +178,13 @@ export default function ModelScreen() {
{models
.filter((m) => m.isUserAdded)
.map((m) => (
<ModelCard key={m.url} model={m} blockModelBtn={blockModelBtn} />
<ModelCard
key={m.url}
model={m}
blockModelBtn={blockModelBtn}
preferWebGPU={currParams.preferWebGPU}
webgpuMemoryBudget={effectiveWebGPUMemoryBudget}
/>
))}
</div>

Expand All @@ -137,7 +194,13 @@ export default function ModelScreen() {
{models
.filter((m) => !m.isUserAdded)
.map((m) => (
<ModelCard key={m.url} model={m} blockModelBtn={blockModelBtn} />
<ModelCard
key={m.url}
model={m}
blockModelBtn={blockModelBtn}
preferWebGPU={currParams.preferWebGPU}
webgpuMemoryBudget={effectiveWebGPUMemoryBudget}
/>
))}
</div>

Expand Down Expand Up @@ -277,9 +340,13 @@ function AddCustomModelDialog({ onClose }: { onClose(): void }) {
function ModelCard({
model,
blockModelBtn,
preferWebGPU,
webgpuMemoryBudget,
}: {
model: DisplayedModel;
blockModelBtn: boolean;
preferWebGPU: boolean;
webgpuMemoryBudget?: number;
}) {
const {
downloadModel,
Expand All @@ -293,9 +360,19 @@ function ModelCard({

const m = model;
const percent = parseInt(Math.round(m.downloadPercent * 100).toString());
const blockedByWebGPU = !!(
preferWebGPU &&
webgpuMemoryBudget &&
m.size > webgpuMemoryBudget
);
const blockedActionLabel = blockedByWebGPU
? `Too large for current WebGPU budget (${toHumanReadableSize(webgpuMemoryBudget!)})`
: undefined;
return (
<div
className={`card bg-base-100 w-full mb-2 ${m.state === ModelState.LOADED ? 'border-2 border-primary' : ''}`}
className={`card bg-base-100 w-full mb-2 ${
m.state === ModelState.LOADED ? 'border-2 border-primary' : ''
} ${blockedByWebGPU ? 'opacity-50 saturate-0' : ''}`}
key={m.url}
>
<div className="card-body p-4 flex flex-row">
Expand All @@ -321,6 +398,13 @@ function ModelCard({
: ''}
</small>

{blockedByWebGPU && (
<div className="text-sm text-warning mt-1">
<FontAwesomeIcon icon={faWarning} className="mr-2" />
Model size exceeds the current WebGPU budget.
</div>
)}

{m.state === ModelState.LOADED && currRuntimeInfo && (
<>
<br />
Expand Down Expand Up @@ -362,7 +446,8 @@ function ModelCard({
<button
className="btn btn-primary btn-sm mr-2"
onClick={() => downloadModel(m)}
disabled={blockModelBtn}
disabled={blockModelBtn || blockedByWebGPU}
title={blockedActionLabel}
>
Download
</button>
Expand All @@ -372,7 +457,8 @@ function ModelCard({
<button
className="btn btn-primary btn-sm mr-2"
onClick={() => loadModel(m)}
disabled={blockModelBtn}
disabled={blockModelBtn || blockedByWebGPU}
title={blockedActionLabel}
>
Load model
</button>
Expand Down
82 changes: 40 additions & 42 deletions examples/main/src/config.ts
Original file line number Diff line number Diff line change
@@ -1,86 +1,83 @@
// See: https://vitejs.dev/guide/assets#explicit-url-imports
import wllamaSingle from '@wllama/wllama/src/single-thread/wllama.wasm?url';
import wllamaMulti from '@wllama/wllama/src/multi-thread/wllama.wasm?url';
import wllamaJspiSingle from '@wllama/wllama/src/jspi-single-thread/wllama.wasm?url';
import wllamaJspiMulti from '@wllama/wllama/src/jspi-multi-thread/wllama.wasm?url';
import wllamaAsyncifySingle from '@wllama/wllama/src/asyncify-single-thread/wllama.wasm?url';
import wllamaAsyncifyMulti from '@wllama/wllama/src/asyncify-multi-thread/wllama.wasm?url';
import wllamaPackageJson from '@wllama/wllama/package.json';
import { InferenceParams } from './utils/types';

export const WLLAMA_VERSION = wllamaPackageJson.version;

export const WLLAMA_CONFIG_PATHS = {
'single-thread/wllama.wasm': wllamaSingle,
'multi-thread/wllama.wasm': wllamaMulti,
'jspi/single-thread/wllama.wasm': wllamaJspiSingle,
'jspi/multi-thread/wllama.wasm': wllamaJspiMulti,
'asyncify/single-thread/wllama.wasm': wllamaAsyncifySingle,
'asyncify/multi-thread/wllama.wasm': wllamaAsyncifyMulti,

};

export const MAX_GGUF_SIZE = 2 * 1024 * 1024 * 1024; // 2GB

export const LIST_MODELS = [
{
url: 'https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf',
size: 229309376,
},
{
url: 'https://huggingface.co/LiquidAI/LFM2-700M-GGUF/resolve/main/LFM2-700M-Q4_K_M.gguf',
size: 468624320,
url: 'https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf',
size: 773025824,
},
{
url: 'https://huggingface.co/LiquidAI/LFM2-1.2B-GGUF/resolve/main/LFM2-1.2B-Q4_K_M.gguf',
size: 730893248,
url: 'https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf',
size: 1321082528,
},
{
url: 'https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf',
size: 639447744,
url: 'https://huggingface.co/unsloth/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_0.gguf',
size: 1921909184,
},
{
url: 'https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf',
size: 1107409472,
url: 'https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-Q4_0.gguf',
size: 241574944,
},
{
url: 'https://huggingface.co/ggml-org/SmolLM3-3B-GGUF/resolve/main/SmolLM3-Q4_K_M.gguf',
size: 1915305312,
url: 'https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-F16.gguf',
size: 542835488,
},
{
url: 'https://huggingface.co/ngxson/SmolLM2-360M-Instruct-Q8_0-GGUF/resolve/main/smollm2-360m-instruct-q8_0.gguf',
size: 386404992,
url: 'https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q4_0.gguf',
size: 721918496,
},
{
url: 'https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q8_0.gguf',
size: 675710816,
url: 'https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q8_0.gguf',
size: 1069306400,
},
{
url: 'https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.2-1b-instruct-q4_k_m.gguf',
size: 807690656,
},
{
url: 'https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf',
size: 924456032,
url: 'https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf',
size: 639447744,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/qwen2-1_5b-instruct-q4_k_m-00001-of-00004.gguf',
size: 986046272,
url: 'https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_0.gguf',
size: 1056782912,
},
{
url: 'https://huggingface.co/ngxson/SmolLM2-1.7B-Instruct-Q4_K_M-GGUF/resolve/main/smollm2-1.7b-instruct-q4_k_m.gguf',
size: 1055609536,
url: 'https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_0.gguf',
size: 1214873856,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/gemma-2-2b-it-abliterated-Q4_K_M-00001-of-00004.gguf',
size: 1708583264,
url: 'https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_0.gguf',
size: 695751488,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/neuralreyna-mini-1.8b-v0.3.q4_k_m-00001-of-00005.gguf',
size: 1217753472,
url: 'https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q8_0.gguf',
size: 1246253888,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/Phi-3.1-mini-128k-instruct-Q3_K_M-00001-of-00008.gguf',
size: 1955478176,
url: 'https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/resolve/main/SmolLM3-3B-128K-Q4_0.gguf',
size: 1811456608,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/meta-llama-3.1-8b-instruct-abliterated.Q2_K-00001-of-00014.gguf',
size: 3179133600,
url: 'https://huggingface.co/unsloth/Ministral-3-3B-Instruct-2512-GGUF/resolve/main/Ministral-3-3B-Instruct-2512-Q3_K_M.gguf',
size: 1795552544,
},
{
url: 'https://huggingface.co/ngxson/wllama-split-models/resolve/main/Meta-Llama-3.1-8B-Instruct-Q2_K-00001-of-00014.gguf',
size: 3179138048,
url: 'https://huggingface.co/unsloth/Phi-4-mini-instruct-GGUF/resolve/main/Phi-4-mini-instruct-Q2_K.gguf',
size: 1682635744,
},
];

Expand All @@ -90,6 +87,7 @@ export const DEFAULT_INFERENCE_PARAMS: InferenceParams = {
nPredict: 4096,
nBatch: 128,
temperature: 0.2,
preferWebGPU: true,
};

export const DEFAULT_CHAT_TEMPLATE =
Expand Down
1 change: 1 addition & 0 deletions examples/main/src/utils/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export interface InferenceParams {
nBatch: number;
temperature: number;
nPredict: number;
preferWebGPU: boolean;
}

export interface Message {
Expand Down
5 changes: 5 additions & 0 deletions examples/main/src/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ export const toHumanReadableSize = (bytes: number): string => {
return `${size.toFixed(1)} ${units[unitIndex]}`;
};

export const getWebGPUMemoryBudget = async (): Promise<number | undefined> => {
const adapter = await navigator.gpu.requestAdapter();
return adapter?.limits.maxBufferSize;
};

export const DebugLogger = {
content: [] as string[],
debug(...args: any) {
Expand Down
Loading
Loading