Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ node_modules
.vscode
/cache
/build
/build-compat
/build-wasm
/docs
/dist
Expand All @@ -18,3 +19,5 @@ tmp.patch

a.out.js
a.out.wasm

/compat/wasm/*.wasm
2 changes: 2 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
/src/wasm-from-cdn.ts
/src/glue/messages.ts

/compat/wasm

*.md
*.mdx
*.json
Expand Down
41 changes: 31 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,62 @@ set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)

add_compile_options(
set(WLLAMA_COMPILE_OPTIONS
-O3 -msimd128 -DNDEBUG
-flto=full -frtti
-fwasm-exceptions
-pthread
-sMEMORY64=1
)
add_link_options(
-sMEMORY64=1
set(WLLAMA_LINK_OPTIONS
-flto=full
-fwasm-exceptions
--no-entry
-sEXPORT_ALL=1
-sEXPORT_ES6=0
-sMODULARIZE=0
-sINITIAL_MEMORY=128MB
-sMAXIMUM_MEMORY=4096MB
-sALLOW_MEMORY_GROWTH=1
-sFORCE_FILESYSTEM=1
-sEXPORTED_FUNCTIONS=_main,_wllama_malloc,_wllama_start,_wllama_action,_wllama_exit,_wllama_debug
-sEXPORTED_RUNTIME_METHODS=ccall,cwrap
-sEXPORTED_RUNTIME_METHODS=ccall,cwrap,HEAPU8,MEMFS,FS,mmapAlloc,ENV,wasmMemory
-sNO_EXIT_RUNTIME=1
-sIMPORTED_MEMORY=1
-sPTHREAD_POOL_SIZE=Module[\"pthreadPoolSize\"]
-sUSE_PTHREADS=1
-pthread
-sJSPI
-sJSPI_EXPORTS=['wllama_start','wllama_action']
-Wl,--wrap,fopen
-Wl,--wrap,fclose
-Wl,--wrap,fread
-Wl,--wrap,fseek
-Wl,--wrap,ftell
)

if (WLLAMA_COMPAT)
# no wasm exception (not compatible with asyncify - asyncify is needed for firefox and safari)
# no mem64 (not compatible with safari)
list(APPEND WLLAMA_COMPILE_OPTIONS
-fexceptions
-pthread
)
list(APPEND WLLAMA_LINK_OPTIONS
-fexceptions
-sASYNCIFY=1
-sASYNCIFY_ADD=['wllama_start','wllama_action']
)
else()
list(APPEND WLLAMA_COMPILE_OPTIONS
-sMEMORY64=1
-fwasm-exceptions
)
list(APPEND WLLAMA_LINK_OPTIONS
-sMEMORY64=1
-fwasm-exceptions
-sJSPI
-sJSPI_EXPORTS=['wllama_start','wllama_action']
)
endif()

add_compile_options(${WLLAMA_COMPILE_OPTIONS})
add_link_options(${WLLAMA_LINK_OPTIONS})

add_subdirectory(llama.cpp)

set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ WebAssembly binding for [llama.cpp](https://github.com/ggerganov/llama.cpp)
For changelog, please visit [releases page](https://github.com/ngxson/wllama/releases)

> [!IMPORTANT]
> **🔥🔥 V3 is out, with WebGPU, multimodal and tool calling support. Read the [V3 release guide](./guides/intro-v3.md)**
> Memory64 is now a requirement, which drops support for Safari. Please follow [this issue](https://github.com/ngxson/wllama/issues/210) for more info.
> **🔥🔥 V3 is out, with WebGPU, multimodal and tool calling support. Read the [V3 release guide](./guides/intro-v3.md)**
>
> For compatibility issues, please refer to [@wllama/wllama-compat](./compat/README.md)

![](./assets/screenshot_0.png)

Expand Down Expand Up @@ -70,6 +71,10 @@ WebGPU support is introduced via [PR #215](https://github.com/ngxson/wllama/pull
Upon updating to V3.1, WebGPU will be enabled automatically. By default, all layers will be offloaded to GPU. If the model is too big to fit into VRAM, you can manually adjust the number of layers via the `n_gpu_layers` parameter of `LoadModelParams`. Example:

```js
// (optionally) will allow running WebGPU on Safari via compatibility mode
// the second argument 'exclude_firefox' is optional, it disables WebGPU on Firefox (the performance is too bad)
wllama.setCompat('default', 'exclude_firefox');

await wllama.loadModel(files, {
n_gpu_layers: 4, // meaning 4 layers are offloaded to GPU; set to 0 to disable GPU inference
});
Expand Down
88 changes: 88 additions & 0 deletions compat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# @wllama/wllama-compat

Optional package that provides compatibility WASM assets for `@wllama/wllama` on browsers that lack [JSPI](https://github.com/WebAssembly/js-promise-integration) or [MEMORY64](https://github.com/WebAssembly/memory64) support - most notably Safari and older browsers.

## Why this package exists

The default `@wllama/wllama` build relies on two modern WebAssembly features: [JSPI](https://github.com/WebAssembly/js-promise-integration) and [MEMORY64](https://github.com/WebAssembly/memory64)

When either feature is absent, wllama automatically falls back to **compat mode**: a separate WASM build that uses [Asyncify](https://emscripten.org/docs/porting/asyncify.html) instead of JSPI, and drops MEMORY64.

> **Note:** Compat mode has significantly lower performance than the default build. Use it only as a fallback.

## Browser compatibility

| | Chromium | Firefox | Safari |
|---|---|---|---|
| Auto-compat (default, recommended) | ✅ | 🟡 (no WebGPU) | 🟡 (supports WebGPU) |
| Force-compat | ✅ | 🔴 (supports WebGPU) | 🟡 (supports WebGPU) |
| Non-compat mode | ✅ | 🟡 (no WebGPU) | ❌ |

- ✅: Good speed
- 🟡: Acceptable speed
- 🔴: Runs but slow, not usable
- ❌: Does not run at all

### Default behaviour

Out of the box, wllama fetches the compat assets from jsDelivr CDN when compat mode is needed. If you want to self-host the assets (no external CDN dependency), install this package (see new section.)

### Recommended preset

By default (`mode = 'safari'`), compat is disabled on Firefox because WebGPU via compat mode is extremely slow there. This is the recommended behaviour:

```js
wllama.setCompat('default');
```

If you also want compat on Firefox (e.g. to reach users without JSPI enabled), pass `'firefox_safari'`:

```js
wllama.setCompat('default', 'firefox_safari');
```

## Disabling compat mode

To opt out of compat mode completely (e.g. you don't target Safari):

```ts
wllama.setCompat(null);
```

## Using this package

**You only need to install package if you want to store compat assets locally**. By default, assets are pulled from CDN.

```bash
npm install @wllama/wllama-compat
```

Then copy the assets from `node_modules/@wllama/wllama-compat/wasm/` to your public directory and call `setCompat()` with the URLs pointing to those files:

```ts
import { Wllama } from '@wllama/wllama';

const wllama = new Wllama({ default: '/wasm/wllama.wasm' });

wllama.setCompat({
wasm: '/wllama-compat/wasm/wllama.wasm',
worker: '/wllama-compat/wasm/wllama.js',
});
```

**IMPORTANT**: for Vite, you will need to import the JS as `?raw`

```ts
import compatWasm from '@wllama/wllama-compat/wasm/wllama.wasm?url';
import compatWorker from '@wllama/wllama-compat/wasm/wllama.js?raw'; // IMPORTANT: ?raw, NOT ?url

export const WLLAMA_COMPAT_CONFIG = {
wasm: compatWasm,
worker: {
code: compatWorker,
},
};

const instance = new Wllama(WLLAMA_CONFIG_PATHS, { logger: DebugLogger });
instance.setCompat(WLLAMA_COMPAT_CONFIG);
```
1 change: 1 addition & 0 deletions compat/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// refer to README.md for more
30 changes: 30 additions & 0 deletions compat/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "@wllama/wllama-compat",
"version": "3.2.2",
"description": "Optional package providing compatibility with older browsers for @wllama/wllama",
"main": "index.js",
"type": "module",
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"scripts": {
"upload": "npm publish --access public"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ngxson/wllama.git"
},
"keywords": [
"wasm",
"webassembly",
"llama",
"llm",
"ai",
"rag",
"embeddings",
"generation"
],
"author": "Xuan Son NGUYEN <contact@ngxson.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/ngxson/wllama/issues"
},
"homepage": "https://github.com/ngxson/wllama#readme"
}
1 change: 1 addition & 0 deletions compat/wasm/wllama.js

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions examples/basic/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ <h2>Embeddings</h2>
async function startCompletions(modelUrl, files) {
const wllama = new Wllama(CONFIG_PATHS);
// await wllama.cacheManager.clear();

// note: this is optional
if (window.location.href.match(/localhost|127\.0\.0\.1/)) {
wllama.setCompat({
wasm: '../../compat/wasm/wllama.wasm',
worker: '../../compat/wasm/wllama.js',
});
} else {
wllama.setCompat('default');
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if (files) {
await wllama.loadModel(files);
} else {
Expand Down
9 changes: 9 additions & 0 deletions examples/embeddings/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@
async function main() {
let res, tokens, elapsed, buffer;
const wllama = new Wllama(CONFIG_PATHS);
// when running demo locally, we want to point the compat to local for debugging; when deployed, we point it to CDN
if (window.location.href.match(/localhost|127\.0\.0\.1/)) {
wllama.setCompat({
wasm: '../../compat/wasm/wllama.wasm',
worker: '../../compat/wasm/wllama.js',
});
} else {
wllama.setCompat('default');
}

print(`DEMO EMBEDDINGS`);
print(`Loading model ${MODEL}`);
Expand Down
42 changes: 31 additions & 11 deletions examples/main/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions examples/main/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
"remark-gfm": "^4.0.0"
},
"devDependencies": {
"@types/node": "^25.9.1",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@typescript-eslint/eslint-plugin": "^7.15.0",
"@typescript-eslint/parser": "^7.15.0",
"@vitejs/plugin-react": "^4.3.1",
"@wllama/wllama": "file:../../",
"@wllama/wllama-compat": "file:../../compat",
"autoprefixer": "^10.4.19",
"eslint": "^8.57.0",
"eslint-plugin-react-hooks": "^4.6.2",
Expand Down
Loading
Loading