Skip to content

Commit 16d5e00

Browse files
[Cache] Support IndexedDB, add useIndexedDBCache in AppConfig (#352)
Add `AppConfig.useIndexedDBCache` to optionally use IndexedDBCache rather than the default Cache API. Also add `examples/cache-usage` to demonstrate the usage of the two caches and cache utils such as deleting a model from cache. --------- Co-authored-by: Charlie Ruan <[email protected]>
1 parent 489d882 commit 16d5e00

File tree

8 files changed

+210
-23
lines changed

8 files changed

+210
-23
lines changed

examples/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ These examples demonstrate various capabilities via WebLLM's OpenAI-like API.
2727

2828
#### Others
2929
- [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`.
30+
- [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and
31+
users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking
32+
whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc.
3033

3134
## Demo Spaces
3235

examples/cache-usage/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# WebLLM Cache Usage
2+
3+
WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
4+
This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
5+
demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.
6+
7+
For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.
8+
9+
To inspect the downloaded artifacts in your browser, open up developer console, go to application,
10+
and you will find the artifacts under either `IndexedDB` or `Cache storage`.
11+
12+
13+
To run the exapmle, you can do the following steps under this folder
14+
15+
```bash
16+
npm install
17+
npm start
18+
```
19+
20+
Note if you would like to hack WebLLM core package.
21+
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
22+
instruction in the project to build webllm locally. This option is only recommended
23+
if you would like to hack WebLLM core package.

examples/cache-usage/package.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "cache-usage",
3+
"version": "0.1.0",
4+
"private": true,
5+
"scripts": {
6+
"start": "parcel src/cache_usage.html --port 8888",
7+
"build": "parcel build src/cache_usage.html --dist-dir lib"
8+
},
9+
"devDependencies": {
10+
"buffer": "^5.7.1",
11+
"parcel": "^2.8.3",
12+
"process": "^0.11.10",
13+
"tslib": "^2.3.1",
14+
"typescript": "^4.9.5",
15+
"url": "^0.11.3"
16+
},
17+
"dependencies": {
18+
"@mlc-ai/web-llm": "^0.2.30"
19+
}
20+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<script>
4+
webLLMGlobal = {}
5+
</script>
6+
7+
<body>
8+
<h2>WebLLM Test Page</h2>
9+
Open console to see output
10+
</br>
11+
</br>
12+
<label id="init-label"> </label>
13+
14+
<h3>Prompt</h3>
15+
<label id="prompt-label"> </label>
16+
17+
<h3>Response</h3>
18+
<label id="generate-label"> </label>
19+
</br>
20+
<label id="stats-label"> </label>
21+
22+
<script type="module" src="./cache_usage.ts"></script>
23+
24+
</html>
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import * as webllm from "@mlc-ai/web-llm";
2+
3+
function setLabel(id: string, text: string) {
4+
const label = document.getElementById(id);
5+
if (label == null) {
6+
throw Error("Cannot find label " + id);
7+
}
8+
label.innerText = text;
9+
}
10+
11+
const initProgressCallback = (report: webllm.InitProgressReport) => {
12+
setLabel("init-label", report.text);
13+
};
14+
15+
async function main() {
16+
const appConfig = webllm.prebuiltAppConfig;
17+
// CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
18+
appConfig.useIndexedDBCache = true;
19+
20+
if (appConfig.useIndexedDBCache) {
21+
console.log("Using IndexedDB Cache");
22+
} else {
23+
console.log("Using Cache API");
24+
}
25+
26+
// 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
27+
const selectedModel = "Phi2-q4f16_1"
28+
const engine: webllm.EngineInterface = await webllm.CreateEngine(
29+
"Phi2-q4f16_1",
30+
{ initProgressCallback: initProgressCallback, appConfig: appConfig }
31+
);
32+
33+
const request: webllm.ChatCompletionRequest = {
34+
stream: false,
35+
messages: [
36+
{ "role": "user", "content": "Write an analogy between mathematics and a lighthouse." },
37+
],
38+
n: 1,
39+
};
40+
let reply = await engine.chat.completions.create(request);
41+
console.log(reply);
42+
43+
// 2. Check whether model weights are cached
44+
let modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
45+
console.log("hasModelInCache: ", modelCached);
46+
if (!modelCached) {
47+
throw Error("Expect hasModelInCache() to be true, but got: " + modelCached);
48+
}
49+
50+
// 3. We reload, and we should see this time it is much faster because the weights are cached.
51+
console.log("Reload model start");
52+
await engine.reload(selectedModel, undefined, appConfig);
53+
console.log("Reload model end");
54+
reply = await engine.chat.completions.create(request);
55+
console.log(reply);
56+
57+
// 4. Delete every thing about this model from cache
58+
// You can also delete only the model library wasm, only the model weights, or only the config file
59+
await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
60+
modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
61+
console.log("After deletion, hasModelInCache: ", modelCached);
62+
if (modelCached) {
63+
throw Error("Expect hasModelInCache() to be false, but got: " + modelCached);
64+
}
65+
66+
// 5. If we reload, we should expect the model to start downloading again
67+
console.log("Reload model start");
68+
await engine.reload(selectedModel, undefined, appConfig);
69+
console.log("Reload model end");
70+
reply = await engine.chat.completions.create(request);
71+
console.log(reply);
72+
}
73+
74+
main();

src/cache_util.ts

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import * as tvmjs from "tvmjs";
22
import {
33
AppConfig,
4+
ModelRecord,
45
prebuiltAppConfig,
56
} from "./config";
67

7-
function findModelRecord(modelId: string, appConfig?: AppConfig) {
8+
function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord {
89
const matchedItem = appConfig?.model_list.find(
910
item => item.model_id == modelId
1011
);
@@ -18,9 +19,10 @@ export async function hasModelInCache(modelId: string, appConfig?: AppConfig): P
1819
if (appConfig === undefined) {
1920
appConfig = prebuiltAppConfig;
2021
}
21-
const modelRecord = await findModelRecord(modelId, appConfig);
22+
const modelRecord = findModelRecord(modelId, appConfig);
2223
const modelUrl = modelRecord.model_url;
23-
return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model");
24+
const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache";
25+
return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model", cacheType);
2426
}
2527

2628
export async function deleteModelAllInfoInCache(modelId: string, appConfig?: AppConfig) {
@@ -42,9 +44,15 @@ export async function deleteModelInCache(modelId: string, appConfig?: AppConfig)
4244
if (appConfig === undefined) {
4345
appConfig = prebuiltAppConfig;
4446
}
45-
const modelRecord = await findModelRecord(modelId, appConfig);
46-
tvmjs.deleteNDArrayCache(modelRecord.model_url, "webllm/model");
47-
const modelCache = new tvmjs.ArtifactCache("webllm/model");
47+
const modelRecord = findModelRecord(modelId, appConfig);
48+
let modelCache: tvmjs.ArtifactCacheTemplate;
49+
if (appConfig.useIndexedDBCache) {
50+
tvmjs.deleteNDArrayCache(modelRecord.model_url, "webllm/model", "indexeddb");
51+
modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
52+
} else {
53+
tvmjs.deleteNDArrayCache(modelRecord.model_url, "webllm/model", "cache");
54+
modelCache = new tvmjs.ArtifactCache("webllm/model");
55+
}
4856
await modelCache.deleteInCache(new URL("tokenizer.model", modelRecord.model_url).href);
4957
await modelCache.deleteInCache(new URL("tokenizer.json", modelRecord.model_url).href);
5058
}
@@ -54,19 +62,28 @@ export async function deleteChatConfigInCache(modelId: string, appConfig?: AppCo
5462
if (appConfig === undefined) {
5563
appConfig = prebuiltAppConfig;
5664
}
57-
const modelRecord = await findModelRecord(modelId, appConfig);
58-
const configCache = new tvmjs.ArtifactCache("webllm/config");
65+
const modelRecord = findModelRecord(modelId, appConfig);
66+
let configCache: tvmjs.ArtifactCacheTemplate;
67+
if (appConfig.useIndexedDBCache) {
68+
configCache = new tvmjs.ArtifactIndexedDBCache("webllm/config");
69+
} else {
70+
configCache = new tvmjs.ArtifactCache("webllm/config");
71+
}
5972
const configUrl = new URL("mlc-chat-config.json", modelRecord.model_url).href;
6073
await configCache.deleteInCache(configUrl);
6174
}
6275

63-
6476
export async function deleteModelWasmInCache(modelId: string, appConfig?: AppConfig) {
6577
// delete the wasm in Cache
6678
if (appConfig === undefined) {
6779
appConfig = prebuiltAppConfig;
6880
}
69-
const modelRecord = await findModelRecord(modelId, appConfig);
70-
const wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
81+
const modelRecord = findModelRecord(modelId, appConfig);
82+
let wasmCache: tvmjs.ArtifactCacheTemplate;
83+
if (appConfig.useIndexedDBCache) {
84+
wasmCache = new tvmjs.ArtifactIndexedDBCache("webllm/wasm");
85+
} else {
86+
wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
87+
}
7188
await wasmCache.deleteInCache(modelRecord.model_lib_url);
7289
}

src/config.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,15 @@ export interface ModelRecord {
220220
* passed to the load.
221221
*
222222
* @param model_list: models to be used.
223+
* @param useIndexedDBCache: if true, will use IndexedDBCache to cache models and other artifacts.
224+
* If false or unspecified, will use the Cache API. For more information of the two, see:
225+
* https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser
226+
*
227+
* @note Note that the Cache API is more well-tested in WebLLM as of now.
223228
*/
224229
export interface AppConfig {
225230
model_list: Array<ModelRecord>;
231+
useIndexedDBCache?: boolean;
226232
}
227233

228234
/**
@@ -243,6 +249,7 @@ export const modelLibURLPrefix =
243249
* current WebLLM npm version.
244250
*/
245251
export const prebuiltAppConfig: AppConfig = {
252+
useIndexedDBCache: false,
246253
model_list: [
247254
// Llama-2
248255
{

src/engine.ts

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -109,17 +109,29 @@ export class Engine implements EngineInterface {
109109
if (!modelUrl.startsWith("http")) {
110110
modelUrl = new URL(modelUrl, baseUrl).href;
111111
}
112-
const configCache = new tvmjs.ArtifactCache("webllm/config");
112+
113+
let configCache: tvmjs.ArtifactCacheTemplate;
114+
if (appConfig.useIndexedDBCache) {
115+
configCache = new tvmjs.ArtifactIndexedDBCache("webllm/config");
116+
} else {
117+
configCache = new tvmjs.ArtifactCache("webllm/config");
118+
}
113119

114120
// load config
115121
const configUrl = new URL("mlc-chat-config.json", modelUrl).href;
116122
this.config = {
117-
...(await (await configCache.fetchWithCache(configUrl)).json()),
123+
...(await configCache.fetchWithCache(configUrl, "json")),
118124
...chatOpts
119125
} as ChatConfig;
120126

121127
// load tvm wasm
122-
const wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
128+
let wasmCache: tvmjs.ArtifactCacheTemplate;
129+
if (appConfig.useIndexedDBCache) {
130+
wasmCache = new tvmjs.ArtifactIndexedDBCache("webllm/wasm");
131+
} else {
132+
wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
133+
}
134+
123135
const wasmUrl = modelRecord.model_lib_url;
124136
if (wasmUrl === undefined) {
125137
throw Error("You need to specify `model_lib_url` for each model in `model_list` " +
@@ -135,10 +147,10 @@ export class Engine implements EngineInterface {
135147
return await fetch(new URL(wasmUrl, baseUrl).href);
136148
} else {
137149
// use cache
138-
return await wasmCache.fetchWithCache(wasmUrl);
150+
return await wasmCache.fetchWithCache(wasmUrl, "arraybuffer");
139151
}
140152
};
141-
const wasmSource = await (await fetchWasmSource()).arrayBuffer();
153+
const wasmSource = await fetchWasmSource();
142154

143155
const tvm = await tvmjs.instantiate(
144156
new Uint8Array(wasmSource),
@@ -188,9 +200,9 @@ export class Engine implements EngineInterface {
188200
}
189201
});
190202
this.deviceLostIsError = true;
191-
const tokenizer = await this.asyncLoadTokenizer(modelUrl, this.config);
192-
await tvm.fetchNDArrayCache(modelUrl, tvm.webgpu(), "webllm/model");
193-
203+
const tokenizer = await this.asyncLoadTokenizer(modelUrl, this.config, appConfig);
204+
const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache";
205+
await tvm.fetchNDArrayCache(modelUrl, tvm.webgpu(), "webllm/model", cacheType);
194206
this.pipeline = new LLMChatPipeline(tvm, tokenizer, this.config, this.logitProcessor);
195207
await this.pipeline?.asyncLoadWebGPUPipelines();
196208
const tend = performance.now();
@@ -692,12 +704,19 @@ export class Engine implements EngineInterface {
692704

693705
private async asyncLoadTokenizer(
694706
baseUrl: string,
695-
config: ChatConfig
707+
config: ChatConfig,
708+
appConfig: AppConfig,
696709
): Promise<Tokenizer> {
697-
const modelCache = new tvmjs.ArtifactCache("webllm/model");
710+
let modelCache: tvmjs.ArtifactCacheTemplate;
711+
if (appConfig.useIndexedDBCache) {
712+
modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
713+
} else {
714+
modelCache = new tvmjs.ArtifactCache("webllm/model");
715+
}
716+
698717
if (config.tokenizer_files.includes("tokenizer.json")) {
699718
const url = new URL("tokenizer.json", baseUrl).href;
700-
const model = await (await modelCache.fetchWithCache(url)).arrayBuffer();
719+
const model = await modelCache.fetchWithCache(url, "arraybuffer");
701720
return Tokenizer.fromJSON(model);
702721
}
703722
else if (config.tokenizer_files.includes("tokenizer.model")) {
@@ -707,7 +726,7 @@ export class Engine implements EngineInterface {
707726
"Consider converting `tokenizer.model` to `tokenizer.json` by compiling the model " +
708727
"with MLC again, or see if MLC's huggingface provides this file.");
709728
const url = new URL("tokenizer.model", baseUrl).href;
710-
const model = await (await modelCache.fetchWithCache(url)).arrayBuffer();
729+
const model = await modelCache.fetchWithCache(url, "arraybuffer");
711730
return Tokenizer.fromSentencePiece(model);
712731
}
713732
throw Error("Cannot handle tokenizer files " + config.tokenizer_files)

0 commit comments

Comments
 (0)