mlc-ai · akaashrp · Oct 24, 2025 · Oct 24, 2025 · Nov 27, 2025 · Dec 1, 2025
diff --git a/examples/README.md b/examples/README.md
@@ -46,8 +46,7 @@ These examples demonstrate various capabilities via WebLLM's OpenAI-like API.
 #### Others
 
 - [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`.
-- [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and
-  users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking
+- [cache-usage](cache-usage): demonstrates how WebLLM supports multiple cache backends. Choose between the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache), [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), or the experimental Chrome [Cross-Origin Storage](https://github.com/explainers-by-googlers/cross-origin-storage) extension via `appConfig.cacheBackend`. Also demonstrates various cache utils such as checking
   whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc.
 - [simple-chat-upload](simple-chat-upload): demonstrates how to upload local models to WebLLM instead of downloading via a URL link
 

diff --git a/examples/cache-usage/README.md b/examples/cache-usage/README.md
@@ -1,15 +1,19 @@
 # WebLLM Cache Usage
 
-WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
-This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
+WebLLM supports multiple persistent cache backends. You can pick the classic Cache API, IndexedDB, or the experimental Chrome [Cross-Origin Storage](https://github.com/explainers-by-googlers/cross-origin-storage) extension by
+setting `AppConfig.cacheBackend` to `"cache"`, `"indexeddb"`, or `"cross-origin"`.
+This folder provides an example on how different caches are used in WebLLM. We also
 demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.
 
+> **Note:** The cross-origin backend requires installation of the [cross-origin storage browser extension](https://github.com/web-ai-community/cross-origin-storage-extension).
+
 For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.
 
 To inspect the downloaded artifacts in your browser, open up developer console, go to application,
-and you will find the artifacts under either `IndexedDB` or `Cache storage`.
+and you will find the artifacts under either `IndexedDB` or `Cache storage`. When `"cross-origin"` is selected,
+the extension displays origins and resource hashes.
 
-To run the exapmle, you can do the following steps under this folder
+To run the example, you can do the following steps under this folder
 
 ```bash
 npm install

diff --git a/examples/cache-usage/package.json b/examples/cache-usage/package.json
@@ -3,12 +3,12 @@
   "version": "0.1.0",
   "private": true,
   "scripts": {
-    "start": "parcel src/cache_usage.html  --port 8888",
+    "start": "parcel src/cache_usage.html  --port 8889",
     "build": "parcel build src/cache_usage.html --dist-dir lib"
   },
   "devDependencies": {
     "buffer": "^5.7.1",
-    "parcel": "^2.8.3",
+    "parcel": "2.8.3",
     "process": "^0.11.10",
     "tslib": "^2.3.1",
     "typescript": "^4.9.5",

diff --git a/examples/cache-usage/src/cache_usage.ts b/examples/cache-usage/src/cache_usage.ts
@@ -14,17 +14,20 @@ const initProgressCallback = (report: webllm.InitProgressReport) => {
 
 async function main() {
   const appConfig = webllm.prebuiltAppConfig;
-  // CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
-  appConfig.useIndexedDBCache = true;
+  // CHANGE THIS TO SEE THE EFFECTS OF EACH, CODE BELOW DOES NOT NEED TO CHANGE
+  appConfig.cacheBackend = "cross-origin"; // "indexeddb" or "cache" or "cross-origin"
 
-  if (appConfig.useIndexedDBCache) {
+  const cacheBackend = appConfig.cacheBackend as string;
+  if (cacheBackend === "indexeddb") {
     console.log("Using IndexedDB Cache");
-  } else {
+  } else if (cacheBackend === "cache") {
     console.log("Using Cache API");
+  } else if (cacheBackend === "cross-origin") {
+    console.log("Using Cross-Origin Storage");
   }
 
   // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
-  const selectedModel = "phi-2-q4f16_1-MLC";
+  const selectedModel = "Llama-3.2-1B-Instruct-q4f16_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback, appConfig: appConfig },
@@ -39,6 +42,7 @@ async function main() {
       },
     ],
     n: 1,
+    temperature: 0,
   };
   let reply = await engine.chat.completions.create(request);
   console.log(reply);
@@ -57,7 +61,12 @@ async function main() {
   reply = await engine.chat.completions.create(request);
   console.log(reply);
 
-  // 4. Delete every thing about this model from cache
+  // Cross-origin storage currently does not support deletion
+  if (cacheBackend === "cross-origin") {
+    return;
+  }
+
+  // 4. Delete everything about this model from cache
   // You can also delete only the model library wasm, only the model weights, or only the config file
   await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
   modelCached = await webllm.hasModelInCache(selectedModel, appConfig);

diff --git a/package-lock.json b/package-lock.json
diff --git a/src/cache_util.ts b/src/cache_util.ts
@@ -4,10 +4,157 @@ import {
   ChatConfig,
   ModelRecord,
   prebuiltAppConfig,
+  getCacheBackend,
 } from "./config";
 import { cleanModelUrl } from "./support";
 import { ModelNotFoundError, UnsupportedTokenizerFilesError } from "./error";
 import { Tokenizer } from "@mlc-ai/web-tokenizers";
+import CrossOriginStorage from "./cross_origin_storage";
+import CrossOriginStorageCache from "./cross_origin_storage_cache";
+
+type CacheScope = "webllm/model" | "webllm/config" | "webllm/wasm";
+
+let crossOriginUnavailableLogged = false;
+let crossOriginAvailabilityWait: Promise<void> | null = null;
+
+function scheduleCrossOriginFallbackWarning(
+  logger: (msg: string) => void,
+): void {
+  if (crossOriginUnavailableLogged || crossOriginAvailabilityWait) {
+    return;
+  }
+  crossOriginAvailabilityWait = (async () => {
+    const available = CrossOriginStorage.isAvailable();
+    crossOriginAvailabilityWait = null;
+    if (available || crossOriginUnavailableLogged) {
+      return;
+    }
+    logger(
+      "Cross-origin storage backend is not yet available; temporarily falling back to the Cache API.",
+    );
+    crossOriginUnavailableLogged = true;
+  })();
+}
+
+function useCrossOrigin(appConfig: AppConfig): boolean {
+  return (
+    getCacheBackend(appConfig) === "cross-origin" &&
+    CrossOriginStorage.isAvailable()
+  );
+}
+
+export function getArtifactCache(
+  scope: CacheScope,
+  appConfig: AppConfig,
+  logger: (msg: string) => void = console.warn,
+): tvmjs.ArtifactCacheTemplate {
+  const backend = getCacheBackend(appConfig);
+  if (backend === "cross-origin") {
+    if (CrossOriginStorage.isAvailable()) {
+      return new CrossOriginStorageCache(scope);
+    }
+    scheduleCrossOriginFallbackWarning(logger);
+  }
+  if (backend === "indexeddb") {
+    return new tvmjs.ArtifactIndexedDBCache(scope);
+  }
+  return new tvmjs.ArtifactCache(scope);
+}
+
+async function hasTensorCache(
+  cache: tvmjs.ArtifactCacheTemplate,
+  tensorCacheUrl: string,
+): Promise<boolean> {
+  const jsonUrl = new URL("tensor-cache.json", tensorCacheUrl).href;
+  const hasManifest = await cache.hasAllKeys([jsonUrl]);
+  if (!hasManifest) {
+    return false;
+  }
+  const manifest = await cache.fetchWithCache(jsonUrl, "json");
+  const records = manifest?.records ?? [];
+  if (!Array.isArray(records) || records.length === 0) {
+    return false;
+  }
+  const shardUrls = records.map(
+    (entry: { dataPath: string }) =>
+      new URL(entry.dataPath, tensorCacheUrl).href,
+  );
+  return cache.hasAllKeys(shardUrls);
+}
+
+async function deleteTensorCacheEntries(
+  cache: tvmjs.ArtifactCacheTemplate,
+  tensorCacheUrl: string,
+): Promise<void> {
+  const jsonUrl = new URL("tensor-cache.json", tensorCacheUrl).href;
+  const hasManifest = await cache.hasAllKeys([jsonUrl]);
+  if (!hasManifest) {
+    return;
+  }
+  let manifest: { records?: Array<{ dataPath: string }> };
+  try {
+    manifest = await cache.fetchWithCache(jsonUrl, "json");
+  } catch (err) {
+    console.warn(
+      `Failed to load tensor cache manifest at ${jsonUrl}; skipping deletion.`,
+      err,
+    );
+    return;
+  }
+  const records = manifest?.records ?? [];
+  await Promise.all(
+    records.map(async (entry) => {
+      if (!entry?.dataPath) {
+        return;
+      }
+      const dataUrl = new URL(entry.dataPath, tensorCacheUrl).href;
+      await cache.deleteInCache(dataUrl);
+    }),
+  );
+  await cache.deleteInCache(jsonUrl);
+}
+
+export async function fetchModelArtifacts(
+  tvm: tvmjs.Instance,
+  tensorCacheUrl: string,
+  device: tvmjs.DLDevice,
+  appConfig: AppConfig,
+  signal?: AbortSignal,
+): Promise<any> {
+  if (!useCrossOrigin(appConfig)) {
+    const backend = getCacheBackend(appConfig);
+    const cacheType = backend === "indexeddb" ? "indexeddb" : "cache";
+    return tvm.fetchTensorCache(
+      tensorCacheUrl,
+      device,
+      "webllm/model",
+      cacheType,
+      signal,
+    );
+  }
+
+  const artifactCache = getArtifactCache("webllm/model", appConfig);
+  const jsonUrl = new URL("tensor-cache.json", tensorCacheUrl).href;
+  const manifest = await artifactCache.fetchWithCache(jsonUrl, "json", signal);
+  const records = (
+    Array.isArray(manifest?.records) ? manifest.records : []
+  ) as Array<any>;
+  await (tvm as any).fetchTensorCacheInternal(
+    tensorCacheUrl,
+    records,
+    device,
+    artifactCache,
+    signal,
+  );
+  if (manifest?.metadata !== undefined) {
+    const runtime = tvm as any;
+    runtime.cacheMetadata = {
+      ...runtime.cacheMetadata,
+      ...(manifest.metadata as Record<string, unknown>),
+    };
+  }
+  return manifest;
+}
 
 function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord {
   const matchedItem = appConfig?.model_list.find(
@@ -28,7 +175,12 @@ export async function hasModelInCache(
   }
   const modelRecord = findModelRecord(modelId, appConfig);
   const modelUrl = cleanModelUrl(modelRecord.model);
-  const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache";
+  if (useCrossOrigin(appConfig)) {
+    const cache = getArtifactCache("webllm/model", appConfig);
+    return hasTensorCache(cache, modelUrl);
+  }
+  const backend = getCacheBackend(appConfig);
+  const cacheType = backend === "indexeddb" ? "indexeddb" : "cache";
   return tvmjs.hasTensorInCache(modelUrl, "webllm/model", cacheType);
 }
 
@@ -58,13 +210,13 @@ export async function deleteModelInCache(
   }
   const modelRecord = findModelRecord(modelId, appConfig);
   const modelUrl = cleanModelUrl(modelRecord.model);
-  let modelCache: tvmjs.ArtifactCacheTemplate;
-  if (appConfig.useIndexedDBCache) {
-    tvmjs.deleteTensorCache(modelUrl, "webllm/model", "indexeddb");
-    modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
+  const modelCache = getArtifactCache("webllm/model", appConfig);
+  if (useCrossOrigin(appConfig)) {
+    await deleteTensorCacheEntries(modelCache, modelUrl);
   } else {
-    tvmjs.deleteTensorCache(modelUrl, "webllm/model", "cache");
-    modelCache = new tvmjs.ArtifactCache("webllm/model");
+    const backend = getCacheBackend(appConfig);
+    const cacheType = backend === "indexeddb" ? "indexeddb" : "cache";
+    await tvmjs.deleteTensorCache(modelUrl, "webllm/model", cacheType);
   }
   await modelCache.deleteInCache(new URL("tokenizer.model", modelUrl).href);
   await modelCache.deleteInCache(new URL("tokenizer.json", modelUrl).href);
@@ -79,12 +231,7 @@ export async function deleteChatConfigInCache(
     appConfig = prebuiltAppConfig;
   }
   const modelRecord = findModelRecord(modelId, appConfig);
-  let configCache: tvmjs.ArtifactCacheTemplate;
-  if (appConfig.useIndexedDBCache) {
-    configCache = new tvmjs.ArtifactIndexedDBCache("webllm/config");
-  } else {
-    configCache = new tvmjs.ArtifactCache("webllm/config");
-  }
+  const configCache = getArtifactCache("webllm/config", appConfig);
   const modelUrl = cleanModelUrl(modelRecord.model);
   const configUrl = new URL("mlc-chat-config.json", modelUrl).href;
   await configCache.deleteInCache(configUrl);
@@ -99,12 +246,7 @@ export async function deleteModelWasmInCache(
     appConfig = prebuiltAppConfig;
   }
   const modelRecord = findModelRecord(modelId, appConfig);
-  let wasmCache: tvmjs.ArtifactCacheTemplate;
-  if (appConfig.useIndexedDBCache) {
-    wasmCache = new tvmjs.ArtifactIndexedDBCache("webllm/wasm");
-  } else {
-    wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
-  }
+  const wasmCache = getArtifactCache("webllm/wasm", appConfig);
   await wasmCache.deleteInCache(modelRecord.model_lib);
 }
 
@@ -122,12 +264,7 @@ export async function asyncLoadTokenizer(
   appConfig: AppConfig,
   logger: (msg: string) => void = console.log,
 ): Promise<Tokenizer> {
-  let modelCache: tvmjs.ArtifactCacheTemplate;
-  if (appConfig.useIndexedDBCache) {
-    modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
-  } else {
-    modelCache = new tvmjs.ArtifactCache("webllm/model");
-  }
+  const modelCache = getArtifactCache("webllm/model", appConfig, logger);
 
   if (config.tokenizer_files.includes("tokenizer.json")) {
     const url = new URL("tokenizer.json", baseUrl).href;