-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[v4] added wasm cache #1471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+660
−453
Merged
[v4] added wasm cache #1471
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
1a758a0
added wasm cache
nico-martin 1ce23bc
some refactoring of the hub.js and caching of the wasm factory
nico-martin e480fc3
fixed comment
nico-martin 24596d9
added string as cache return
nico-martin 3adc971
fixes after review
nico-martin afef880
resolved conflicts
nico-martin ddad7cb
Merge branch 'v4' into v4-cache-wasm-file
xenova 01ba6ae
Only return if match is found
xenova 154b261
Merge branch 'v4-cache-wasm-file' of https://github.com/huggingface/t…
xenova 29c4010
Return response even if cache doesn't exist
xenova File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import { env, apis } from '../env.js'; | |
| // In either case, we select the default export if it exists, otherwise we use the named export. | ||
| import * as ONNX_NODE from 'onnxruntime-node'; | ||
| import * as ONNX_WEB from 'onnxruntime-web/webgpu'; | ||
| import { loadWasmBinary, loadWasmFactory } from './utils/cacheWasm.js'; | ||
|
|
||
| export { Tensor } from 'onnxruntime-common'; | ||
|
|
||
|
|
@@ -145,6 +146,79 @@ const IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV; | |
| */ | ||
| let webInitChain = Promise.resolve(); | ||
|
|
||
| /** | ||
| * Promise that resolves when WASM binary has been loaded (if caching is enabled). | ||
| * This ensures we only attempt to load the WASM binary once. | ||
| * @type {Promise<void>|null} | ||
| */ | ||
| let wasmLoadPromise = null; | ||
|
|
||
| /** | ||
| * Ensures the WASM binary is loaded and cached before creating an inference session. | ||
| * Only runs once, even if called multiple times. | ||
| * | ||
| * @returns {Promise<void>} | ||
| */ | ||
| async function ensureWasmLoaded() { | ||
| // If already loading or loaded, return the existing promise | ||
| if (wasmLoadPromise) { | ||
| return wasmLoadPromise; | ||
| } | ||
|
|
||
| const shouldUseWasmCache = | ||
| env.useWasmCache && | ||
| typeof ONNX_ENV?.wasm?.wasmPaths === 'object' && | ||
| ONNX_ENV?.wasm?.wasmPaths?.wasm && | ||
| ONNX_ENV?.wasm?.wasmPaths?.mjs; | ||
|
|
||
| // Check if we should load the WASM binary | ||
| if (!shouldUseWasmCache) { | ||
| wasmLoadPromise = Promise.resolve(); | ||
| return wasmLoadPromise; | ||
| } | ||
|
|
||
| // Start loading the WASM binary | ||
| wasmLoadPromise = (async () => { | ||
| // At this point, we know wasmPaths is an object (not a string) because | ||
| // shouldUseWasmCache checks for wasmPaths.wasm and wasmPaths.mjs | ||
| const urls = /** @type {{ wasm: string, mjs: string }} */ (ONNX_ENV.wasm.wasmPaths); | ||
|
|
||
| // Load and cache both the WASM binary and factory | ||
| await Promise.all([ | ||
| // Load and cache the WASM binary | ||
| urls.wasm | ||
| ? (async () => { | ||
| try { | ||
| const wasmBinary = await loadWasmBinary(urls.wasm); | ||
| if (wasmBinary) { | ||
| ONNX_ENV.wasm.wasmBinary = wasmBinary; | ||
| } | ||
| } catch (err) { | ||
| console.warn('Failed to pre-load WASM binary:', err); | ||
| } | ||
| })() | ||
| : Promise.resolve(), | ||
|
|
||
| // Load and cache the WASM factory | ||
| urls.mjs | ||
| ? (async () => { | ||
| try { | ||
| const wasmFactoryBlob = await loadWasmFactory(urls.mjs); | ||
| if (wasmFactoryBlob) { | ||
| // @ts-ignore | ||
| ONNX_ENV.wasm.wasmPaths.mjs = wasmFactoryBlob; | ||
| } | ||
| } catch (err) { | ||
| console.warn('Failed to pre-load WASM factory:', err); | ||
| } | ||
| })() | ||
| : Promise.resolve(), | ||
| ]); | ||
| })(); | ||
|
|
||
| return wasmLoadPromise; | ||
| } | ||
|
|
||
| /** | ||
| * Create an ONNX inference session. | ||
| * @param {Uint8Array|string} buffer_or_path The ONNX model buffer or path. | ||
|
|
@@ -153,6 +227,7 @@ let webInitChain = Promise.resolve(); | |
| * @returns {Promise<import('onnxruntime-common').InferenceSession & { config: Object}>} The ONNX inference session. | ||
| */ | ||
| export async function createInferenceSession(buffer_or_path, session_options, session_config) { | ||
| await ensureWasmLoaded(); | ||
| const load = () => InferenceSession.create(buffer_or_path, { | ||
| // Set default log level, but allow overriding through session options | ||
| logSeverityLevel: DEFAULT_LOG_LEVEL, | ||
|
|
@@ -209,15 +284,15 @@ if (ONNX_ENV?.wasm) { | |
|
|
||
| ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI | ||
| ? { | ||
| mjs: `${wasmPathPrefix}/ort-wasm-simd-threaded.mjs`, | ||
| wasm: `${wasmPathPrefix}/ort-wasm-simd-threaded.wasm`, | ||
| mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`, | ||
| wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`, | ||
| } | ||
| : wasmPathPrefix; | ||
| : { | ||
| mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`, | ||
| wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`, | ||
| }; | ||
| } | ||
|
|
||
| // TODO: Add support for loading WASM files from cached buffer when we upgrade to [email protected] | ||
| // https://github.com/microsoft/onnxruntime/pull/21534 | ||
|
|
||
| // Users may wish to proxy the WASM backend to prevent the UI from freezing, | ||
| // However, this is not necessary when using WebGPU, so we default to false. | ||
| ONNX_ENV.wasm.proxy = false; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| import { getCache } from '../../utils/cache.js'; | ||
|
|
||
| /** | ||
| * Loads and caches a file from the given URL. | ||
| * @param {string} url The URL of the file to load. | ||
| * @returns {Promise<Response|import('../../utils/hub/FileResponse.js').default|null|string>} The response object, or null if loading failed. | ||
| */ | ||
| async function loadAndCacheFile(url) { | ||
| const fileName = url.split('/').pop(); | ||
|
|
||
| /** @type {import('../../utils/cache.js').CacheInterface|undefined} */ | ||
| let cache; | ||
| try { | ||
| cache = await getCache(); | ||
|
|
||
| // Try to get from cache first | ||
| if (cache) { | ||
| const result = await cache.match(url); | ||
| if (result) { | ||
| return result; | ||
| } | ||
| } | ||
| } catch (error) { | ||
| console.warn(`Failed to load ${fileName} from cache:`, error); | ||
| } | ||
|
|
||
| // If not in cache, fetch it | ||
| const response = await fetch(url); | ||
|
|
||
| if (!response.ok) { | ||
| throw new Error(`Failed to fetch ${fileName}: ${response.status} ${response.statusText}`); | ||
| } | ||
|
|
||
| // Cache the response for future use | ||
| if (cache) { | ||
| try { | ||
| await cache.put(url, response.clone()); | ||
| } catch (e) { | ||
| console.warn(`Failed to cache ${fileName}:`, e); | ||
| } | ||
| } | ||
|
|
||
| return response; | ||
|
|
||
| } | ||
|
|
||
| /** | ||
| * Loads and caches the WASM binary for ONNX Runtime. | ||
| * @param {string} wasmURL The URL of the WASM file to load. | ||
| * @returns {Promise<ArrayBuffer|null>} The WASM binary as an ArrayBuffer, or null if loading failed. | ||
| */ | ||
|
|
||
| export async function loadWasmBinary(wasmURL) { | ||
| const response = await loadAndCacheFile(wasmURL); | ||
| if (!response || typeof response === 'string') return null; | ||
|
|
||
| try { | ||
| return await response.arrayBuffer(); | ||
| } catch (error) { | ||
| console.warn('Failed to read WASM binary:', error); | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Loads and caches the WASM Factory for ONNX Runtime. | ||
| * @param {string} libURL The URL of the WASM Factory to load. | ||
| * @returns {Promise<string|null>} The blob URL of the WASM Factory, or null if loading failed. | ||
| */ | ||
| export async function loadWasmFactory(libURL) { | ||
| const response = await loadAndCacheFile(libURL); | ||
| if (!response || typeof response === 'string') return null; | ||
|
|
||
| try { | ||
| let code = await response.text(); | ||
| // Fix relative paths when loading factory from blob, overwrite import.meta.url with actual baseURL | ||
| const baseUrl = libURL.split('/').slice(0, -1).join('/'); | ||
| code = code.replace(/import\.meta\.url/g, `"${baseUrl}"`); | ||
| const blob = new Blob([code], { type: 'text/javascript' }); | ||
| return URL.createObjectURL(blob); | ||
| } catch (error) { | ||
| console.warn('Failed to read WASM binary:', error); | ||
| return null; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| import { apis, env } from '../env.js'; | ||
| import FileCache from './hub/FileCache.js'; | ||
|
|
||
| /** | ||
| * @typedef {Object} CacheInterface | ||
| * @property {(request: string) => Promise<Response|import('./hub/FileResponse.js').default|undefined|string>} match | ||
| * Checks if a request is in the cache and returns the cached response if found. | ||
| * @property {(request: string, response: Response, progress_callback?: (data: {progress: number, loaded: number, total: number}) => void) => Promise<void>} put | ||
| * Adds a response to the cache. | ||
| */ | ||
|
|
||
| /** | ||
| * Retrieves an appropriate caching backend based on the environment configuration. | ||
| * Attempts to use custom cache, browser cache, or file system cache in that order of priority. | ||
| * @returns {Promise<CacheInterface | null>} | ||
| * @param file_cache_dir {string|null} Path to a directory in which a downloaded pretrained model configuration should be cached if using the file system cache. | ||
| */ | ||
| export async function getCache(file_cache_dir = null) { | ||
| // First, check if the a caching backend is available | ||
| // If no caching mechanism available, will download the file every time | ||
| let cache = null; | ||
| if (env.useCustomCache) { | ||
| // Allow the user to specify a custom cache system. | ||
| if (!env.customCache) { | ||
| throw Error('`env.useCustomCache=true`, but `env.customCache` is not defined.'); | ||
| } | ||
|
|
||
| // Check that the required methods are defined: | ||
| if (!env.customCache.match || !env.customCache.put) { | ||
| throw new Error( | ||
| '`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. ' + | ||
| 'For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache', | ||
| ); | ||
| } | ||
| cache = env.customCache; | ||
| } | ||
|
|
||
| if (!cache && env.useBrowserCache) { | ||
| if (typeof caches === 'undefined') { | ||
| throw Error('Browser cache is not available in this environment.'); | ||
| } | ||
| try { | ||
| // In some cases, the browser cache may be visible, but not accessible due to security restrictions. | ||
| // For example, when running an application in an iframe, if a user attempts to load the page in | ||
| // incognito mode, the following error is thrown: `DOMException: Failed to execute 'open' on 'CacheStorage': | ||
| // An attempt was made to break through the security policy of the user agent.` | ||
| // So, instead of crashing, we just ignore the error and continue without using the cache. | ||
| cache = await caches.open(env.cacheKey); | ||
| } catch (e) { | ||
| console.warn('An error occurred while opening the browser cache:', e); | ||
| } | ||
| } | ||
|
|
||
| if (!cache && env.useFSCache) { | ||
| if (!apis.IS_FS_AVAILABLE) { | ||
| throw Error('File System Cache is not available in this environment.'); | ||
| } | ||
|
|
||
| // If `cache_dir` is not specified, use the default cache directory | ||
| cache = new FileCache(file_cache_dir ?? env.cacheDir); | ||
| } | ||
|
|
||
| return cache; | ||
| } | ||
|
|
||
| /** | ||
| * Searches the cache for any of the provided names and returns the first match found. | ||
| * @param {CacheInterface} cache The cache to search | ||
| * @param {...string} names The names of the items to search for | ||
| * @returns {Promise<import('./hub/FileResponse.js').default|Response|undefined|string>} The item from the cache, or undefined if not found. | ||
| */ | ||
| export async function tryCache(cache, ...names) { | ||
| for (let name of names) { | ||
| try { | ||
| let result = await cache.match(name); | ||
| if (result) return result; | ||
| } catch (e) { | ||
| continue; | ||
| } | ||
| } | ||
| return undefined; | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.