Skip to content

Commit 50806a7

Browse files
authored
[js/web] support external data in npm test (#19377)
### Description support external data in npm test. This allows test runner to detect whether an external data is available in the test folder, and if it is, load it as external data automatically. this feature does not parse every model to figure out whether the model has external data. the following comments in code explained how to determine whether should parse the model file. ```js // for performance consideration, we do not parse every model. when we think it's likely to have external // data, we will parse it. We think it's "likely" when one of the following conditions is met: // 1. any file in the same folder has the similar file name as the model file // (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data") // 2. the file size is larger than 1GB ```
1 parent efc17e7 commit 50806a7

File tree

3 files changed

+55
-6
lines changed

3 files changed

+55
-6
lines changed

js/web/script/test-runner-cli.ts

+48-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import * as os from 'os';
1212
import * as path from 'path';
1313
import {inspect} from 'util';
1414

15+
import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx';
1516
import {bufferToBase64} from '../test/test-shared';
1617
import {Test} from '../test/test-types';
1718

@@ -264,10 +265,12 @@ async function main() {
264265

265266
let modelUrl: string|null = null;
266267
let cases: Test.ModelTestCase[] = [];
268+
let externalData: Array<{data: string; path: string}>|undefined;
267269

268270
npmlog.verbose('TestRunnerCli.Init.Model', `Start to prepare test data from folder: ${testDataRootFolder}`);
269271

270272
try {
273+
const maybeExternalDataFiles: Array<[fileNameWithoutExtension: string, size: number]> = [];
271274
for (const thisPath of fs.readdirSync(testDataRootFolder)) {
272275
const thisFullPath = path.join(testDataRootFolder, thisPath);
273276
const stat = fs.lstatSync(thisFullPath);
@@ -282,6 +285,8 @@ async function main() {
282285
} else {
283286
throw new Error('there are multiple model files under the folder specified');
284287
}
288+
} else {
289+
maybeExternalDataFiles.push([path.parse(thisPath).name, stat.size]);
285290
}
286291
} else if (stat.isDirectory()) {
287292
const dataFiles: string[] = [];
@@ -307,6 +312,34 @@ async function main() {
307312
if (modelUrl === null) {
308313
throw new Error('there are no model file under the folder specified');
309314
}
315+
// for performance consideration, we do not parse every model. when we think it's likely to have external
316+
// data, we will parse it. We think it's "likely" when one of the following conditions is met:
317+
// 1. any file in the same folder has the similar file name as the model file
318+
// (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data")
319+
// 2. the file size is larger than 1GB
320+
const likelyToHaveExternalData = maybeExternalDataFiles.some(
321+
([fileNameWithoutExtension, size]) =>
322+
path.basename(modelUrl!).startsWith(fileNameWithoutExtension) || size >= 1 * 1024 * 1024 * 1024);
323+
if (likelyToHaveExternalData) {
324+
const model = onnx.ModelProto.decode(fs.readFileSync(path.join(testDataRootFolder, path.basename(modelUrl!))));
325+
const externalDataPathSet = new Set<string>();
326+
for (const initializer of model.graph!.initializer!) {
327+
if (initializer.externalData) {
328+
for (const data of initializer.externalData) {
329+
if (data.key === 'location') {
330+
externalDataPathSet.add(data.value!);
331+
}
332+
}
333+
}
334+
}
335+
externalData = [];
336+
const externalDataPaths = [...externalDataPathSet];
337+
for (const dataPath of externalDataPaths) {
338+
const fullPath = path.resolve(testDataRootFolder, dataPath);
339+
const url = path.join(TEST_DATA_BASE, path.relative(TEST_ROOT, fullPath));
340+
externalData.push({data: url, path: dataPath});
341+
}
342+
}
310343
} catch (e) {
311344
npmlog.error('TestRunnerCli.Init.Model', `Failed to prepare test data. Error: ${inspect(e)}`);
312345
throw e;
@@ -340,9 +373,23 @@ async function main() {
340373
npmlog.verbose('TestRunnerCli.Init.Model', ` Model file: ${modelUrl}`);
341374
npmlog.verbose('TestRunnerCli.Init.Model', ` Backend: ${backend}`);
342375
npmlog.verbose('TestRunnerCli.Init.Model', ` Test set(s): ${cases.length} (${caseCount})`);
376+
if (externalData) {
377+
npmlog.verbose('TestRunnerCli.Init.Model', ` External data: ${externalData.length}`);
378+
for (const data of externalData) {
379+
npmlog.verbose('TestRunnerCli.Init.Model', ` - ${data.path}`);
380+
}
381+
}
343382
npmlog.verbose('TestRunnerCli.Init.Model', '===============================================================');
344383

345-
return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases, ioBinding};
384+
return {
385+
name: path.basename(testDataRootFolder),
386+
platformCondition,
387+
modelUrl,
388+
backend,
389+
cases,
390+
ioBinding,
391+
externalData
392+
};
346393
}
347394

348395
function tryLocateModelTestFolder(searchPattern: string): string {

js/web/test/test-runner.ts

+6-5
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ async function loadTensors(
138138

139139
async function initializeSession(
140140
modelFilePath: string, backendHint: ort.InferenceSession.ExecutionProviderConfig, ioBindingMode: Test.IOBindingMode,
141-
profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions,
142-
fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
141+
profile: boolean, externalData: ort.InferenceSession.SessionOptions['externalData'],
142+
sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
143143
const preloadModelData: Uint8Array|undefined =
144144
fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined;
145145
Logger.verbose(
@@ -153,7 +153,8 @@ async function initializeSession(
153153
executionProviders: [backendHint],
154154
profiler: profilerConfig,
155155
enableProfiling: profile,
156-
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined
156+
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
157+
externalData
157158
};
158159

159160
let session: ort.InferenceSession;
@@ -246,8 +247,8 @@ export class ModelTestContext {
246247
const executionProviderConfig =
247248
modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!;
248249
const session = await initializeSession(
249-
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, testOptions?.sessionOptions || {},
250-
this.cache);
250+
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData,
251+
testOptions?.sessionOptions || {}, this.cache);
251252

252253
const initEnd = now();
253254

js/web/test/test-types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ export declare namespace Test {
6565
export interface ModelTest {
6666
name: string;
6767
modelUrl: string;
68+
externalData?: InferenceSession.SessionOptions['externalData'];
6869
backend?: string; // value should be populated at build time
6970
ioBinding: IOBindingMode;
7071
platformCondition?: PlatformCondition;

0 commit comments

Comments
 (0)