/* Any copyright is dedicated to the Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ */ /// "use strict"; /** * @type {import("../../actors/MLEngineParent.sys.mjs")} */ const { MLEngineParent } = ChromeUtils.importESModule( "resource://gre/actors/MLEngineParent.sys.mjs" ); const { ModelHub, IndexedDBCache } = ChromeUtils.importESModule( "chrome://global/content/ml/ModelHub.sys.mjs" ); const { createEngine, PipelineOptions, QuantizationLevel, ExecutionPriority, InferenceDevice, LogLevel, } = ChromeUtils.importESModule( "chrome://global/content/ml/EngineProcess.sys.mjs" ); // This test suite shares some utility functions with translations as they work in a very // similar fashion. Eventually, the plan is to unify these two components. Services.scriptloader.loadSubScript( "chrome://mochitests/content/browser/toolkit/components/translations/tests/browser/shared-head.js", this ); /** * Sets up the stage for a test * */ async function setup({ disabled = false, prefs = [], records = null } = {}) { const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({ autoDownloadFromRemoteSettings: false, records, }); await SpecialPowers.pushPrefEnv({ set: [ // Enabled by default. ["browser.ml.enable", !disabled], ["browser.ml.logLevel", "All"], ["browser.ml.modelCacheTimeout", 1000], ["browser.ml.checkForMemory", false], ["browser.ml.queueWaitTimeout", 2], ["javascript.options.wasm_lazy_tiering", true], ...prefs, ], }); return { remoteClients, async cleanup() { await removeMocks(); await waitForCondition( () => EngineProcess.areAllEnginesTerminated(), "Waiting for all of the engines to be terminated.", 100, 200 ); await SpecialPowers.popPrefEnv(); }, }; } function getDefaultWasmRecords() { return [ { name: MLEngineParent.WASM_FILENAME, version: MLEngineParent.WASM_MAJOR_VERSION + ".0", }, ]; } async function createAndMockMLRemoteSettings({ autoDownloadFromRemoteSettings = false, records = null, } = {}) { const wasmRecords = getDefaultWasmRecords().map(({ name, version }) => ({ id: crypto.randomUUID(), name, version, last_modified: Date.now(), schema: Date.now(), })); const runtime = await createRemoteClient({ collectionName: "test-translation-wasm", records: wasmRecords, attachmentMock: true, autoDownloadFromRemoteSettings, }); const options = await createRemoteClient({ records: records || [ { taskName: "moz-echo", modelId: "mozilla/distilvit", processorId: "mozilla/distilvit", tokenizerId: "mozilla/distilvit", modelRevision: "main", processorRevision: "main", tokenizerRevision: "main", dtype: "q8", id: "74a71cfd-1734-44e6-85c0-69cf3e874138", }, ], collectionName: "test-ml-inference-options", }); const allowDeny = await createRemoteClient({ records: [ { filter: "ALLOW", urlPrefix: "https://", id: "74a71cfd-1734-44e6-85c0-69cf3e874138", }, ], collectionName: "test-ml-allow-deny-list", }); const remoteClients = { "ml-onnx-runtime": runtime, "ml-inference-options": options, "ml-model-allow-deny-list": allowDeny, }; MLEngineParent.mockRemoteSettings({ "ml-onnx-runtime": runtime.client, "ml-inference-options": options, "ml-model-allow-deny-list": allowDeny, }); return { async removeMocks() { await runtime.client.attachments.deleteAll(); await runtime.client.db.clear(); await options.db.clear(); await allowDeny.db.clear(); MLEngineParent.removeMocks(); }, remoteClients, }; } /** * Creates a local RemoteSettingsClient for use within tests. * * @returns {RemoteSettings|AttachmentMock} */ async function createRemoteClient({ records, collectionName, attachmentMock = false, autoDownloadFromRemoteSettings = false, }) { const { RemoteSettings } = ChromeUtils.importESModule( "resource://services-settings/remote-settings.sys.mjs" ); const client = RemoteSettings(`${collectionName}-${_remoteSettingsMockId++}`); await client.db.clear(); await client.db.importChanges({}, Date.now(), records); if (attachmentMock) { return createAttachmentMock( client, collectionName, autoDownloadFromRemoteSettings ); } return client; } /* * Perftest related */ const MB_TO_BYTES = 1024 * 1024; const INIT_START = "initializationStart"; const INIT_END = "initializationEnd"; const RUN_START = "runStart"; const RUN_END = "runEnd"; const PIPELINE_READY_START = "ensurePipelineIsReadyStart"; const PIPELINE_READY_END = "ensurePipelineIsReadyEnd"; const PIPELINE_READY_LATENCY = "pipeline-ready-latency"; const INITIALIZATION_LATENCY = "initialization-latency"; const MODEL_RUN_LATENCY = "model-run-latency"; const TOTAL_MEMORY_USAGE = "total-memory-usage"; const COLD_START_PREFIX = "cold-start-"; const ITERATIONS = 10; const WHEN = "when"; const MEMORY = "memory"; const formatNumber = new Intl.NumberFormat("en-US", { maximumSignificantDigits: 4, }).format; function median(arr) { arr = [...arr].sort((a, b) => a - b); const mid = Math.floor(arr.length / 2); if (arr.length % 2) { return arr[mid]; } return (arr[mid - 1] + arr[mid]) / 2; } function stringify(arr) { function pad(str) { str = str.padStart(7, " "); if (str[0] != " ") { str = " " + str; } return str; } return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)), ""); } function reportMetrics(journal) { let metrics = {}; let text = "\nResults (ms)\n"; const names = Object.keys(journal); const prefixLen = 1 + Math.max(...names.map(str => str.length)); for (const name in journal) { const med = median(journal[name]); text += (name + ":").padEnd(prefixLen, " ") + stringify(journal[name]); text += " median " + formatNumber(med) + "\n"; metrics[name] = med; } dump(text); info(`perfMetrics | ${JSON.stringify(metrics)}`); } /** * Fetches the latest metric entry with the specified name and retrieves its value for the given key. * If multiple metrics share the same name, the function returns the key from the most recent one. * * @param {Array} metrics - The array of metric objects to search through. * @param {string} name - The name of the metric to find. * @param {string} key - The key within the metric object whose value should be returned. * @returns {*} - The value of the specified key in the latest metric with the given name, or undefined if no matching metric is found. */ function fetchMLMetric(metrics, name, key) { const matchingMetrics = metrics.filter(metric => metric.name === name); if (matchingMetrics.length === 0) { return undefined; } // Return undefined if no match found const latestMetric = matchingMetrics[matchingMetrics.length - 1]; return latestMetric[key]; } function fetchLatencyMetrics(metrics, isFirstRun) { const pipelineLatency = fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) - fetchMLMetric(metrics, PIPELINE_READY_START, WHEN); const initLatency = fetchMLMetric(metrics, INIT_END, WHEN) - fetchMLMetric(metrics, INIT_START, WHEN); const runLatency = fetchMLMetric(metrics, RUN_END, WHEN) - fetchMLMetric(metrics, RUN_START, WHEN); return { [`${isFirstRun ? COLD_START_PREFIX : ""}${PIPELINE_READY_LATENCY}`]: pipelineLatency, [`${isFirstRun ? COLD_START_PREFIX : ""}${INITIALIZATION_LATENCY}`]: initLatency, [`${isFirstRun ? COLD_START_PREFIX : ""}${MODEL_RUN_LATENCY}`]: runLatency, }; } function fetchMetrics(metrics, isFirstRun) { return { ...fetchLatencyMetrics(metrics, isFirstRun), }; } async function initializeEngine(pipelineOptions) { const modelDirectory = normalizePathForOS( `${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models` ); info(`Model Directory: ${modelDirectory}`); const modelHubRootUrl = Services.env.get("MOZ_MODELS_HUB"); if (!modelHubRootUrl) { throw new Error( "MOZ_MODELS_HUB is not set, you need to run with --hooks toolkit/components/ml/tests/tools/hook_local_hub.py" ); } info(`ModelHubRootUrl: ${modelHubRootUrl}`); const { cleanup } = await perfSetup({ prefs: [["browser.ml.modelHubRootUrl", modelHubRootUrl]], }); info("Get the engine process"); const mlEngineParent = await EngineProcess.getMLEngineParent(); info("Get Pipeline Options"); info("Run the inference"); return { cleanup, engine: await mlEngineParent.getEngine(pipelineOptions), }; } function normalizePathForOS(path) { if (Services.appinfo.OS === "WINNT") { // On Windows, replace forward slashes with backslashes return path.replace(/\//g, "\\"); } // On Unix-like systems, replace backslashes with forward slashes return path.replace(/\\/g, "/"); } async function perfSetup({ disabled = false, prefs = [] } = {}) { const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({ autoDownloadFromRemoteSettings: false, }); await SpecialPowers.pushPrefEnv({ set: [ // Enabled by default. ["browser.ml.enable", !disabled], ["browser.ml.logLevel", "Error"], ["browser.ml.modelCacheTimeout", 1000], ["browser.ml.checkForMemory", false], ["javascript.options.wasm_lazy_tiering", true], ...prefs, ], }); const artifactDirectory = normalizePathForOS( `${Services.env.get("MOZ_FETCHES_DIR")}` ); async function pathExists(path) { try { return await IOUtils.exists(path); } catch (e) { return false; } } // Stop immediately if this fails. if (!artifactDirectory) { throw new Error( `The wasm artifact directory is not set. This usually happens when running locally. " + "Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " + "Place them in a directory and rerun the test with the environment variable 'MOZ_FETCHES_DIR' " + "set such that all the files are directly inside 'MOZ_FETCHES_DIR'` ); } if (!PathUtils.isAbsolute(artifactDirectory)) { throw new Error( "Please provide an absolute path for 'MOZ_FETCHES_DIR and not a relative path" ); } async function download(record) { const recordPath = normalizePathForOS( `${artifactDirectory}/${record.name}` ); // Stop immediately if this fails. if (!(await pathExists(recordPath))) { throw new Error(`The wasm file <${recordPath}> does not exist. This usually happens when running locally. " + "Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " + "Place them in the directory <${artifactDirectory}> " + "such that <${recordPath}> exists.`); } return { buffer: (await IOUtils.read(recordPath)).buffer, }; } remoteClients["ml-onnx-runtime"].client.attachments.download = download; return { remoteClients, async cleanup() { await removeMocks(); await waitForCondition( () => EngineProcess.areAllEnginesTerminated(), "Waiting for all of the engines to be terminated.", 100, 200 ); await SpecialPowers.popPrefEnv(); }, }; } /** * Returns the total memory usage in MiB for the inference process */ async function getTotalMemoryUsage() { let mgr = Cc["@mozilla.org/memory-reporter-manager;1"].getService( Ci.nsIMemoryReporterManager ); let total = 0; const handleReport = ( aProcess, aPath, _aKind, _aUnits, aAmount, _aDescription ) => { if (aProcess.startsWith("inference")) { if (aPath.startsWith("explicit")) { total += aAmount; } } }; await new Promise(r => mgr.getReportsExtended( handleReport, null, r, null, /* anonymized = */ false, /* minimizeMemoryUsage = */ true, null ) ); return Math.round(total / 1024 / 1024); } /** * Runs an inference given the options and arguments * */ async function runInference(pipelineOptions, request, isFirstRun = false) { const { cleanup, engine } = await initializeEngine(pipelineOptions); let metrics = {}; try { const res = await engine.run(request); metrics = fetchMetrics(res.metrics, isFirstRun); metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${TOTAL_MEMORY_USAGE}`] = await getTotalMemoryUsage(); } finally { await EngineProcess.destroyMLEngine(); await cleanup(); } return metrics; } /** * Runs a performance test for the given name, options, and arguments and * reports the results for perfherder. */ async function perfTest( name, options, request, iterations = ITERATIONS, addColdStart = false ) { name = name.toUpperCase(); let METRICS = [ `${name}-${PIPELINE_READY_LATENCY}`, `${name}-${INITIALIZATION_LATENCY}`, `${name}-${MODEL_RUN_LATENCY}`, `${name}-${TOTAL_MEMORY_USAGE}`, ...(addColdStart ? [ `${name}-${COLD_START_PREFIX}${PIPELINE_READY_LATENCY}`, `${name}-${COLD_START_PREFIX}${INITIALIZATION_LATENCY}`, `${name}-${COLD_START_PREFIX}${MODEL_RUN_LATENCY}`, `${name}-${COLD_START_PREFIX}${TOTAL_MEMORY_USAGE}`, ] : []), ]; const journal = {}; for (let metric of METRICS) { journal[metric] = []; } const pipelineOptions = new PipelineOptions(options); let nIterations = addColdStart ? iterations + 1 : iterations; for (let i = 0; i < nIterations; i++) { const shouldAddColdStart = addColdStart && i === 0; let metrics = await runInference( pipelineOptions, request, shouldAddColdStart ); for (let [metricName, metricVal] of Object.entries(metrics)) { if (metricVal === null || metricVal === undefined || metricVal < 0) { metricVal = 0; } journal[`${name}-${metricName}`].push(metricVal); } } Assert.ok(true); reportMetrics(journal); }