Bug 1964665 - Add a perftest for speecht5-tts r=firefox-ai-ml-reviewers,sparky,perftest-reviewers,tarek

This patch adds a perftest for the OONX speecht5-tts q8 quantized model as part of our ongoing exploration and benchmarking of TTS tasks for transform media type project. The goals are 1) add the first perftest for speech-to-text task as reference for future tests; 2) benchmark core performance metrics like latency and memory usage to assess realistic inference conditions (note that streaming audio output is not yet tested. This helps establish a baseline for comparing candidate TTS models (as well as different quantization levels) for future integration decisions. The test runs directly in transformers.js without any modification to the pipeline.

Differential Revision: https://phabricator.services.mozilla.com/D248133
This commit is contained in:
Tom Zhang
2025-05-13 15:48:50 +00:00
committed by vbaungally@mozilla.com
parent edbda0a1b9
commit ed3a91e4fa
9 changed files with 237 additions and 0 deletions

View File

@@ -66,6 +66,7 @@ suites:
"ML Smart Tab Clustering": ""
"browser_ml_smart_tab_perf.js": ""
"browser_ml_llama_summarizer_perf.js": ""
"ML Speech T5 TTS": ""
intl/benchmarks/test/xpcshell:
description: "Performance tests running through XPCShell for Intl code"

View File

@@ -98,3 +98,30 @@ smollm2-360-instruct-gguf:
revision: 593b5a2e04c8f3e4ee880263f93e0bd2901ad47f
path-prefix: "onnx-models/HuggingFaceTB/SmolLM2-360M-Instruct-GGUF/main/"
artifact-name: smollm2-360-instruct-gguf.tar.zst
speecht5-tts:
description: microsoft/speecht5_tts with ONNX weights to be compatible with Transformers.js
fetch:
type: git
repo: https://huggingface.co/Xenova/speecht5_tts
revision: 73001d8bf5a9a7532c9cede52b5d3c21936939b1
path-prefix: "onnx-models/Xenova/speecht5_tts/main/"
artifact-name: speecht5_tts.tar.zst
speecht5-tts-vocoder:
description: vocoder for speecht5_tts
fetch:
type: git
repo: https://huggingface.co/Xenova/speecht5_hifigan
revision: 05bbb2dea568f207f71113f71c9ecc216840238e
path-prefix: "onnx-models/Xenova/speecht5_hifigan/main/"
artifact-name: speecht5_tts_hifigan.tar.zst
speecht5-tts-speaker:
description: speaker embedding for speecht5_tts
fetch:
type: git
repo: https://huggingface.co/datasets/Xenova/transformers.js-docs
revision: 4cc21eccd7f4ad1174c2f721c02468e43bf39b60
path-prefix: "onnx-models/Xenova/transformers.js-docs/main/"
artifact-name: speecht5_tts_speaker_embedding.tar.zst

View File

@@ -666,3 +666,32 @@ ml-perf-smart-tab-cluster:
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_smart_tab_clustering_perf.js
ml-perf-speecht5-tts:
fetches:
fetch:
- ort.jsep.wasm
- speecht5-tts
- speecht5-tts-vocoder
- speecht5-tts-speaker
description: Run ML speecht5-tts Model
worker:
max-run-time: 900
treeherder:
symbol: perftest(ml-perf-speecht5)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_speecht5_tts.js

View File

@@ -593,3 +593,32 @@ ml-perf-smart-tab-cluster:
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_smart_tab_clustering_perf.js
ml-perf-speecht5-tts:
fetches:
fetch:
- ort.jsep.wasm
- speecht5-tts
- speecht5-tts-vocoder
- speecht5-tts-speaker
description: Run ML speecht5-tts Model
treeherder:
symbol: perftest(ml-perf-speecht5)
tier: 2
worker:
max-run-time: 900
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_speecht5_tts.js

View File

@@ -268,3 +268,32 @@ ml-perf-smart-tab-cluster:
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_smart_tab_clustering_perf.js
ml-perf-speecht5-tts:
fetches:
fetch:
- ort.jsep.wasm
- speecht5-tts
- speecht5-tts-vocoder
- speecht5-tts-speaker
description: Run ML speecht5-tts Model
worker:
max-run-time: 900
treeherder:
symbol: perftest(ml-perf-speecht5)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_speecht5_tts.js

View File

@@ -541,3 +541,32 @@ ml-perf-smart-tab-cluster:
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_smart_tab_clustering_perf.js
ml-perf-speecht5-tts:
fetches:
fetch:
- ort.jsep.wasm
- speecht5-tts
- speecht5-tts-vocoder
- speecht5-tts-speaker
description: Run ML speecht5-tts Model
worker:
max-run-time: 900
treeherder:
symbol: perftest(ml-perf-speecht5)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
--hooks toolkit/components/ml/tests/tools/hooks_local_hub.py
--mochitest-name-change
toolkit/components/ml/tests/browser/browser_ml_speecht5_tts.js

View File

@@ -439,6 +439,24 @@ browser_ml_smart_tab_clustering_perf.js
**Testing Smart Tab Clustering**
browser_ml_speecht5_tts.js
==========================
:owner: GenAI Team
:name: ML Speech T5 TTS
:Default options:
::
--perfherder
--perfherder-metrics name:latency,unit:ms,shouldAlert:False, name:memory,unit:MiB,shouldAlert:False
--verbose
--manifest perftest.toml
--manifest-flavor browser-chrome
--try-platform linux, mac, win
**Testing Speech T5 TTS**
browser_ml_autofill_perf.js
===========================

View File

@@ -0,0 +1,72 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
"use strict";
const perfMetadata = {
owner: "GenAI Team",
name: "ML Speech T5 TTS",
description: "Testing Speech T5 TTS",
options: {
default: {
perfherder: true,
perfherder_metrics: [
{
name: "latency",
unit: "ms",
shouldAlert: false,
},
{
name: "memory",
unit: "MiB",
shouldAlert: false,
},
],
verbose: true,
manifest: "perftest.toml",
manifest_flavor: "browser-chrome",
try_platform: ["linux", "mac", "win"],
},
},
};
requestLongerTimeout(250);
const { sinon } = ChromeUtils.importESModule(
"resource://testing-common/Sinon.sys.mjs"
);
// Text-to-speech model tests
add_task(async function test_ml_tts() {
const options = new PipelineOptions({
taskName: "text-to-speech",
modelId: "Xenova/speecht5_tts",
modelHubUrlTemplate: "{model}/{revision}",
modelRevision: "main",
dtype: "q8",
timeoutMS: 2 * 60 * 1000,
});
const requestInfo = {
inputArgs: "The one ring to rule them all.",
runOptions: {
speaker_embeddings: `${Services.env.get("MOZ_MODELS_HUB")}/Xenova/transformers.js-docs/main/speaker_embeddings.bin`,
vocoder: `${Services.env.get("MOZ_MODELS_HUB")}/Xenova/speecht5_hifigan`,
},
};
const request = {
args: [requestInfo.inputArgs],
options: requestInfo.runOptions,
};
info(`is request null | ${request === null || request === undefined}`);
await perfTest({
name: "speecht5_tts",
options,
request,
iterations: 10,
addColdStart: true,
trackPeakMemory: false,
});
});

View File

@@ -26,6 +26,9 @@ disabled = "Disabled as we want to run this only as perftest, not regular CI"
["browser_ml_smart_tab_perf.js"]
disabled = "Disabled as we want to run this only as perftest, not regular CI"
["browser_ml_speecht5_tts.js"]
disabled = "Disabled as we want to run this only as perftest, not regular CI"
["browser_ml_suggest_feature_perf.js"]
disabled = "Disabled as we want to run this only as perftest, not regular CI"