Bug 1947840 - Collect Peak Memory Usage in Translations Perftests r=translations-reviewers,perftest-reviewers,afinder,gregtatum

Updates the Translations perftests to collect peak memory usage
in addition to the stabilized memory usage metric that it already
collects.

Differential Revision: https://phabricator.services.mozilla.com/D237955
This commit is contained in:
Erik Nordin
2025-02-21 01:12:03 +00:00
parent 1760896732
commit 5d5dc92224
4 changed files with 293 additions and 41 deletions

View File

@@ -39,7 +39,13 @@ const perfMetadata = {
lowerIsBetter: false,
},
{
name: "total-memory-usage",
name: "peak-memory-usage",
unit: "MiB",
shouldAlert: true,
lowerIsBetter: true,
},
{
name: "stabilized-memory-usage",
unit: "MiB",
shouldAlert: true,
lowerIsBetter: true,
@@ -60,9 +66,9 @@ const perfMetadata = {
};
/**
* Request 2x longer timeout for this test.
* Request 4x longer timeout for this test.
*/
requestLongerTimeout(2);
requestLongerTimeout(4);
/**
* Runs the translations benchmark tests from Spanish to English.
@@ -70,8 +76,10 @@ requestLongerTimeout(2);
add_task(async function test_translations_performance_es_en() {
await TranslationsBencher.benchmarkTranslation({
page: SPANISH_BENCHMARK_PAGE_URL,
runCount: 5,
sourceLanguage: "es",
targetLanguage: "en",
speedBenchCount: 5,
memoryBenchCount: 5,
memorySampleInterval: 10,
});
});

View File

@@ -404,11 +404,30 @@ class TranslationsBencher {
static METRIC_TOKENS_PER_SECOND = "tokens-per-second";
/**
* The metric base name for total memory usage in the inference process.
* The metric base name for peak memory usage in the inference process.
*
* We often see a spike in memory usage when models initialize that eventually
* stabilizes as the inference process continues running. As such, it is important
* that we collect two memory metrics during our benchmarks.
*
* @see {TranslationsBencher.METRIC_STABILIZED_MEMORY_USAGE}
*
* @type {string}
*/
static METRIC_TOTAL_MEMORY_USAGE = "total-memory-usage";
static METRIC_PEAK_MEMORY_USAGE = "peak-memory-usage";
/**
* The metric base name for stabilized memory usage in the inference process.
*
* We often see a spike in memory usage when models initialize that eventually
* stabilizes as the inference process continues running. As such, it is important
* that we collect two memory metrics during our benchmarks.
*
* @see {TranslationsBencher.METRIC_PEAK_MEMORY_USAGE}
*
* @type {string}
*/
static METRIC_STABILIZED_MEMORY_USAGE = "stabilized-memory-usage";
/**
* The metric base name for total translation time.
@@ -437,6 +456,12 @@ class TranslationsBencher {
* A class that gathers and reports metrics to perftest.
*/
static Journal = class {
/**
* A map of collected metrics, where the key is the metric name
* and the value is an array of all recorded values.
*
* @type {Record<string, number[]>}
*/
#metrics = {};
/**
@@ -449,7 +474,8 @@ class TranslationsBencher {
if (!this.#metrics[metricName]) {
this.#metrics[metricName] = [];
}
this.#metrics[metricName].push(value);
this.#metrics[metricName].push(Number(value.toFixed(3)));
}
/**
@@ -483,21 +509,122 @@ class TranslationsBencher {
};
/**
* Benchmarks the translation process and reports metrics to perftest.
* A class to track peak memory usage during translation via sampled intervals.
*/
static PeakMemorySampler = class {
/**
* The peak recorded memory in mebibytes (MiB).
*
* @type {number}
*/
#peakMemoryMiB = 0;
/**
* The interval id for the memory sample timer.
*
* @type {number|null}
*/
#intervalId = null;
/**
* The interval at which memory usage is sampled in milliseconds.
*
* @type {number}
*/
#interval;
/**
* Constructs a PeakMemorySampler.
*
* @param {number} interval - The interval in milliseconds between memory samples.
*/
constructor(interval) {
this.#interval = interval;
}
/**
* Collects the current inference process memory usage and updates
* the peak memory measurement if the current usage exceeds the previous peak.
*
* @returns {Promise<void>}
*/
async #collectMemorySample() {
const currentMemoryMiB =
await TranslationsBencher.#getInferenceProcessTotalMemoryUsage();
if (currentMemoryMiB > this.#peakMemoryMiB) {
this.#peakMemoryMiB = currentMemoryMiB;
}
}
/**
* Starts the interval timer to begin sampling a new peak memory usage.
*/
start() {
if (this.#intervalId !== null) {
throw new Error(
"Attempt to start a PeakMemorySampler that was already running."
);
}
this.#peakMemoryMiB = 0;
this.#intervalId = setInterval(() => {
this.#collectMemorySample().catch(console.error);
}, this.#interval);
}
/**
* Stops the interval timer from continuing to sample peak memory usage.
*/
stop() {
if (this.#intervalId === null) {
throw new Error(
"Attempt to stop a PeakMemorySampler that was not running."
);
}
clearInterval(this.#intervalId);
this.#intervalId = null;
this.#collectMemorySample();
}
/**
* Returns the peak recorded memory usage in mebibytes (MiB).
*
* @returns {number}
*/
getPeakRecordedMemoryUsage() {
if (this.#intervalId) {
throw new Error(
"Attempt to retrieve peak recorded memory usage while the memory sampler is running."
);
}
return this.#peakMemoryMiB;
}
};
/**
* Benchmarks the translation process (both memory usage and speed)
* and reports metrics to perftest. It runs one full translation for
* each memory sample, and then one full translation for each speed sample.
*
* @param {object} options - The benchmark options.
* @param {string} options.page - The URL of the page to test.
* @param {number} options.runCount - The number of runs to perform.
* @param {string} options.sourceLanguage - The BCP-47 language tag for the source language.
* @param {string} options.targetLanguage - The BCP-47 language tag for the target language.
* @param {number} options.speedBenchCount - The number of speed-sampling runs to perform.
* @param {number} options.memoryBenchCount - The number of memory-sampling runs to perform.
* @param {number} [options.memorySampleInterval] - The interval in milliseconds between memory usage samples.
*
* @returns {Promise<void>} Resolves when benchmarking is complete.
*/
static async benchmarkTranslation({
page,
runCount,
sourceLanguage,
targetLanguage,
speedBenchCount,
memoryBenchCount,
memorySampleInterval = 10,
}) {
const { wordCount, tokenCount, pageLanguage } =
TranslationsBencher.#PAGE_DATA[page] ?? {};
@@ -540,7 +667,135 @@ class TranslationsBencher {
const journal = new TranslationsBencher.Journal();
for (let runNumber = 0; runNumber < runCount; ++runNumber) {
await TranslationsBencher.#benchmarkTranslationMemory({
page,
journal,
sourceLanguage,
targetLanguage,
memoryBenchCount,
memorySampleInterval,
});
await TranslationsBencher.#benchmarkTranslationSpeed({
page,
journal,
sourceLanguage,
targetLanguage,
wordCount,
tokenCount,
speedBenchCount,
});
journal.reportMetrics();
}
/**
* Benchmarks memory usage by measuring peak and stabilized memory usage
* across multiple runs of the translation process.
*
* @param {object} options - The benchmark options.
* @param {string} options.page - The URL of the page to test.
* @param {TranslationsBencher.Journal} options.journal - The shared metrics journal.
* @param {string} options.sourceLanguage - The BCP-47 language tag for the source language.
* @param {string} options.targetLanguage - The BCP-47 language tag for the target language.
* @param {number} options.memoryBenchCount - The number of runs to perform for memory sampling.
* @param {number} options.memorySampleInterval - The interval in milliseconds between memory samples.
*
* @returns {Promise<void>} Resolves when memory benchmarking is complete.
*/
static async #benchmarkTranslationMemory({
page,
journal,
sourceLanguage,
targetLanguage,
memoryBenchCount,
memorySampleInterval,
}) {
for (let runNumber = 0; runNumber < memoryBenchCount; ++runNumber) {
const { cleanup, runInPage } = await loadTestPage({
page,
endToEndTest: true,
languagePairs: [
{ fromLang: sourceLanguage, toLang: "en" },
{ fromLang: "en", toLang: targetLanguage },
],
prefs: [["browser.translations.logLevel", "Error"]],
});
// Create a new PeakMemorySampler using the provided interval.
const peakMemorySampler = new TranslationsBencher.PeakMemorySampler(
memorySampleInterval
);
await TranslationsBencher.#injectTranslationCompleteObserver(runInPage);
await FullPageTranslationsTestUtils.assertTranslationsButton(
{ button: true, circleArrows: false, locale: false, icon: true },
"The button is available."
);
await FullPageTranslationsTestUtils.openPanel({
onOpenPanel: FullPageTranslationsTestUtils.assertPanelViewDefault,
});
await FullPageTranslationsTestUtils.changeSelectedFromLanguage({
langTag: sourceLanguage,
});
await FullPageTranslationsTestUtils.changeSelectedToLanguage({
langTag: targetLanguage,
});
const translationCompleteTimestampPromise =
TranslationsBencher.#getTranslationCompleteTimestampPromise(runInPage);
peakMemorySampler.start();
await FullPageTranslationsTestUtils.clickTranslateButton();
await translationCompleteTimestampPromise;
peakMemorySampler.stop();
const peakMemoryMiB = peakMemorySampler.getPeakRecordedMemoryUsage();
const stabilizedMemoryMiB =
await TranslationsBencher.#getInferenceProcessTotalMemoryUsage();
journal.pushMetrics([
[TranslationsBencher.METRIC_PEAK_MEMORY_USAGE, peakMemoryMiB],
[
TranslationsBencher.METRIC_STABILIZED_MEMORY_USAGE,
stabilizedMemoryMiB,
],
]);
await cleanup();
}
}
/**
* Benchmarks speed by measuring engine init time, words per second, tokens per second,
* and total translation time across multiple runs.
*
* @param {object} options - The benchmark options.
* @param {string} options.page - The URL of the page to test.
* @param {TranslationsBencher.Journal} options.journal - The shared metrics journal.
* @param {string} options.sourceLanguage - The BCP-47 language tag for the source language.
* @param {string} options.targetLanguage - The BCP-47 language tag for the target language.
* @param {number} options.wordCount - The total word count of the page.
* @param {number} options.tokenCount - The total token count of the page.
* @param {number} options.speedBenchCount - The number of runs to perform for speed sampling.
*
* @returns {Promise<void>} Resolves when speed benchmarking is complete.
*/
static async #benchmarkTranslationSpeed({
page,
journal,
sourceLanguage,
targetLanguage,
wordCount,
tokenCount,
speedBenchCount,
}) {
for (let runNumber = 0; runNumber < speedBenchCount; ++runNumber) {
const { tab, cleanup, runInPage } = await loadTestPage({
page,
endToEndTest: true,
@@ -589,37 +844,18 @@ class TranslationsBencher {
const wordsPerSecond = wordCount / translationTimeSeconds;
const tokensPerSecond = tokenCount / translationTimeSeconds;
const totalMemoryMB =
await TranslationsBencher.#getInferenceProcessTotalMemoryUsage();
const decimalPrecision = 3;
journal.pushMetrics([
[
TranslationsBencher.METRIC_ENGINE_INIT_TIME,
Number(initTimeMilliseconds.toFixed(decimalPrecision)),
],
[
TranslationsBencher.METRIC_WORDS_PER_SECOND,
Number(wordsPerSecond.toFixed(decimalPrecision)),
],
[
TranslationsBencher.METRIC_TOKENS_PER_SECOND,
Number(tokensPerSecond.toFixed(decimalPrecision)),
],
[
TranslationsBencher.METRIC_TOTAL_MEMORY_USAGE,
Number(totalMemoryMB.toFixed(decimalPrecision)),
],
[TranslationsBencher.METRIC_ENGINE_INIT_TIME, initTimeMilliseconds],
[TranslationsBencher.METRIC_WORDS_PER_SECOND, wordsPerSecond],
[TranslationsBencher.METRIC_TOKENS_PER_SECOND, tokensPerSecond],
[
TranslationsBencher.METRIC_TOTAL_TRANSLATION_TIME,
Number(translationTimeSeconds.toFixed(decimalPrecision)),
translationTimeSeconds,
],
]);
await cleanup();
}
journal.reportMetrics();
}
/**
@@ -702,12 +938,12 @@ class TranslationsBencher {
}
/**
* Returns the total memory used by the inference process in megabytes.
* Returns the total memory used by the inference process in mebibytes (MiB).
*
* @returns {Promise<number>} The total memory usage in megabytes.
* @returns {Promise<number>} The total memory usage in mebibytes.
*/
static async #getInferenceProcessTotalMemoryUsage() {
const inferenceProcessInfo = await getInferenceProcessInfo();
const inferenceProcessInfo = await fetchInferenceProcessInfo();
return bytesToMebibytes(inferenceProcessInfo.memory);
}
}

View File

@@ -43,7 +43,7 @@ browser_translations_perf_es_en.js
::
--perfherder
--perfherder-metrics name:engine-init-time,unit:ms,shouldAlert:True,lowerIsBetter:True, name:words-per-second,unit:WPS,shouldAlert:True,lowerIsBetter:False, name:tokens-per-second,unit:TPS,shouldAlert:True,lowerIsBetter:False, name:total-memory-usage,unit:MiB,shouldAlert:True,lowerIsBetter:True, name:total-translation-time,unit:s,shouldAlert:True,lowerIsBetter:True
--perfherder-metrics name:engine-init-time,unit:ms,shouldAlert:True,lowerIsBetter:True, name:words-per-second,unit:WPS,shouldAlert:True,lowerIsBetter:False, name:tokens-per-second,unit:TPS,shouldAlert:True,lowerIsBetter:False, name:peak-memory-usage,unit:MiB,shouldAlert:True,lowerIsBetter:True, name:stabilized-memory-usage,unit:MiB,shouldAlert:True,lowerIsBetter:True, name:total-translation-time,unit:s,shouldAlert:True,lowerIsBetter:True
--verbose
--manifest perftest.toml
--manifest-flavor browser-chrome

View File

@@ -15,9 +15,17 @@ const { TranslationsPanelShared } = ChromeUtils.importESModule(
const { TranslationsUtils } = ChromeUtils.importESModule(
"chrome://global/content/translations/TranslationsUtils.mjs"
);
const { getInferenceProcessInfo } = ChromeUtils.importESModule(
"chrome://global/content/ml/Utils.sys.mjs"
);
// This is a bit silly, but ml/tests/browser/head.js relies on this function:
// https://searchfox.org/mozilla-central/rev/14f68f084d6a3bc438a3f973ed81d3a4dbab9629/toolkit/components/ml/tests/browser/head.js#23-25
//
// And it also pulls in the entirety of this file.
// https://searchfox.org/mozilla-central/rev/14f68f084d6a3bc438a3f973ed81d3a4dbab9629/toolkit/components/ml/tests/browser/head.js#41-46
//
// So we can't have a naming conflict of a variable defined twice like this.
// https://bugzilla.mozilla.org/show_bug.cgi?id=1949530
const { getInferenceProcessInfo: fetchInferenceProcessInfo } =
ChromeUtils.importESModule("chrome://global/content/ml/Utils.sys.mjs");
// Avoid about:blank's non-standard behavior.
const BLANK_PAGE =