Backed out changeset 8d6f7bc74d08 (bug 1917532) Backed out changeset 7c963e72cf06 (bug 1575506) Backed out changeset d5e110187781 (bug 1917530) Backed out changeset 1d2325ffded6 (bug 1917530) Backed out changeset 8a361e37e32c (bug 1917530) Backed out changeset c4011d92c7f7 (bug 1917530) Backed out changeset 1a45047dfb3c (bug 1917530) Backed out changeset ed6b35444c45 (bug 1917530)
771 lines
22 KiB
JavaScript
771 lines
22 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
"use strict";
|
|
|
|
/**
|
|
* @typedef {import("../translations").Bergamot} Bergamot
|
|
* @typedef {import("../translations").LanguageTranslationModelFiles} LanguageTranslationModelFiles
|
|
*/
|
|
|
|
/* global loadBergamot */
|
|
importScripts("chrome://global/content/translations/bergamot-translator.js");
|
|
|
|
// Respect the preference "browser.translations.logLevel".
|
|
let _loggingLevel = "Error";
|
|
function log(...args) {
|
|
if (_loggingLevel !== "Error" && _loggingLevel !== "Warn") {
|
|
console.log("Translations:", ...args);
|
|
}
|
|
}
|
|
function trace(...args) {
|
|
if (_loggingLevel === "Trace" || _loggingLevel === "All") {
|
|
console.log("Translations:", ...args);
|
|
}
|
|
}
|
|
|
|
// Throw Promise rejection errors so that they are visible in the console.
|
|
self.addEventListener("unhandledrejection", event => {
|
|
throw event.reason;
|
|
});
|
|
|
|
/**
|
|
* The alignment for each file type, file type strings should be same as in the
|
|
* model registry.
|
|
*/
|
|
const MODEL_FILE_ALIGNMENTS = {
|
|
model: 256,
|
|
lex: 64,
|
|
vocab: 64,
|
|
qualityModel: 64,
|
|
srcvocab: 64,
|
|
trgvocab: 64,
|
|
};
|
|
|
|
/**
|
|
* Initialize the engine, and get it ready to handle translation requests.
|
|
* The "initialize" message must be received before any other message handling
|
|
* requests will be processed.
|
|
*/
|
|
addEventListener("message", handleInitializationMessage);
|
|
|
|
async function handleInitializationMessage({ data }) {
|
|
const startTime = performance.now();
|
|
if (data.type !== "initialize") {
|
|
console.error(
|
|
"The TranslationEngine worker received a message before it was initialized."
|
|
);
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const { fromLanguage, toLanguage, enginePayload, logLevel, innerWindowId } =
|
|
data;
|
|
|
|
if (!fromLanguage) {
|
|
throw new Error('Worker initialization missing "fromLanguage"');
|
|
}
|
|
if (!toLanguage) {
|
|
throw new Error('Worker initialization missing "toLanguage"');
|
|
}
|
|
|
|
if (logLevel) {
|
|
// Respect the "browser.translations.logLevel" preference.
|
|
_loggingLevel = logLevel;
|
|
}
|
|
|
|
let engine;
|
|
if (enginePayload.isMocked) {
|
|
// The engine is testing mode, and no Bergamot wasm is available.
|
|
engine = new MockedEngine(fromLanguage, toLanguage);
|
|
} else {
|
|
const { bergamotWasmArrayBuffer, languageModelFiles } = enginePayload;
|
|
const bergamot = await BergamotUtils.initializeWasm(
|
|
bergamotWasmArrayBuffer
|
|
);
|
|
engine = new Engine(
|
|
fromLanguage,
|
|
toLanguage,
|
|
bergamot,
|
|
languageModelFiles
|
|
);
|
|
}
|
|
|
|
ChromeUtils.addProfilerMarker(
|
|
"TranslationsWorker",
|
|
{ startTime, innerWindowId },
|
|
"Translations engine loaded."
|
|
);
|
|
|
|
handleMessages(engine);
|
|
postMessage({ type: "initialization-success" });
|
|
} catch (error) {
|
|
console.error(error);
|
|
postMessage({ type: "initialization-error", error: error?.message });
|
|
}
|
|
|
|
removeEventListener("message", handleInitializationMessage);
|
|
}
|
|
|
|
/**
|
|
* Sets up the message handling for the worker.
|
|
*
|
|
* @param {Engine | MockedEngine} engine
|
|
*/
|
|
function handleMessages(engine) {
|
|
let discardPromise;
|
|
addEventListener("message", async ({ data }) => {
|
|
try {
|
|
if (data.type === "initialize") {
|
|
throw new Error("The Translations engine must not be re-initialized.");
|
|
}
|
|
if (data.type === "translation-request") {
|
|
// Only show these messages when "All" logging is on, since there are so many
|
|
// of them.
|
|
trace("Received message", data);
|
|
} else {
|
|
log("Received message", data);
|
|
}
|
|
|
|
switch (data.type) {
|
|
case "translation-request": {
|
|
const { sourceText, messageId, isHTML, innerWindowId } = data;
|
|
if (discardPromise) {
|
|
// Wait for messages to be discarded if there are any.
|
|
await discardPromise;
|
|
}
|
|
try {
|
|
const { whitespaceBefore, whitespaceAfter, cleanedSourceText } =
|
|
cleanText(sourceText);
|
|
|
|
// Add a translation to the work queue, and when it returns, post the message
|
|
// back. The translation may never return if the translations are discarded
|
|
// before it have time to be run. In this case this await is just never
|
|
// resolved, and the postMessage is never run.
|
|
let targetText = await engine.translate(
|
|
cleanedSourceText,
|
|
isHTML,
|
|
innerWindowId
|
|
);
|
|
|
|
// Ensure the whitespace is retained.
|
|
targetText = whitespaceBefore + targetText + whitespaceAfter;
|
|
|
|
// This logging level can be very verbose and slow, so only do it under the
|
|
// "Trace" level, which is the most verbose. Set the logging level to "Info" to avoid
|
|
// these, and get all of the other logs.
|
|
trace("Translation complete", {
|
|
sourceText,
|
|
targetText,
|
|
isHTML,
|
|
innerWindowId,
|
|
});
|
|
|
|
postMessage({
|
|
type: "translation-response",
|
|
targetText,
|
|
messageId,
|
|
});
|
|
} catch (error) {
|
|
console.error(error);
|
|
let message = "An error occurred in the engine worker.";
|
|
if (typeof error?.message === "string") {
|
|
message = error.message;
|
|
}
|
|
let stack = "(no stack)";
|
|
if (typeof error?.stack === "string") {
|
|
stack = error.stack;
|
|
}
|
|
postMessage({
|
|
type: "translation-error",
|
|
error: { message, stack },
|
|
messageId,
|
|
innerWindowId,
|
|
});
|
|
}
|
|
break;
|
|
}
|
|
case "discard-translation-queue": {
|
|
ChromeUtils.addProfilerMarker(
|
|
"TranslationsWorker",
|
|
{ innerWindowId: data.innerWindowId },
|
|
"Translations discard requested"
|
|
);
|
|
|
|
discardPromise = engine.discardTranslations(data.innerWindowId);
|
|
await discardPromise;
|
|
discardPromise = null;
|
|
|
|
// Signal to the "message" listeners in the main thread to stop listening.
|
|
postMessage({
|
|
type: "translations-discarded",
|
|
});
|
|
break;
|
|
}
|
|
default:
|
|
console.warn("Unknown message type:", data.type);
|
|
}
|
|
} catch (error) {
|
|
// Ensure the unexpected errors are surfaced in the console.
|
|
console.error(error);
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* The Engine is created once for a language pair. The initialization process copies the
|
|
* ArrayBuffers for the language buffers from JS-managed ArrayBuffers, to aligned
|
|
* internal memory for the wasm heap.
|
|
*
|
|
* After this the ArrayBuffers are discarded and GC'd. This file should be managed
|
|
* from the TranslationsEngine class on the main thread.
|
|
*
|
|
* This class starts listening for messages only after the Bergamot engine has been
|
|
* fully initialized.
|
|
*/
|
|
class Engine {
|
|
/**
|
|
* @param {string} fromLanguage
|
|
* @param {string} toLanguage
|
|
* @param {Bergamot} bergamot
|
|
* @param {Array<LanguageTranslationModelFiles>} languageTranslationModelFiles
|
|
*/
|
|
constructor(
|
|
fromLanguage,
|
|
toLanguage,
|
|
bergamot,
|
|
languageTranslationModelFiles
|
|
) {
|
|
/** @type {string} */
|
|
this.fromLanguage = fromLanguage;
|
|
/** @type {string} */
|
|
this.toLanguage = toLanguage;
|
|
/** @type {Bergamot} */
|
|
this.bergamot = bergamot;
|
|
/** @type {Bergamot["TranslationModel"][]} */
|
|
this.languageTranslationModels = languageTranslationModelFiles.map(
|
|
languageTranslationModelFiles =>
|
|
BergamotUtils.constructSingleTranslationModel(
|
|
bergamot,
|
|
languageTranslationModelFiles
|
|
)
|
|
);
|
|
|
|
/** @type {Bergamot["BlockingService"]} */
|
|
this.translationService = new bergamot.BlockingService({
|
|
// Caching is disabled (see https://github.com/mozilla/firefox-translations/issues/288)
|
|
cacheSize: 0,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Run the translation models to perform a batch of message translations. The
|
|
* promise is rejected when the sync version of this function throws an error.
|
|
* This function creates an async interface over the synchronous translation
|
|
* mechanism. This allows other microtasks such as message handling to still work
|
|
* even though the translations are CPU-intensive.
|
|
*
|
|
* @param {string} sourceText
|
|
* @param {boolean} isHTML
|
|
* @param {number} innerWindowId - This is required
|
|
*
|
|
* @returns {Promise<string>}sourceText
|
|
*/
|
|
translate(sourceText, isHTML, innerWindowId) {
|
|
return this.#getWorkQueue(innerWindowId).runTask(() =>
|
|
this.#syncTranslate(sourceText, isHTML, innerWindowId)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Map each innerWindowId to its own WorkQueue. This makes it easy to shut down
|
|
* an entire queue of work when the page is unloaded.
|
|
*
|
|
* @type {Map<number, WorkQueue>}
|
|
*/
|
|
#workQueues = new Map();
|
|
|
|
/**
|
|
* Get or create a `WorkQueue` that is unique to an `innerWindowId`.
|
|
*
|
|
* @param {number} innerWindowId
|
|
* @returns {WorkQueue}
|
|
*/
|
|
#getWorkQueue(innerWindowId) {
|
|
let workQueue = this.#workQueues.get(innerWindowId);
|
|
if (workQueue) {
|
|
return workQueue;
|
|
}
|
|
workQueue = new WorkQueue(innerWindowId);
|
|
this.#workQueues.set(innerWindowId, workQueue);
|
|
return workQueue;
|
|
}
|
|
|
|
/**
|
|
* Cancels any in-progress translations by removing the work queue.
|
|
*
|
|
* @param {number} innerWindowId
|
|
*/
|
|
discardTranslations(innerWindowId) {
|
|
let workQueue = this.#workQueues.get(innerWindowId);
|
|
if (workQueue) {
|
|
workQueue.cancelWork();
|
|
this.#workQueues.delete(innerWindowId);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Run the translation models to perform a translation. This
|
|
* blocks the worker thread until it is completed.
|
|
*
|
|
* @param {string} sourceText
|
|
* @param {boolean} isHTML
|
|
* @param {number} innerWindowId
|
|
* @returns {string}
|
|
*/
|
|
#syncTranslate(sourceText, isHTML, innerWindowId) {
|
|
const startTime = performance.now();
|
|
let response;
|
|
const { messages, options } = BergamotUtils.getTranslationArgs(
|
|
this.bergamot,
|
|
sourceText,
|
|
isHTML
|
|
);
|
|
try {
|
|
if (messages.size() === 0) {
|
|
return [];
|
|
}
|
|
|
|
/** @type {Bergamot["VectorResponse"]} */
|
|
let responses;
|
|
|
|
if (this.languageTranslationModels.length === 1) {
|
|
responses = this.translationService.translate(
|
|
this.languageTranslationModels[0],
|
|
messages,
|
|
options
|
|
);
|
|
} else if (this.languageTranslationModels.length === 2) {
|
|
responses = this.translationService.translateViaPivoting(
|
|
this.languageTranslationModels[0],
|
|
this.languageTranslationModels[1],
|
|
messages,
|
|
options
|
|
);
|
|
} else {
|
|
throw new Error(
|
|
"Too many models were provided to the translation worker."
|
|
);
|
|
}
|
|
|
|
// Report on the time it took to do this translation.
|
|
ChromeUtils.addProfilerMarker(
|
|
"TranslationsWorker",
|
|
{ startTime, innerWindowId },
|
|
`Translated ${sourceText.length} code units.`
|
|
);
|
|
|
|
const targetText = responses.get(0).getTranslatedText();
|
|
return targetText;
|
|
} finally {
|
|
// Free up any memory that was allocated. This will always run.
|
|
messages?.delete();
|
|
options?.delete();
|
|
response?.delete();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Static utilities to help work with the Bergamot wasm module.
|
|
*/
|
|
class BergamotUtils {
|
|
/**
|
|
* Construct a single translation model.
|
|
*
|
|
* @param {Bergamot} bergamot
|
|
* @param {LanguageTranslationModelFiles} languageTranslationModelFiles
|
|
* @returns {Bergamot["TranslationModel"]}
|
|
*/
|
|
static constructSingleTranslationModel(
|
|
bergamot,
|
|
languageTranslationModelFiles
|
|
) {
|
|
log(`Constructing translation model.`);
|
|
|
|
const { model, lex, vocab, qualityModel, srcvocab, trgvocab } =
|
|
BergamotUtils.allocateModelMemory(
|
|
bergamot,
|
|
languageTranslationModelFiles
|
|
);
|
|
|
|
// Transform the bytes to mb, like "10.2mb"
|
|
const getMemory = memory => `${Math.floor(memory.size() / 100_000) / 10}mb`;
|
|
|
|
let memoryLog = `Model memory sizes in wasm heap:`;
|
|
memoryLog += `\n Model: ${getMemory(model)}`;
|
|
memoryLog += `\n Shortlist: ${getMemory(lex)}`;
|
|
|
|
// Set up the vocab list, which could either be a single "vocab" model, or a
|
|
// "srcvocab" and "trgvocab" pair.
|
|
const vocabList = new bergamot.AlignedMemoryList();
|
|
|
|
if (vocab) {
|
|
vocabList.push_back(vocab);
|
|
memoryLog += `\n Vocab: ${getMemory(vocab)}`;
|
|
} else if (srcvocab && trgvocab) {
|
|
vocabList.push_back(srcvocab);
|
|
vocabList.push_back(trgvocab);
|
|
memoryLog += `\n Src Vocab: ${getMemory(srcvocab)}`;
|
|
memoryLog += `\n Trg Vocab: ${getMemory(trgvocab)}`;
|
|
} else {
|
|
throw new Error("Vocabulary key is not found.");
|
|
}
|
|
|
|
if (qualityModel) {
|
|
memoryLog += `\n QualityModel: ${getMemory(qualityModel)}\n`;
|
|
}
|
|
|
|
const config = BergamotUtils.generateTextConfig({
|
|
"beam-size": "1",
|
|
normalize: "1.0",
|
|
"word-penalty": "0",
|
|
"max-length-break": "128",
|
|
"mini-batch-words": "1024",
|
|
workspace: "128",
|
|
"max-length-factor": "2.0",
|
|
"skip-cost": (!qualityModel).toString(),
|
|
"cpu-threads": "0",
|
|
quiet: "true",
|
|
"quiet-translation": "true",
|
|
"gemm-precision":
|
|
languageTranslationModelFiles.model.record.name.endsWith("intgemm8.bin")
|
|
? "int8shiftAll"
|
|
: "int8shiftAlphaAll",
|
|
alignment: "soft",
|
|
});
|
|
|
|
log(`Bergamot translation model config: ${config}`);
|
|
log(memoryLog);
|
|
|
|
return new bergamot.TranslationModel(
|
|
config,
|
|
model,
|
|
lex,
|
|
vocabList,
|
|
qualityModel ?? null
|
|
);
|
|
}
|
|
|
|
/**
|
|
* The models must be placed in aligned memory that the Bergamot wasm module has access
|
|
* to. This function copies over the model blobs into this memory space.
|
|
*
|
|
* @param {Bergamot} bergamot
|
|
* @param {LanguageTranslationModelFiles} languageTranslationModelFiles
|
|
* @returns {LanguageTranslationModelFilesAligned}
|
|
*/
|
|
static allocateModelMemory(bergamot, languageTranslationModelFiles) {
|
|
/** @type {LanguageTranslationModelFilesAligned} */
|
|
const results = {};
|
|
|
|
for (const [fileType, file] of Object.entries(
|
|
languageTranslationModelFiles
|
|
)) {
|
|
const alignment = MODEL_FILE_ALIGNMENTS[fileType];
|
|
if (!alignment) {
|
|
throw new Error(`Unknown file type: "${fileType}"`);
|
|
}
|
|
|
|
const alignedMemory = new bergamot.AlignedMemory(
|
|
file.buffer.byteLength,
|
|
alignment
|
|
);
|
|
|
|
alignedMemory.getByteArrayView().set(new Uint8Array(file.buffer));
|
|
|
|
results[fileType] = alignedMemory;
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Initialize the Bergamot translation engine. It is a wasm compiled version of the
|
|
* Marian translation software. The wasm is delivered remotely to cut down on binary size.
|
|
*
|
|
* https://github.com/mozilla/bergamot-translator/
|
|
*
|
|
* @param {ArrayBuffer} wasmBinary
|
|
* @returns {Promise<Bergamot>}
|
|
*/
|
|
static initializeWasm(wasmBinary) {
|
|
return new Promise((resolve, reject) => {
|
|
/** @type {number} */
|
|
let start = performance.now();
|
|
|
|
/** @type {Bergamot} */
|
|
const bergamot = loadBergamot({
|
|
// This is the amount of memory that a simple run of Bergamot uses, in bytes.
|
|
INITIAL_MEMORY: 234_291_200,
|
|
print: log,
|
|
onAbort() {
|
|
reject(new Error("Error loading Bergamot wasm module."));
|
|
},
|
|
onRuntimeInitialized: async () => {
|
|
const duration = performance.now() - start;
|
|
log(
|
|
`Bergamot wasm runtime initialized in ${duration / 1000} seconds.`
|
|
);
|
|
// Await at least one microtask so that the captured `bergamot` variable is
|
|
// fully initialized.
|
|
await Promise.resolve();
|
|
resolve(bergamot);
|
|
},
|
|
wasmBinary,
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Maps the Bergamot Vector to a JS array
|
|
*
|
|
* @param {Bergamot["Vector"]} vector
|
|
* @param {Function} fn
|
|
* @returns {Array}
|
|
*/
|
|
static mapVector(vector, fn) {
|
|
const result = [];
|
|
for (let index = 0; index < vector.size(); index++) {
|
|
result.push(fn(vector.get(index), index));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Generate a config for the Marian translation service. It requires specific whitespace.
|
|
*
|
|
* https://marian-nmt.github.io/docs/cmd/marian-decoder/
|
|
*
|
|
* @param {Record<string, string>} config
|
|
* @returns {string}
|
|
*/
|
|
static generateTextConfig(config) {
|
|
const indent = " ";
|
|
let result = "\n";
|
|
|
|
for (const [key, value] of Object.entries(config)) {
|
|
result += `${indent}${key}: ${value}\n`;
|
|
}
|
|
|
|
return result + indent;
|
|
}
|
|
|
|
/**
|
|
* JS objects need to be translated into wasm objects to configure the translation engine.
|
|
*
|
|
* @param {Bergamot} bergamot
|
|
* @param {string} sourceText
|
|
* @returns {{ messages: Bergamot["VectorString"], options: Bergamot["VectorResponseOptions"] }}
|
|
*/
|
|
static getTranslationArgs(bergamot, sourceText, isHTML) {
|
|
const messages = new bergamot.VectorString();
|
|
const options = new bergamot.VectorResponseOptions();
|
|
|
|
// Empty paragraphs break the translation.
|
|
if (sourceText) {
|
|
messages.push_back(sourceText);
|
|
options.push_back({
|
|
qualityScores: false,
|
|
alignment: true,
|
|
html: isHTML,
|
|
});
|
|
}
|
|
|
|
return { messages, options };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* For testing purposes, provide a fully mocked engine. This allows for easy integration
|
|
* testing of the UI, without having to rely on downloading remote models and remote
|
|
* wasm binaries.
|
|
*/
|
|
class MockedEngine {
|
|
/**
|
|
* @param {string} fromLanguage
|
|
* @param {string} toLanguage
|
|
*/
|
|
constructor(fromLanguage, toLanguage) {
|
|
/** @type {string} */
|
|
this.fromLanguage = fromLanguage;
|
|
/** @type {string} */
|
|
this.toLanguage = toLanguage;
|
|
}
|
|
|
|
/**
|
|
* Create a fake translation of the text.
|
|
*
|
|
* @param {string} sourceText
|
|
* @param {bool} isHTML
|
|
* @returns {string}
|
|
*/
|
|
translate(sourceText, isHTML) {
|
|
// Note when an HTML translations is requested.
|
|
let html = isHTML ? ", html" : "";
|
|
const targetText = sourceText.toUpperCase();
|
|
return `${targetText} [${this.fromLanguage} to ${this.toLanguage}${html}]`;
|
|
}
|
|
|
|
discardTranslations() {}
|
|
}
|
|
|
|
/**
|
|
* This class takes tasks that may block the thread's event loop, and has them yield
|
|
* after a time budget via setTimeout calls to allow other code to execute.
|
|
*/
|
|
class WorkQueue {
|
|
#TIME_BUDGET = 100; // ms
|
|
#RUN_IMMEDIATELY_COUNT = 20;
|
|
|
|
/** @type {Array<{task: Function, resolve: Function}>} */
|
|
#tasks = [];
|
|
#isRunning = false;
|
|
#isWorkCancelled = false;
|
|
#runImmediately = this.#RUN_IMMEDIATELY_COUNT;
|
|
|
|
/**
|
|
* @param {number} innerWindowId
|
|
*/
|
|
constructor(innerWindowId) {
|
|
this.innerWindowId = innerWindowId;
|
|
}
|
|
|
|
/**
|
|
* Run the task and return the result.
|
|
*
|
|
* @template {any} T
|
|
* @param {() => T} task
|
|
* @returns {Promise<T>}
|
|
*/
|
|
runTask(task) {
|
|
if (this.#runImmediately > 0) {
|
|
// Run the first N translations immediately, most likely these are the user-visible
|
|
// translations on the page, as they are sent in first. The setTimeout of 0 can
|
|
// still delay the translations noticeably.
|
|
this.#runImmediately--;
|
|
return Promise.resolve(task());
|
|
}
|
|
return new Promise((resolve, reject) => {
|
|
this.#tasks.push({ task, resolve, reject });
|
|
this.#run().catch(error => console.error(error));
|
|
});
|
|
}
|
|
|
|
/**
|
|
* The internal run function.
|
|
*/
|
|
async #run() {
|
|
if (this.#isRunning) {
|
|
// The work queue is already running.
|
|
return;
|
|
}
|
|
|
|
this.#isRunning = true;
|
|
|
|
// Measure the timeout
|
|
let lastTimeout = null;
|
|
|
|
let tasksInBatch = 0;
|
|
const addProfilerMarker = () => {
|
|
ChromeUtils.addProfilerMarker(
|
|
"TranslationsWorker WorkQueue",
|
|
{ startTime: lastTimeout, innerWindowId: this.innerWindowId },
|
|
`WorkQueue processed ${tasksInBatch} tasks`
|
|
);
|
|
};
|
|
|
|
while (this.#tasks.length !== 0) {
|
|
if (this.#isWorkCancelled) {
|
|
// The work was already cancelled.
|
|
break;
|
|
}
|
|
const now = performance.now();
|
|
|
|
if (lastTimeout === null) {
|
|
lastTimeout = now;
|
|
// Allow other work to get on the queue.
|
|
await new Promise(resolve => setTimeout(resolve, 0));
|
|
} else if (now - lastTimeout > this.#TIME_BUDGET) {
|
|
// Perform a timeout with no effective wait. This clears the current
|
|
// promise queue from the event loop.
|
|
await new Promise(resolve => setTimeout(resolve, 0));
|
|
addProfilerMarker();
|
|
lastTimeout = performance.now();
|
|
}
|
|
|
|
// Check this between every `await`.
|
|
if (this.#isWorkCancelled || !this.#tasks.length) {
|
|
break;
|
|
}
|
|
|
|
tasksInBatch++;
|
|
const { task, resolve, reject } = this.#tasks.shift();
|
|
try {
|
|
const result = await task();
|
|
|
|
// Check this between every `await`.
|
|
if (this.#isWorkCancelled) {
|
|
break;
|
|
}
|
|
// The work is done, resolve the original task.
|
|
resolve(result);
|
|
} catch (error) {
|
|
reject(error);
|
|
}
|
|
}
|
|
addProfilerMarker();
|
|
this.#isRunning = false;
|
|
}
|
|
|
|
async cancelWork() {
|
|
this.#isWorkCancelled = true;
|
|
this.#tasks = [];
|
|
await new Promise(resolve => setTimeout(resolve, 0));
|
|
this.#isWorkCancelled = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This regex matches the whitespace before and after a text, so that it is preserved.
|
|
*/
|
|
const whitespaceRegex = /^(\s*)(.*?)(\s*)$/s;
|
|
// Include newlines in .*
|
|
// (^^^) (^^^) Match the whitespace at the beginning and end.
|
|
// (^^^) /s Non-greedily match the text (including newlines.)
|
|
|
|
/**
|
|
* Do any cleaning steps for the text that are required before sending it into
|
|
* the translation engine.
|
|
*
|
|
* @param {string} sourceText
|
|
* @returns {{ whitespaceBefore: string, whitespaceAfter: string, cleanedSourceText: string }}
|
|
*/
|
|
function cleanText(sourceText) {
|
|
// Whitespace at the beginning or end can confuse translations, but can affect the
|
|
// presentation of the final result.
|
|
const result = whitespaceRegex.exec(sourceText);
|
|
if (!result) {
|
|
throw new Error("The whitespace regex should always return a result.");
|
|
}
|
|
const whitespaceBefore = result[1];
|
|
const whitespaceAfter = result[3];
|
|
let cleanedSourceText = result[2];
|
|
|
|
// Remove any soft hyphens, as they will break tokenization.
|
|
cleanedSourceText = cleanedSourceText.replaceAll("\u00AD", "");
|
|
|
|
return { whitespaceBefore, whitespaceAfter, cleanedSourceText };
|
|
}
|