Files
tubestation/browser/components/backup/Archive.worker.mjs
Stephen Thompson ad5c1d8ebf Bug 1906169 - Error plumbing for profile backup web worker r=backup-reviewers,mconley
Some of the Firefox profile backup code is executed within a web worker, so errors thrown in the worker do not automatically maintain their full context when caught in the main process.

This change creates a BackupError for throwing errors with causes specific to Firefox profile backup. The new error type is configured to work with the PromiseWorker machinery in the Firefox codebase in order to serialize and deserialize error details across the worker boundary.

Differential Revision: https://phabricator.services.mozilla.com/D215920
2024-07-08 17:49:45 +00:00

432 lines
15 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs";
// The ArchiveUtils module is designed to be imported in both worker and
// main thread contexts.
/* eslint-disable mozilla/reject-import-system-module-from-non-system */
import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs";
import { ArchiveEncryptor } from "resource:///modules/backup/ArchiveEncryption.sys.mjs";
import { BackupError } from "resource:///modules/backup/BackupError.mjs";
import { ERRORS } from "resource:///modules/backup/BackupConstants.mjs";
/**
* An ArchiveWorker is a PromiseWorker that tries to do most of the heavy
* lifting of dealing with single-file archives for backups, to avoid doing
* much on the main thread. This is mostly important for single-file archive
* _creation_, as this is supposed to occur silently in the background without
* the user noticing any degredation in performance.
*/
class ArchiveWorker {
#worker = null;
constructor() {
// Connect the provider to the worker.
this.#connectToPromiseWorker();
}
/**
* Generates a boundary string that can be used to separate sections in a
* multipart/mixed MIME message.
*
* See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
*
* @returns {string}
*/
#generateBoundary() {
return (
"----=_Part_" +
new Date().getTime() +
"_" +
Math.random().toString(36).slice(2, 12) +
"_" +
Math.random().toString(36).slice(2, 12)
);
}
/**
* Calculates how many base64 bytes will be generated from some number of
* unencoded bytes. This presumes that the base64 bytes include a newline
* terminator at the end.
*
* @param {number} bytes
* The number of bytes to be converted to base64.
* @param {boolean} encrypting
* True if encryption via ArchiveEncryptor is being applied.
* @returns {number}
*/
#computeChunkBase64Bytes(bytes, encrypting) {
if (encrypting) {
bytes += ArchiveUtils.TAG_LENGTH_BYTES;
}
return 4 * Math.ceil(bytes / 3) + 1;
}
/**
* @typedef {object} EncryptionArgs
* @property {CryptoKey} publicKey
* The RSA-OAEP public key that will be used to derive keys for encrypting
* the backup.
* @property {CryptoKey} backupAuthKey
* The AES-GCM key that will be used to authenticate the owner of the
* backup.
* @property {Uint8Array} wrappedSecrets
* The encrypted backup secrets computed by ArchiveEncryptionState.
* @property {Uint8Array} salt
* A salt computed for the PBKDF2 stretching of the recovery code.
* @property {Uint8Array} nonce
* A nonce computed when wrapping the private key and OSKeyStore secret.
*/
/**
* Constructs a single-file archive for a backup on the filesystem. A
* single-file archive is a specially crafted HTML document that includes,
* among other things, an inlined multipart/mixed MIME message within a
* document comment.
*
* @param {object} params
* Arguments that are described in more detail below.
* @param {string} params.archivePath
* The path on the file system to write the single-file archive.
* @param {string} params.markup
* The HTML markup to insert into the archive file before the HTML
* comment block. This is the markup that will be rendered if the HTML
* file is opened in a web browser.
* @param {object} params.backupMetadata
* The metadata associated with this backup. This is a copy of the metadata
* object that is contained within the compressed backups' manifest.
* @param {string} params.compressedBackupSnapshotPath
* The path on the file system where the compressed backup file is located.
* @param {EncryptionArgs} [params.encryptionArgs=undefined]
* Optional EncryptionArgs, which will be used to encrypt this archive.
* @param {number} params.chunkSize
* The size of the chunks to break the byte stream into for encoding.
* @returns {Promise<undefined>}
*/
async constructArchive({
archivePath,
markup,
backupMetadata,
compressedBackupSnapshotPath,
encryptionArgs,
chunkSize,
}) {
let encryptor = null;
if (encryptionArgs) {
encryptor = await ArchiveEncryptor.initialize(
encryptionArgs.publicKey,
encryptionArgs.backupAuthKey
);
}
let boundary = this.#generateBoundary();
let jsonBlock;
if (encryptor) {
jsonBlock = await encryptor.confirm(
backupMetadata,
encryptionArgs.wrappedSecrets,
encryptionArgs.salt,
encryptionArgs.nonce
);
} else {
jsonBlock = {
version: ArchiveUtils.SCHEMA_VERSION,
encConfig: null,
meta: backupMetadata,
};
}
let serializedJsonBlock = JSON.stringify(jsonBlock);
let textEncoder = new TextEncoder();
let jsonBlockLength = textEncoder.encode(serializedJsonBlock).length;
// Once we get the ability to stream to the filesystem from IOUtils in a
// worker, we should use that instead of appending each of these chunks.
//
// This isn't supposed to be some kind of generalized MIME message
// generator, so we're happy to construct it by hand here.
await IOUtils.writeUTF8(archivePath, markup);
await IOUtils.writeUTF8(
archivePath,
`
${ArchiveUtils.INLINE_MIME_START_MARKER}
Content-Type: multipart/mixed; boundary="${boundary}"
--${boundary}
Content-Type: application/json; charset=utf-8
Content-Disposition: attachment; filename="archive.json"
Content-Length: ${jsonBlockLength}
${JSON.stringify(jsonBlock)}
`,
{ mode: "append" }
);
let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading(
compressedBackupSnapshotPath
);
let totalBytesToRead = compressedBackupSnapshotFile.size;
// To calculate the Content-Length of the base64 block, we start by
// computing how many newlines we'll be adding...
let totalNewlines = Math.ceil(totalBytesToRead / chunkSize);
// Next, we determine how many full-sized chunks of chunkSize we'll be
// using, and multiply that by the number of base64 bytes that such a chunk
// will require.
let fullSizeChunks = totalNewlines - 1;
let fullSizeChunkBase64Bytes = this.#computeChunkBase64Bytes(
chunkSize,
!!encryptor
);
let totalBase64Bytes = fullSizeChunks * fullSizeChunkBase64Bytes;
// Finally, if there are any leftover bytes that are less than chunkSize,
// determine how many bytes those will require, and add it to our total.
let leftoverChunkBytes = totalBytesToRead % chunkSize;
if (leftoverChunkBytes) {
totalBase64Bytes += this.#computeChunkBase64Bytes(
leftoverChunkBytes,
!!encryptor
);
} else {
// We divided perfectly by chunkSize, so add another
// fullSizeChunkBase64Bytes to the total.
totalBase64Bytes += fullSizeChunkBase64Bytes;
}
await IOUtils.writeUTF8(
archivePath,
`--${boundary}
Content-Type: application/octet-stream
Content-Disposition: attachment; filename="archive.zip"
Content-Transfer-Encoding: base64
Content-Length: ${totalBase64Bytes}
`,
{ mode: "append" }
);
// And now we read in the bytes of the compressed file, base64 encode them,
// and append them to the document. Down the line, this is also where
// encryption will be done.
let currentIndex = 0;
while (currentIndex < totalBytesToRead) {
let bytesToRead = Math.min(chunkSize, totalBytesToRead - currentIndex);
if (bytesToRead <= 0) {
throw new BackupError(
"Failed to calculate the right number of bytes to read.",
ERRORS.FILE_SYSTEM_ERROR
);
}
let buffer = new Uint8Array(bytesToRead);
compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex);
let bytesToWrite;
if (encryptor) {
let isLastChunk = bytesToRead < chunkSize;
bytesToWrite = await encryptor.encrypt(buffer, isLastChunk);
} else {
bytesToWrite = buffer;
}
// We're very intentionally newline-separating these blocks here, as
// these blocks may have been run through encryption, and the same blocks
// must be run through decryption to unpack the archive.
// Newline-separation makes it easier to identify and manage these blocks.
await IOUtils.writeUTF8(
archivePath,
ArchiveUtils.arrayToBase64(bytesToWrite) + "\n",
{
mode: "append",
}
);
currentIndex += bytesToRead;
}
await IOUtils.writeUTF8(
archivePath,
`
--${boundary}
${ArchiveUtils.INLINE_MIME_END_MARKER}
`,
{ mode: "append" }
);
compressedBackupSnapshotFile.close();
return true;
}
/**
* @typedef {object} ArchiveHeaderResult
* @property {string} contentType
* The value of the Content-Type for the inlined MIME message.
* @property {number} startByteOffset
* The byte offset within the archive file where the inlined MIME message
* begins.
*/
/**
* Given a path to a single-file archive HTML file, this method will sniff
* the header of the file to make sure it matches one that we support. If
* successful, it will resolve with the contentType of the inline MIME
* message, as well as the byte offset for which the start of the inlined MIME
* message can be read from.
*
* @param {string} archivePath
* The path to a single-file archive HTML file.
* @returns {Promise<ArchiveHeaderResult, Error>}
*/
parseArchiveHeader(archivePath) {
// We expect the first bytes of the file to indicate that this is an HTML5
// file and to give us a version number we can handle.
let syncReadFile = IOUtils.openFileForSyncReading(archivePath);
let totalBytes = syncReadFile.size;
// This seems like a reasonable minimum number of bytes to read in to get
// at the header. If the header data isn't in there, then it's a corrupt
// file.
const MAX_BYTES_TO_READ = 256;
let headerBytesToRead = Math.min(
MAX_BYTES_TO_READ,
totalBytes - MAX_BYTES_TO_READ
);
let headerBuffer = new Uint8Array(headerBytesToRead);
syncReadFile.readBytesInto(headerBuffer, 0);
let textDecoder = new TextDecoder();
let decodedHeader = textDecoder.decode(headerBuffer);
const EXPECTED_HEADER =
/^<!DOCTYPE html>[\r\n]+<!-- Version: (\d+) -->[\r\n]+/;
let headerMatches = decodedHeader.match(EXPECTED_HEADER);
if (!headerMatches) {
throw new BackupError("Corrupt archive header", ERRORS.CORRUPTED_ARCHIVE);
}
let version = parseInt(headerMatches[1], 10);
// In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we
// could place migrations / handlers for older archive versions.
if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) {
throw new BackupError(
"Unsupported archive version: " + version,
ERRORS.UNSUPPORTED_BACKUP_VERSION
);
}
// Now we have to scan forward, looking for the INLINE_MIME_MARKER_START
// and the Content-Type, which appears just before the MIME message.
//
// We scan by reading bytes into a buffer rather than reading in the whole
// file, since the file could be quite large (100s of MB).
let currentIndex = headerBuffer.byteLength;
let startByteOffset = 0;
// We keep the old buffer around, and always join it with the buffer that
// contains the recently read-in bytes. That way, we can account for the
// possibility that the INLINE_MIME_START_MARKER and Content-Type were
// only half-loaded in prior or current buffer.
let oldBuffer = headerBuffer;
let priorIndex = 0;
let contentType = null;
const EXPECTED_MARKER = new RegExp(
`${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n`
);
let textEncoder = new TextEncoder();
while (currentIndex < totalBytes) {
let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex);
// This shouldn't happen, but better safe than sorry.
if (bytesToRead <= 0) {
throw new BackupError(
"Failed to calculate the proper number of bytes to read: " +
bytesToRead,
ERRORS.UNKNOWN
);
}
let buffer = new Uint8Array(bytesToRead);
syncReadFile.readBytesInto(buffer, currentIndex);
let combinedBuffer = new Uint8Array(
oldBuffer.byteLength + buffer.byteLength
);
combinedBuffer.set(oldBuffer, 0);
combinedBuffer.set(buffer, oldBuffer.byteLength);
// Now we look for the inline MIME marker, and try to extract the
// Content-Type for it.
let decodedString = textDecoder.decode(combinedBuffer);
let markerMatches = decodedString.match(EXPECTED_MARKER);
if (markerMatches) {
// If we found it, we want to find the byte index for the point
// immediately after the match. You'd think we could use
// decodedString.search for this, but unfortunately search returns
// character indexes and not byte indexes (and Unicode characters,
// which might be displayed in the markup of the page, are multiple
// bytes long). To work around this, we use a TextEncoder to encode
// everything leading up to the marker, and count the number of bytes.
// Then we count the number of bytes in our match. The sum of these
// two values, plus the priorIndex gives us the byte index of the point
// right after our regular expression match in a Unicode-character
// compatible way.
//
// This all presumes that the archive file was encoded as UTF-8. Since
// we control the generation of this file, this is a safe assumption.
let match = markerMatches[0];
let matchIndex = decodedString.indexOf(match);
let substringUpToMatch = decodedString.slice(0, matchIndex);
let substringUpToMatchBytes =
textEncoder.encode(substringUpToMatch).byteLength;
let matchBytes = textEncoder.encode(markerMatches[0]).byteLength;
startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes;
contentType = markerMatches[1];
break;
}
priorIndex = currentIndex;
currentIndex += bytesToRead;
oldBuffer = buffer;
}
return { startByteOffset, contentType };
}
/**
* Implements the standard boilerplate to make this class work as a
* PromiseWorker.
*/
#connectToPromiseWorker() {
this.#worker = new PromiseWorker.AbstractWorker();
this.#worker.dispatch = (method, args = []) => {
if (!this[method]) {
throw new BackupError(
"Method does not exist: " + method,
ERRORS.INTERNAL_ERROR
);
}
return this[method](...args);
};
this.#worker.close = () => self.close();
this.#worker.postMessage = (message, ...transfers) => {
self.postMessage(message, ...transfers);
};
self.callMainThread = this.#worker.callMainThread.bind(this.#worker);
self.addEventListener("message", msg => this.#worker.handleMessage(msg));
self.addEventListener("unhandledrejection", function (error) {
throw error.reason;
});
}
}
new ArchiveWorker();