/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs"; // The ArchiveUtils module is designed to be imported in both worker and // main thread contexts. /* eslint-disable mozilla/reject-import-system-module-from-non-system */ import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs"; /** * An ArchiveWorker is a PromiseWorker that tries to do most of the heavy * lifting of dealing with single-file archives for backups, to avoid doing * much on the main thread. This is mostly important for single-file archive * _creation_, as this is supposed to occur silently in the background without * the user noticing any degredation in performance. */ class ArchiveWorker { #worker = null; constructor() { // Connect the provider to the worker. this.#connectToPromiseWorker(); } /** * Generates a boundary string that can be used to separate sections in a * multipart/mixed MIME message. * * See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html. * * @returns {string} */ #generateBoundary() { return ( "----=_Part_" + new Date().getTime() + "_" + Math.random().toString(36).slice(2, 12) + "_" + Math.random().toString(36).slice(2, 12) ); } /** * Constructs a single-file archive for a backup on the filesystem. A * single-file archive is a specially crafted HTML document that includes, * among other things, an inlined multipart/mixed MIME message within a * document comment. * * @param {object} params * Arguments that are described in more detail below. * @param {string} params.archivePath * The path on the file system to write the single-file archive. * @param {string} params.templateURI * A URI pointing to the HTML template that will be used for the viewable * part of the document. The inlined MIME message will be appended after * the contents of this template. * @param {object} params.backupMetadata * The metadata associated with this backup. This is a copy of the metadata * object that is contained within the compressed backups' manifest. * @param {string} params.compressedBackupSnapshotPath * The path on the file system where the compressed backup file is located. * @returns {Promise} */ async constructArchive({ archivePath, templateURI, backupMetadata, compressedBackupSnapshotPath, }) { // We can get at the template content by using a sync XHR, which is fine to // to do in a Worker. let templateXhr = new XMLHttpRequest(); // Using a synchronous XHR in a worker is fine. templateXhr.open("GET", templateURI, false); templateXhr.responseType = "text"; templateXhr.send(null); let template = templateXhr.responseText; let boundary = this.#generateBoundary(); let serializedMetadata = JSON.stringify(backupMetadata); let textEncoder = new TextEncoder(); let metadataLength = textEncoder.encode(serializedMetadata).length; // Once we get the ability to stream to the filesystem from IOUtils in a // worker, we should use that instead of appending each of these chunks. // // This isn't supposed to be some kind of generalized MIME message // generator, so we're happy to construct it by hand here. await IOUtils.writeUTF8(archivePath, template); await IOUtils.writeUTF8( archivePath, ` ${ArchiveUtils.INLINE_MIME_START_MARKER} Content-Type: multipart/mixed; boundary="${boundary}" --${boundary} Content-Type: application/json; charset=utf-8 Content-Disposition: attachment; filename="archive.json" Content-Length: ${metadataLength} ${JSON.stringify(backupMetadata)} `, { mode: "append" } ); let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading( compressedBackupSnapshotPath ); let totalBytes = compressedBackupSnapshotFile.size; // To calculate the Content-Length of the base64 block, we start by // computing how many newlines we'll be adding... let totalNewlines = Math.ceil( totalBytes / ArchiveUtils.ARCHIVE_CHUNK_MAX_BYTES_SIZE ); // And then add that to how many base64 bytes we're adding. For base64 // encoding, 4 bytes are used to encode 3 bytes. let totalBase64Bytes = 4 * Math.ceil(totalBytes / 3) + totalNewlines; await IOUtils.writeUTF8( archivePath, `--${boundary} Content-Type: application/octet-stream Content-Disposition: attachment; filename="archive.zip" Content-Transfer-Encoding: base64 Content-Length: ${totalBase64Bytes} `, { mode: "append" } ); // And now we read in the bytes of the compressed file, base64 encode them, // and append them to the document. Down the line, this is also where // encryption will be done. let currentIndex = 0; while (currentIndex < totalBytes) { let bytesToRead = Math.min( ArchiveUtils.ARCHIVE_CHUNK_MAX_BYTES_SIZE, totalBytes - currentIndex ); if (bytesToRead <= 0) { throw new Error( "Failed to calculate the right number of bytes to read." ); } let buffer = new Uint8Array(bytesToRead); compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex); // We're very intentionally newline-separating these blocks here, as // these blocks may have been run through encryption, and the same blocks // must be run through decryption to unpack the archive. // Newline-separation makes it easier to identify and manage these blocks. await IOUtils.writeUTF8( archivePath, ArchiveUtils.arrayToBase64(buffer) + "\n", { mode: "append", } ); currentIndex += bytesToRead; } await IOUtils.writeUTF8( archivePath, ` --${boundary} ${ArchiveUtils.INLINE_MIME_END_MARKER} `, { mode: "append" } ); compressedBackupSnapshotFile.close(); return true; } /** * @typedef {object} ArchiveHeaderResult * @property {string} contentType * The value of the Content-Type for the inlined MIME message. * @property {number} startByteOffset * The byte offset within the archive file where the inlined MIME message * begins. */ /** * Given a path to a single-file archive HTML file, this method will sniff * the header of the file to make sure it matches one that we support. If * successful, it will resolve with the contentType of the inline MIME * message, as well as the byte offset for which the start of the inlined MIME * message can be read from. * * @param {string} archivePath * The path to a single-file archive HTML file. * @returns {Promise} */ parseArchiveHeader(archivePath) { // We expect the first bytes of the file to indicate that this is an HTML5 // file and to give us a version number we can handle. let syncReadFile = IOUtils.openFileForSyncReading(archivePath); let totalBytes = syncReadFile.size; // This seems like a reasonable minimum number of bytes to read in to get // at the header. If the header data isn't in there, then it's a corrupt // file. const MAX_BYTES_TO_READ = 256; let headerBytesToRead = Math.min( MAX_BYTES_TO_READ, totalBytes - MAX_BYTES_TO_READ ); let headerBuffer = new Uint8Array(headerBytesToRead); syncReadFile.readBytesInto(headerBuffer, 0); let textDecoder = new TextDecoder(); let decodedHeader = textDecoder.decode(headerBuffer); const EXPECTED_HEADER = /^\n\n/; let headerMatches = decodedHeader.match(EXPECTED_HEADER); if (!headerMatches) { throw new Error("Corrupt archive header"); } let version = parseInt(headerMatches[1], 10); // In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we // could place migrations / handlers for older archive versions. if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) { throw new Error("Unsupported archive version: " + version); } // Now we have to scan forward, looking for the INLINE_MIME_MARKER_START // and the Content-Type, which appears just before the MIME message. // // We scan by reading bytes into a buffer rather than reading in the whole // file, since the file could be quite large (100s of MB). let currentIndex = headerBuffer.byteLength; let startByteOffset = 0; // We keep the old buffer around, and always join it with the buffer that // contains the recently read-in bytes. That way, we can account for the // possibility that the INLINE_MIME_START_MARKER and Content-Type were // only half-loaded in prior or current buffer. let oldBuffer = headerBuffer; let priorIndex = 0; let contentType = null; const EXPECTED_MARKER = new RegExp( `${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n` ); let textEncoder = new TextEncoder(); while (currentIndex < totalBytes) { let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex); // This shouldn't happen, but better safe than sorry. if (bytesToRead <= 0) { throw new Error( "Failed to calculate the proper number of bytes to read: " + bytesToRead ); } let buffer = new Uint8Array(bytesToRead); syncReadFile.readBytesInto(buffer, currentIndex); let combinedBuffer = new Uint8Array( oldBuffer.byteLength + buffer.byteLength ); combinedBuffer.set(oldBuffer, 0); combinedBuffer.set(buffer, oldBuffer.byteLength); // Now we look for the inline MIME marker, and try to extract the // Content-Type for it. let decodedString = textDecoder.decode(combinedBuffer); let markerMatches = decodedString.match(EXPECTED_MARKER); if (markerMatches) { // If we found it, we want to find the byte index for the point // immediately after the match. You'd think we could use // decodedString.search for this, but unfortunately search returns // character indexes and not byte indexes (and Unicode characters, // which might be displayed in the markup of the page, are multiple // bytes long). To work around this, we use a TextEncoder to encode // everything leading up to the marker, and count the number of bytes. // Then we count the number of bytes in our match. The sum of these // two values, plus the priorIndex gives us the byte index of the point // right after our regular expression match in a Unicode-character // compatible way. // // This all presumes that the archive file was encoded as UTF-8. Since // we control the generation of this file, this is a safe assumption. let match = markerMatches[0]; let matchIndex = decodedString.indexOf(match); let substringUpToMatch = decodedString.slice(0, matchIndex); let substringUpToMatchBytes = textEncoder.encode(substringUpToMatch).byteLength; let matchBytes = textEncoder.encode(markerMatches[0]).byteLength; startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes; contentType = markerMatches[1]; break; } priorIndex = currentIndex; currentIndex += bytesToRead; oldBuffer = buffer; } return { startByteOffset, contentType }; } /** * Implements the standard boilerplate to make this class work as a * PromiseWorker. */ #connectToPromiseWorker() { this.#worker = new PromiseWorker.AbstractWorker(); this.#worker.dispatch = (method, args = []) => { if (!this[method]) { throw new Error("Method does not exist: " + method); } return this[method](...args); }; this.#worker.close = () => self.close(); this.#worker.postMessage = (message, ...transfers) => { self.postMessage(message, ...transfers); }; self.callMainThread = this.#worker.callMainThread.bind(this.#worker); self.addEventListener("message", msg => this.#worker.handleMessage(msg)); self.addEventListener("unhandledrejection", function (error) { throw error.reason; }); } } new ArchiveWorker();