There are a number of interesting things going on this patch that I think are worth highlighting here for my reviewers: 1. The single-file archive format is an HTML file that uses an inlined multipart/mixed MIME message within a HTML document comment in order to embed the backup data into the archive. 2. We use the multipart/mixed nsIStreamConverter to extract the JSON and binary data from the MIME block. 3. We use a Archive Worker to do the archive creation, allowing us to do the work of construction off of the main thread. 4. The Archive Worker is only parsing the header and getting the byte offset of the MIME block. Extraction is happening in the parent process. This is mainly for simplicity for now, since the Archive Worker cannot invoke an nsIStreamConverter. Down the line, if we determine that we'd prefer the Archive Worker do the base64 decoding off of the main thread, we may need to use a Message Channel to send the byte sfrom the nsIStreamConverter to it, and add stream-writing support to IOUtils so that the Archive Worker can take care of sending the decoded bytes to disk. 5. The patch doesn't expose the extraction mechanism in any way except through the debug interface right now. That will come down the line. In the meantime, this mechanism can be manually tested in the debug interface by creating a backup, which should also create an "archive.html" file in the backups folder. Using the "Extract from archive" button in the debug tool will let you select that HTML file and extract the ZIP as a file in the backups folder called "extraction.zip". 6. The test template contains Unicode characters because certain locales might involve us writing Unicode characters in the HTML template when generating the archive. The fun part about that is calculating where the byte offset is for the MIME block! See the comment in the Archive.worker.mjs script for how that works. Differential Revision: https://phabricator.services.mozilla.com/D211588
338 lines
12 KiB
JavaScript
338 lines
12 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs";
|
|
|
|
// The ArchiveUtils module is designed to be imported in both worker and
|
|
// main thread contexts.
|
|
/* eslint-disable mozilla/reject-import-system-module-from-non-system */
|
|
import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs";
|
|
|
|
/**
|
|
* An ArchiveWorker is a PromiseWorker that tries to do most of the heavy
|
|
* lifting of dealing with single-file archives for backups, to avoid doing
|
|
* much on the main thread. This is mostly important for single-file archive
|
|
* _creation_, as this is supposed to occur silently in the background without
|
|
* the user noticing any degredation in performance.
|
|
*/
|
|
class ArchiveWorker {
|
|
#worker = null;
|
|
|
|
constructor() {
|
|
// Connect the provider to the worker.
|
|
this.#connectToPromiseWorker();
|
|
}
|
|
|
|
/**
|
|
* Generates a boundary string that can be used to separate sections in a
|
|
* multipart/mixed MIME message.
|
|
*
|
|
* See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
|
|
*
|
|
* @returns {string}
|
|
*/
|
|
#generateBoundary() {
|
|
return (
|
|
"----=_Part_" +
|
|
new Date().getTime() +
|
|
"_" +
|
|
Math.random().toString(36).slice(2, 12) +
|
|
"_" +
|
|
Math.random().toString(36).slice(2, 12)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Constructs a single-file archive for a backup on the filesystem. A
|
|
* single-file archive is a specially crafted HTML document that includes,
|
|
* among other things, an inlined multipart/mixed MIME message within a
|
|
* document comment.
|
|
*
|
|
* @param {object} params
|
|
* Arguments that are described in more detail below.
|
|
* @param {string} params.archivePath
|
|
* The path on the file system to write the single-file archive.
|
|
* @param {string} params.templateURI
|
|
* A URI pointing to the HTML template that will be used for the viewable
|
|
* part of the document. The inlined MIME message will be appended after
|
|
* the contents of this template.
|
|
* @param {object} params.backupMetadata
|
|
* The metadata associated with this backup. This is a copy of the metadata
|
|
* object that is contained within the compressed backups' manifest.
|
|
* @param {string} params.compressedBackupSnapshotPath
|
|
* The path on the file system where the compressed backup file is located.
|
|
* @returns {Promise<undefined>}
|
|
*/
|
|
async constructArchive({
|
|
archivePath,
|
|
templateURI,
|
|
backupMetadata,
|
|
compressedBackupSnapshotPath,
|
|
}) {
|
|
// We can get at the template content by using a sync XHR, which is fine to
|
|
// to do in a Worker.
|
|
let templateXhr = new XMLHttpRequest();
|
|
// Using a synchronous XHR in a worker is fine.
|
|
templateXhr.open("GET", templateURI, false);
|
|
templateXhr.responseType = "text";
|
|
templateXhr.send(null);
|
|
let template = templateXhr.responseText;
|
|
|
|
let boundary = this.#generateBoundary();
|
|
let serializedMetadata = JSON.stringify(backupMetadata);
|
|
let textEncoder = new TextEncoder();
|
|
let metadataLength = textEncoder.encode(serializedMetadata).length;
|
|
|
|
// Once we get the ability to stream to the filesystem from IOUtils in a
|
|
// worker, we should use that instead of appending each of these chunks.
|
|
//
|
|
// This isn't supposed to be some kind of generalized MIME message
|
|
// generator, so we're happy to construct it by hand here.
|
|
await IOUtils.writeUTF8(archivePath, template);
|
|
await IOUtils.writeUTF8(
|
|
archivePath,
|
|
`
|
|
${ArchiveUtils.INLINE_MIME_START_MARKER}
|
|
Content-Type: multipart/mixed; boundary="${boundary}"
|
|
|
|
--${boundary}
|
|
Content-Type: application/json; charset=utf-8
|
|
Content-Disposition: attachment; filename="archive.json"
|
|
Content-Length: ${metadataLength}
|
|
|
|
${JSON.stringify(backupMetadata)}
|
|
`,
|
|
{ mode: "append" }
|
|
);
|
|
|
|
let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading(
|
|
compressedBackupSnapshotPath
|
|
);
|
|
let totalBytes = compressedBackupSnapshotFile.size;
|
|
|
|
// To calculate the Content-Length of the base64 block, we start by
|
|
// computing how many newlines we'll be adding...
|
|
let totalNewlines = Math.ceil(
|
|
totalBytes / ArchiveUtils.ARCHIVE_CHUNK_MAX_BYTES_SIZE
|
|
);
|
|
// And then add that to how many base64 bytes we're adding. For base64
|
|
// encoding, 4 bytes are used to encode 3 bytes.
|
|
let totalBase64Bytes = 4 * Math.ceil(totalBytes / 3) + totalNewlines;
|
|
|
|
await IOUtils.writeUTF8(
|
|
archivePath,
|
|
`--${boundary}
|
|
Content-Type: application/octet-stream
|
|
Content-Disposition: attachment; filename="archive.zip"
|
|
Content-Transfer-Encoding: base64
|
|
Content-Length: ${totalBase64Bytes}
|
|
|
|
`,
|
|
{ mode: "append" }
|
|
);
|
|
|
|
// And now we read in the bytes of the compressed file, base64 encode them,
|
|
// and append them to the document. Down the line, this is also where
|
|
// encryption will be done.
|
|
let currentIndex = 0;
|
|
while (currentIndex < totalBytes) {
|
|
let bytesToRead = Math.min(
|
|
ArchiveUtils.ARCHIVE_CHUNK_MAX_BYTES_SIZE,
|
|
totalBytes - currentIndex
|
|
);
|
|
if (bytesToRead <= 0) {
|
|
throw new Error(
|
|
"Failed to calculate the right number of bytes to read."
|
|
);
|
|
}
|
|
|
|
let buffer = new Uint8Array(bytesToRead);
|
|
compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex);
|
|
|
|
// We're very intentionally newline-separating these blocks here, as
|
|
// these blocks may have been run through encryption, and the same blocks
|
|
// must be run through decryption to unpack the archive.
|
|
// Newline-separation makes it easier to identify and manage these blocks.
|
|
await IOUtils.writeUTF8(
|
|
archivePath,
|
|
ArchiveUtils.arrayToBase64(buffer) + "\n",
|
|
{
|
|
mode: "append",
|
|
}
|
|
);
|
|
|
|
currentIndex += bytesToRead;
|
|
}
|
|
|
|
await IOUtils.writeUTF8(
|
|
archivePath,
|
|
`
|
|
--${boundary}
|
|
${ArchiveUtils.INLINE_MIME_END_MARKER}
|
|
`,
|
|
{ mode: "append" }
|
|
);
|
|
|
|
compressedBackupSnapshotFile.close();
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* @typedef {object} ArchiveHeaderResult
|
|
* @property {string} contentType
|
|
* The value of the Content-Type for the inlined MIME message.
|
|
* @property {number} startByteOffset
|
|
* The byte offset within the archive file where the inlined MIME message
|
|
* begins.
|
|
*/
|
|
|
|
/**
|
|
* Given a path to a single-file archive HTML file, this method will sniff
|
|
* the header of the file to make sure it matches one that we support. If
|
|
* successful, it will resolve with the contentType of the inline MIME
|
|
* message, as well as the byte offset for which the start of the inlined MIME
|
|
* message can be read from.
|
|
*
|
|
* @param {string} archivePath
|
|
* The path to a single-file archive HTML file.
|
|
* @returns {Promise<ArchiveHeaderResult, Error>}
|
|
*/
|
|
parseArchiveHeader(archivePath) {
|
|
// We expect the first bytes of the file to indicate that this is an HTML5
|
|
// file and to give us a version number we can handle.
|
|
let syncReadFile = IOUtils.openFileForSyncReading(archivePath);
|
|
let totalBytes = syncReadFile.size;
|
|
|
|
// This seems like a reasonable minimum number of bytes to read in to get
|
|
// at the header. If the header data isn't in there, then it's a corrupt
|
|
// file.
|
|
const MAX_BYTES_TO_READ = 256;
|
|
let headerBytesToRead = Math.min(
|
|
MAX_BYTES_TO_READ,
|
|
totalBytes - MAX_BYTES_TO_READ
|
|
);
|
|
let headerBuffer = new Uint8Array(headerBytesToRead);
|
|
syncReadFile.readBytesInto(headerBuffer, 0);
|
|
|
|
let textDecoder = new TextDecoder();
|
|
let decodedHeader = textDecoder.decode(headerBuffer);
|
|
const EXPECTED_HEADER = /^<!DOCTYPE html>\n<!-- Version: (\d+) -->\n/;
|
|
let headerMatches = decodedHeader.match(EXPECTED_HEADER);
|
|
if (!headerMatches) {
|
|
throw new Error("Corrupt archive header");
|
|
}
|
|
|
|
let version = parseInt(headerMatches[1], 10);
|
|
// In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we
|
|
// could place migrations / handlers for older archive versions.
|
|
if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) {
|
|
throw new Error("Unsupported archive version: " + version);
|
|
}
|
|
|
|
// Now we have to scan forward, looking for the INLINE_MIME_MARKER_START
|
|
// and the Content-Type, which appears just before the MIME message.
|
|
//
|
|
// We scan by reading bytes into a buffer rather than reading in the whole
|
|
// file, since the file could be quite large (100s of MB).
|
|
let currentIndex = headerBuffer.byteLength;
|
|
|
|
let startByteOffset = 0;
|
|
// We keep the old buffer around, and always join it with the buffer that
|
|
// contains the recently read-in bytes. That way, we can account for the
|
|
// possibility that the INLINE_MIME_START_MARKER and Content-Type were
|
|
// only half-loaded in prior or current buffer.
|
|
let oldBuffer = headerBuffer;
|
|
let priorIndex = 0;
|
|
let contentType = null;
|
|
const EXPECTED_MARKER = new RegExp(
|
|
`${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n`
|
|
);
|
|
|
|
let textEncoder = new TextEncoder();
|
|
while (currentIndex < totalBytes) {
|
|
let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex);
|
|
|
|
// This shouldn't happen, but better safe than sorry.
|
|
if (bytesToRead <= 0) {
|
|
throw new Error(
|
|
"Failed to calculate the proper number of bytes to read: " +
|
|
bytesToRead
|
|
);
|
|
}
|
|
|
|
let buffer = new Uint8Array(bytesToRead);
|
|
syncReadFile.readBytesInto(buffer, currentIndex);
|
|
|
|
let combinedBuffer = new Uint8Array(
|
|
oldBuffer.byteLength + buffer.byteLength
|
|
);
|
|
combinedBuffer.set(oldBuffer, 0);
|
|
combinedBuffer.set(buffer, oldBuffer.byteLength);
|
|
|
|
// Now we look for the inline MIME marker, and try to extract the
|
|
// Content-Type for it.
|
|
let decodedString = textDecoder.decode(combinedBuffer);
|
|
let markerMatches = decodedString.match(EXPECTED_MARKER);
|
|
|
|
if (markerMatches) {
|
|
// If we found it, we want to find the byte index for the point
|
|
// immediately after the match. You'd think we could use
|
|
// decodedString.search for this, but unfortunately search returns
|
|
// character indexes and not byte indexes (and Unicode characters,
|
|
// which might be displayed in the markup of the page, are multiple
|
|
// bytes long). To work around this, we use a TextEncoder to encode
|
|
// everything leading up to the marker, and count the number of bytes.
|
|
// Then we count the number of bytes in our match. The sum of these
|
|
// two values, plus the priorIndex gives us the byte index of the point
|
|
// right after our regular expression match in a Unicode-character
|
|
// compatible way.
|
|
//
|
|
// This all presumes that the archive file was encoded as UTF-8. Since
|
|
// we control the generation of this file, this is a safe assumption.
|
|
let match = markerMatches[0];
|
|
let matchIndex = decodedString.indexOf(match);
|
|
let substringUpToMatch = decodedString.slice(0, matchIndex);
|
|
let substringUpToMatchBytes =
|
|
textEncoder.encode(substringUpToMatch).byteLength;
|
|
let matchBytes = textEncoder.encode(markerMatches[0]).byteLength;
|
|
startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes;
|
|
contentType = markerMatches[1];
|
|
break;
|
|
}
|
|
|
|
priorIndex = currentIndex;
|
|
currentIndex += bytesToRead;
|
|
oldBuffer = buffer;
|
|
}
|
|
return { startByteOffset, contentType };
|
|
}
|
|
|
|
/**
|
|
* Implements the standard boilerplate to make this class work as a
|
|
* PromiseWorker.
|
|
*/
|
|
#connectToPromiseWorker() {
|
|
this.#worker = new PromiseWorker.AbstractWorker();
|
|
this.#worker.dispatch = (method, args = []) => {
|
|
if (!this[method]) {
|
|
throw new Error("Method does not exist: " + method);
|
|
}
|
|
return this[method](...args);
|
|
};
|
|
this.#worker.close = () => self.close();
|
|
this.#worker.postMessage = (message, ...transfers) => {
|
|
self.postMessage(message, ...transfers);
|
|
};
|
|
|
|
self.callMainThread = this.#worker.callMainThread.bind(this.#worker);
|
|
self.addEventListener("message", msg => this.#worker.handleMessage(msg));
|
|
self.addEventListener("unhandledrejection", function (error) {
|
|
throw error.reason;
|
|
});
|
|
}
|
|
}
|
|
|
|
new ArchiveWorker();
|