Files
tubestation/browser/components/urlbar/private/RemoteSettingsClient.sys.mjs

456 lines
14 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
import { BaseFeature } from "resource:///modules/urlbar/private/BaseFeature.sys.mjs";
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
EventEmitter: "resource://gre/modules/EventEmitter.sys.mjs",
TaskQueue: "resource:///modules/UrlbarUtils.sys.mjs",
UrlbarPrefs: "resource:///modules/UrlbarPrefs.sys.mjs",
});
XPCOMUtils.defineLazyModuleGetters(lazy, {
RemoteSettings: "resource://services-settings/remote-settings.js",
});
const RS_COLLECTION = "quicksuggest";
// Categories that should show "Firefox Suggest" instead of "Sponsored"
const NONSPONSORED_IAB_CATEGORIES = new Set(["5 - Education"]);
// Default score for remote settings suggestions.
const DEFAULT_SUGGESTION_SCORE = 0.2;
// Entries are added to the `#resultsByKeyword` map in chunks, and each chunk
// will add at most this many entries.
const ADD_RESULTS_CHUNK_SIZE = 1000;
const TELEMETRY_LATENCY = "FX_URLBAR_QUICK_SUGGEST_REMOTE_SETTINGS_LATENCY_MS";
/**
* Fetches the suggestions data from RemoteSettings and builds the structures
* to provide suggestions for UrlbarProviderQuickSuggest.
*/
export class RemoteSettingsClient extends BaseFeature {
/**
* @returns {number}
* The default score for remote settings suggestions, a value in the range
* [0, 1]. All suggestions require a score that can be used for comparison,
* so if a remote settings suggestion does not have one, it's assigned this
* value.
*/
static get DEFAULT_SUGGESTION_SCORE() {
return DEFAULT_SUGGESTION_SCORE;
}
constructor() {
super();
this.#taskQueue = new lazy.TaskQueue();
this.#emitter = new lazy.EventEmitter();
}
get shouldEnable() {
return (
lazy.UrlbarPrefs.get("suggest.quicksuggest.nonsponsored") ||
lazy.UrlbarPrefs.get("suggest.quicksuggest.sponsored")
);
}
get enablingPreferences() {
return [
"suggest.quicksuggest.nonsponsored",
"suggest.quicksuggest.sponsored",
];
}
/**
* @returns {EventEmitter}
* The client will emit events on this object.
*/
get emitter() {
return this.#emitter;
}
/**
* @returns {Promise}
* Resolves when any ongoing updates to the suggestions data are done.
*/
get readyPromise() {
return this.#taskQueue.emptyPromise;
}
/**
* @returns {object}
* Global quick suggest configuration stored in remote settings. When the
* config changes the `emitter` property will emit a "config-set" event. The
* config is an object that looks like this:
*
* {
* best_match: {
* min_search_string_length,
* blocked_suggestion_ids,
* },
* impression_caps: {
* nonsponsored: {
* lifetime,
* custom: [
* { interval_s, max_count },
* ],
* },
* sponsored: {
* lifetime,
* custom: [
* { interval_s, max_count },
* ],
* },
* },
* }
*/
get config() {
return this.#config;
}
enable(enabled) {
this.#queueSettingsSetup(enabled);
}
/**
* Fetches remote settings suggestions.
*
* @param {string} searchString
* The search string.
* @returns {Array}
* The remote settings suggestions. If there are no matches, an empty array
* is returned.
*/
async fetch(searchString) {
let suggestions;
let stopwatchInstance = {};
TelemetryStopwatch.start(TELEMETRY_LATENCY, stopwatchInstance);
try {
suggestions = await this.#fetchHelper(searchString);
TelemetryStopwatch.finish(TELEMETRY_LATENCY, stopwatchInstance);
} catch (error) {
TelemetryStopwatch.cancel(TELEMETRY_LATENCY, stopwatchInstance);
this.logger.error("Error fetching suggestions: " + error);
}
return suggestions || [];
}
/**
* Helper for `fetch()` that actually looks up the matching suggestions.
*
* @param {string} phrase
* The search string.
* @returns {Array}
* The matched suggestion objects. If there are no matches, an empty array
* is returned.
*/
async #fetchHelper(phrase) {
this.logger.info("Handling query: " + JSON.stringify(phrase));
phrase = phrase.toLowerCase();
let object = this.#resultsByKeyword.get(phrase);
if (!object) {
return [];
}
// `object` will be a single result object if there's only one match or an
// array of result objects if there's more than one match.
let results = [object].flat();
// Start each icon fetch at the same time and wait for them all to finish.
let icons = await Promise.all(
results.map(({ icon }) => this.#fetchIcon(icon))
);
return results.map(result => ({
full_keyword: this.getFullKeyword(phrase, result.keywords),
title: result.title,
url: result.url,
click_url: result.click_url,
impression_url: result.impression_url,
block_id: result.id,
advertiser: result.advertiser,
iab_category: result.iab_category,
is_sponsored: !NONSPONSORED_IAB_CATEGORIES.has(result.iab_category),
score:
typeof result.score == "number"
? result.score
: DEFAULT_SUGGESTION_SCORE,
source: "remote-settings",
icon: icons.shift(),
position: result.position,
is_top_pick: !!result.is_top_pick,
_test_is_best_match: result._test_is_best_match,
}));
}
/**
* Gets the full keyword (i.e., suggestion) for a result and query. The data
* doesn't include full keywords, so we make our own based on the result's
* keyword phrases and a particular query. We use two heuristics:
*
* (1) Find the first keyword phrase that has more words than the query. Use
* its first `queryWords.length` words as the full keyword. e.g., if the
* query is "moz" and `result.keywords` is ["moz", "mozi", "mozil",
* "mozill", "mozilla", "mozilla firefox"], pick "mozilla firefox", pop
* off the "firefox" and use "mozilla" as the full keyword.
* (2) If there isn't any keyword phrase with more words, then pick the
* longest phrase. e.g., pick "mozilla" in the previous example (assuming
* the "mozilla firefox" phrase isn't there). That might be the query
* itself.
*
* @param {string} query
* The query string that matched `result`.
* @param {Array} keywords
* An array of result keywords.
* @returns {string}
* The full keyword.
*/
getFullKeyword(query, keywords) {
let longerPhrase;
let trimmedQuery = query.trim();
let queryWords = trimmedQuery.split(" ");
for (let phrase of keywords) {
if (phrase.startsWith(query)) {
let trimmedPhrase = phrase.trim();
let phraseWords = trimmedPhrase.split(" ");
// As an exception to (1), if the query ends with a space, then look for
// phrases with one more word so that the suggestion includes a word
// following the space.
let extra = query.endsWith(" ") ? 1 : 0;
let len = queryWords.length + extra;
if (len < phraseWords.length) {
// We found a phrase with more words.
return phraseWords.slice(0, len).join(" ");
}
if (
query.length < phrase.length &&
(!longerPhrase || longerPhrase.length < trimmedPhrase.length)
) {
// We found a longer phrase with the same number of words.
longerPhrase = trimmedPhrase;
}
}
}
return longerPhrase || trimmedQuery;
}
/**
* Queues a task to ensure our remote settings client is initialized or torn
* down as appropriate.
*
* @param {boolean} enabled
* Whether the feature should be enabled.
*/
#queueSettingsSetup(enabled) {
this.#taskQueue.queue(() => {
if (enabled && !this.#rs) {
this.#onSettingsSync = (...args) => this.#queueSettingsSync(...args);
this.#rs = lazy.RemoteSettings(RS_COLLECTION);
this.#rs.on("sync", this.#onSettingsSync);
this.#queueSettingsSync();
} else if (!enabled && this.#rs) {
this.#rs.off("sync", this.#onSettingsSync);
this.#rs = null;
this.#onSettingsSync = null;
}
});
}
/**
* Queues a task to populate the results map from the remote settings data
* plus any other work that needs to be done on sync.
*
* @param {object} [event]
* The event object passed to the "sync" event listener if you're calling
* this from the listener.
*/
async #queueSettingsSync(event = null) {
await this.#taskQueue.queue(async () => {
if (!this.#rs || this._test_ignoreSettingsSync) {
return;
}
// Remove local files of deleted records
if (event?.data?.deleted) {
await Promise.all(
event.data.deleted
.filter(d => d.attachment)
.map(entry =>
Promise.all([
this.#rs.attachments.deleteDownloaded(entry), // type: data
this.#rs.attachments.deleteFromDisk(entry), // type: icon
])
)
);
}
let dataType = lazy.UrlbarPrefs.get("quickSuggestRemoteSettingsDataType");
this.logger.debug("Loading data with type: " + dataType);
let [configArray, data] = await Promise.all([
this.#rs.get({ filters: { type: "configuration" } }),
this.#rs.get({ filters: { type: dataType } }),
this.#rs
.get({ filters: { type: "icon" } })
.then(icons =>
Promise.all(icons.map(i => this.#rs.attachments.downloadToDisk(i)))
),
]);
this.logger.debug("Got configuration: " + JSON.stringify(configArray));
this.#setConfig(configArray?.[0]?.configuration || {});
this.#resultsByKeyword.clear();
this.logger.debug(`Got data with ${data.length} records`);
for (let record of data) {
let { buffer } = await this.#rs.attachments.download(record);
let results = JSON.parse(new TextDecoder("utf-8").decode(buffer));
this.logger.debug(`Adding ${results.length} results`);
await this.#addResults(results);
}
});
}
/**
* Sets the quick suggest config and emits a "config-set" event.
*
* @param {object} config
* The config object.
*/
#setConfig(config) {
this.#config = config || {};
this.#emitter.emit("config-set");
}
/**
* Adds a list of result objects to the results map. This method is also used
* by tests to set up mock suggestions.
*
* @param {Array} results
* Array of result objects.
*/
async #addResults(results) {
// There can be many results, and each result can have many keywords. To
// avoid blocking the main thread for too long, update the map in chunks,
// and to avoid blocking the UI and other higher priority work, do each
// chunk only when the main thread is idle. During each chunk, we'll add at
// most `_addResultsChunkSize` entries to the map.
let resultIndex = 0;
let keywordIndex = 0;
// Keep adding chunks until all results have been fully added.
while (resultIndex < results.length) {
await new Promise(resolve => {
Services.tm.idleDispatchToMainThread(() => {
// Keep updating the map until the current chunk is done.
let indexInChunk = 0;
while (
indexInChunk < this._addResultsChunkSize &&
resultIndex < results.length
) {
let result = results[resultIndex];
if (keywordIndex == result.keywords.length) {
resultIndex++;
keywordIndex = 0;
continue;
}
// If the keyword's only result is `result`, store it directly as
// the value. Otherwise store an array of results. For details, see
// the `#resultsByKeyword` comment.
let keyword = result.keywords[keywordIndex];
let object = this.#resultsByKeyword.get(keyword);
if (!object) {
this.#resultsByKeyword.set(keyword, result);
} else if (!Array.isArray(object)) {
this.#resultsByKeyword.set(keyword, [object, result]);
} else {
object.push(result);
}
keywordIndex++;
indexInChunk++;
}
// The current chunk is done.
resolve();
});
});
}
}
/**
* Fetch the icon from RemoteSettings attachments.
*
* @param {string} path
* The icon's remote settings path.
*/
async #fetchIcon(path) {
if (!path || !this.#rs) {
return null;
}
let record = (
await this.#rs.get({
filters: { id: `icon-${path}` },
})
).pop();
if (!record) {
return null;
}
return this.#rs.attachments.downloadToDisk(record);
}
get _test_rs() {
return this.#rs;
}
get _test_resultsByKeyword() {
return this.#resultsByKeyword;
}
_test_setConfig(config) {
this.#setConfig(config);
}
async _test_addResults(results) {
await this.#addResults(results);
}
// The RemoteSettings client.
#rs = null;
// Task queue for serializing access to remote settings and related data.
// Methods in this class should use this when they need to to modify or access
// the settings client. It ensures settings accesses are serialized, do not
// overlap, and happen only one at a time. It also lets clients, especially
// tests, use this class without having to worry about whether a settings sync
// or initialization is ongoing; see `readyPromise`.
#taskQueue = null;
// Configuration data synced from remote settings. See the `config` getter.
#config = {};
// Maps each keyword in the dataset to one or more results for the keyword. If
// only one result uses a keyword, the keyword's value in the map will be the
// result object. If more than one result uses the keyword, the value will be
// an array of the results. The reason for not always using an array is that
// we expect the vast majority of keywords to be used by only one result, and
// since there are potentially very many keywords and results and we keep them
// in memory all the time, we want to save as much memory as possible.
#resultsByKeyword = new Map();
// This is only defined as a property so that tests can override it.
_addResultsChunkSize = ADD_RESULTS_CHUNK_SIZE;
#onSettingsSync = null;
#emitter = null;
}