Bug 1960959 - Improve lookup performance of url classifier exception list. r=bvandersloot
Switching the data structure from an array to nested hash maps. Differential Revision: https://phabricator.services.mozilla.com/D247625
This commit is contained in:
committed by
ezuehlcke@mozilla.com
parent
e01fee1518
commit
eda1e20339
@@ -5,10 +5,14 @@
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "UrlClassifierExceptionList.h"
|
||||
#include "nsIEffectiveTLDService.h"
|
||||
#include "nsIUrlClassifierExceptionListEntry.h"
|
||||
#include "nsIURI.h"
|
||||
#include "mozilla/net/UrlClassifierCommon.h"
|
||||
#include "mozilla/ProfilerMarkers.h"
|
||||
#include "nsNetCID.h"
|
||||
#include "nsServiceManagerUtils.h"
|
||||
#include "mozilla/RustRegex.h"
|
||||
|
||||
namespace mozilla::net {
|
||||
|
||||
@@ -25,12 +29,58 @@ UrlClassifierExceptionList::AddEntry(
|
||||
nsIUrlClassifierExceptionListEntry* aEntry) {
|
||||
NS_ENSURE_ARG_POINTER(aEntry);
|
||||
|
||||
nsAutoCString entryString;
|
||||
Unused << aEntry->Describe(entryString);
|
||||
UC_LOG_DEBUG(("UrlClassifierExceptionList::%s - Adding entry: %s",
|
||||
__FUNCTION__, entryString.get()));
|
||||
// From the url patterns in the entry, extract the site and top level site.
|
||||
// They are used as keys in the exception entry maps.
|
||||
|
||||
nsAutoCString urlPattern;
|
||||
nsresult rv = aEntry->GetUrlPattern(urlPattern);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
nsAutoCString site;
|
||||
rv = GetSchemelessSiteFromUrlPattern(urlPattern, site);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// We must be able to parse a site from the url pattern.
|
||||
NS_ENSURE_TRUE(!site.IsEmpty(), NS_ERROR_INVALID_ARG);
|
||||
|
||||
nsAutoCString topLevelUrlPattern;
|
||||
rv = aEntry->GetTopLevelUrlPattern(topLevelUrlPattern);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
nsAutoCString topLevelSite;
|
||||
rv = GetSchemelessSiteFromUrlPattern(topLevelUrlPattern, topLevelSite);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// topLevelUrlPattern is not mandatory, but if topLevelUrlPattern is set,
|
||||
// topLevelSite populated as well.
|
||||
NS_ENSURE_TRUE(topLevelUrlPattern.IsEmpty() == topLevelSite.IsEmpty(),
|
||||
NS_ERROR_INVALID_ARG);
|
||||
|
||||
if (MOZ_LOG_TEST(UrlClassifierCommon::sLog, LogLevel::Debug)) {
|
||||
nsAutoCString entryString;
|
||||
Unused << aEntry->Describe(entryString);
|
||||
UC_LOG_DEBUG(("UrlClassifierExceptionList::%s - Adding entry: %s",
|
||||
__FUNCTION__, entryString.get()));
|
||||
}
|
||||
|
||||
// If the top level site is empty, the exception applies across all top
|
||||
// level sites. Store it in the global exceptions map.
|
||||
if (topLevelSite.IsEmpty()) {
|
||||
mGlobalExceptions.LookupOrInsert(site).AppendElement(aEntry);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// Otherwise, store it in the site specific exception map.
|
||||
mExceptions
|
||||
// Outer map keyed by top level site.
|
||||
// topLevelSite may be the empty string. We still use that a key. These
|
||||
// entries apply to all top-level sites.
|
||||
.LookupOrInsert(topLevelSite)
|
||||
// Inner map keyed by site of the load.
|
||||
.LookupOrInsert(site)
|
||||
// Append the entry.
|
||||
.AppendElement(aEntry);
|
||||
|
||||
mEntries.AppendElement(aEntry);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -53,13 +103,77 @@ UrlClassifierExceptionList::Matches(nsIURI* aURI, nsIURI* aTopLevelURI,
|
||||
aTopLevelURI ? aTopLevelURI->GetSpecOrDefault().get() : "null",
|
||||
aIsPrivateBrowsing));
|
||||
|
||||
for (auto& entry : mEntries) {
|
||||
// Get the eTLD service so we can compute sites from URIs.
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsIEffectiveTLDService> eTLDService(
|
||||
do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID, &rv));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// If given, compute the (schemeless) site from the top level URI.
|
||||
// If not we will leave it empty and only look for global exceptions.
|
||||
nsAutoCString aTopLevelSite;
|
||||
if (aTopLevelURI) {
|
||||
rv = eTLDService->GetSchemelessSite(aTopLevelURI, aTopLevelSite);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
|
||||
// Compute the (schemeless) site from the URI of the load.
|
||||
nsAutoCString aSite;
|
||||
rv = eTLDService->GetSchemelessSite(aURI, aSite);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// Get the list of exceptions that apply to the current load.
|
||||
// We need to check both global and site specific exceptions
|
||||
|
||||
// 1. Check global exceptions, which apply to all top level sites and lookup
|
||||
// entries matching the current load (aSite).
|
||||
ExceptionEntryArray* globalExceptions =
|
||||
mGlobalExceptions.Lookup(aSite).DataPtrOrNull();
|
||||
|
||||
*aResult = ExceptionListMatchesLoad(globalExceptions, aURI, aTopLevelURI,
|
||||
aIsPrivateBrowsing);
|
||||
if (*aResult) {
|
||||
// We found a match, no need to check the site specific exceptions.
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// 2. Get exceptions which apply only to the current top level site.
|
||||
SiteToEntries* topLevelSiteToEntries =
|
||||
mExceptions.Lookup(aTopLevelSite).DataPtrOrNull();
|
||||
if (topLevelSiteToEntries) {
|
||||
ExceptionEntryArray* siteSpecificExceptions =
|
||||
topLevelSiteToEntries->Lookup(aSite).DataPtrOrNull();
|
||||
|
||||
*aResult = ExceptionListMatchesLoad(siteSpecificExceptions, aURI,
|
||||
aTopLevelURI, aIsPrivateBrowsing);
|
||||
if (*aResult) {
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(*aResult)) {
|
||||
UC_LOG_DEBUG(("%s - No match found", __FUNCTION__));
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
bool UrlClassifierExceptionList::ExceptionListMatchesLoad(
|
||||
ExceptionEntryArray* aExceptions, nsIURI* aURI, nsIURI* aTopLevelURI,
|
||||
bool aIsPrivateBrowsing) {
|
||||
MOZ_ASSERT(aURI);
|
||||
|
||||
if (!aExceptions) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& entry : *aExceptions) {
|
||||
bool match = false;
|
||||
nsresult rv =
|
||||
entry->Matches(aURI, aTopLevelURI, aIsPrivateBrowsing, aResult);
|
||||
entry->Matches(aURI, aTopLevelURI, aIsPrivateBrowsing, &match);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
continue;
|
||||
}
|
||||
if (*aResult) {
|
||||
if (match) {
|
||||
// Match found, return immediately.
|
||||
if (MOZ_LOG_TEST(UrlClassifierCommon::sLog, LogLevel::Debug)) {
|
||||
nsAutoCString entryString;
|
||||
@@ -69,20 +183,66 @@ UrlClassifierExceptionList::Matches(nsIURI* aURI, nsIURI* aTopLevelURI,
|
||||
"entry: %s",
|
||||
__FUNCTION__, entryString.get()));
|
||||
}
|
||||
return NS_OK;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// No match found, return false.
|
||||
UC_LOG_DEBUG(("%s - No match found", __FUNCTION__));
|
||||
NS_IMETHODIMP
|
||||
UrlClassifierExceptionList::GetSchemelessSiteFromUrlPattern(
|
||||
const nsACString& aUrlPattern, nsACString& aSite) {
|
||||
if (aUrlPattern.IsEmpty()) {
|
||||
aSite.Truncate();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
// Extract the host portion from the url pattern. This regex only supports url
|
||||
// patterns with a host.
|
||||
mozilla::RustRegex regex("://(?:\\*\\.)?([^/*]+)");
|
||||
mozilla::RustRegexCaptures captures = regex.FindCaptures(aUrlPattern);
|
||||
NS_ENSURE_TRUE(captures.IsValid(), NS_ERROR_INVALID_ARG);
|
||||
|
||||
// Get the host from the first capture group
|
||||
auto maybeMatch = captures[1];
|
||||
NS_ENSURE_TRUE(maybeMatch, NS_ERROR_INVALID_ARG);
|
||||
|
||||
nsAutoCString host;
|
||||
host.Assign(Substring(aUrlPattern, maybeMatch->start,
|
||||
maybeMatch->end - maybeMatch->start));
|
||||
NS_ENSURE_TRUE(!host.IsEmpty(), NS_ERROR_INVALID_ARG);
|
||||
|
||||
// Get the eTLD service to convert host to schemeless site
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsIEffectiveTLDService> eTLDService(
|
||||
do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID, &rv));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
return eTLDService->GetSchemelessSiteFromHost(host, aSite);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
UrlClassifierExceptionList::TestGetEntries(
|
||||
nsTArray<RefPtr<nsIUrlClassifierExceptionListEntry>>& aEntries) {
|
||||
aEntries = mEntries.Clone();
|
||||
// Global entries (not top-level specific)
|
||||
for (const auto& entry : mGlobalExceptions) {
|
||||
const ExceptionEntryArray& entries = entry.GetData();
|
||||
aEntries.AppendElements(entries);
|
||||
}
|
||||
|
||||
// Site specific entries.
|
||||
// Iterate through the outer map (top-level sites)
|
||||
for (const auto& outerEntry : mExceptions) {
|
||||
const SiteToEntries& innerMap = outerEntry.GetData();
|
||||
|
||||
// Iterate through the inner map (sites to exception entries)
|
||||
for (const auto& innerEntry : innerMap) {
|
||||
const ExceptionEntryArray& entries = innerEntry.GetData();
|
||||
// Append all entries from this array to the result
|
||||
aEntries.AppendElements(entries);
|
||||
}
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
} // namespace mozilla::net
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#ifndef mozilla_UrlClassifierExceptionList_h
|
||||
#define mozilla_UrlClassifierExceptionList_h
|
||||
|
||||
#include "nsHashKeys.h"
|
||||
#include "nsTHashMap.h"
|
||||
#include "nsIUrlClassifierExceptionList.h"
|
||||
#include "nsISupports.h"
|
||||
#include "nsTArray.h"
|
||||
@@ -27,8 +29,35 @@ class UrlClassifierExceptionList final : public nsIUrlClassifierExceptionList {
|
||||
private:
|
||||
~UrlClassifierExceptionList() = default;
|
||||
|
||||
// A list of exception entries
|
||||
using ExceptionEntryArray =
|
||||
nsTArray<RefPtr<nsIUrlClassifierExceptionListEntry>>;
|
||||
|
||||
// A map from (schemeless) site to a list of exception entries.
|
||||
using SiteToEntries = nsTHashMap<nsCStringHashKey, ExceptionEntryArray>;
|
||||
|
||||
// Helper method to check if any exception in the array matches the given
|
||||
// load.
|
||||
static bool ExceptionListMatchesLoad(ExceptionEntryArray* aExceptions,
|
||||
nsIURI* aURI, nsIURI* aTopLevelURI,
|
||||
bool aIsPrivateBrowsing);
|
||||
|
||||
// Helper method to extract the schemeless site from a URL pattern.
|
||||
NS_IMETHODIMP GetSchemelessSiteFromUrlPattern(const nsACString& aUrlPattern,
|
||||
nsACString& aSite);
|
||||
|
||||
// The feature this exception list is for, e.g. "tracking-protection".
|
||||
nsCString mFeature;
|
||||
nsTArray<RefPtr<nsIUrlClassifierExceptionListEntry>> mEntries;
|
||||
|
||||
// A two stage hash map to store the (top level) site-specific exception
|
||||
// entries.
|
||||
// The outer hash map key is the top level (schemeless) site.
|
||||
// The inner hash map key is the (schemeless) site of the load to be checked.
|
||||
nsTHashMap<nsCStringHashKey, SiteToEntries> mExceptions;
|
||||
|
||||
// A map of exception list entries which apply across all top level sites.
|
||||
// The hash map key is the (schemeless) site of the load to be checked.
|
||||
nsTHashMap<nsCStringHashKey, ExceptionEntryArray> mGlobalExceptions;
|
||||
};
|
||||
|
||||
} // namespace mozilla::net
|
||||
|
||||
@@ -110,33 +110,35 @@ add_task(async function test_list_changes() {
|
||||
|
||||
let entries = list.testGetEntries();
|
||||
Assert.equal(entries.length, 3, "Has three items in the list");
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
entries[1].urlPattern,
|
||||
"*://example.com/*",
|
||||
"First item is example.com"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
entries[2].urlPattern,
|
||||
"*://MOZILLA.ORG/*",
|
||||
"Second item is mozilla.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].topLevelUrlPattern,
|
||||
entries[2].topLevelUrlPattern,
|
||||
"*://example.com/*",
|
||||
"Top level url pattern of second item is correctly set."
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].isPrivateBrowsingOnly,
|
||||
entries[2].isPrivateBrowsingOnly,
|
||||
true,
|
||||
"isPrivateBrowsingOnly flag of second item is correctly set."
|
||||
);
|
||||
Assert.deepEqual(
|
||||
entries[1].filterContentBlockingCategories,
|
||||
entries[2].filterContentBlockingCategories,
|
||||
["standard"],
|
||||
"filterContentBlockingCategories of second item is correctly set."
|
||||
);
|
||||
Assert.equal(
|
||||
entries[2].urlPattern,
|
||||
entries[0].urlPattern,
|
||||
"*://*.example.org/*",
|
||||
"Third item is *.example.org"
|
||||
);
|
||||
@@ -151,6 +153,8 @@ add_task(async function test_list_changes() {
|
||||
|
||||
Assert.equal(entries.length, 4, "Has four items in the list");
|
||||
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
"*://example.com/*",
|
||||
@@ -162,12 +166,12 @@ add_task(async function test_list_changes() {
|
||||
"Second item is mozilla.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[3].urlPattern,
|
||||
entries[0].urlPattern,
|
||||
"*://*.example.org/*",
|
||||
"Third item is *.example.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
entries[3].urlPattern,
|
||||
"*://test.com/*",
|
||||
"Fourth item is test.com"
|
||||
);
|
||||
@@ -183,33 +187,36 @@ add_task(async function test_list_changes() {
|
||||
|
||||
entries = list.testGetEntries();
|
||||
Assert.equal(entries.length, 6, "Has six items in the list");
|
||||
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
entries[4].urlPattern,
|
||||
"*://test.com/*",
|
||||
"First item is test.com"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
entries[5].urlPattern,
|
||||
"*://whatever.com/*",
|
||||
"Second item is whatever.com"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[2].urlPattern,
|
||||
entries[0].urlPattern,
|
||||
"*://*.abc.com/*",
|
||||
"Third item is *.abc.com"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[3].urlPattern,
|
||||
entries[2].urlPattern,
|
||||
"*://example.com/*",
|
||||
"Fourth item is example.com"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[4].urlPattern,
|
||||
entries[3].urlPattern,
|
||||
"*://MOZILLA.ORG/*",
|
||||
"Fifth item is mozilla.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[5].urlPattern,
|
||||
entries[1].urlPattern,
|
||||
"*://*.example.org/*",
|
||||
"Sixth item is *.example.org"
|
||||
);
|
||||
@@ -288,16 +295,19 @@ add_task(async function test_list_init_data() {
|
||||
|
||||
list = await waitForEvent(updateEvent, "update");
|
||||
let entries = list.testGetEntries();
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
|
||||
Assert.equal(entries.length, 2, "Has two items in the list");
|
||||
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
"*://tracking.example.com/*",
|
||||
"First item is tracking.example.com"
|
||||
"*://*.tracking.org/*",
|
||||
"First item is *.tracking.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
"*://*.tracking.org/*",
|
||||
"Second item is *.tracking.org"
|
||||
"*://tracking.example.com/*",
|
||||
"Second item is tracking.example.com"
|
||||
);
|
||||
|
||||
// Register another feature after ExceptionListService got the initial data.
|
||||
@@ -311,16 +321,18 @@ add_task(async function test_list_init_data() {
|
||||
|
||||
list = await promise;
|
||||
entries = list.testGetEntries();
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
|
||||
Assert.equal(entries.length, 2, "Has two items in the list");
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
"*://social.example.com/*",
|
||||
"First item is social.example.com"
|
||||
"*://MOZILLA.ORG/*",
|
||||
"First item is mozilla.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
"*://MOZILLA.ORG/*",
|
||||
"Second item is mozilla.org"
|
||||
"*://social.example.com/*",
|
||||
"Second item is social.example.com"
|
||||
);
|
||||
|
||||
// Test registering a feature after ExceptionListService recieved the synced data.
|
||||
@@ -360,16 +372,17 @@ add_task(async function test_list_init_data() {
|
||||
list = await promise;
|
||||
|
||||
entries = list.testGetEntries();
|
||||
entries = entries.sort((a, b) => a.urlPattern.localeCompare(b.urlPattern));
|
||||
Assert.equal(entries.length, 2, "Has two items in the list");
|
||||
Assert.equal(
|
||||
entries[0].urlPattern,
|
||||
"*://fingerprinting.example.com/*",
|
||||
"First item is fingerprinting.example.com"
|
||||
"*://*.fingerprinting.org/*",
|
||||
"First item is *.fingerprinting.org"
|
||||
);
|
||||
Assert.equal(
|
||||
entries[1].urlPattern,
|
||||
"*://*.fingerprinting.org/*",
|
||||
"Second item is *.fingerprinting.org"
|
||||
"*://fingerprinting.example.com/*",
|
||||
"Second item is fingerprinting.example.com"
|
||||
);
|
||||
|
||||
exceptionListService.unregisterExceptionListObserver(
|
||||
|
||||
Reference in New Issue
Block a user