Bug 1967381 - Optimize Document::RecomputeLanguageFromCharset. a=dmeehan DONTBUILD

Original Revision: https://phabricator.services.mozilla.com/D251598

Differential Revision: https://phabricator.services.mozilla.com/D264905
This commit is contained in:
Henri Sivonen
2025-09-23 14:49:10 +00:00
committed by dmeehan@mozilla.com
parent 35da1c77c5
commit 8c188f23b4
11 changed files with 167 additions and 112 deletions

View File

@@ -276,6 +276,7 @@
#include "mozilla/gfx/ScaleFactor.h" #include "mozilla/gfx/ScaleFactor.h"
#include "mozilla/glean/DomMetrics.h" #include "mozilla/glean/DomMetrics.h"
#include "mozilla/glean/DomUseCounterMetrics.h" #include "mozilla/glean/DomUseCounterMetrics.h"
#include "mozilla/intl/EncodingToLang.h"
#include "mozilla/intl/LocaleService.h" #include "mozilla/intl/LocaleService.h"
#include "mozilla/ipc/IdleSchedulerChild.h" #include "mozilla/ipc/IdleSchedulerChild.h"
#include "mozilla/ipc/MessageChannel.h" #include "mozilla/ipc/MessageChannel.h"
@@ -417,7 +418,6 @@
#include "nsIXULRuntime.h" #include "nsIXULRuntime.h"
#include "nsImageLoadingContent.h" #include "nsImageLoadingContent.h"
#include "nsImportModule.h" #include "nsImportModule.h"
#include "nsLanguageAtomService.h"
#include "nsLayoutUtils.h" #include "nsLayoutUtils.h"
#include "nsMimeTypes.h" #include "nsMimeTypes.h"
#include "nsNetCID.h" #include "nsNetCID.h"
@@ -1500,6 +1500,7 @@ Document::Document(const char* aContentType)
mInteractiveWidgetMode( mInteractiveWidgetMode(
InteractiveWidgetUtils::DefaultInteractiveWidgetMode()), InteractiveWidgetUtils::DefaultInteractiveWidgetMode()),
mHeaderData(nullptr), mHeaderData(nullptr),
mLanguageFromCharset(nullptr),
mServoRestyleRootDirtyBits(0), mServoRestyleRootDirtyBits(0),
mThrowOnDynamicMarkupInsertionCounter(0), mThrowOnDynamicMarkupInsertionCounter(0),
mIgnoreOpensDuringUnloadCounter(0), mIgnoreOpensDuringUnloadCounter(0),
@@ -19746,7 +19747,7 @@ nsAtom* Document::GetLanguageForStyle() const {
if (nsAtom* lang = GetContentLanguageAsAtomForStyle()) { if (nsAtom* lang = GetContentLanguageAsAtomForStyle()) {
return lang; return lang;
} }
return mLanguageFromCharset.get(); return mLanguageFromCharset;
} }
void Document::GetContentLanguageForBindings(DOMString& aString) const { void Document::GetContentLanguageForBindings(DOMString& aString) const {
@@ -19755,7 +19756,7 @@ void Document::GetContentLanguageForBindings(DOMString& aString) const {
const LangGroupFontPrefs* Document::GetFontPrefsForLang( const LangGroupFontPrefs* Document::GetFontPrefsForLang(
nsAtom* aLanguage, bool* aNeedsToCache) const { nsAtom* aLanguage, bool* aNeedsToCache) const {
nsAtom* lang = aLanguage ? aLanguage : mLanguageFromCharset.get(); nsAtom* lang = aLanguage ? aLanguage : mLanguageFromCharset;
return StaticPresData::Get()->GetFontPrefsForLang(lang, aNeedsToCache); return StaticPresData::Get()->GetFontPrefsForLang(lang, aNeedsToCache);
} }
@@ -19763,7 +19764,7 @@ void Document::DoCacheAllKnownLangPrefs() {
MOZ_ASSERT(mMayNeedFontPrefsUpdate); MOZ_ASSERT(mMayNeedFontPrefsUpdate);
RefPtr<nsAtom> lang = GetLanguageForStyle(); RefPtr<nsAtom> lang = GetLanguageForStyle();
StaticPresData* data = StaticPresData::Get(); StaticPresData* data = StaticPresData::Get();
data->GetFontPrefsForLang(lang ? lang.get() : mLanguageFromCharset.get()); data->GetFontPrefsForLang(lang ? lang.get() : mLanguageFromCharset);
data->GetFontPrefsForLang(nsGkAtoms::x_math); data->GetFontPrefsForLang(nsGkAtoms::x_math);
// https://bugzilla.mozilla.org/show_bug.cgi?id=1362599#c12 // https://bugzilla.mozilla.org/show_bug.cgi?id=1362599#c12
data->GetFontPrefsForLang(nsGkAtoms::Unicode); data->GetFontPrefsForLang(nsGkAtoms::Unicode);
@@ -19774,29 +19775,14 @@ void Document::DoCacheAllKnownLangPrefs() {
} }
void Document::RecomputeLanguageFromCharset() { void Document::RecomputeLanguageFromCharset() {
RefPtr<nsAtom> language; nsAtom* language = mozilla::intl::EncodingToLang::Lookup(mCharacterSet);
// Optimize the default character sets.
if (mCharacterSet == WINDOWS_1252_ENCODING) {
language = nsGkAtoms::x_western;
} else {
nsLanguageAtomService* service = nsLanguageAtomService::GetService();
if (mCharacterSet == UTF_8_ENCODING) {
language = nsGkAtoms::Unicode;
} else {
language = service->LookupCharSet(mCharacterSet);
}
if (language == nsGkAtoms::Unicode) {
language = service->GetLocaleLanguage();
}
}
if (language == mLanguageFromCharset) { if (language == mLanguageFromCharset) {
return; return;
} }
mMayNeedFontPrefsUpdate = true; mMayNeedFontPrefsUpdate = true;
mLanguageFromCharset = std::move(language); mLanguageFromCharset = language;
} }
nsICookieJarSettings* Document::CookieJarSettings() { nsICookieJarSettings* Document::CookieJarSettings() {

View File

@@ -5489,7 +5489,9 @@ class Document : public nsINode,
nsTHashSet<RefPtr<nsAtom>> mLanguagesUsed; nsTHashSet<RefPtr<nsAtom>> mLanguagesUsed;
// TODO(emilio): Is this hot enough to warrant to be cached? // TODO(emilio): Is this hot enough to warrant to be cached?
RefPtr<nsAtom> mLanguageFromCharset; // EncodingToLang.cpp keeps the atom alive until shutdown, so
// no need for a RefPtr.
nsAtom* mLanguageFromCharset;
// Restyle root for servo's style system. // Restyle root for servo's style system.
// //

View File

@@ -0,0 +1,66 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/intl/EncodingToLang.h"
#include "nsGkAtoms.h"
#include "nsLanguageAtomService.h"
using namespace mozilla;
using namespace mozilla::intl;
// Parallel arrays of Encoding and corresponding Lang atoms,
// in rough order of frequency.
// Unfortunately, the `mozilla::NotNull` hack that was used to
// declare the encoding pointers in C++ does not allow putting
// the pointers in a static array without a run-time initializer,
// so our options are:
// 1. Putting the pointers in a static array in Rust, at a distance.
// 2. Run-time initializer.
// 3. Using pointer pointers, as seen here.
const mozilla::NotNull<const mozilla::Encoding *> *
EncodingToLang::kEncodingsByRoughFrequency[] = {
#define _(encoding, lang) &encoding,
#include "EncodingsByFrequency.inc"
#undef _
};
// This one isn't constant, as it gets adjusted during Initialize().
// static
nsAtom* EncodingToLang::sLangs[] = {
#define _(encoding, lang) lang,
#include "EncodingsByFrequency.inc"
#undef _
};
// static
nsAtom* EncodingToLang::Lookup(NotNull<const mozilla::Encoding*> aEncoding) {
// Linear search should be fine, since in the vast, vast majority of cases,
// the search stops at the first or second item.
unsigned int i = 0;
for (; i < std::size(kEncodingsByRoughFrequency); i++) {
if (*kEncodingsByRoughFrequency[i] == aEncoding) {
return sLangs[i];
}
}
MOZ_ASSERT(false, "The encoding is always supposed to be found in the array");
return sLangs[0];
}
// static
void EncodingToLang::Initialize() {
sLangs[0] = nsLanguageAtomService::GetService()->GetLocaleLanguage();
// We logically hold a strong ref to the first occurrence
// and a non-owning pointer to the rest.
NS_ADDREF(sLangs[0]);
for (size_t i = 1; i < std::size(sLangs); ++i) {
if (!sLangs[i]) {
sLangs[i] = sLangs[0];
}
}
}
// static
void EncodingToLang::Shutdown() { NS_RELEASE(sLangs[0]); }

View File

@@ -0,0 +1,34 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_intl_EncodingToLang_h
#define mozilla_intl_EncodingToLang_h
#include "nsAtom.h"
#include "mozilla/Encoding.h"
namespace mozilla::intl {
class EncodingToLang {
public:
// Call once from nsLayoutStatics::Initialize()
static void Initialize();
// Call once from nsLayoutStatics::Shutdown()
static void Shutdown();
// Looks up a font matching language atom by encoding.
// The atom will be kept alive until nsLayoutStatics::Shutdown(),
// which is why it's a raw pointer.
static nsAtom* Lookup(mozilla::NotNull<const mozilla::Encoding*> aEncoding);
private:
static nsAtom* sLangs[];
static const mozilla::NotNull<const mozilla::Encoding *> *
kEncodingsByRoughFrequency[];
};
}; // namespace mozilla::intl
#endif // mozilla_intl_EncodingToLang_h

View File

@@ -0,0 +1,50 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// List of encodings and their corresponding Lang atoms for font selection.
// nullptr indicates that the browser UI locale language will be used.
// These are in roughly-estimated frequency order sometimes by group.
// The main thing is that UTF-8 is first and windows-1252 is second,
// followed by other encodings that were a legacy default somewhere.
_(UTF_8_ENCODING, nullptr)
_(WINDOWS_1252_ENCODING, nsGkAtoms::x_western)
_(GBK_ENCODING, nsGkAtoms::zh_cn)
_(SHIFT_JIS_ENCODING, nsGkAtoms::Japanese)
_(BIG5_ENCODING, nsGkAtoms::zh_tw)
_(EUC_KR_ENCODING, nsGkAtoms::ko)
_(WINDOWS_1250_ENCODING, nsGkAtoms::x_western)
_(WINDOWS_1251_ENCODING, nsGkAtoms::x_cyrillic)
_(WINDOWS_1253_ENCODING, nsGkAtoms::el)
_(WINDOWS_1254_ENCODING, nsGkAtoms::x_western)
_(WINDOWS_1255_ENCODING, nsGkAtoms::he)
_(WINDOWS_1256_ENCODING, nsGkAtoms::ar)
_(WINDOWS_1257_ENCODING, nsGkAtoms::x_western)
_(WINDOWS_1258_ENCODING, nsGkAtoms::x_western)
_(WINDOWS_874_ENCODING, nsGkAtoms::th)
_(ISO_8859_2_ENCODING, nsGkAtoms::x_western)
_(EUC_JP_ENCODING, nsGkAtoms::Japanese)
_(GB18030_ENCODING, nsGkAtoms::zh_cn)
_(UTF_16BE_ENCODING, nullptr)
_(UTF_16LE_ENCODING, nullptr)
_(ISO_2022_JP_ENCODING, nsGkAtoms::Japanese)
_(ISO_8859_3_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_4_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_5_ENCODING, nsGkAtoms::x_cyrillic)
_(ISO_8859_6_ENCODING, nsGkAtoms::ar)
_(ISO_8859_7_ENCODING, nsGkAtoms::el)
_(ISO_8859_8_ENCODING, nsGkAtoms::he)
_(ISO_8859_8_I_ENCODING, nsGkAtoms::he)
_(ISO_8859_10_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_13_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_14_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_15_ENCODING, nsGkAtoms::x_western)
_(ISO_8859_16_ENCODING, nsGkAtoms::x_western)
_(KOI8_R_ENCODING, nsGkAtoms::x_cyrillic)
_(KOI8_U_ENCODING, nsGkAtoms::x_cyrillic)
_(MACINTOSH_ENCODING, nsGkAtoms::x_western)
_(REPLACEMENT_ENCODING, nullptr)
_(IBM866_ENCODING, nsGkAtoms::x_cyrillic)
_(X_MAC_CYRILLIC_ENCODING, nsGkAtoms::x_cyrillic)
_(X_USER_DEFINED_ENCODING, nullptr)

View File

@@ -1,40 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# x-unicode is assumed for encodings not listed here
Big5=zh-TW
EUC-JP=ja
EUC-KR=ko
gb18030=zh-CN
GBK=zh-CN
IBM866=x-cyrillic
ISO-2022-JP=ja
ISO-8859-3=x-western
ISO-8859-4=x-western
ISO-8859-5=x-cyrillic
ISO-8859-6=ar
ISO-8859-7=el
ISO-8859-8=he
ISO-8859-8-I=he
ISO-8859-10=x-western
ISO-8859-13=x-western
ISO-8859-14=x-western
ISO-8859-15=x-western
ISO-8859-16=x-western
ISO-8859-2=x-western
KOI8-R=x-cyrillic
KOI8-U=x-cyrillic
Shift_JIS=ja
windows-1250=x-western
windows-1251=x-cyrillic
windows-1252=x-western
windows-1253=el
windows-1254=x-western
windows-1255=he
windows-1256=ar
windows-1257=x-western
windows-1258=x-western
windows-874=th
x-mac-cyrillic=x-cyrillic

View File

@@ -35,6 +35,7 @@ EXPORTS += [
EXPORTS.mozilla.intl += [ EXPORTS.mozilla.intl += [
"AppDateTimeFormat.h", "AppDateTimeFormat.h",
"EncodingToLang.h",
"LocaleService.h", "LocaleService.h",
"MozLocaleBindings.h", "MozLocaleBindings.h",
"OSPreferences.h", "OSPreferences.h",
@@ -43,6 +44,7 @@ EXPORTS.mozilla.intl += [
UNIFIED_SOURCES += [ UNIFIED_SOURCES += [
"AppDateTimeFormat.cpp", "AppDateTimeFormat.cpp",
"EncodingToLang.cpp",
"LocaleService.cpp", "LocaleService.cpp",
"nsLanguageAtomService.cpp", "nsLanguageAtomService.cpp",
"nsUConvPropertySearch.cpp", "nsUConvPropertySearch.cpp",
@@ -68,13 +70,6 @@ RESOURCE_FILES += [
"language.properties", "language.properties",
] ]
prefixes = ("encodingsgroups",)
for prefix in prefixes:
input_file = prefix + ".properties"
header = prefix + ".properties.h"
GeneratedFile(header, script="props2arrays.py", inputs=[input_file])
if CONFIG["ENABLE_TESTS"]: if CONFIG["ENABLE_TESTS"]:
DIRS += ["tests/gtest"] DIRS += ["tests/gtest"]

View File

@@ -20,10 +20,6 @@
using namespace mozilla; using namespace mozilla;
using mozilla::intl::OSPreferences; using mozilla::intl::OSPreferences;
static constexpr nsUConvProp encodingsGroups[] = {
#include "encodingsgroups.properties.h"
};
// List of mozilla internal x-* tags that map to themselves (see bug 256257) // List of mozilla internal x-* tags that map to themselves (see bug 256257)
static constexpr nsStaticAtom* kLangGroups[] = { static constexpr nsStaticAtom* kLangGroups[] = {
// This list must be sorted! // This list must be sorted!
@@ -110,18 +106,6 @@ nsStaticAtom* nsLanguageAtomService::LookupLanguage(
return GetLanguageGroup(lang); return GetLanguageGroup(lang);
} }
already_AddRefed<nsAtom> nsLanguageAtomService::LookupCharSet(
NotNull<const Encoding*> aEncoding) {
nsAutoCString charset;
aEncoding->Name(charset);
nsAutoCString group;
if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
encodingsGroups, std::size(encodingsGroups), charset, group))) {
return RefPtr<nsAtom>(nsGkAtoms::Unicode).forget();
}
return NS_Atomize(group);
}
nsAtom* nsLanguageAtomService::GetLocaleLanguage() { nsAtom* nsLanguageAtomService::GetLocaleLanguage() {
{ {
AutoReadLock lock(mLock); AutoReadLock lock(mLock);

View File

@@ -33,7 +33,6 @@ class nsLanguageAtomService final {
static void Shutdown(); static void Shutdown();
nsStaticAtom* LookupLanguage(const nsACString& aLanguage); nsStaticAtom* LookupLanguage(const nsACString& aLanguage);
already_AddRefed<nsAtom> LookupCharSet(NotNull<const Encoding*> aCharSet);
nsAtom* GetLocaleLanguage(); nsAtom* GetLocaleLanguage();
// Returns the language group that the specified language is a part of, // Returns the language group that the specified language is a part of,

View File

@@ -1,26 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
def main(header, propFile):
mappings = {}
with open(propFile) as f:
for line in f:
line = line.strip()
if not line.startswith("#"):
parts = line.split("=", 1)
if len(parts) == 2 and len(parts[0]) > 0:
mappings[parts[0].strip()] = parts[1].strip()
keys = mappings.keys()
header.write("// This is a generated file. Please do not edit.\n")
header.write("// Please edit the corresponding .properties file instead.\n")
entries = [
'{ "%s", "%s", %d }' % (key, mappings[key], len(mappings[key]))
for key in sorted(keys)
]
header.write(",\n".join(entries) + "\n")

View File

@@ -10,6 +10,7 @@
#include "nscore.h" #include "nscore.h"
#include "mozilla/intl/AppDateTimeFormat.h" #include "mozilla/intl/AppDateTimeFormat.h"
#include "mozilla/intl/EncodingToLang.h"
#include "mozilla/dom/ServiceWorkerRegistrar.h" #include "mozilla/dom/ServiceWorkerRegistrar.h"
#include "nsAttrValue.h" #include "nsAttrValue.h"
#include "nsComputedDOMStyle.h" #include "nsComputedDOMStyle.h"
@@ -299,6 +300,8 @@ nsresult nsLayoutStatics::Initialize() {
} }
#endif #endif
mozilla::intl::EncodingToLang::Initialize();
return NS_OK; return NS_OK;
} }
@@ -401,4 +404,6 @@ void nsLayoutStatics::Shutdown() {
RestoreTabContentObserver::Shutdown(); RestoreTabContentObserver::Shutdown();
mozilla::intl::LineBreakCache::Shutdown(); mozilla::intl::LineBreakCache::Shutdown();
mozilla::intl::EncodingToLang::Shutdown();
} }