diff --git a/dom/base/Document.cpp b/dom/base/Document.cpp index 011a4ea07164..3eefcc592890 100644 --- a/dom/base/Document.cpp +++ b/dom/base/Document.cpp @@ -276,6 +276,7 @@ #include "mozilla/gfx/ScaleFactor.h" #include "mozilla/glean/DomMetrics.h" #include "mozilla/glean/DomUseCounterMetrics.h" +#include "mozilla/intl/EncodingToLang.h" #include "mozilla/intl/LocaleService.h" #include "mozilla/ipc/IdleSchedulerChild.h" #include "mozilla/ipc/MessageChannel.h" @@ -417,7 +418,6 @@ #include "nsIXULRuntime.h" #include "nsImageLoadingContent.h" #include "nsImportModule.h" -#include "nsLanguageAtomService.h" #include "nsLayoutUtils.h" #include "nsMimeTypes.h" #include "nsNetCID.h" @@ -1500,6 +1500,7 @@ Document::Document(const char* aContentType) mInteractiveWidgetMode( InteractiveWidgetUtils::DefaultInteractiveWidgetMode()), mHeaderData(nullptr), + mLanguageFromCharset(nullptr), mServoRestyleRootDirtyBits(0), mThrowOnDynamicMarkupInsertionCounter(0), mIgnoreOpensDuringUnloadCounter(0), @@ -19746,7 +19747,7 @@ nsAtom* Document::GetLanguageForStyle() const { if (nsAtom* lang = GetContentLanguageAsAtomForStyle()) { return lang; } - return mLanguageFromCharset.get(); + return mLanguageFromCharset; } void Document::GetContentLanguageForBindings(DOMString& aString) const { @@ -19755,7 +19756,7 @@ void Document::GetContentLanguageForBindings(DOMString& aString) const { const LangGroupFontPrefs* Document::GetFontPrefsForLang( nsAtom* aLanguage, bool* aNeedsToCache) const { - nsAtom* lang = aLanguage ? aLanguage : mLanguageFromCharset.get(); + nsAtom* lang = aLanguage ? aLanguage : mLanguageFromCharset; return StaticPresData::Get()->GetFontPrefsForLang(lang, aNeedsToCache); } @@ -19763,7 +19764,7 @@ void Document::DoCacheAllKnownLangPrefs() { MOZ_ASSERT(mMayNeedFontPrefsUpdate); RefPtr lang = GetLanguageForStyle(); StaticPresData* data = StaticPresData::Get(); - data->GetFontPrefsForLang(lang ? lang.get() : mLanguageFromCharset.get()); + data->GetFontPrefsForLang(lang ? lang.get() : mLanguageFromCharset); data->GetFontPrefsForLang(nsGkAtoms::x_math); // https://bugzilla.mozilla.org/show_bug.cgi?id=1362599#c12 data->GetFontPrefsForLang(nsGkAtoms::Unicode); @@ -19774,29 +19775,14 @@ void Document::DoCacheAllKnownLangPrefs() { } void Document::RecomputeLanguageFromCharset() { - RefPtr language; - // Optimize the default character sets. - if (mCharacterSet == WINDOWS_1252_ENCODING) { - language = nsGkAtoms::x_western; - } else { - nsLanguageAtomService* service = nsLanguageAtomService::GetService(); - if (mCharacterSet == UTF_8_ENCODING) { - language = nsGkAtoms::Unicode; - } else { - language = service->LookupCharSet(mCharacterSet); - } - - if (language == nsGkAtoms::Unicode) { - language = service->GetLocaleLanguage(); - } - } + nsAtom* language = mozilla::intl::EncodingToLang::Lookup(mCharacterSet); if (language == mLanguageFromCharset) { return; } mMayNeedFontPrefsUpdate = true; - mLanguageFromCharset = std::move(language); + mLanguageFromCharset = language; } nsICookieJarSettings* Document::CookieJarSettings() { diff --git a/dom/base/Document.h b/dom/base/Document.h index f9f8089ad484..32c23b19f333 100644 --- a/dom/base/Document.h +++ b/dom/base/Document.h @@ -5489,7 +5489,9 @@ class Document : public nsINode, nsTHashSet> mLanguagesUsed; // TODO(emilio): Is this hot enough to warrant to be cached? - RefPtr mLanguageFromCharset; + // EncodingToLang.cpp keeps the atom alive until shutdown, so + // no need for a RefPtr. + nsAtom* mLanguageFromCharset; // Restyle root for servo's style system. // diff --git a/intl/locale/EncodingToLang.cpp b/intl/locale/EncodingToLang.cpp new file mode 100644 index 000000000000..712eb454c4a7 --- /dev/null +++ b/intl/locale/EncodingToLang.cpp @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/intl/EncodingToLang.h" +#include "nsGkAtoms.h" +#include "nsLanguageAtomService.h" + +using namespace mozilla; +using namespace mozilla::intl; + +// Parallel arrays of Encoding and corresponding Lang atoms, +// in rough order of frequency. + +// Unfortunately, the `mozilla::NotNull` hack that was used to +// declare the encoding pointers in C++ does not allow putting +// the pointers in a static array without a run-time initializer, +// so our options are: +// 1. Putting the pointers in a static array in Rust, at a distance. +// 2. Run-time initializer. +// 3. Using pointer pointers, as seen here. +const mozilla::NotNull * + EncodingToLang::kEncodingsByRoughFrequency[] = { +#define _(encoding, lang) &encoding, +#include "EncodingsByFrequency.inc" +#undef _ +}; + +// This one isn't constant, as it gets adjusted during Initialize(). +// static +nsAtom* EncodingToLang::sLangs[] = { +#define _(encoding, lang) lang, +#include "EncodingsByFrequency.inc" +#undef _ +}; + +// static +nsAtom* EncodingToLang::Lookup(NotNull aEncoding) { + // Linear search should be fine, since in the vast, vast majority of cases, + // the search stops at the first or second item. + unsigned int i = 0; + for (; i < std::size(kEncodingsByRoughFrequency); i++) { + if (*kEncodingsByRoughFrequency[i] == aEncoding) { + return sLangs[i]; + } + } + MOZ_ASSERT(false, "The encoding is always supposed to be found in the array"); + return sLangs[0]; +} + +// static +void EncodingToLang::Initialize() { + sLangs[0] = nsLanguageAtomService::GetService()->GetLocaleLanguage(); + // We logically hold a strong ref to the first occurrence + // and a non-owning pointer to the rest. + NS_ADDREF(sLangs[0]); + for (size_t i = 1; i < std::size(sLangs); ++i) { + if (!sLangs[i]) { + sLangs[i] = sLangs[0]; + } + } +} + +// static +void EncodingToLang::Shutdown() { NS_RELEASE(sLangs[0]); } diff --git a/intl/locale/EncodingToLang.h b/intl/locale/EncodingToLang.h new file mode 100644 index 000000000000..f54fa7937489 --- /dev/null +++ b/intl/locale/EncodingToLang.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_intl_EncodingToLang_h +#define mozilla_intl_EncodingToLang_h + +#include "nsAtom.h" +#include "mozilla/Encoding.h" + +namespace mozilla::intl { + +class EncodingToLang { + public: + // Call once from nsLayoutStatics::Initialize() + static void Initialize(); + // Call once from nsLayoutStatics::Shutdown() + static void Shutdown(); + + // Looks up a font matching language atom by encoding. + // The atom will be kept alive until nsLayoutStatics::Shutdown(), + // which is why it's a raw pointer. + static nsAtom* Lookup(mozilla::NotNull aEncoding); + + private: + static nsAtom* sLangs[]; + static const mozilla::NotNull * + kEncodingsByRoughFrequency[]; +}; + +}; // namespace mozilla::intl + +#endif // mozilla_intl_EncodingToLang_h diff --git a/intl/locale/EncodingsByFrequency.inc b/intl/locale/EncodingsByFrequency.inc new file mode 100644 index 000000000000..6a65dd39a8ee --- /dev/null +++ b/intl/locale/EncodingsByFrequency.inc @@ -0,0 +1,50 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// List of encodings and their corresponding Lang atoms for font selection. +// nullptr indicates that the browser UI locale language will be used. + +// These are in roughly-estimated frequency order sometimes by group. +// The main thing is that UTF-8 is first and windows-1252 is second, +// followed by other encodings that were a legacy default somewhere. + _(UTF_8_ENCODING, nullptr) + _(WINDOWS_1252_ENCODING, nsGkAtoms::x_western) + _(GBK_ENCODING, nsGkAtoms::zh_cn) + _(SHIFT_JIS_ENCODING, nsGkAtoms::Japanese) + _(BIG5_ENCODING, nsGkAtoms::zh_tw) + _(EUC_KR_ENCODING, nsGkAtoms::ko) + _(WINDOWS_1250_ENCODING, nsGkAtoms::x_western) + _(WINDOWS_1251_ENCODING, nsGkAtoms::x_cyrillic) + _(WINDOWS_1253_ENCODING, nsGkAtoms::el) + _(WINDOWS_1254_ENCODING, nsGkAtoms::x_western) + _(WINDOWS_1255_ENCODING, nsGkAtoms::he) + _(WINDOWS_1256_ENCODING, nsGkAtoms::ar) + _(WINDOWS_1257_ENCODING, nsGkAtoms::x_western) + _(WINDOWS_1258_ENCODING, nsGkAtoms::x_western) + _(WINDOWS_874_ENCODING, nsGkAtoms::th) + _(ISO_8859_2_ENCODING, nsGkAtoms::x_western) + _(EUC_JP_ENCODING, nsGkAtoms::Japanese) + _(GB18030_ENCODING, nsGkAtoms::zh_cn) + _(UTF_16BE_ENCODING, nullptr) + _(UTF_16LE_ENCODING, nullptr) + _(ISO_2022_JP_ENCODING, nsGkAtoms::Japanese) + _(ISO_8859_3_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_4_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_5_ENCODING, nsGkAtoms::x_cyrillic) + _(ISO_8859_6_ENCODING, nsGkAtoms::ar) + _(ISO_8859_7_ENCODING, nsGkAtoms::el) + _(ISO_8859_8_ENCODING, nsGkAtoms::he) + _(ISO_8859_8_I_ENCODING, nsGkAtoms::he) + _(ISO_8859_10_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_13_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_14_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_15_ENCODING, nsGkAtoms::x_western) + _(ISO_8859_16_ENCODING, nsGkAtoms::x_western) + _(KOI8_R_ENCODING, nsGkAtoms::x_cyrillic) + _(KOI8_U_ENCODING, nsGkAtoms::x_cyrillic) + _(MACINTOSH_ENCODING, nsGkAtoms::x_western) + _(REPLACEMENT_ENCODING, nullptr) + _(IBM866_ENCODING, nsGkAtoms::x_cyrillic) + _(X_MAC_CYRILLIC_ENCODING, nsGkAtoms::x_cyrillic) + _(X_USER_DEFINED_ENCODING, nullptr) diff --git a/intl/locale/encodingsgroups.properties b/intl/locale/encodingsgroups.properties deleted file mode 100644 index f631bfa64f5c..000000000000 --- a/intl/locale/encodingsgroups.properties +++ /dev/null @@ -1,40 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# x-unicode is assumed for encodings not listed here - -Big5=zh-TW -EUC-JP=ja -EUC-KR=ko -gb18030=zh-CN -GBK=zh-CN -IBM866=x-cyrillic -ISO-2022-JP=ja -ISO-8859-3=x-western -ISO-8859-4=x-western -ISO-8859-5=x-cyrillic -ISO-8859-6=ar -ISO-8859-7=el -ISO-8859-8=he -ISO-8859-8-I=he -ISO-8859-10=x-western -ISO-8859-13=x-western -ISO-8859-14=x-western -ISO-8859-15=x-western -ISO-8859-16=x-western -ISO-8859-2=x-western -KOI8-R=x-cyrillic -KOI8-U=x-cyrillic -Shift_JIS=ja -windows-1250=x-western -windows-1251=x-cyrillic -windows-1252=x-western -windows-1253=el -windows-1254=x-western -windows-1255=he -windows-1256=ar -windows-1257=x-western -windows-1258=x-western -windows-874=th -x-mac-cyrillic=x-cyrillic diff --git a/intl/locale/moz.build b/intl/locale/moz.build index 10e7c5cfb172..366a0f327ee6 100644 --- a/intl/locale/moz.build +++ b/intl/locale/moz.build @@ -35,6 +35,7 @@ EXPORTS += [ EXPORTS.mozilla.intl += [ "AppDateTimeFormat.h", + "EncodingToLang.h", "LocaleService.h", "MozLocaleBindings.h", "OSPreferences.h", @@ -43,6 +44,7 @@ EXPORTS.mozilla.intl += [ UNIFIED_SOURCES += [ "AppDateTimeFormat.cpp", + "EncodingToLang.cpp", "LocaleService.cpp", "nsLanguageAtomService.cpp", "nsUConvPropertySearch.cpp", @@ -68,13 +70,6 @@ RESOURCE_FILES += [ "language.properties", ] -prefixes = ("encodingsgroups",) - -for prefix in prefixes: - input_file = prefix + ".properties" - header = prefix + ".properties.h" - GeneratedFile(header, script="props2arrays.py", inputs=[input_file]) - if CONFIG["ENABLE_TESTS"]: DIRS += ["tests/gtest"] diff --git a/intl/locale/nsLanguageAtomService.cpp b/intl/locale/nsLanguageAtomService.cpp index 7f62f86afb65..754624afd2bd 100644 --- a/intl/locale/nsLanguageAtomService.cpp +++ b/intl/locale/nsLanguageAtomService.cpp @@ -20,10 +20,6 @@ using namespace mozilla; using mozilla::intl::OSPreferences; -static constexpr nsUConvProp encodingsGroups[] = { -#include "encodingsgroups.properties.h" -}; - // List of mozilla internal x-* tags that map to themselves (see bug 256257) static constexpr nsStaticAtom* kLangGroups[] = { // This list must be sorted! @@ -110,18 +106,6 @@ nsStaticAtom* nsLanguageAtomService::LookupLanguage( return GetLanguageGroup(lang); } -already_AddRefed nsLanguageAtomService::LookupCharSet( - NotNull aEncoding) { - nsAutoCString charset; - aEncoding->Name(charset); - nsAutoCString group; - if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( - encodingsGroups, std::size(encodingsGroups), charset, group))) { - return RefPtr(nsGkAtoms::Unicode).forget(); - } - return NS_Atomize(group); -} - nsAtom* nsLanguageAtomService::GetLocaleLanguage() { { AutoReadLock lock(mLock); diff --git a/intl/locale/nsLanguageAtomService.h b/intl/locale/nsLanguageAtomService.h index de0961233d05..3550c32a18d7 100644 --- a/intl/locale/nsLanguageAtomService.h +++ b/intl/locale/nsLanguageAtomService.h @@ -33,7 +33,6 @@ class nsLanguageAtomService final { static void Shutdown(); nsStaticAtom* LookupLanguage(const nsACString& aLanguage); - already_AddRefed LookupCharSet(NotNull aCharSet); nsAtom* GetLocaleLanguage(); // Returns the language group that the specified language is a part of, diff --git a/intl/locale/props2arrays.py b/intl/locale/props2arrays.py deleted file mode 100644 index 3a31fdb80b33..000000000000 --- a/intl/locale/props2arrays.py +++ /dev/null @@ -1,26 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -def main(header, propFile): - mappings = {} - - with open(propFile) as f: - for line in f: - line = line.strip() - if not line.startswith("#"): - parts = line.split("=", 1) - if len(parts) == 2 and len(parts[0]) > 0: - mappings[parts[0].strip()] = parts[1].strip() - - keys = mappings.keys() - - header.write("// This is a generated file. Please do not edit.\n") - header.write("// Please edit the corresponding .properties file instead.\n") - - entries = [ - '{ "%s", "%s", %d }' % (key, mappings[key], len(mappings[key])) - for key in sorted(keys) - ] - header.write(",\n".join(entries) + "\n") diff --git a/layout/build/nsLayoutStatics.cpp b/layout/build/nsLayoutStatics.cpp index d7ae81e06475..44d22e090185 100644 --- a/layout/build/nsLayoutStatics.cpp +++ b/layout/build/nsLayoutStatics.cpp @@ -10,6 +10,7 @@ #include "nscore.h" #include "mozilla/intl/AppDateTimeFormat.h" +#include "mozilla/intl/EncodingToLang.h" #include "mozilla/dom/ServiceWorkerRegistrar.h" #include "nsAttrValue.h" #include "nsComputedDOMStyle.h" @@ -299,6 +300,8 @@ nsresult nsLayoutStatics::Initialize() { } #endif + mozilla::intl::EncodingToLang::Initialize(); + return NS_OK; } @@ -401,4 +404,6 @@ void nsLayoutStatics::Shutdown() { RestoreTabContentObserver::Shutdown(); mozilla::intl::LineBreakCache::Shutdown(); + + mozilla::intl::EncodingToLang::Shutdown(); }