diff --git a/browser/components/urlbar/tests/unit/test_providerUnifiedComplete.js b/browser/components/urlbar/tests/unit/test_providerUnifiedComplete.js index 2789e8d7c323..078539b4329b 100644 --- a/browser/components/urlbar/tests/unit/test_providerUnifiedComplete.js +++ b/browser/components/urlbar/tests/unit/test_providerUnifiedComplete.js @@ -212,7 +212,7 @@ add_task(async function test_diacritics() { let context = createContext(searchString, { isPrivate: false }); await PlacesUtils.bookmarks.insert({ - url: "https://bookmark.mozilla.org/%C3%A3gu%C4%A9", + url: "https://bookmark.mozilla.org/%C3%A3g%CC%83u%C4%A9", title: "Test bookmark with accents in path", parentGuid: PlacesUtils.bookmarks.unfiledGuid, }); diff --git a/intl/unicharutil/util/nsUnicharUtils.cpp b/intl/unicharutil/util/nsUnicharUtils.cpp index 9cc579c4ecbf..edc3f9c01bb5 100644 --- a/intl/unicharutil/util/nsUnicharUtils.cpp +++ b/intl/unicharutil/util/nsUnicharUtils.cpp @@ -434,6 +434,23 @@ int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight, return 0; } +static MOZ_ALWAYS_INLINE uint32_t +GetLowerUTF8Codepoint_inline(const char* aStr, const char* aEnd, + const char** aNext, bool aMatchDiacritics) { + uint32_t c; + for (;;) { + c = GetLowerUTF8Codepoint_inline(aStr, aEnd, aNext); + if (aMatchDiacritics) { + break; + } + if (!mozilla::unicode::IsCombiningDiacritic(c)) { + break; + } + aStr = *aNext; + } + return c; +} + bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight, const char* aLeftEnd, const char* aRightEnd, const char** aLeftNext, @@ -445,14 +462,15 @@ bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight, NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd."); NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd."); - uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext); + uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext, + aMatchDiacritics); if (MOZ_UNLIKELY(leftChar == uint32_t(-1))) { *aErr = true; return false; } - uint32_t rightChar = - GetLowerUTF8Codepoint_inline(aRight, aRightEnd, aRightNext); + uint32_t rightChar = GetLowerUTF8Codepoint_inline( + aRight, aRightEnd, aRightNext, aMatchDiacritics); if (MOZ_UNLIKELY(rightChar == uint32_t(-1))) { *aErr = true; return false; diff --git a/intl/unicharutil/util/nsUnicodeProperties.cpp b/intl/unicharutil/util/nsUnicodeProperties.cpp index 9003f19cd71d..492b5ea009a7 100644 --- a/intl/unicharutil/util/nsUnicodeProperties.cpp +++ b/intl/unicharutil/util/nsUnicodeProperties.cpp @@ -315,6 +315,9 @@ uint32_t GetNaked(uint32_t aCh) { static const UNormalizer2* normalizer; static HashMap nakedCharCache; + NS_ASSERTION(!IsCombiningDiacritic(aCh), + "This character needs to be skipped"); + HashMap::Ptr entry = nakedCharCache.lookup(aCh); if (entry.found()) { return entry->value(); @@ -340,13 +343,6 @@ uint32_t GetNaked(uint32_t aCh) { return aCh; } - if (u_getIntPropertyValue(aCh, UCHAR_GENERAL_CATEGORY) & U_GC_M_MASK) { - // The character is itself a combining character, and we don't want to use - // its decomposition into multiple combining characters. - baseChar = aCh; - goto cache; - } - if (NS_IS_HIGH_SURROGATE(decomposition[0])) { baseChar = SURROGATE_TO_UCS4(decomposition[0], decomposition[1]); combiners = decomposition + 2;