Backed out changeset a59c7ce8c81e (bug 1957384) for causing reftest failures

This commit is contained in:
Cristina Horotan
2025-04-11 22:19:36 +03:00
parent 0ffb03c79d
commit f349b08b9a
3 changed files with 59 additions and 124 deletions

View File

@@ -1919,6 +1919,33 @@ nsIBidiKeyboard* nsContentUtils::GetBidiKeyboard() {
return sBidiKeyboard;
}
/**
* This is used to determine whether a character is in one of the classes
* which CSS says should be part of the first-letter. Currently, that is
* all punctuation classes (P*). Note that this is a change from CSS2
* which excluded Pc and Pd.
*
* https://www.w3.org/TR/css-pseudo-4/#first-letter-pseudo
* "Punctuation (i.e, characters that belong to the Punctuation (P*) Unicode
* general category [UAX44]) [...]"
*/
// static
bool nsContentUtils::IsFirstLetterPunctuation(uint32_t aChar) {
switch (mozilla::unicode::GetGeneralCategory(aChar)) {
case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
case HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: /* Pd */
case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: /* Pe */
case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: /* Pf */
case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: /* Po */
case HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: /* Ps */
return true;
default:
return false;
}
}
// static
bool nsContentUtils::IsAlphanumeric(uint32_t aChar) {
nsUGenCategory cat = mozilla::unicode::GetGenCategory(aChar);

View File

@@ -809,6 +809,11 @@ class nsContentUtils {
static const nsDependentSubstring TrimWhitespace(const nsAString& aStr,
bool aTrimTrailing = true);
/**
* Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
*/
static bool IsFirstLetterPunctuation(uint32_t aChar);
/**
* Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No
*/

View File

@@ -8402,61 +8402,15 @@ std::pair<int32_t, int32_t> nsTextFrame::GetOffsets() const {
return std::make_pair(GetContentOffset(), GetContentEnd());
}
static bool IsFirstLetterPrefixPunctuation(uint32_t aChar) {
switch (mozilla::unicode::GetGeneralCategory(aChar)) {
case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
case HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: /* Pd */
case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: /* Pe */
case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: /* Pf */
case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: /* Po */
case HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: /* Ps */
return true;
default:
return false;
}
}
static bool IsFirstLetterSuffixPunctuation(uint32_t aChar) {
switch (mozilla::unicode::GetGeneralCategory(aChar)) {
case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: /* Pe */
case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: /* Pf */
case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: /* Po */
return true;
default:
return false;
}
}
static int32_t FindEndOfPrefixPunctuationRun(const nsTextFragment* aFrag,
const gfxTextRun* aTextRun,
gfxSkipCharsIterator* aIter,
int32_t aOffset, int32_t aStart,
int32_t aEnd) {
static int32_t FindEndOfPunctuationRun(const nsTextFragment* aFrag,
const gfxTextRun* aTextRun,
gfxSkipCharsIterator* aIter,
int32_t aOffset, int32_t aStart,
int32_t aEnd) {
int32_t i;
for (i = aStart; i < aEnd - aOffset; ++i) {
if (IsFirstLetterPrefixPunctuation(
aFrag->ScalarValueAt(AssertedCast<uint32_t>(aOffset + i)))) {
aIter->SetOriginalOffset(aOffset + i);
FindClusterEnd(aTextRun, aEnd, aIter);
i = aIter->GetOriginalOffset() - aOffset;
} else {
break;
}
}
return i;
}
static int32_t FindEndOfSuffixPunctuationRun(const nsTextFragment* aFrag,
const gfxTextRun* aTextRun,
gfxSkipCharsIterator* aIter,
int32_t aOffset, int32_t aStart,
int32_t aEnd) {
int32_t i;
for (i = aStart; i < aEnd - aOffset; ++i) {
if (IsFirstLetterSuffixPunctuation(
if (nsContentUtils::IsFirstLetterPunctuation(
aFrag->ScalarValueAt(AssertedCast<uint32_t>(aOffset + i)))) {
aIter->SetOriginalOffset(aOffset + i);
FindClusterEnd(aTextRun, aEnd, aIter);
@@ -8486,6 +8440,7 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
const gfxTextRun* aTextRun, int32_t aOffset,
const gfxSkipCharsIterator& aIter,
int32_t* aLength) {
int32_t i;
int32_t length = *aLength;
int32_t endOffset = aOffset + length;
gfxSkipCharsIterator iter(aIter);
@@ -8509,39 +8464,25 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
return false;
};
// Skip any trimmable leading whitespace.
int32_t i = GetTrimmableWhitespaceCount(aFrag, aOffset, length, 1);
while (true) {
// Scan past any leading punctuation. This leaves `j` at the first
// non-punctuation character.
int32_t j = FindEndOfPrefixPunctuationRun(aFrag, aTextRun, &iter, aOffset,
i, endOffset);
if (j == length) {
return false;
}
// skip leading whitespace, then consume clusters that start with punctuation
i = FindEndOfPunctuationRun(
aFrag, aTextRun, &iter, aOffset,
GetTrimmableWhitespaceCount(aFrag, aOffset, length, 1), endOffset);
if (i == length) {
return false;
}
// Scan past any Unicode whitespace characters after punctuation.
while (j < length) {
char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + j));
// The spec says to allow "characters that belong to the `Zs` Unicode
// general category _other than_ U+3000" here.
if (unicode::GetGeneralCategory(ch) ==
HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR &&
ch != 0x3000) {
++j;
} else {
break;
}
}
if (j == length) {
return false;
}
if (j == i) {
// If no whitespace was found, we've finished the first-letter prefix;
// if there was some, then go back to check for more punctuation.
// skip space/no-break-space after punctuation
while (i < length) {
char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + i));
if (ch == ' ' || ch == CH_NBSP) {
++i;
} else {
break;
}
i = j;
}
if (i == length) {
return false;
}
// If the next character is not a letter, number or symbol, there is no
@@ -8554,7 +8495,7 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
return true;
}
// Consume another cluster (the actual first letter):
// consume another cluster (the actual first letter)
// For complex scripts such as Indic and SEAsian, where first-letter
// should extend to entire orthographic "syllable" clusters, we don't
@@ -8625,12 +8566,9 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
break;
}
// NOTE that FindClusterEnd sets the iterator to the last character that is
// part of the cluster, NOT to the first character beyond it.
iter.SetOriginalOffset(aOffset + i);
FindClusterEnd(aTextRun, endOffset, &iter, allowSplitLigature);
// Index of the last character included in the first-letter cluster.
i = iter.GetOriginalOffset() - aOffset;
// Heuristic for Indic scripts that like to form conjuncts:
@@ -8678,44 +8616,9 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
}
}
// When we reach here, `i` points to the last character of the first-letter
// cluster, NOT to the first character beyond it. Advance to the next char,
// ready to check for following whitespace/punctuation:
++i;
while (i < length) {
// Skip over whitespace, except for word separator characters, before the
// check for following punctuation. But remember the position before the
// whitespace, in case we need to reset.
const int32_t preWS = i;
while (i < length) {
char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + i));
// The spec says the first-letter suffix includes "any intervening
// typographic space -- characters belonging to the Zs Unicode general
// category other than U+3000 IDEOGRAPHIC SPACE or a word separator",
// where "word separator" includes U+0020 and U+00A0.
if (ch == 0x0020 || ch == 0x00A0 || ch == 0x3000 ||
unicode::GetGeneralCategory(ch) !=
HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR) {
break;
} else {
++i;
}
}
// Consume clusters that start with punctuation.
const int32_t prePunct = i;
i = FindEndOfSuffixPunctuationRun(aFrag, aTextRun, &iter, aOffset, i,
endOffset);
// If we didn't find punctuation here, then we also don't want to include
// any preceding whitespace, so reset our index.
if (i == prePunct) {
i = preWS;
break;
}
}
// consume clusters that start with punctuation
i = FindEndOfPunctuationRun(aFrag, aTextRun, &iter, aOffset, i + 1,
endOffset);
if (i < length) {
*aLength = i;
}