Backed out changeset a59c7ce8c81e (bug 1957384) for causing reftest failures

2025-04-11 22:19:36 +03:00
parent 0ffb03c79d
commit f349b08b9a
3 changed files with 59 additions and 124 deletions
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@@ -1919,6 +1919,33 @@ nsIBidiKeyboard* nsContentUtils::GetBidiKeyboard() {
  return sBidiKeyboard;
 }

+/**
+ * This is used to determine whether a character is in one of the classes
+ * which CSS says should be part of the first-letter.  Currently, that is
+ * all punctuation classes (P*).  Note that this is a change from CSS2
+ * which excluded Pc and Pd.
+ *
+ * https://www.w3.org/TR/css-pseudo-4/#first-letter-pseudo
+ * "Punctuation (i.e, characters that belong to the Punctuation (P*) Unicode
+ *  general category [UAX44]) [...]"
+ */
+
+// static
+bool nsContentUtils::IsFirstLetterPunctuation(uint32_t aChar) {
+  switch (mozilla::unicode::GetGeneralCategory(aChar)) {
+    case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
+    case HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION:    /* Pd */
+    case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION:   /* Pe */
+    case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION:   /* Pf */
+    case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
+    case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION:   /* Po */
+    case HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION:    /* Ps */
+      return true;
+    default:
+      return false;
+  }
+}
+
 // static
 bool nsContentUtils::IsAlphanumeric(uint32_t aChar) {
  nsUGenCategory cat = mozilla::unicode::GetGenCategory(aChar);
--- a/dom/base/nsContentUtils.h
+++ b/dom/base/nsContentUtils.h
@@ -809,6 +809,11 @@ class nsContentUtils {
  static const nsDependentSubstring TrimWhitespace(const nsAString& aStr,
                                                   bool aTrimTrailing = true);

+  /**
+   * Returns true if aChar is of class Ps, Pi, Po, Pf, or Pe.
+   */
+  static bool IsFirstLetterPunctuation(uint32_t aChar);
+
  /**
   * Returns true if aChar is of class Lu, Ll, Lt, Lm, Lo, Nd, Nl or No
   */
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@@ -8402,61 +8402,15 @@ std::pair<int32_t, int32_t> nsTextFrame::GetOffsets() const {
  return std::make_pair(GetContentOffset(), GetContentEnd());
 }

-static bool IsFirstLetterPrefixPunctuation(uint32_t aChar) {
-  switch (mozilla::unicode::GetGeneralCategory(aChar)) {
-    case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
-    case HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION:    /* Pd */
-    case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION:   /* Pe */
-    case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION:   /* Pf */
-    case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
-    case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION:   /* Po */
-    case HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION:    /* Ps */
-      return true;
-    default:
-      return false;
-  }
-}
-
-static bool IsFirstLetterSuffixPunctuation(uint32_t aChar) {
-  switch (mozilla::unicode::GetGeneralCategory(aChar)) {
-    case HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: /* Pc */
-    case HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION:   /* Pe */
-    case HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION:   /* Pf */
-    case HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: /* Pi */
-    case HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION:   /* Po */
-      return true;
-    default:
-      return false;
-  }
-}
-
-static int32_t FindEndOfPrefixPunctuationRun(const nsTextFragment* aFrag,
-                                             const gfxTextRun* aTextRun,
-                                             gfxSkipCharsIterator* aIter,
-                                             int32_t aOffset, int32_t aStart,
-                                             int32_t aEnd) {
+static int32_t FindEndOfPunctuationRun(const nsTextFragment* aFrag,
+                                       const gfxTextRun* aTextRun,
+                                       gfxSkipCharsIterator* aIter,
+                                       int32_t aOffset, int32_t aStart,
+                                       int32_t aEnd) {
  int32_t i;
-  for (i = aStart; i < aEnd - aOffset; ++i) {
-    if (IsFirstLetterPrefixPunctuation(
-            aFrag->ScalarValueAt(AssertedCast<uint32_t>(aOffset + i)))) {
-      aIter->SetOriginalOffset(aOffset + i);
-      FindClusterEnd(aTextRun, aEnd, aIter);
-      i = aIter->GetOriginalOffset() - aOffset;
-    } else {
-      break;
-    }
-  }
-  return i;
-}

-static int32_t FindEndOfSuffixPunctuationRun(const nsTextFragment* aFrag,
-                                             const gfxTextRun* aTextRun,
-                                             gfxSkipCharsIterator* aIter,
-                                             int32_t aOffset, int32_t aStart,
-                                             int32_t aEnd) {
-  int32_t i;
  for (i = aStart; i < aEnd - aOffset; ++i) {
-    if (IsFirstLetterSuffixPunctuation(
+    if (nsContentUtils::IsFirstLetterPunctuation(
            aFrag->ScalarValueAt(AssertedCast<uint32_t>(aOffset + i)))) {
      aIter->SetOriginalOffset(aOffset + i);
      FindClusterEnd(aTextRun, aEnd, aIter);
@@ -8486,6 +8440,7 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
                                 const gfxTextRun* aTextRun, int32_t aOffset,
                                 const gfxSkipCharsIterator& aIter,
                                 int32_t* aLength) {
+  int32_t i;
  int32_t length = *aLength;
  int32_t endOffset = aOffset + length;
  gfxSkipCharsIterator iter(aIter);
@@ -8509,39 +8464,25 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
    return false;
  };

-  // Skip any trimmable leading whitespace.
-  int32_t i = GetTrimmableWhitespaceCount(aFrag, aOffset, length, 1);
-  while (true) {
-    // Scan past any leading punctuation. This leaves `j` at the first
-    // non-punctuation character.
-    int32_t j = FindEndOfPrefixPunctuationRun(aFrag, aTextRun, &iter, aOffset,
-                                              i, endOffset);
-    if (j == length) {
-      return false;
-    }
+  // skip leading whitespace, then consume clusters that start with punctuation
+  i = FindEndOfPunctuationRun(
+      aFrag, aTextRun, &iter, aOffset,
+      GetTrimmableWhitespaceCount(aFrag, aOffset, length, 1), endOffset);
+  if (i == length) {
+    return false;
+  }

-    // Scan past any Unicode whitespace characters after punctuation.
-    while (j < length) {
-      char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + j));
-      // The spec says to allow "characters that belong to the `Zs` Unicode
-      // general category _other than_ U+3000" here.
-      if (unicode::GetGeneralCategory(ch) ==
-              HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR &&
-          ch != 0x3000) {
-        ++j;
-      } else {
-        break;
-      }
-    }
-    if (j == length) {
-      return false;
-    }
-    if (j == i) {
-      // If no whitespace was found, we've finished the first-letter prefix;
-      // if there was some, then go back to check for more punctuation.
+  // skip space/no-break-space after punctuation
+  while (i < length) {
+    char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + i));
+    if (ch == ' ' || ch == CH_NBSP) {
+      ++i;
+    } else {
      break;
    }
-    i = j;
+  }
+  if (i == length) {
+    return false;
  }

  // If the next character is not a letter, number or symbol, there is no
@@ -8554,7 +8495,7 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
    return true;
  }

-  // Consume another cluster (the actual first letter):
+  // consume another cluster (the actual first letter)

  // For complex scripts such as Indic and SEAsian, where first-letter
  // should extend to entire orthographic "syllable" clusters, we don't
@@ -8625,12 +8566,9 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
      break;
  }

-  // NOTE that FindClusterEnd sets the iterator to the last character that is
-  // part of the cluster, NOT to the first character beyond it.
  iter.SetOriginalOffset(aOffset + i);
  FindClusterEnd(aTextRun, endOffset, &iter, allowSplitLigature);

-  // Index of the last character included in the first-letter cluster.
  i = iter.GetOriginalOffset() - aOffset;

  // Heuristic for Indic scripts that like to form conjuncts:
@@ -8678,44 +8616,9 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
    }
  }

-  // When we reach here, `i` points to the last character of the first-letter
-  // cluster, NOT to the first character beyond it. Advance to the next char,
-  // ready to check for following whitespace/punctuation:
-  ++i;
-
-  while (i < length) {
-    // Skip over whitespace, except for word separator characters, before the
-    // check for following punctuation. But remember the position before the
-    // whitespace, in case we need to reset.
-    const int32_t preWS = i;
-    while (i < length) {
-      char16_t ch = aFrag->CharAt(AssertedCast<uint32_t>(aOffset + i));
-      // The spec says the first-letter suffix includes "any intervening
-      // typographic space -- characters belonging to the Zs Unicode general
-      // category other than U+3000 IDEOGRAPHIC SPACE or a word separator",
-      // where "word separator" includes U+0020 and U+00A0.
-      if (ch == 0x0020 || ch == 0x00A0 || ch == 0x3000 ||
-          unicode::GetGeneralCategory(ch) !=
-              HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR) {
-        break;
-      } else {
-        ++i;
-      }
-    }
-
-    // Consume clusters that start with punctuation.
-    const int32_t prePunct = i;
-    i = FindEndOfSuffixPunctuationRun(aFrag, aTextRun, &iter, aOffset, i,
-                                      endOffset);
-
-    // If we didn't find punctuation here, then we also don't want to include
-    // any preceding whitespace, so reset our index.
-    if (i == prePunct) {
-      i = preWS;
-      break;
-    }
-  }
-
+  // consume clusters that start with punctuation
+  i = FindEndOfPunctuationRun(aFrag, aTextRun, &iter, aOffset, i + 1,
+                              endOffset);
  if (i < length) {
    *aLength = i;
  }