Bug 1521723 - Apply hyphenate-limit-chars settings to the potential breaks found by the hyphenator. r=layout-reviewers,emilio

Differential Revision: https://phabricator.services.mozilla.com/D234960
2025-01-29 17:01:54 +00:00
parent 143c12f00d
commit 922837f10b
3 changed files with 135 additions and 22 deletions
--- a/dom/base/nsLineBreaker.cpp
+++ b/dom/base/nsLineBreaker.cpp
@@ -64,15 +64,7 @@ static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
 }
 nsLineBreaker::nsLineBreaker()
-    : mCurrentWordLanguage(nullptr),
+    : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
      mCurrentWordContainsMixedLang(false),
      mScriptIsChineseOrJapanese(false),
      mAfterBreakableSpace(false),
      mBreakHere(false),
      mWordBreak(WordBreakRule::Normal),
      mLineBreak(LineBreakRule::Auto),
      mWordContinuation(false),
      mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
 nsLineBreaker::~nsLineBreaker() {
  NS_ASSERTION(mCurrentWord.Length() == 0,
@@ -422,14 +414,88 @@ void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
                                          const char16_t* aTextStart,
                                          const char16_t* aTextLimit,
                                          uint8_t* aBreakState) {
  // Early-return for words that are definitely too short to hyphenate.
  if (aTextLimit - aTextStart < mHyphenateLimitWord) {
    return;
  }
  nsDependentSubstring string(aTextStart, aTextLimit);
  AutoTArray<bool, 200> hyphens;
-  if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
+  if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
-    for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
+    return;
      if (hyphens[i]) {
        aBreakState[i + 1] =
            gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
  }
  // Keep track of the length seen so far, in terms of characters that are
  // countable for hyphenate-limit-chars purposes.
  uint32_t length = 0;
  AutoTArray<std::pair<uint32_t, uint32_t>, 16> positionAndLength;
  for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
    // Get current character, converting surrogate pairs to UCS4 for char
    // category lookup.
    uint32_t ch = string[i];
    if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
        NS_IS_LOW_SURROGATE(string[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
    }
    // According to CSS Text, "Nonspacing combining marks (Unicode General
    // Category Mn) and intra-word punctuation (Unicode General Category P*)
    // do not count towards the minimum."
    // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
    // We also don't count Control or Format categories.
    using intl::GeneralCategory;
    switch (UnicodeProperties::CharType(ch)) {
      case GeneralCategory::Nonspacing_Mark:
      case GeneralCategory::Dash_Punctuation:
      case GeneralCategory::Open_Punctuation:
      case GeneralCategory::Close_Punctuation:
      case GeneralCategory::Connector_Punctuation:
      case GeneralCategory::Other_Punctuation:
      case GeneralCategory::Initial_Punctuation:
      case GeneralCategory::Final_Punctuation:
      case GeneralCategory::Control:
      case GeneralCategory::Format:
      case GeneralCategory::Surrogate:
        break;
      default:
        ++length;
        break;
    }
    // Don't accept any breaks until we're far enough into the word.
    if (length >= mHyphenateLimitStart && hyphens[i]) {
      MOZ_ASSERT(aBreakState[i + 1] ==
                     gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
      aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
      // Keep track of hyphen position and "countable" length of the word.
      positionAndLength.AppendElement(
          std::pair<uint32_t, uint32_t>(i + 1, length));
    }
    // If the character was outside the BMP, skip past the low surrogate.
    if (!IS_IN_BMP(ch)) {
      ++i;
    }
  }
  ++length;  // Account for the last character (not counted by the loop above).
  if (length < mHyphenateLimitWord) {
    // After discounting combining marks, punctuation, controls, etc., the word
    // was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
    // forget them.
    while (!positionAndLength.IsEmpty()) {
      auto [lastPos, lastLen] = positionAndLength.PopLastElement();
      aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    }
  } else {
    // Check if trailing fragment is too short; if so, remove the last hyphen
    // break(s) that we set, until the fragment will be long enough.
    while (!positionAndLength.IsEmpty()) {
      auto [lastPos, lastLen] = positionAndLength.PopLastElement();
      if (length - lastLen >= mHyphenateLimitEnd) {
        break;
      }
      aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    }
  }
 }
--- a/dom/base/nsLineBreaker.h
+++ b/dom/base/nsLineBreaker.h
@@ -217,6 +217,16 @@ class nsLineBreaker {
    mWordContinuation = aContinuation;
  }
  /**
   * Set the hyphenate-limit-chars values. Values are clamped to be <= 255.
   */
  void SetHyphenateLimitChars(uint32_t aWordLength, uint32_t aStartLength,
                              uint32_t aEndLength) {
    mHyphenateLimitWord = std::min(255u, aWordLength);
    mHyphenateLimitStart = std::min(255u, aStartLength);
    mHyphenateLimitEnd = std::min(255u, aEndLength);
  }
 private:
  // This is a list of text sources that make up the "current word" (i.e.,
  // run of text which does not contain any whitespace). All the mLengths
@@ -262,25 +272,38 @@ class nsLineBreaker {
  AutoTArray<char16_t, 100> mCurrentWord;
  // All the items that contribute to mCurrentWord
  AutoTArray<TextItem, 2> mTextItems;
-  nsAtom* mCurrentWordLanguage;
+  nsAtom* mCurrentWordLanguage = nullptr;
-  bool mCurrentWordContainsMixedLang;
+
  // Constraints from CSS `hyphenate-limit-chars` property, to block the use of
  // auto-hyphenation if the word is too short, or at positions too near the
  // beginning/end of the word.
  // (Note that per CSS Text spec, these counts ignore combining marks, etc.,
  // so they are not purely codepoint or character counts.)
  // (Zero values would have no effect; but text-frame code will update the
  // values from CSS before calling the line-breaker.)
  uint8_t mHyphenateLimitWord = 0;   // Min word length to auto-hyphenate
  uint8_t mHyphenateLimitStart = 0;  // Min number of chars before the break
  uint8_t mHyphenateLimitEnd = 0;    // Min number of chars after the break
  bool mCurrentWordContainsMixedLang = false;
  bool mCurrentWordMightBeBreakable = false;
-  bool mScriptIsChineseOrJapanese;
+  bool mScriptIsChineseOrJapanese = false;
  // True if the previous character was breakable whitespace
-  bool mAfterBreakableSpace;
+  bool mAfterBreakableSpace = false;
  // True if a break must be allowed at the current position because
  // a run of breakable whitespace ends here
-  bool mBreakHere;
+  bool mBreakHere = false;
  // Break rules for letters from the "word-break" property.
-  mozilla::intl::WordBreakRule mWordBreak;
+  mozilla::intl::WordBreakRule mWordBreak =
      mozilla::intl::WordBreakRule::Normal;
  // Line breaking strictness from the "line-break" property.
-  mozilla::intl::LineBreakRule mLineBreak;
+  mozilla::intl::LineBreakRule mLineBreak = mozilla::intl::LineBreakRule::Auto;
  // Should the text be treated as continuing a word-in-progress (for purposes
  // of initial capitalization)? Normally this is set to false whenever we
  // start using a linebreaker, but it may be set to true if the line-breaker
  // has been explicitly flushed mid-word.
-  bool mWordContinuation;
+  bool mWordContinuation = false;
  // True if using old line segmenter.
  const bool mLegacyBehavior;
 };
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@@ -1733,6 +1733,30 @@ void BuildTextRunsScanner::AccumulateRunInfo(nsTextFrame* aFrame) {
    mLineBreakBeforeFrames.AppendElement(aFrame);
    mStartOfLine = false;
  }
  // Default limits used by `hyphenate-limit-chars` for `auto` components, as
  // suggested by the CSS Text spec.
  // TODO: consider making these sensitive to the context, e.g. increasing the
  // values for long line lengths to reduce the tendency to hyphenate too much.
  const uint32_t kDefaultHyphenateTotalWordLength = 5;
  const uint32_t kDefaultHyphenatePreBreakLength = 2;
  const uint32_t kDefaultHyphenatePostBreakLength = 2;
  const auto& hyphenateLimitChars = aFrame->StyleText()->mHyphenateLimitChars;
  uint32_t pre =
      hyphenateLimitChars.pre_hyphen_length.IsAuto()
          ? kDefaultHyphenatePreBreakLength
          : std::max(0, hyphenateLimitChars.pre_hyphen_length.AsNumber());
  uint32_t post =
      hyphenateLimitChars.post_hyphen_length.IsAuto()
          ? kDefaultHyphenatePostBreakLength
          : std::max(0, hyphenateLimitChars.post_hyphen_length.AsNumber());
  uint32_t total =
      hyphenateLimitChars.total_word_length.IsAuto()
          ? kDefaultHyphenateTotalWordLength
          : std::max(0, hyphenateLimitChars.total_word_length.AsNumber());
  total = std::max(total, pre + post);
  mLineBreaker.SetHyphenateLimitChars(total, pre, post);
 }
 static bool HasTerminalNewline(const nsTextFrame* aFrame) {