diff --git a/dom/base/nsLineBreaker.cpp b/dom/base/nsLineBreaker.cpp index ab3bfee2747c..711885934e5e 100644 --- a/dom/base/nsLineBreaker.cpp +++ b/dom/base/nsLineBreaker.cpp @@ -64,15 +64,7 @@ static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) { } nsLineBreaker::nsLineBreaker() - : mCurrentWordLanguage(nullptr), - mCurrentWordContainsMixedLang(false), - mScriptIsChineseOrJapanese(false), - mAfterBreakableSpace(false), - mBreakHere(false), - mWordBreak(WordBreakRule::Normal), - mLineBreak(LineBreakRule::Auto), - mWordContinuation(false), - mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {} + : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {} nsLineBreaker::~nsLineBreaker() { NS_ASSERTION(mCurrentWord.Length() == 0, @@ -422,14 +414,88 @@ void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator, const char16_t* aTextStart, const char16_t* aTextLimit, uint8_t* aBreakState) { + // Early-return for words that are definitely too short to hyphenate. + if (aTextLimit - aTextStart < mHyphenateLimitWord) { + return; + } + nsDependentSubstring string(aTextStart, aTextLimit); AutoTArray hyphens; - if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { - for (uint32_t i = 0; i + 1 < string.Length(); ++i) { - if (hyphens[i]) { - aBreakState[i + 1] = - gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; + if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) { + return; + } + + // Keep track of the length seen so far, in terms of characters that are + // countable for hyphenate-limit-chars purposes. + uint32_t length = 0; + AutoTArray, 16> positionAndLength; + for (uint32_t i = 0; i + 1 < string.Length(); ++i) { + // Get current character, converting surrogate pairs to UCS4 for char + // category lookup. + uint32_t ch = string[i]; + if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() && + NS_IS_LOW_SURROGATE(string[i + 1])) { + ch = SURROGATE_TO_UCS4(ch, string[i + 1]); + } + + // According to CSS Text, "Nonspacing combining marks (Unicode General + // Category Mn) and intra-word punctuation (Unicode General Category P*) + // do not count towards the minimum." + // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits) + // We also don't count Control or Format categories. + using intl::GeneralCategory; + switch (UnicodeProperties::CharType(ch)) { + case GeneralCategory::Nonspacing_Mark: + case GeneralCategory::Dash_Punctuation: + case GeneralCategory::Open_Punctuation: + case GeneralCategory::Close_Punctuation: + case GeneralCategory::Connector_Punctuation: + case GeneralCategory::Other_Punctuation: + case GeneralCategory::Initial_Punctuation: + case GeneralCategory::Final_Punctuation: + case GeneralCategory::Control: + case GeneralCategory::Format: + case GeneralCategory::Surrogate: + break; + default: + ++length; + break; + } + + // Don't accept any breaks until we're far enough into the word. + if (length >= mHyphenateLimitStart && hyphens[i]) { + MOZ_ASSERT(aBreakState[i + 1] == + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE); + aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; + // Keep track of hyphen position and "countable" length of the word. + positionAndLength.AppendElement( + std::pair(i + 1, length)); + } + + // If the character was outside the BMP, skip past the low surrogate. + if (!IS_IN_BMP(ch)) { + ++i; + } + } + ++length; // Account for the last character (not counted by the loop above). + + if (length < mHyphenateLimitWord) { + // After discounting combining marks, punctuation, controls, etc., the word + // was too short for hyphenate-limit-chars. If we've set any hyphen breaks, + // forget them. + while (!positionAndLength.IsEmpty()) { + auto [lastPos, lastLen] = positionAndLength.PopLastElement(); + aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; + } + } else { + // Check if trailing fragment is too short; if so, remove the last hyphen + // break(s) that we set, until the fragment will be long enough. + while (!positionAndLength.IsEmpty()) { + auto [lastPos, lastLen] = positionAndLength.PopLastElement(); + if (length - lastLen >= mHyphenateLimitEnd) { + break; } + aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; } } } diff --git a/dom/base/nsLineBreaker.h b/dom/base/nsLineBreaker.h index 7cce282793f6..bbea9e31cb2f 100644 --- a/dom/base/nsLineBreaker.h +++ b/dom/base/nsLineBreaker.h @@ -217,6 +217,16 @@ class nsLineBreaker { mWordContinuation = aContinuation; } + /** + * Set the hyphenate-limit-chars values. Values are clamped to be <= 255. + */ + void SetHyphenateLimitChars(uint32_t aWordLength, uint32_t aStartLength, + uint32_t aEndLength) { + mHyphenateLimitWord = std::min(255u, aWordLength); + mHyphenateLimitStart = std::min(255u, aStartLength); + mHyphenateLimitEnd = std::min(255u, aEndLength); + } + private: // This is a list of text sources that make up the "current word" (i.e., // run of text which does not contain any whitespace). All the mLengths @@ -262,25 +272,38 @@ class nsLineBreaker { AutoTArray mCurrentWord; // All the items that contribute to mCurrentWord AutoTArray mTextItems; - nsAtom* mCurrentWordLanguage; - bool mCurrentWordContainsMixedLang; + nsAtom* mCurrentWordLanguage = nullptr; + + // Constraints from CSS `hyphenate-limit-chars` property, to block the use of + // auto-hyphenation if the word is too short, or at positions too near the + // beginning/end of the word. + // (Note that per CSS Text spec, these counts ignore combining marks, etc., + // so they are not purely codepoint or character counts.) + // (Zero values would have no effect; but text-frame code will update the + // values from CSS before calling the line-breaker.) + uint8_t mHyphenateLimitWord = 0; // Min word length to auto-hyphenate + uint8_t mHyphenateLimitStart = 0; // Min number of chars before the break + uint8_t mHyphenateLimitEnd = 0; // Min number of chars after the break + + bool mCurrentWordContainsMixedLang = false; bool mCurrentWordMightBeBreakable = false; - bool mScriptIsChineseOrJapanese; + bool mScriptIsChineseOrJapanese = false; // True if the previous character was breakable whitespace - bool mAfterBreakableSpace; + bool mAfterBreakableSpace = false; // True if a break must be allowed at the current position because // a run of breakable whitespace ends here - bool mBreakHere; + bool mBreakHere = false; // Break rules for letters from the "word-break" property. - mozilla::intl::WordBreakRule mWordBreak; + mozilla::intl::WordBreakRule mWordBreak = + mozilla::intl::WordBreakRule::Normal; // Line breaking strictness from the "line-break" property. - mozilla::intl::LineBreakRule mLineBreak; + mozilla::intl::LineBreakRule mLineBreak = mozilla::intl::LineBreakRule::Auto; // Should the text be treated as continuing a word-in-progress (for purposes // of initial capitalization)? Normally this is set to false whenever we // start using a linebreaker, but it may be set to true if the line-breaker // has been explicitly flushed mid-word. - bool mWordContinuation; + bool mWordContinuation = false; // True if using old line segmenter. const bool mLegacyBehavior; }; diff --git a/layout/generic/nsTextFrame.cpp b/layout/generic/nsTextFrame.cpp index 3dfffc3d9781..3893a0826635 100644 --- a/layout/generic/nsTextFrame.cpp +++ b/layout/generic/nsTextFrame.cpp @@ -1733,6 +1733,30 @@ void BuildTextRunsScanner::AccumulateRunInfo(nsTextFrame* aFrame) { mLineBreakBeforeFrames.AppendElement(aFrame); mStartOfLine = false; } + + // Default limits used by `hyphenate-limit-chars` for `auto` components, as + // suggested by the CSS Text spec. + // TODO: consider making these sensitive to the context, e.g. increasing the + // values for long line lengths to reduce the tendency to hyphenate too much. + const uint32_t kDefaultHyphenateTotalWordLength = 5; + const uint32_t kDefaultHyphenatePreBreakLength = 2; + const uint32_t kDefaultHyphenatePostBreakLength = 2; + + const auto& hyphenateLimitChars = aFrame->StyleText()->mHyphenateLimitChars; + uint32_t pre = + hyphenateLimitChars.pre_hyphen_length.IsAuto() + ? kDefaultHyphenatePreBreakLength + : std::max(0, hyphenateLimitChars.pre_hyphen_length.AsNumber()); + uint32_t post = + hyphenateLimitChars.post_hyphen_length.IsAuto() + ? kDefaultHyphenatePostBreakLength + : std::max(0, hyphenateLimitChars.post_hyphen_length.AsNumber()); + uint32_t total = + hyphenateLimitChars.total_word_length.IsAuto() + ? kDefaultHyphenateTotalWordLength + : std::max(0, hyphenateLimitChars.total_word_length.AsNumber()); + total = std::max(total, pre + post); + mLineBreaker.SetHyphenateLimitChars(total, pre, post); } static bool HasTerminalNewline(const nsTextFrame* aFrame) {