Bug 1521723 - Apply hyphenate-limit-chars settings to the potential breaks found by the hyphenator. r=layout-reviewers,emilio

Differential Revision: https://phabricator.services.mozilla.com/D234960
This commit is contained in:
Jonathan Kew
2025-01-29 17:01:54 +00:00
parent 143c12f00d
commit 922837f10b
3 changed files with 135 additions and 22 deletions

View File

@@ -64,15 +64,7 @@ static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
} }
nsLineBreaker::nsLineBreaker() nsLineBreaker::nsLineBreaker()
: mCurrentWordLanguage(nullptr), : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
mCurrentWordContainsMixedLang(false),
mScriptIsChineseOrJapanese(false),
mAfterBreakableSpace(false),
mBreakHere(false),
mWordBreak(WordBreakRule::Normal),
mLineBreak(LineBreakRule::Auto),
mWordContinuation(false),
mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
nsLineBreaker::~nsLineBreaker() { nsLineBreaker::~nsLineBreaker() {
NS_ASSERTION(mCurrentWord.Length() == 0, NS_ASSERTION(mCurrentWord.Length() == 0,
@@ -422,14 +414,88 @@ void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
const char16_t* aTextStart, const char16_t* aTextStart,
const char16_t* aTextLimit, const char16_t* aTextLimit,
uint8_t* aBreakState) { uint8_t* aBreakState) {
// Early-return for words that are definitely too short to hyphenate.
if (aTextLimit - aTextStart < mHyphenateLimitWord) {
return;
}
nsDependentSubstring string(aTextStart, aTextLimit); nsDependentSubstring string(aTextStart, aTextLimit);
AutoTArray<bool, 200> hyphens; AutoTArray<bool, 200> hyphens;
if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
for (uint32_t i = 0; i + 1 < string.Length(); ++i) { return;
if (hyphens[i]) { }
aBreakState[i + 1] =
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; // Keep track of the length seen so far, in terms of characters that are
// countable for hyphenate-limit-chars purposes.
uint32_t length = 0;
AutoTArray<std::pair<uint32_t, uint32_t>, 16> positionAndLength;
for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
// Get current character, converting surrogate pairs to UCS4 for char
// category lookup.
uint32_t ch = string[i];
if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
NS_IS_LOW_SURROGATE(string[i + 1])) {
ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
}
// According to CSS Text, "Nonspacing combining marks (Unicode General
// Category Mn) and intra-word punctuation (Unicode General Category P*)
// do not count towards the minimum."
// (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
// We also don't count Control or Format categories.
using intl::GeneralCategory;
switch (UnicodeProperties::CharType(ch)) {
case GeneralCategory::Nonspacing_Mark:
case GeneralCategory::Dash_Punctuation:
case GeneralCategory::Open_Punctuation:
case GeneralCategory::Close_Punctuation:
case GeneralCategory::Connector_Punctuation:
case GeneralCategory::Other_Punctuation:
case GeneralCategory::Initial_Punctuation:
case GeneralCategory::Final_Punctuation:
case GeneralCategory::Control:
case GeneralCategory::Format:
case GeneralCategory::Surrogate:
break;
default:
++length;
break;
}
// Don't accept any breaks until we're far enough into the word.
if (length >= mHyphenateLimitStart && hyphens[i]) {
MOZ_ASSERT(aBreakState[i + 1] ==
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
// Keep track of hyphen position and "countable" length of the word.
positionAndLength.AppendElement(
std::pair<uint32_t, uint32_t>(i + 1, length));
}
// If the character was outside the BMP, skip past the low surrogate.
if (!IS_IN_BMP(ch)) {
++i;
}
}
++length; // Account for the last character (not counted by the loop above).
if (length < mHyphenateLimitWord) {
// After discounting combining marks, punctuation, controls, etc., the word
// was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
// forget them.
while (!positionAndLength.IsEmpty()) {
auto [lastPos, lastLen] = positionAndLength.PopLastElement();
aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
}
} else {
// Check if trailing fragment is too short; if so, remove the last hyphen
// break(s) that we set, until the fragment will be long enough.
while (!positionAndLength.IsEmpty()) {
auto [lastPos, lastLen] = positionAndLength.PopLastElement();
if (length - lastLen >= mHyphenateLimitEnd) {
break;
} }
aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
} }
} }
} }

View File

@@ -217,6 +217,16 @@ class nsLineBreaker {
mWordContinuation = aContinuation; mWordContinuation = aContinuation;
} }
/**
* Set the hyphenate-limit-chars values. Values are clamped to be <= 255.
*/
void SetHyphenateLimitChars(uint32_t aWordLength, uint32_t aStartLength,
uint32_t aEndLength) {
mHyphenateLimitWord = std::min(255u, aWordLength);
mHyphenateLimitStart = std::min(255u, aStartLength);
mHyphenateLimitEnd = std::min(255u, aEndLength);
}
private: private:
// This is a list of text sources that make up the "current word" (i.e., // This is a list of text sources that make up the "current word" (i.e.,
// run of text which does not contain any whitespace). All the mLengths // run of text which does not contain any whitespace). All the mLengths
@@ -262,25 +272,38 @@ class nsLineBreaker {
AutoTArray<char16_t, 100> mCurrentWord; AutoTArray<char16_t, 100> mCurrentWord;
// All the items that contribute to mCurrentWord // All the items that contribute to mCurrentWord
AutoTArray<TextItem, 2> mTextItems; AutoTArray<TextItem, 2> mTextItems;
nsAtom* mCurrentWordLanguage; nsAtom* mCurrentWordLanguage = nullptr;
bool mCurrentWordContainsMixedLang;
// Constraints from CSS `hyphenate-limit-chars` property, to block the use of
// auto-hyphenation if the word is too short, or at positions too near the
// beginning/end of the word.
// (Note that per CSS Text spec, these counts ignore combining marks, etc.,
// so they are not purely codepoint or character counts.)
// (Zero values would have no effect; but text-frame code will update the
// values from CSS before calling the line-breaker.)
uint8_t mHyphenateLimitWord = 0; // Min word length to auto-hyphenate
uint8_t mHyphenateLimitStart = 0; // Min number of chars before the break
uint8_t mHyphenateLimitEnd = 0; // Min number of chars after the break
bool mCurrentWordContainsMixedLang = false;
bool mCurrentWordMightBeBreakable = false; bool mCurrentWordMightBeBreakable = false;
bool mScriptIsChineseOrJapanese; bool mScriptIsChineseOrJapanese = false;
// True if the previous character was breakable whitespace // True if the previous character was breakable whitespace
bool mAfterBreakableSpace; bool mAfterBreakableSpace = false;
// True if a break must be allowed at the current position because // True if a break must be allowed at the current position because
// a run of breakable whitespace ends here // a run of breakable whitespace ends here
bool mBreakHere; bool mBreakHere = false;
// Break rules for letters from the "word-break" property. // Break rules for letters from the "word-break" property.
mozilla::intl::WordBreakRule mWordBreak; mozilla::intl::WordBreakRule mWordBreak =
mozilla::intl::WordBreakRule::Normal;
// Line breaking strictness from the "line-break" property. // Line breaking strictness from the "line-break" property.
mozilla::intl::LineBreakRule mLineBreak; mozilla::intl::LineBreakRule mLineBreak = mozilla::intl::LineBreakRule::Auto;
// Should the text be treated as continuing a word-in-progress (for purposes // Should the text be treated as continuing a word-in-progress (for purposes
// of initial capitalization)? Normally this is set to false whenever we // of initial capitalization)? Normally this is set to false whenever we
// start using a linebreaker, but it may be set to true if the line-breaker // start using a linebreaker, but it may be set to true if the line-breaker
// has been explicitly flushed mid-word. // has been explicitly flushed mid-word.
bool mWordContinuation; bool mWordContinuation = false;
// True if using old line segmenter. // True if using old line segmenter.
const bool mLegacyBehavior; const bool mLegacyBehavior;
}; };

View File

@@ -1733,6 +1733,30 @@ void BuildTextRunsScanner::AccumulateRunInfo(nsTextFrame* aFrame) {
mLineBreakBeforeFrames.AppendElement(aFrame); mLineBreakBeforeFrames.AppendElement(aFrame);
mStartOfLine = false; mStartOfLine = false;
} }
// Default limits used by `hyphenate-limit-chars` for `auto` components, as
// suggested by the CSS Text spec.
// TODO: consider making these sensitive to the context, e.g. increasing the
// values for long line lengths to reduce the tendency to hyphenate too much.
const uint32_t kDefaultHyphenateTotalWordLength = 5;
const uint32_t kDefaultHyphenatePreBreakLength = 2;
const uint32_t kDefaultHyphenatePostBreakLength = 2;
const auto& hyphenateLimitChars = aFrame->StyleText()->mHyphenateLimitChars;
uint32_t pre =
hyphenateLimitChars.pre_hyphen_length.IsAuto()
? kDefaultHyphenatePreBreakLength
: std::max(0, hyphenateLimitChars.pre_hyphen_length.AsNumber());
uint32_t post =
hyphenateLimitChars.post_hyphen_length.IsAuto()
? kDefaultHyphenatePostBreakLength
: std::max(0, hyphenateLimitChars.post_hyphen_length.AsNumber());
uint32_t total =
hyphenateLimitChars.total_word_length.IsAuto()
? kDefaultHyphenateTotalWordLength
: std::max(0, hyphenateLimitChars.total_word_length.AsNumber());
total = std::max(total, pre + post);
mLineBreaker.SetHyphenateLimitChars(total, pre, post);
} }
static bool HasTerminalNewline(const nsTextFrame* aFrame) { static bool HasTerminalNewline(const nsTextFrame* aFrame) {