Bug 1521723 - Apply hyphenate-limit-chars settings to the potential breaks found by the hyphenator. r=layout-reviewers,emilio
Differential Revision: https://phabricator.services.mozilla.com/D234960
This commit is contained in:
@@ -64,15 +64,7 @@ static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
nsLineBreaker::nsLineBreaker()
|
nsLineBreaker::nsLineBreaker()
|
||||||
: mCurrentWordLanguage(nullptr),
|
: mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
|
||||||
mCurrentWordContainsMixedLang(false),
|
|
||||||
mScriptIsChineseOrJapanese(false),
|
|
||||||
mAfterBreakableSpace(false),
|
|
||||||
mBreakHere(false),
|
|
||||||
mWordBreak(WordBreakRule::Normal),
|
|
||||||
mLineBreak(LineBreakRule::Auto),
|
|
||||||
mWordContinuation(false),
|
|
||||||
mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
|
|
||||||
|
|
||||||
nsLineBreaker::~nsLineBreaker() {
|
nsLineBreaker::~nsLineBreaker() {
|
||||||
NS_ASSERTION(mCurrentWord.Length() == 0,
|
NS_ASSERTION(mCurrentWord.Length() == 0,
|
||||||
@@ -422,14 +414,88 @@ void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
|
|||||||
const char16_t* aTextStart,
|
const char16_t* aTextStart,
|
||||||
const char16_t* aTextLimit,
|
const char16_t* aTextLimit,
|
||||||
uint8_t* aBreakState) {
|
uint8_t* aBreakState) {
|
||||||
|
// Early-return for words that are definitely too short to hyphenate.
|
||||||
|
if (aTextLimit - aTextStart < mHyphenateLimitWord) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
nsDependentSubstring string(aTextStart, aTextLimit);
|
nsDependentSubstring string(aTextStart, aTextLimit);
|
||||||
AutoTArray<bool, 200> hyphens;
|
AutoTArray<bool, 200> hyphens;
|
||||||
if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
|
if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
|
||||||
for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
|
return;
|
||||||
if (hyphens[i]) {
|
|
||||||
aBreakState[i + 1] =
|
|
||||||
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of the length seen so far, in terms of characters that are
|
||||||
|
// countable for hyphenate-limit-chars purposes.
|
||||||
|
uint32_t length = 0;
|
||||||
|
AutoTArray<std::pair<uint32_t, uint32_t>, 16> positionAndLength;
|
||||||
|
for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
|
||||||
|
// Get current character, converting surrogate pairs to UCS4 for char
|
||||||
|
// category lookup.
|
||||||
|
uint32_t ch = string[i];
|
||||||
|
if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
|
||||||
|
NS_IS_LOW_SURROGATE(string[i + 1])) {
|
||||||
|
ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// According to CSS Text, "Nonspacing combining marks (Unicode General
|
||||||
|
// Category Mn) and intra-word punctuation (Unicode General Category P*)
|
||||||
|
// do not count towards the minimum."
|
||||||
|
// (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
|
||||||
|
// We also don't count Control or Format categories.
|
||||||
|
using intl::GeneralCategory;
|
||||||
|
switch (UnicodeProperties::CharType(ch)) {
|
||||||
|
case GeneralCategory::Nonspacing_Mark:
|
||||||
|
case GeneralCategory::Dash_Punctuation:
|
||||||
|
case GeneralCategory::Open_Punctuation:
|
||||||
|
case GeneralCategory::Close_Punctuation:
|
||||||
|
case GeneralCategory::Connector_Punctuation:
|
||||||
|
case GeneralCategory::Other_Punctuation:
|
||||||
|
case GeneralCategory::Initial_Punctuation:
|
||||||
|
case GeneralCategory::Final_Punctuation:
|
||||||
|
case GeneralCategory::Control:
|
||||||
|
case GeneralCategory::Format:
|
||||||
|
case GeneralCategory::Surrogate:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
++length;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't accept any breaks until we're far enough into the word.
|
||||||
|
if (length >= mHyphenateLimitStart && hyphens[i]) {
|
||||||
|
MOZ_ASSERT(aBreakState[i + 1] ==
|
||||||
|
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
|
||||||
|
aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
|
||||||
|
// Keep track of hyphen position and "countable" length of the word.
|
||||||
|
positionAndLength.AppendElement(
|
||||||
|
std::pair<uint32_t, uint32_t>(i + 1, length));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the character was outside the BMP, skip past the low surrogate.
|
||||||
|
if (!IS_IN_BMP(ch)) {
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++length; // Account for the last character (not counted by the loop above).
|
||||||
|
|
||||||
|
if (length < mHyphenateLimitWord) {
|
||||||
|
// After discounting combining marks, punctuation, controls, etc., the word
|
||||||
|
// was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
|
||||||
|
// forget them.
|
||||||
|
while (!positionAndLength.IsEmpty()) {
|
||||||
|
auto [lastPos, lastLen] = positionAndLength.PopLastElement();
|
||||||
|
aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Check if trailing fragment is too short; if so, remove the last hyphen
|
||||||
|
// break(s) that we set, until the fragment will be long enough.
|
||||||
|
while (!positionAndLength.IsEmpty()) {
|
||||||
|
auto [lastPos, lastLen] = positionAndLength.PopLastElement();
|
||||||
|
if (length - lastLen >= mHyphenateLimitEnd) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
aBreakState[lastPos] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -217,6 +217,16 @@ class nsLineBreaker {
|
|||||||
mWordContinuation = aContinuation;
|
mWordContinuation = aContinuation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the hyphenate-limit-chars values. Values are clamped to be <= 255.
|
||||||
|
*/
|
||||||
|
void SetHyphenateLimitChars(uint32_t aWordLength, uint32_t aStartLength,
|
||||||
|
uint32_t aEndLength) {
|
||||||
|
mHyphenateLimitWord = std::min(255u, aWordLength);
|
||||||
|
mHyphenateLimitStart = std::min(255u, aStartLength);
|
||||||
|
mHyphenateLimitEnd = std::min(255u, aEndLength);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// This is a list of text sources that make up the "current word" (i.e.,
|
// This is a list of text sources that make up the "current word" (i.e.,
|
||||||
// run of text which does not contain any whitespace). All the mLengths
|
// run of text which does not contain any whitespace). All the mLengths
|
||||||
@@ -262,25 +272,38 @@ class nsLineBreaker {
|
|||||||
AutoTArray<char16_t, 100> mCurrentWord;
|
AutoTArray<char16_t, 100> mCurrentWord;
|
||||||
// All the items that contribute to mCurrentWord
|
// All the items that contribute to mCurrentWord
|
||||||
AutoTArray<TextItem, 2> mTextItems;
|
AutoTArray<TextItem, 2> mTextItems;
|
||||||
nsAtom* mCurrentWordLanguage;
|
nsAtom* mCurrentWordLanguage = nullptr;
|
||||||
bool mCurrentWordContainsMixedLang;
|
|
||||||
|
// Constraints from CSS `hyphenate-limit-chars` property, to block the use of
|
||||||
|
// auto-hyphenation if the word is too short, or at positions too near the
|
||||||
|
// beginning/end of the word.
|
||||||
|
// (Note that per CSS Text spec, these counts ignore combining marks, etc.,
|
||||||
|
// so they are not purely codepoint or character counts.)
|
||||||
|
// (Zero values would have no effect; but text-frame code will update the
|
||||||
|
// values from CSS before calling the line-breaker.)
|
||||||
|
uint8_t mHyphenateLimitWord = 0; // Min word length to auto-hyphenate
|
||||||
|
uint8_t mHyphenateLimitStart = 0; // Min number of chars before the break
|
||||||
|
uint8_t mHyphenateLimitEnd = 0; // Min number of chars after the break
|
||||||
|
|
||||||
|
bool mCurrentWordContainsMixedLang = false;
|
||||||
bool mCurrentWordMightBeBreakable = false;
|
bool mCurrentWordMightBeBreakable = false;
|
||||||
bool mScriptIsChineseOrJapanese;
|
bool mScriptIsChineseOrJapanese = false;
|
||||||
|
|
||||||
// True if the previous character was breakable whitespace
|
// True if the previous character was breakable whitespace
|
||||||
bool mAfterBreakableSpace;
|
bool mAfterBreakableSpace = false;
|
||||||
// True if a break must be allowed at the current position because
|
// True if a break must be allowed at the current position because
|
||||||
// a run of breakable whitespace ends here
|
// a run of breakable whitespace ends here
|
||||||
bool mBreakHere;
|
bool mBreakHere = false;
|
||||||
// Break rules for letters from the "word-break" property.
|
// Break rules for letters from the "word-break" property.
|
||||||
mozilla::intl::WordBreakRule mWordBreak;
|
mozilla::intl::WordBreakRule mWordBreak =
|
||||||
|
mozilla::intl::WordBreakRule::Normal;
|
||||||
// Line breaking strictness from the "line-break" property.
|
// Line breaking strictness from the "line-break" property.
|
||||||
mozilla::intl::LineBreakRule mLineBreak;
|
mozilla::intl::LineBreakRule mLineBreak = mozilla::intl::LineBreakRule::Auto;
|
||||||
// Should the text be treated as continuing a word-in-progress (for purposes
|
// Should the text be treated as continuing a word-in-progress (for purposes
|
||||||
// of initial capitalization)? Normally this is set to false whenever we
|
// of initial capitalization)? Normally this is set to false whenever we
|
||||||
// start using a linebreaker, but it may be set to true if the line-breaker
|
// start using a linebreaker, but it may be set to true if the line-breaker
|
||||||
// has been explicitly flushed mid-word.
|
// has been explicitly flushed mid-word.
|
||||||
bool mWordContinuation;
|
bool mWordContinuation = false;
|
||||||
// True if using old line segmenter.
|
// True if using old line segmenter.
|
||||||
const bool mLegacyBehavior;
|
const bool mLegacyBehavior;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1733,6 +1733,30 @@ void BuildTextRunsScanner::AccumulateRunInfo(nsTextFrame* aFrame) {
|
|||||||
mLineBreakBeforeFrames.AppendElement(aFrame);
|
mLineBreakBeforeFrames.AppendElement(aFrame);
|
||||||
mStartOfLine = false;
|
mStartOfLine = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Default limits used by `hyphenate-limit-chars` for `auto` components, as
|
||||||
|
// suggested by the CSS Text spec.
|
||||||
|
// TODO: consider making these sensitive to the context, e.g. increasing the
|
||||||
|
// values for long line lengths to reduce the tendency to hyphenate too much.
|
||||||
|
const uint32_t kDefaultHyphenateTotalWordLength = 5;
|
||||||
|
const uint32_t kDefaultHyphenatePreBreakLength = 2;
|
||||||
|
const uint32_t kDefaultHyphenatePostBreakLength = 2;
|
||||||
|
|
||||||
|
const auto& hyphenateLimitChars = aFrame->StyleText()->mHyphenateLimitChars;
|
||||||
|
uint32_t pre =
|
||||||
|
hyphenateLimitChars.pre_hyphen_length.IsAuto()
|
||||||
|
? kDefaultHyphenatePreBreakLength
|
||||||
|
: std::max(0, hyphenateLimitChars.pre_hyphen_length.AsNumber());
|
||||||
|
uint32_t post =
|
||||||
|
hyphenateLimitChars.post_hyphen_length.IsAuto()
|
||||||
|
? kDefaultHyphenatePostBreakLength
|
||||||
|
: std::max(0, hyphenateLimitChars.post_hyphen_length.AsNumber());
|
||||||
|
uint32_t total =
|
||||||
|
hyphenateLimitChars.total_word_length.IsAuto()
|
||||||
|
? kDefaultHyphenateTotalWordLength
|
||||||
|
: std::max(0, hyphenateLimitChars.total_word_length.AsNumber());
|
||||||
|
total = std::max(total, pre + post);
|
||||||
|
mLineBreaker.SetHyphenateLimitChars(total, pre, post);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool HasTerminalNewline(const nsTextFrame* aFrame) {
|
static bool HasTerminalNewline(const nsTextFrame* aFrame) {
|
||||||
|
|||||||
Reference in New Issue
Block a user