diff --git a/intl/components/src/UnicodeProperties.h b/intl/components/src/UnicodeProperties.h index 6737d33ad124..3fde1c09fd1b 100644 --- a/intl/components/src/UnicodeProperties.h +++ b/intl/components/src/UnicodeProperties.h @@ -171,6 +171,23 @@ class UnicodeProperties final { return u_hasBinaryProperty(aCh, prop); } + /** + * Check if the width of aCh is full width, half width or wide. + */ + static inline bool IsEastAsianWidthFHW(uint32_t aCh) { + switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) { + case U_EA_FULLWIDTH: + case U_EA_HALFWIDTH: + case U_EA_WIDE: + return true; + case U_EA_AMBIGUOUS: + case U_EA_NARROW: + case U_EA_NEUTRAL: + return false; + } + return false; + } + /** * Check if the width of aCh is full width, half width or wide * excluding emoji. @@ -224,6 +241,24 @@ class UnicodeProperties final { return false; } + /** + * Check if the CharType of aCh is a punctuation type. + */ + static inline bool IsPunctuation(uint32_t aCh) { + switch (CharType(aCh)) { + case GeneralCategory::Dash_Punctuation: + case GeneralCategory::Open_Punctuation: + case GeneralCategory::Close_Punctuation: + case GeneralCategory::Connector_Punctuation: + case GeneralCategory::Other_Punctuation: + case GeneralCategory::Initial_Punctuation: + case GeneralCategory::Final_Punctuation: + return true; + default: + return false; + } + } + /** * Check if the CharType of aCh is math or other symbol. */ diff --git a/intl/unicharutil/util/nsUnicharUtils.cpp b/intl/unicharutil/util/nsUnicharUtils.cpp index 31ac06a52df6..913a27fa4344 100644 --- a/intl/unicharutil/util/nsUnicharUtils.cpp +++ b/intl/unicharutil/util/nsUnicharUtils.cpp @@ -520,6 +520,11 @@ bool IsSegmentBreakSkipChar(uint32_t u) { intl::UnicodeProperties::GetScriptCode(u) != intl::Script::HANGUL; } +bool IsEastAsianPunctuation(uint32_t u) { + return intl::UnicodeProperties::IsEastAsianWidthFHW(u) && + intl::UnicodeProperties::IsPunctuation(u); +} + bool IsPunctuationForWordSelect(char16_t aCh) { const uint8_t cat = unicode::GetGeneralCategory(aCh); switch (cat) { diff --git a/intl/unicharutil/util/nsUnicharUtils.h b/intl/unicharutil/util/nsUnicharUtils.h index 693e192e411f..9717be9938fa 100644 --- a/intl/unicharutil/util/nsUnicharUtils.h +++ b/intl/unicharutil/util/nsUnicharUtils.h @@ -153,7 +153,12 @@ namespace mozilla { */ uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr); +/** + * Tests used in CSS Segment Break Transformation to determine whether a + * newline is discardable. + */ bool IsSegmentBreakSkipChar(uint32_t u); +bool IsEastAsianPunctuation(uint32_t u); /** * Return true for all Punctuation categories (Unicode general category P?), diff --git a/layout/generic/nsTextFrame.cpp b/layout/generic/nsTextFrame.cpp index d477df7443b1..77b65b6d13b8 100644 --- a/layout/generic/nsTextFrame.cpp +++ b/layout/generic/nsTextFrame.cpp @@ -1322,6 +1322,7 @@ BuildTextRunsScanner::FindBoundaryResult BuildTextRunsScanner::FindBoundaries( uint32_t start = textFrame->GetContentOffset(); uint32_t length = textFrame->GetContentLength(); const void* text; + const nsAtom* language = textFrame->StyleFont()->mLanguage; if (frag->Is2b()) { // It is possible that we may end up removing all whitespace in // a piece of text because of The White Space Processing Rules, @@ -1336,7 +1337,7 @@ BuildTextRunsScanner::FindBoundaryResult BuildTextRunsScanner::FindBoundaries( char16_t* bufStart = aState->mBuffer.Elements(); char16_t* bufEnd = nsTextFrameUtils::TransformText( frag->Get2b() + start, length, bufStart, compression, &incomingFlags, - &skipChars, &analysisFlags); + &skipChars, &analysisFlags, language); text = bufStart; length = bufEnd - bufStart; } else { @@ -2318,6 +2319,7 @@ already_AddRefed BuildTextRunsScanner::BuildTextRunForFrames( int32_t contentStart = mappedFlow->mStartFrame->GetContentOffset(); int32_t contentEnd = mappedFlow->GetContentEnd(); int32_t contentLength = contentEnd - contentStart; + const nsAtom* language = f->StyleFont()->mLanguage; TextRunMappedFlow* newFlow = &userMappedFlows[i]; newFlow->mStartFrame = mappedFlow->mStartFrame; @@ -2339,7 +2341,7 @@ already_AddRefed BuildTextRunsScanner::BuildTextRunForFrames( char16_t* bufStart = static_cast(aTextBuffer); char16_t* bufEnd = nsTextFrameUtils::TransformText( frag->Get2b() + contentStart, contentLength, bufStart, compression, - &mNextRunContextInfo, &skipChars, &analysisFlags); + &mNextRunContextInfo, &skipChars, &analysisFlags, language); aTextBuffer = bufEnd; currentTransformedTextOffset = bufEnd - static_cast(textPtr); @@ -2356,7 +2358,7 @@ already_AddRefed BuildTextRunsScanner::BuildTextRunForFrames( uint8_t* end = nsTextFrameUtils::TransformText( reinterpret_cast(frag->Get1b()) + contentStart, contentLength, bufStart, compression, &mNextRunContextInfo, - &skipChars, &analysisFlags); + &skipChars, &analysisFlags, language); aTextBuffer = ExpandBuffer(static_cast(aTextBuffer), tempBuf.Elements(), end - tempBuf.Elements()); @@ -2367,7 +2369,7 @@ already_AddRefed BuildTextRunsScanner::BuildTextRunForFrames( uint8_t* end = nsTextFrameUtils::TransformText( reinterpret_cast(frag->Get1b()) + contentStart, contentLength, bufStart, compression, &mNextRunContextInfo, - &skipChars, &analysisFlags); + &skipChars, &analysisFlags, language); aTextBuffer = end; currentTransformedTextOffset = end - static_cast(textPtr); @@ -2618,6 +2620,7 @@ bool BuildTextRunsScanner::SetupLineBreakerContext(gfxTextRun* aTextRun) { } gfxSkipChars skipChars; + const nsAtom* language = mMappedFlows[0].mStartFrame->StyleFont()->mLanguage; for (uint32_t i = 0; i < mMappedFlows.Length(); ++i) { MappedFlow* mappedFlow = &mMappedFlows[i]; @@ -2639,7 +2642,7 @@ bool BuildTextRunsScanner::SetupLineBreakerContext(gfxTextRun* aTextRun) { char16_t* bufStart = static_cast(textPtr); char16_t* bufEnd = nsTextFrameUtils::TransformText( frag->Get2b() + contentStart, contentLength, bufStart, compression, - &mNextRunContextInfo, &skipChars, &analysisFlags); + &mNextRunContextInfo, &skipChars, &analysisFlags, language); textPtr = bufEnd; } else { if (mDoubleByteText) { @@ -2653,7 +2656,7 @@ bool BuildTextRunsScanner::SetupLineBreakerContext(gfxTextRun* aTextRun) { uint8_t* end = nsTextFrameUtils::TransformText( reinterpret_cast(frag->Get1b()) + contentStart, contentLength, bufStart, compression, &mNextRunContextInfo, - &skipChars, &analysisFlags); + &skipChars, &analysisFlags, language); textPtr = ExpandBuffer(static_cast(textPtr), tempBuf.Elements(), end - tempBuf.Elements()); } else { @@ -2661,7 +2664,7 @@ bool BuildTextRunsScanner::SetupLineBreakerContext(gfxTextRun* aTextRun) { uint8_t* end = nsTextFrameUtils::TransformText( reinterpret_cast(frag->Get1b()) + contentStart, contentLength, bufStart, compression, &mNextRunContextInfo, - &skipChars, &analysisFlags); + &skipChars, &analysisFlags, language); textPtr = end; } } @@ -5371,7 +5374,7 @@ void nsTextFrame::UnionAdditionalOverflow(nsPresContext* aPresContext, nscoord topOrLeft(nscoord_MAX), bottomOrRight(nscoord_MIN); typedef gfxFont::Metrics Metrics; auto accumulateDecorationRect = - [&](const LineDecoration& dec, gfxFloat Metrics::*lineSize, + [&](const LineDecoration& dec, gfxFloat Metrics::* lineSize, mozilla::StyleTextDecorationLine lineType) { params.style = dec.mStyle; // If the style is solid, let's include decoration line rect of @@ -7119,7 +7122,7 @@ void nsTextFrame::DrawTextRunAndDecorations( typedef gfxFont::Metrics Metrics; auto paintDecorationLine = [&](const LineDecoration& dec, - gfxFloat Metrics::*lineSize, + gfxFloat Metrics::* lineSize, StyleTextDecorationLine lineType) { if (dec.mStyle == StyleTextDecorationStyle::None) { return; diff --git a/layout/generic/nsTextFrameUtils.cpp b/layout/generic/nsTextFrameUtils.cpp index c6af6e7b04aa..05c9c40f6320 100644 --- a/layout/generic/nsTextFrameUtils.cpp +++ b/layout/generic/nsTextFrameUtils.cpp @@ -86,7 +86,8 @@ static CharT* TransformWhiteSpaces( const CharT* aText, uint32_t aLength, uint32_t aBegin, uint32_t aEnd, bool aHasSegmentBreak, bool& aInWhitespace, CharT* aOutput, nsTextFrameUtils::Flags& aFlags, - nsTextFrameUtils::CompressionMode aCompression, gfxSkipChars* aSkipChars) { + nsTextFrameUtils::CompressionMode aCompression, gfxSkipChars* aSkipChars, + bool aLangIsJapaneseOrChinese) { MOZ_ASSERT(aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE || aCompression == nsTextFrameUtils::COMPRESS_WHITESPACE_NEWLINE, "whitespaces should be skippable!!"); @@ -126,10 +127,15 @@ static CharT* TransformWhiteSpaces( } } while (IsDefaultIgnorable(ucs4after) && pos < aLength); - // Discard newlines between characters that have F, W, or H - // EastAsianWidth property and neither side is Hangul. + // Discard newlines between characters that have F, W, or H EastAsianWidth + // property and neither side is Hangul. + // For Japanese/Chinese, also discard if *either* character is a fullwidth/ + // wide punctuation character. isSegmentBreakSkippable = - IsSegmentBreakSkipChar(ucs4before) && IsSegmentBreakSkipChar(ucs4after); + (IsSegmentBreakSkipChar(ucs4before) && + IsSegmentBreakSkipChar(ucs4after)) || + (aLangIsJapaneseOrChinese && (IsEastAsianPunctuation(ucs4before) || + IsEastAsianPunctuation(ucs4after))); } for (uint32_t i = aBegin; i < aEnd; ++i) { @@ -201,12 +207,10 @@ static CharT* TransformWhiteSpaces( } template -CharT* nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength, - CharT* aOutput, - CompressionMode aCompression, - uint8_t* aIncomingFlags, - gfxSkipChars* aSkipChars, - Flags* aAnalysisFlags) { +CharT* nsTextFrameUtils::TransformText( + const CharT* aText, uint32_t aLength, CharT* aOutput, + CompressionMode aCompression, uint8_t* aIncomingFlags, + gfxSkipChars* aSkipChars, Flags* aAnalysisFlags, const nsAtom* aLanguage) { Flags flags = Flags(); #ifdef DEBUG int32_t skipCharsOffset = aSkipChars->GetOriginalCharCount(); @@ -247,6 +251,19 @@ CharT* nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength, } *aIncomingFlags &= ~INCOMING_WHITESPACE; } else { + bool langIsJapaneseOrChinese = [=]() { + if (!aLanguage || aLanguage->GetLength() < 2) { + return false; + } + const char16_t* text = aLanguage->GetUTF16String(); + if ((ToLowerCaseASCII(text[0]) == char16_t('j') && + ToLowerCaseASCII(text[1]) == char16_t('a')) || + (ToLowerCaseASCII(text[0]) == char16_t('z') && + ToLowerCaseASCII(text[1]) == char16_t('h'))) { + return aLanguage->GetLength() == 2 || text[2] == '-'; + } + return false; + }(); bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0; uint32_t i; for (i = 0; i < aLength; ++i) { @@ -284,9 +301,9 @@ CharT* nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength, j--; } if (j > i) { - aOutput = TransformWhiteSpaces(aText, aLength, i, j, hasSegmentBreak, - inWhitespace, aOutput, flags, - aCompression, aSkipChars); + aOutput = TransformWhiteSpaces( + aText, aLength, i, j, hasSegmentBreak, inWhitespace, aOutput, + flags, aCompression, aSkipChars, langIsJapaneseOrChinese); } // We need to keep KeepChar()/SkipChar() in order, so process the // last white space first, then process the trailing discardables. @@ -347,11 +364,11 @@ CharT* nsTextFrameUtils::TransformText(const CharT* aText, uint32_t aLength, template uint8_t* nsTextFrameUtils::TransformText( const uint8_t* aText, uint32_t aLength, uint8_t* aOutput, CompressionMode aCompression, uint8_t* aIncomingFlags, - gfxSkipChars* aSkipChars, Flags* aAnalysisFlags); + gfxSkipChars* aSkipChars, Flags* aAnalysisFlags, const nsAtom* aLanguage); template char16_t* nsTextFrameUtils::TransformText( const char16_t* aText, uint32_t aLength, char16_t* aOutput, CompressionMode aCompression, uint8_t* aIncomingFlags, - gfxSkipChars* aSkipChars, Flags* aAnalysisFlags); + gfxSkipChars* aSkipChars, Flags* aAnalysisFlags, const nsAtom* aLanguage); template bool nsTextFrameUtils::IsSkippableCharacterForTransformText( uint8_t aChar); template bool nsTextFrameUtils::IsSkippableCharacterForTransformText( diff --git a/layout/generic/nsTextFrameUtils.h b/layout/generic/nsTextFrameUtils.h index 5b9edf271e9c..6d77e7573c17 100644 --- a/layout/generic/nsTextFrameUtils.h +++ b/layout/generic/nsTextFrameUtils.h @@ -10,6 +10,7 @@ #include "gfxSkipChars.h" #include "nsBidiUtils.h" +class nsAtom; class nsIContent; struct nsStyleText; @@ -121,12 +122,15 @@ class nsTextFrameUtils { * @param aIncomingFlags a flag indicating whether there was whitespace * or an Arabic character preceding this text. We set it to indicate if * there's an Arabic character or whitespace preceding the end of this text. + * @param aLanguage Content language (used to select Japanese/Chinese behavior + * at punctuation, see https://bugzilla.mozilla.org/show_bug.cgi?id=1935148). */ template static CharT* TransformText(const CharT* aText, uint32_t aLength, CharT* aOutput, CompressionMode aCompression, uint8_t* aIncomingFlags, gfxSkipChars* aSkipChars, - nsTextFrameUtils::Flags* aAnalysisFlags); + nsTextFrameUtils::Flags* aAnalysisFlags, + const nsAtom* aLanguage); /** * Returns whether aChar is a character that nsTextFrameUtils::TransformText diff --git a/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001-ref.html b/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001-ref.html new file mode 100644 index 000000000000..d339aebda9d2 --- /dev/null +++ b/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001-ref.html @@ -0,0 +1,7 @@ + + +Segment break transformation: CJK punctuation + + +

本システムはサポート切れのブラウザに対応しません。Internet Explorerをお使いの場合、Edge・Chrome・Firefoxなどに移行してください。(EdgeはChromium阪をお使いください)

+

ユーザメイ「ジョン・スミス」、ID「smith」ノアカウントヲショウキョシマス。y/N

diff --git a/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001.html b/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001.html new file mode 100644 index 000000000000..68e4589625db --- /dev/null +++ b/testing/web-platform/tests/css/css-text/line-breaking/segment-break-transformation-punctuation-001.html @@ -0,0 +1,29 @@ + + +Segment break transformation: CJK punctuation + + + + + +

+ 本システムはサポート切れのブラウザに対応しません。 + Internet Explorerをお使いの場合、 + Edge + ・ + Chrome + ・ + Firefoxなどに移行してください。 + (EdgeはChromium阪をお使いください) +

+

+ ユーザメイ + 「ジョン + ・ + スミス」 + 、 + ID + 「smith」 + ノアカウントヲショウキョシマス。 + y/N +