From 178aa5a5c827de59d42c8316d7a8f1c295eb829f Mon Sep 17 00:00:00 2001 From: Makoto Kato Date: Mon, 7 Aug 2023 06:23:59 +0000 Subject: [PATCH] Bug 1719535 - Part 9. Trim ASCII space at the tail for content serializer. r=TYLin XML/Plain text Serializer uses line break segmenter to wrap text. New segmenter that is compatible with UAX#14 has different rules for old segmenter. Old segmenter have break opportunity before ASCII space, but UAX#14 doesn't have it (https://www.unicode.org/reports/tr14/#LB7). So we have to trim ASCII space at the tail for text wrap. Depends on D167677 Differential Revision: https://phabricator.services.mozilla.com/D180773 --- dom/serializers/nsPlainTextSerializer.cpp | 14 +++++++++++++- dom/serializers/nsXMLContentSerializer.cpp | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/dom/serializers/nsPlainTextSerializer.cpp b/dom/serializers/nsPlainTextSerializer.cpp index f64dd902bd5f..3cc7489062cb 100644 --- a/dom/serializers/nsPlainTextSerializer.cpp +++ b/dom/serializers/nsPlainTextSerializer.cpp @@ -131,7 +131,19 @@ int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent( // mContent until we find a width less than or equal to wrap column. uint32_t width = 0; intl::LineBreakIteratorUtf16 lineBreakIter(mContent); - while (const Maybe nextGoodSpace = lineBreakIter.Next()) { + while (Maybe nextGoodSpace = lineBreakIter.Next()) { + // Trim space at the tail. UAX#14 doesn't have break opportunity for + // ASCII space at the tail. + const Maybe originalNextGoodSpace = nextGoodSpace; + while (*nextGoodSpace > 0 && + mContent.CharAt(*nextGoodSpace - 1) == 0x20) { + nextGoodSpace = Some(*nextGoodSpace - 1); + } + if (*nextGoodSpace == 0) { + // Restore the original nextGoodSpace. + nextGoodSpace = originalNextGoodSpace; + } + width += GetUnicharStringWidth(Span( mContent.get() + goodSpace, *nextGoodSpace - goodSpace)); if (prefixwidth + width > aWrapColumn) { diff --git a/dom/serializers/nsXMLContentSerializer.cpp b/dom/serializers/nsXMLContentSerializer.cpp index 08cf7fda56a3..ab0fcdf4135b 100644 --- a/dom/serializers/nsXMLContentSerializer.cpp +++ b/dom/serializers/nsXMLContentSerializer.cpp @@ -1560,6 +1560,19 @@ bool nsXMLContentSerializer::AppendWrapped_NonWhitespaceSequence( MOZ_ASSERT(nextWrapPosition.isSome(), "We should've exited the loop when reaching the end of " "text in the previous iteration!"); + + // Trim space at the tail. UAX#14 doesn't have break opportunity + // for ASCII space at the tail. + const Maybe originalNextWrapPosition = nextWrapPosition; + while (*nextWrapPosition > 0 && + subSeq.at(*nextWrapPosition - 1) == 0x20) { + nextWrapPosition = Some(*nextWrapPosition - 1); + } + if (*nextWrapPosition == 0) { + // Restore the original nextWrapPosition. + nextWrapPosition = originalNextWrapPosition; + } + if (aSequenceStart + *nextWrapPosition > aPos) { break; }