Bug 1940947 - Exclude Korean currency symbol (WON SIGN) from being treated like Chinese/Japanese characters during CSS segment break transformation. r=dshin

Differential Revision: https://phabricator.services.mozilla.com/D235669
This commit is contained in:
Jonathan Kew
2025-01-31 12:26:07 +00:00
parent 59c8cc2e64
commit dc59173f76
4 changed files with 69 additions and 29 deletions

View File

@@ -515,9 +515,16 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr) {
return hash; return hash;
} }
// The Korean Won currency sign has East Asian Width = HALFWIDTH, and
// Script = COMMON (rather than HANGUL), but we don't want to treat it like
// Chinese/Japanese half-width characters for segment break transformation,
// so we exclude it individually in the two functions here.
static constexpr uint32_t kWonCurrencySign = 0x20A9;
bool IsSegmentBreakSkipChar(uint32_t u) { bool IsSegmentBreakSkipChar(uint32_t u) {
return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) && return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) &&
intl::UnicodeProperties::GetScriptCode(u) != intl::Script::HANGUL; intl::UnicodeProperties::GetScriptCode(u) != intl::Script::HANGUL &&
u != kWonCurrencySign;
} }
bool IsEastAsianPunctuation(uint32_t u) { bool IsEastAsianPunctuation(uint32_t u) {
@@ -527,7 +534,9 @@ bool IsEastAsianPunctuation(uint32_t u) {
// two characters consistent behavior. // two characters consistent behavior.
constexpr uint32_t kFullwidthTilde = 0xFF5E; constexpr uint32_t kFullwidthTilde = 0xFF5E;
return intl::UnicodeProperties::IsEastAsianWidthFHW(u) && return intl::UnicodeProperties::IsEastAsianWidthFHW(u) &&
(intl::UnicodeProperties::IsPunctuation(u) || u == kFullwidthTilde); ((intl::UnicodeProperties::IsPunctuation(u) &&
u != kWonCurrencySign) ||
u == kFullwidthTilde);
} }
bool IsPunctuationForWordSelect(char16_t aCh) { bool IsPunctuationForWordSelect(char16_t aCh) {

View File

@@ -0,0 +1,8 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Segment break transformation reference</title>
<p>Each currency amount should have space before and after; there should be no other spaces.</p>
<p lang="ja" style="font-family: sans-serif; word-spacing: 1em;">
価格は $1.00 です。価格は ¥150 です。価格は €1.00 です。価格は ₩1500 です。
</p>

View File

@@ -0,0 +1,23 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Segment break transformation: currency symbols</title>
<link rel="author" href="mailto:jkew@mozilla.com">
<link rel="match" href="segment-break-transformation-currency-001-ref.html">
<link rel="help" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1940947">
<meta name="flags" content="should" />
<p>Each currency amount should have space before and after; there should be no other spaces.</p>
<p lang="ja" style="font-family: sans-serif; word-spacing: 1em;">
価格は
$1.00
です。
価格は
¥150
です。
価格は
€1.00
です。
価格は
₩1500
です。
</p>

View File

@@ -2,7 +2,7 @@
<html lang="en" > <html lang="en" >
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>Won and halfwidth characters around line break</title> <title>Halfwidth characters around line break</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'> <link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='https://drafts.csswg.org/css-text-3/#line-break-transform'> <link rel='help' href='https://drafts.csswg.org/css-text-3/#line-break-transform'>
<meta name="assert" content="If the East Asian Width property of both the character before and after the line feed is F or H and neither side is Hangul, then the segment break is removed."> <meta name="assert" content="If the East Asian Width property of both the character before and after the line feed is F or H and neither side is Hangul, then the segment break is removed.">
@@ -16,77 +16,77 @@
</head> </head>
<body> <body>
<div id='log'></div> <div id='log'></div>
<div id='test1' class="test"><span> <div id='test1' class="test"><span>ノコリ
</span></div> </span></div>
<div id='test2' class="test"><span>&#x20;&#x20;&#x20; <div id='test2' class="test"><span>ノコリ&#x20;&#x20;&#x20;
</span></div> </span></div>
<div id='test3' class="test"><span> <div id='test3' class="test"><span>ノコリ
&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;</span></div>
<div id='test4' class="test"><span>&#x20;&#x20;&#x20; <div id='test4' class="test"><span>ノコリ&#x20;&#x20;&#x20;
&#x20;&#x20;&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;&#x20;&#x20;</span></div>
<div id='test5' class="test"><span> <div id='test5' class="test"><span>ノコリ
</span></div> </span></div>
<div id='test6' class="test"><span>&#x20;&#x20; <div id='test6' class="test"><span>ノコリ&#x20;&#x20;
&#x20;&#x20;&#x20; &#x20;&#x20;&#x20;
&#x20;&#x20;&#x20; &#x20;&#x20;&#x20;
&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;</span></div>
<div id="ref1" class="ref"><span></span></div> <div id="ref1" class="ref"><span>ノコリ</span></div>
<div id='test7' class="test"><span> <div id='test7' class="test"><span>
</span></div> </span></div>
<div id='test8' class="test"><span>&#x20;&#x20;&#x20; <div id='test8' class="test"><span>&#x20;&#x20;&#x20;
</span></div> </span></div>
<div id='test9' class="test"><span> <div id='test9' class="test"><span>
&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;&#x20;</span></div>
<div id='test10' class="test"><span>&#x20;&#x20;&#x20; <div id='test10' class="test"><span>&#x20;&#x20;&#x20;
&#x20;&#x20;&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;&#x20;&#x20;</span></div>
<div id='test11' class="test"><span> <div id='test11' class="test"><span>
</span></div> </span></div>
<div id='test12' class="test"><span>&#x20;&#x20; <div id='test12' class="test"><span>&#x20;&#x20;
&#x20;&#x20;&#x20; &#x20;&#x20;&#x20;
&#x20;&#x20;&#x20; &#x20;&#x20;&#x20;
&#x20;&#x20;&#x20;</span></div> &#x20;&#x20;&#x20;</span></div>
<div id="ref2" class="ref"><span></span></div> <div id="ref2" class="ref"><span></span></div>
<script> <script>
test(function() { test(function() {
assert_equals(document.getElementById('test1').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test1').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "linebreak only "); }, "linebreak only ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test2').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test2').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "spaces linebreak "); }, "spaces linebreak ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test3').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test3').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "linebreak spaces "); }, "linebreak spaces ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test4').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test4').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "spaces linebreak spaces "); }, "spaces linebreak spaces ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test5').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test5').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "multiple linebreaks "); }, "multiple linebreaks ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test6').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth); assert_equals(document.getElementById('test6').firstChild.offsetWidth, document.getElementById('ref1').firstChild.offsetWidth);
}, "multiple linebreaks + spaces "); }, "multiple linebreaks + spaces ノコリ");
test(function() { test(function() {
assert_equals(document.getElementById('test7').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test7').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "linebreak only "); }, "linebreak only ");
test(function() { test(function() {
assert_equals(document.getElementById('test8').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test8').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "spaces linebreak "); }, "spaces linebreak ");
test(function() { test(function() {
assert_equals(document.getElementById('test9').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test9').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "linebreak spaces "); }, "linebreak spaces ");
test(function() { test(function() {
assert_equals(document.getElementById('test10').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test10').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "spaces linebreak spaces "); }, "spaces linebreak spaces ");
test(function() { test(function() {
assert_equals(document.getElementById('test11').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test11').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "multiple linebreaks "); }, "multiple linebreaks ");
test(function() { test(function() {
assert_equals(document.getElementById('test12').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth); assert_equals(document.getElementById('test12').firstChild.offsetWidth, document.getElementById('ref2').firstChild.offsetWidth);
}, "multiple linebreaks + spaces "); }, "multiple linebreaks + spaces ");
</script> </script>
<!-- Notes: <!-- Notes:
The assertion will fail if space is produced for any line in the test paragraph. The assertion will fail if space is produced for any line in the test paragraph.