Files
tubestation/intl/components/gtest/TestScript.cpp
André Bargull dac1ab71ed Bug 1927706 - Part 13: Update expected test results for non-SpiderMonkey code. r=platform-i18n-reviewers,gregtatum
The "ar" locale without a region subtag now defaults to Latin script. Replace with "ar-EG"
to ensure we test Arabic script, too.

Change "space-cluster-2.html" to use U+0311 INVERTED BREVE to avoid getting the
Cyrillic variant glyph for BREVE in Consolas. See Bug 1927706, comment #24 for a
detailled comment why updating ICU to Unicode 16 broke this test case on Windows.

Differential Revision: https://phabricator.services.mozilla.com/D227214
2024-10-31 10:26:31 +00:00

63 lines
2.4 KiB
C++

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
namespace mozilla::intl {
TEST(IntlScript, GetExtensions)
{
UnicodeProperties::ScriptExtensionVector extensions;
// 0x0000..0x0040 are Common.
for (char32_t ch = 0; ch < 0x0041; ch++) {
ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::COMMON);
}
// 0x0332..0x0341 are Inherited.
for (char32_t ch = 0x332; ch < 0x0342; ch++) {
ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::INHERITED);
}
// 0x1cf7's script code is Common, but its script extension is Beng.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x1cf7, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::BENGALI);
// ؿ
// https://unicode-table.com/en/063F/
// This character doesn't have any script extension, so the script code is
// returned.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x063f, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::ARABIC);
// 0xff65 is the unicode character '・', see https://unicode-table.com/en/FF65/
// Halfwidth Katakana Middle Dot.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0xff65, extensions).isOk());
// 0xff65 should have the following script extensions:
// Bopo Hang Hani Hira Kana Yiii.
ASSERT_EQ(extensions.length(), 6u);
ASSERT_EQ(Script(extensions[0]), Script::BOPOMOFO);
ASSERT_EQ(Script(extensions[1]), Script::HAN);
ASSERT_EQ(Script(extensions[2]), Script::HANGUL);
ASSERT_EQ(Script(extensions[3]), Script::HIRAGANA);
ASSERT_EQ(Script(extensions[4]), Script::KATAKANA);
ASSERT_EQ(Script(extensions[5]), Script::YI);
// The max code point is 0x10ffff, so 0x110000 should be invalid.
// Script::UNKNOWN should be returned for an invalid code point.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x110000, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::UNKNOWN);
}
} // namespace mozilla::intl