Backed out 19 changesets (bug 1927706) for causing reftest failures @ space-cluster-2.html

Backed out changeset 3877f1fa62f5 (bug 1927706)
Backed out changeset 233c6ebf84a2 (bug 1927706)
Backed out changeset 07e5871d5fa3 (bug 1927706)
Backed out changeset 84ef75087931 (bug 1927706)
Backed out changeset f89b916619e1 (bug 1927706)
Backed out changeset b82d9d622315 (bug 1927706)
Backed out changeset b0d2c5711865 (bug 1927706)
Backed out changeset 9529dda25bd9 (bug 1927706)
Backed out changeset 40b7907d7fc8 (bug 1927706)
Backed out changeset c549655dbd73 (bug 1927706)
Backed out changeset c5cc289771b3 (bug 1927706)
Backed out changeset 8ef66f7822c4 (bug 1927706)
Backed out changeset dff6d37fb2fe (bug 1927706)
Backed out changeset 083a0b3da643 (bug 1927706)
Backed out changeset 06649ac72a19 (bug 1927706)
Backed out changeset 019f7533abbc (bug 1927706)
Backed out changeset f1539604c459 (bug 1927706)
Backed out changeset 578667f1f0d4 (bug 1927706)
Backed out changeset 8ed1e7e7d4ab (bug 1927706)
This commit is contained in:
Alexandru Marc
2024-10-30 11:19:58 +02:00
parent 177f1e9683
commit 52a93f69fa
2681 changed files with 578943 additions and 485608 deletions

View File

@@ -22,4 +22,4 @@
# changes to stick? As of bug 928195, this shouldn't be necessary! Please
# don't change CLOBBER for WebIDL changes any more.
Bug 1927706 - Update to ICU 76 requires clobber
Modified build files in third_party/libwebrtc - Bug 1925119 - add dcsctp to libwebrtc build. r?bwc!

View File

@@ -120,8 +120,6 @@ sources = [
"/intl/icu/source/common/uloc.cpp",
"/intl/icu/source/common/uloc_keytype.cpp",
"/intl/icu/source/common/uloc_tag.cpp",
"/intl/icu/source/common/ulocale.cpp",
"/intl/icu/source/common/ulocbuilder.cpp",
"/intl/icu/source/common/umapfile.cpp",
"/intl/icu/source/common/umath.cpp",
"/intl/icu/source/common/umutablecptrie.cpp",
@@ -163,7 +161,6 @@ sources = [
"/intl/icu/source/common/utext.cpp",
"/intl/icu/source/common/utf_impl.cpp",
"/intl/icu/source/common/util.cpp",
"/intl/icu/source/common/util_props.cpp",
"/intl/icu/source/common/utrace.cpp",
"/intl/icu/source/common/utrie.cpp",
"/intl/icu/source/common/utrie2.cpp",
@@ -203,6 +200,7 @@ other_sources = [
"/intl/icu/source/common/usc_impl.cpp",
"/intl/icu/source/common/ushape.cpp",
"/intl/icu/source/common/ustr_wcs.cpp",
"/intl/icu/source/common/util_props.cpp",
]
EXPORTS.unicode += [
"/intl/icu/source/common/unicode/appendable.h",
@@ -273,8 +271,6 @@ EXPORTS.unicode += [
"/intl/icu/source/common/unicode/uiter.h",
"/intl/icu/source/common/unicode/uldnames.h",
"/intl/icu/source/common/unicode/uloc.h",
"/intl/icu/source/common/unicode/ulocale.h",
"/intl/icu/source/common/unicode/ulocbuilder.h",
"/intl/icu/source/common/unicode/umachine.h",
"/intl/icu/source/common/unicode/umisc.h",
"/intl/icu/source/common/unicode/umutablecptrie.h",

View File

@@ -18,7 +18,6 @@ DEFINES["UCONFIG_NO_TRANSLITERATION"] = True
DEFINES["UCONFIG_NO_REGULAR_EXPRESSIONS"] = True
DEFINES["UCONFIG_NO_BREAK_ITERATION"] = True
DEFINES["UCONFIG_NO_IDNA"] = True
DEFINES["UCONFIG_NO_MF2"] = True
# We don't need to pass data to and from legacy char* APIs.
DEFINES["U_CHARSET_IS_UTF8"] = True

View File

@@ -70,17 +70,6 @@ sources = [
"/intl/icu/source/i18n/measunit.cpp",
"/intl/icu/source/i18n/measunit_extra.cpp",
"/intl/icu/source/i18n/measure.cpp",
"/intl/icu/source/i18n/messageformat2.cpp",
"/intl/icu/source/i18n/messageformat2_arguments.cpp",
"/intl/icu/source/i18n/messageformat2_checker.cpp",
"/intl/icu/source/i18n/messageformat2_data_model.cpp",
"/intl/icu/source/i18n/messageformat2_errors.cpp",
"/intl/icu/source/i18n/messageformat2_evaluation.cpp",
"/intl/icu/source/i18n/messageformat2_formattable.cpp",
"/intl/icu/source/i18n/messageformat2_formatter.cpp",
"/intl/icu/source/i18n/messageformat2_function_registry.cpp",
"/intl/icu/source/i18n/messageformat2_parser.cpp",
"/intl/icu/source/i18n/messageformat2_serializer.cpp",
"/intl/icu/source/i18n/msgfmt.cpp",
"/intl/icu/source/i18n/nfrs.cpp",
"/intl/icu/source/i18n/nfrule.cpp",
@@ -289,12 +278,6 @@ EXPORTS.unicode += [
"/intl/icu/source/i18n/unicode/measfmt.h",
"/intl/icu/source/i18n/unicode/measunit.h",
"/intl/icu/source/i18n/unicode/measure.h",
"/intl/icu/source/i18n/unicode/messageformat2.h",
"/intl/icu/source/i18n/unicode/messageformat2_arguments.h",
"/intl/icu/source/i18n/unicode/messageformat2_data_model.h",
"/intl/icu/source/i18n/unicode/messageformat2_data_model_names.h",
"/intl/icu/source/i18n/unicode/messageformat2_formattable.h",
"/intl/icu/source/i18n/unicode/messageformat2_function_registry.h",
"/intl/icu/source/i18n/unicode/msgfmt.h",
"/intl/icu/source/i18n/unicode/nounit.h",
"/intl/icu/source/i18n/unicode/numberformatter.h",

View File

@@ -588,9 +588,6 @@ void gfxPlatformMac::GetCommonFallbackFonts(uint32_t aCh, Script aRunScript,
case Script::WANCHO:
aFontList.AppendElement("Noto Sans Wancho");
break;
case Script::ARABIC_NASTALIQ:
aFontList.AppendElement("Noto Nastaliq Urdu");
break;
// Script codes for which no commonly-installed font is currently known.
// Probably future macOS versions will add Noto fonts for many of these,
@@ -649,13 +646,6 @@ void gfxPlatformMac::GetCommonFallbackFonts(uint32_t aCh, Script aRunScript,
case Script::VITHKUQI:
case Script::KAWI:
case Script::NAG_MUNDARI:
case Script::GARAY:
case Script::GURUNG_KHEMA:
case Script::KIRAT_RAI:
case Script::OL_ONAL:
case Script::SUNUWAR:
case Script::TODHRI:
case Script::TULU_TIGALARI:
break;
}

View File

@@ -59,7 +59,7 @@ TEST(IntlDateTimeFormat, Style_ar_utf8)
DateTimeFormat::StyleBag style;
style.time = Some(DateTimeFormat::Style::Medium);
auto dtFormat = testStyle("ar-EG", style);
auto dtFormat = testStyle("ar", style);
TestBuffer<char> buffer;
dtFormat->TryFormat(DATE, buffer).unwrap();
@@ -71,7 +71,7 @@ TEST(IntlDateTimeFormat, Style_ar_utf16)
DateTimeFormat::StyleBag style;
style.time = Some(DateTimeFormat::Style::Medium);
auto dtFormat = testStyle("ar-EG", style);
auto dtFormat = testStyle("ar", style);
TestBuffer<char16_t> buffer;
dtFormat->TryFormat(DATE, buffer).unwrap();
@@ -567,8 +567,6 @@ TEST(IntlDateTimeFormat, TryFormatToParts)
TEST(IntlDateTimeFormat, SetStartTimeIfGregorian)
{
using namespace std::literals;
DateTimeFormat::StyleBag style{};
style.date = Some(DateTimeFormat::Style::Long);
@@ -597,40 +595,27 @@ TEST(IntlDateTimeFormat, SetStartTimeIfGregorian)
MakeStringSpan(locale), style, gen.get(), timeZone)
.unwrap();
const char* Dec22_1581;
const char* Jan01_1582;
const char* Jan01_1583;
if (locale == "en-US-u-ca-iso8601"sv) {
Dec22_1581 = "1581 December 22";
Jan01_1582 = "1582 January 1";
Jan01_1583 = "1583 January 1";
} else {
Dec22_1581 = "December 22, 1581";
Jan01_1582 = "January 1, 1582";
Jan01_1583 = "January 1, 1583";
}
TestBuffer<char> buffer;
// Before the default Gregorian change date, so interpreted in the Julian
// calendar, which is December 22, 1581.
dtFormat->TryFormat(FirstJanuary1582, buffer).unwrap();
ASSERT_TRUE(buffer.verboseMatches(Dec22_1581));
ASSERT_TRUE(buffer.verboseMatches("December 22, 1581"));
// After default Gregorian change date, so January 1, 1583.
dtFormat->TryFormat(FirstJanuary1582 + oneYear, buffer).unwrap();
ASSERT_TRUE(buffer.verboseMatches(Jan01_1583));
ASSERT_TRUE(buffer.verboseMatches("January 1, 1583"));
// Adjust the start time to use a proleptic Gregorian calendar.
dtFormat->SetStartTimeIfGregorian(StartOfTime);
// Now interpreted in proleptic Gregorian calendar at January 1, 1582.
dtFormat->TryFormat(FirstJanuary1582, buffer).unwrap();
ASSERT_TRUE(buffer.verboseMatches(Jan01_1582));
ASSERT_TRUE(buffer.verboseMatches("January 1, 1582"));
// Still January 1, 1583.
dtFormat->TryFormat(FirstJanuary1582 + oneYear, buffer).unwrap();
ASSERT_TRUE(buffer.verboseMatches(Jan01_1583));
ASSERT_TRUE(buffer.verboseMatches("January 1, 1583"));
}
}
} // namespace mozilla::intl

View File

@@ -42,17 +42,12 @@ TEST(IntlLocaleCanonicalizer, CanonicalizeICULevel1)
// Removes the .utf8 ends
CheckLocaleResult(ascii, "ar-MA.utf8", "ar_MA");
// Rejects non parseable ASCII inputs.
ASSERT_EQ(
LocaleCanonicalizer::CanonicalizeICULevel1(
"abcdefghijlkmnopqrstuvwxyzABCDEFGHIJLKMNOPQRSTUVWXYZ-_.0123456789",
ascii)
.unwrapErr(),
ICUError::InternalError);
ASSERT_EQ(
LocaleCanonicalizer::CanonicalizeICULevel1("exotic ascii:", ascii)
.unwrapErr(),
ICUError::InternalError);
// Allows valid ascii inputs
CheckLocaleResult(
ascii,
"abcdefghijlkmnopqrstuvwxyzABCDEFGHIJLKMNOPQRSTUVWXYZ-_.0123456789",
"abcdefghijlkmnopqrstuvwxyzabcdefghijlkmnopqrstuvwxyz__");
CheckLocaleResult(ascii, "exotic ascii:", "exotic ascii:");
// Does not accept non-ascii inputs.
ASSERT_EQ(LocaleCanonicalizer::CanonicalizeICULevel1("👍", ascii).unwrapErr(),

View File

@@ -27,7 +27,7 @@ TEST(IntlNumberFormat, Basic)
ASSERT_EQ(std::u16string_view(res16), u"1,234.56");
UniquePtr<NumberFormat> nfAr =
NumberFormat::TryCreate("ar-EG", options).unwrap();
NumberFormat::TryCreate("ar", options).unwrap();
ASSERT_TRUE(nfAr->format(1234.56, buf8).isOk());
ASSERT_EQ(buf8.get_string_view(), "١٬٢٣٤٫٥٦");
ASSERT_TRUE(nfAr->format(1234.56, buf16).isOk());

View File

@@ -13,7 +13,7 @@ TEST(IntlNumberingSystem, GetName)
auto numbers_en = NumberingSystem::TryCreate("en").unwrap();
ASSERT_EQ(numbers_en->GetName().unwrap(), MakeStringSpan("latn"));
auto numbers_ar = NumberingSystem::TryCreate("ar-EG").unwrap();
auto numbers_ar = NumberingSystem::TryCreate("ar").unwrap();
ASSERT_EQ(numbers_ar->GetName().unwrap(), MakeStringSpan("arab"));
auto numbers_ff_Adlm = NumberingSystem::TryCreate("ff-Adlm").unwrap();

View File

@@ -38,7 +38,7 @@ TEST(IntlRelativeTimeFormat, Basic)
rtf->format(1.2, RelativeTimeFormat::FormatUnit::Day, buf16).isOk());
ASSERT_EQ(buf16.get_string_view(), u"dentro de 1,2 días");
res = RelativeTimeFormat::TryCreate("ar-EG", options);
res = RelativeTimeFormat::TryCreate("ar", options);
ASSERT_TRUE(res.isOk());
rtf = res.unwrap();
buf8.clear();

View File

@@ -18,8 +18,8 @@ TEST(IntlScript, GetExtensions)
ASSERT_EQ(Script(extensions[0]), Script::COMMON);
}
// 0x0332..0x0341 are Inherited.
for (char32_t ch = 0x332; ch < 0x0342; ch++) {
// 0x0300..0x0341 are Inherited.
for (char32_t ch = 0x300; ch < 0x0341; ch++) {
ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::INHERITED);

View File

@@ -462,10 +462,9 @@ class DisplayNames final {
// Normally this type of operation wouldn't be safe, but ASCII characters
// all take 1 byte in UTF-8 encoding, and can be zero padded to be valid
// UTF-16. Currency codes are all three ASCII letters.
// Normalize to upper case so we can easily detect the fallback case.
char16_t currency[] = {AsciiAlphaToUpperCase(aCurrency[0]),
AsciiAlphaToUpperCase(aCurrency[1]),
AsciiAlphaToUpperCase(aCurrency[2]), u'\0'};
char16_t currency[] = {static_cast<char16_t>(aCurrency[0]),
static_cast<char16_t>(aCurrency[1]),
static_cast<char16_t>(aCurrency[2]), u'\0'};
UCurrNameStyle style;
switch (mOptions.style) {
@@ -489,15 +488,19 @@ class DisplayNames final {
return Err(DisplayNamesError::InternalError);
}
// No localized currency name was found when the error code is
// U_USING_DEFAULT_WARNING and the returned string is equal to the (upper
// case transformed) currency code. When `aFallback` is `Fallback::Code`,
// we don't have to perform any additional work, because ICU already
// returned the currency code in its normalized, upper case form.
if (aFallback == DisplayNames::Fallback::None &&
status == U_USING_DEFAULT_WARNING && length == 3 &&
std::u16string_view{name, 3} == std::u16string_view{currency, 3}) {
if (aBuffer.length() != 0) {
if (status == U_USING_DEFAULT_WARNING) {
// A resource bundle lookup returned a result from the root locale.
if (aFallback == DisplayNames::Fallback::Code) {
// Return the canonicalized input when no localized currency name was
// found. Canonical case for currency is upper case.
if (!aBuffer.reserve(3)) {
return Err(DisplayNamesError::OutOfMemory);
}
aBuffer.data()[0] = AsciiAlphaToUpperCase(currency[0]);
aBuffer.data()[1] = AsciiAlphaToUpperCase(currency[1]);
aBuffer.data()[2] = AsciiAlphaToUpperCase(currency[2]);
aBuffer.written(3);
} else if (aBuffer.length() != 0) {
// Ensure an empty string is in the buffer when there is no fallback.
aBuffer.written(0);
}

View File

@@ -1,6 +1,6 @@
// Generated by make_intl_data.py. DO NOT EDIT.
// Version: CLDR-46
// URL: https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Version: CLDR-43
// URL: https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
#include "mozilla/Assertions.h"
#include "mozilla/Span.h"
@@ -99,8 +99,8 @@ static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
#endif
// Mappings from language subtags to preferred values.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@@ -121,93 +121,91 @@ bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
}
if (language.Length() == 3) {
static const char languages[418][4] = {
"aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajp", "ajt", "aju",
"aka", "alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm",
"aue", "ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam",
"baq", "baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih",
"bis", "bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul",
"bur", "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu",
"chv", "cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu",
"cre", "cwd", "cym", "cze", "daf", "dan", "dap", "deu", "dgo", "dhd",
"dik", "diq", "dit", "div", "djl", "dkl", "drh", "drr", "dud", "duj",
"dut", "dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", "epo", "esk",
"est", "eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre",
"fry", "fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", "ger", "gfx",
"ggn", "ggo", "ggr", "gio", "gla", "gle", "glg", "gli", "glv", "gno",
"gom", "gre", "grn", "gti", "gug", "guj", "guv", "gya", "hat", "hau",
"hdn", "hea", "heb", "her", "him", "hin", "hmo", "hrr", "hrv", "hun",
"hye", "ibi", "ibo", "ice", "ido", "iii", "ike", "iku", "ile", "ill",
"ilw", "ina", "ind", "ipk", "isl", "ita", "izi", "jar", "jav", "jeg",
"jpn", "kal", "kan", "kas", "kat", "kau", "kaz", "kdv", "kgc", "kgd",
"kgh", "kgm", "khk", "khm", "kik", "kin", "kir", "kmr", "knc", "kng",
"koj", "kom", "kon", "kor", "kpp", "kpv", "krm", "ktr", "kua", "kur",
"kvs", "kwq", "kxe", "kxl", "kzh", "kzj", "kzt", "lak", "lao", "lat",
"lav", "lbk", "leg", "lii", "lim", "lin", "lit", "llo", "lmm", "ltz",
"lub", "lug", "lvs", "mac", "mah", "mal", "mao", "mar", "may", "meg",
"mgx", "mhr", "mkd", "mlg", "mlt", "mnk", "mnt", "mof", "mol", "mon",
"mri", "msa", "mst", "mup", "mwd", "mwj", "mya", "myd", "myt", "nad",
"nau", "nav", "nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", "nld",
"nln", "nlr", "nno", "nns", "nnx", "nob", "nom", "noo", "nor", "npi",
"nts", "nxu", "nya", "oci", "ojg", "oji", "ori", "orm", "ory", "oss",
"oun", "pan", "pat", "pbu", "pcr", "per", "pes", "pli", "plt", "pmc",
"pmk", "pmu", "pnb", "pol", "por", "ppa", "ppr", "prp", "pry", "pus",
"puz", "que", "quz", "rmr", "rmy", "roh", "ron", "rum", "run", "rus",
"sag", "san", "sap", "sca", "scc", "scr", "sgl", "sin", "skk", "slk",
"slo", "slv", "smd", "sme", "smo", "sna", "snb", "snd", "som", "sot",
"spa", "spy", "sqi", "src", "srd", "srp", "ssw", "sul", "sum", "sun",
"swa", "swe", "swh", "szd", "tah", "tam", "tat", "tdu", "tel", "tgg",
"tgk", "tgl", "tha", "thc", "thw", "thx", "tib", "tid", "tie", "tir",
"tkk", "tlw", "tmk", "tmp", "tne", "ton", "tpw", "tsf", "tsn", "tso",
"ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp", "uok", "urd",
"uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit", "wiw", "wln",
"wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj", "xsl", "xss",
"ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", "yos", "yuu",
"zai", "zha", "zho", "zir", "zkb", "zsm", "zul", "zyb",
static const char languages[408][4] = {
"aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajt", "aju", "aka",
"alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", "aue",
"ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", "baq",
"baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih", "bis",
"bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul", "bur",
"bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", "chv",
"cjr", "cka", "cld", "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre",
"cwd", "cym", "cze", "daf", "dan", "dap", "deu", "dgo", "dhd", "dik",
"diq", "dit", "div", "djl", "dkl", "drh", "drr", "dud", "duj", "dut",
"dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", "epo", "esk", "est",
"eus", "ewe", "fao", "fas", "fat", "fij", "fin", "fra", "fre", "fry",
"fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", "ger", "gfx", "ggn",
"ggo", "ggr", "gio", "gla", "gle", "glg", "gli", "glv", "gno", "gre",
"grn", "gti", "gug", "guj", "guv", "gya", "hat", "hau", "hdn", "hea",
"heb", "her", "him", "hin", "hmo", "hrr", "hrv", "hun", "hye", "ibi",
"ibo", "ice", "ido", "iii", "ike", "iku", "ile", "ill", "ilw", "ina",
"ind", "ipk", "isl", "ita", "izi", "jar", "jav", "jeg", "jpn", "kal",
"kan", "kas", "kat", "kau", "kaz", "kdv", "kgc", "kgd", "kgh", "khk",
"khm", "kik", "kin", "kir", "kmr", "knc", "kng", "knn", "koj", "kom",
"kon", "kor", "kpp", "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq",
"kxe", "kxl", "kzh", "kzj", "kzt", "lak", "lao", "lat", "lav", "lbk",
"leg", "lii", "lim", "lin", "lit", "llo", "lmm", "ltz", "lub", "lug",
"lvs", "mac", "mah", "mal", "mao", "mar", "may", "meg", "mgx", "mhr",
"mkd", "mlg", "mlt", "mnk", "mnt", "mof", "mol", "mon", "mri", "msa",
"mst", "mup", "mwd", "mwj", "mya", "myd", "myt", "nad", "nau", "nav",
"nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", "nld", "nln", "nlr",
"nno", "nns", "nnx", "nob", "noo", "nor", "npi", "nts", "nxu", "nya",
"oci", "ojg", "oji", "ori", "orm", "ory", "oss", "oun", "pan", "pat",
"pbu", "pcr", "per", "pes", "pli", "plt", "pmc", "pmu", "pnb", "pol",
"por", "ppa", "ppr", "pry", "pus", "puz", "que", "quz", "rmr", "rmy",
"roh", "ron", "rum", "run", "rus", "sag", "san", "sap", "sca", "scc",
"scr", "sgl", "sin", "skk", "slk", "slo", "slv", "smd", "sme", "smo",
"sna", "snb", "snd", "som", "sot", "spa", "spy", "sqi", "src", "srd",
"srp", "ssw", "sul", "sum", "sun", "swa", "swe", "swh", "tah", "tam",
"tat", "tdu", "tel", "tgg", "tgk", "tgl", "tha", "thc", "thw", "thx",
"tib", "tid", "tie", "tir", "tkk", "tlw", "tmp", "tne", "ton", "tsf",
"tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp",
"uok", "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit",
"wiw", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj",
"xsl", "ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", "yos",
"yuu", "zai", "zha", "zho", "zir", "zsm", "zul", "zyb",
};
static const char* aliases[418] = {
"aas", "aa", "ab", "dz", "af", "apf", "ami", "apc", "aeb", "jrb",
"ak", "sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as",
"ktz", "av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm",
"eu", "nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho",
"bi", "drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg",
"my", "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu",
"cv", "mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh",
"cr", "cr", "cy", "cs", "dnj", "da", "njz", "de", "doi", "mwr",
"din", "zza", "dif", "dv", "dze", "aqd", "mn", "kzk", "uth", "dwu",
"nl", "dbt", "dz", "et", "el", "amq", "man", "en", "eo", "ik",
"et", "eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr",
"fy", "ff", "ff", "dev", "om", "wny", "grb", "ka", "de", "vaj",
"gvr", "esg", "gtu", "aou", "gd", "ga", "gl", "kzk", "gv", "gon",
"kok", "el", "gn", "nyc", "gn", "gu", "duz", "gba", "ht", "ha",
"hai", "hmn", "he", "hz", "srx", "hi", "ho", "jal", "hr", "hu",
"hy", "opa", "ig", "is", "io", "ii", "iu", "iu", "ie", "ilm",
"gal", "ia", "id", "ik", "is", "it", "eza", "jgk", "jv", "oyb",
"ja", "kl", "kn", "ks", "ka", "kr", "kk", "zkd", "tdf", "ncq",
"kml", "plu", "mn", "km", "ki", "rw", "ky", "ku", "kr", "kg",
"kwv", "kv", "kg", "ko", "jkm", "kv", "bmf", "dtp", "kj", "ku",
"gdj", "yam", "tvd", "kru", "dgl", "dtp", "dtp", "ksp", "lo", "la",
"lv", "bnc", "enl", "raq", "li", "ln", "lt", "ngt", "rmx", "lb",
"lu", "lg", "lv", "mk", "mh", "ml", "mi", "mr", "ms", "cir",
"jbk", "chm", "mk", "mg", "mt", "man", "wnn", "xnt", "ro", "mn",
"mi", "ms", "mry", "raj", "dmw", "vaj", "my", "aog", "mry", "xny",
"na", "nv", "nru", "nr", "ekc", "kdz", "nd", "ng", "ne", "nl",
"azd", "nrk", "nn", "nbr", "ngv", "nb", "cbr", "dtd", "no", "ne",
"pij", "bpp", "ny", "oc", "oj", "oj", "or", "om", "or", "os",
"vaj", "pa", "kxr", "ps", "adx", "fa", "fa", "pi", "mg", "huw",
"crr", "phr", "lah", "pl", "pt", "bfy", "lcq", "gu", "prt", "ps",
"pub", "qu", "qu", "emx", "rom", "rm", "ro", "ro", "rn", "ru",
"sg", "sa", "aqt", "hle", "sr", "hr", "isk", "si", "oyb", "sk",
"sk", "sl", "kmb", "se", "sm", "sn", "iba", "sd", "so", "st",
"es", "kln", "sq", "sc", "sc", "sr", "ss", "sgd", "ulw", "su",
"sw", "sv", "sw", "umi", "ty", "ta", "tt", "dtp", "te", "bjp",
"tg", "fil", "th", "tpo", "ola", "oyb", "bo", "itd", "ras", "ti",
"twm", "weo", "tdg", "tyj", "kak", "to", "tpn", "taj", "tn", "ts",
"tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro", "ema", "ur",
"uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol", "nwo", "wa",
"wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj", "den", "zko",
"rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", "zom", "yug",
"zap", "za", "zh", "scv", "kjh", "ms", "zu", "za",
static const char* aliases[408] = {
"aas", "aa", "ab", "dz", "af", "apf", "ami", "aeb", "jrb", "ak",
"sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as", "ktz",
"av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", "eu",
"nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho", "bi",
"drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg", "my",
"luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", "cv",
"mom", "cmr", "syr", "xch", "zh", "kw", "co", "pij", "quh", "cr",
"cr", "cy", "cs", "dnj", "da", "njz", "de", "doi", "mwr", "din",
"zza", "dif", "dv", "dze", "aqd", "mn", "kzk", "uth", "dwu", "nl",
"dbt", "dz", "et", "el", "amq", "man", "en", "eo", "ik", "et",
"eu", "ee", "fo", "fa", "ak", "fj", "fi", "fr", "fr", "fy",
"ff", "ff", "dev", "om", "wny", "grb", "ka", "de", "vaj", "gvr",
"esg", "gtu", "aou", "gd", "ga", "gl", "kzk", "gv", "gon", "el",
"gn", "nyc", "gn", "gu", "duz", "gba", "ht", "ha", "hai", "hmn",
"he", "hz", "srx", "hi", "ho", "jal", "hr", "hu", "hy", "opa",
"ig", "is", "io", "ii", "iu", "iu", "ie", "ilm", "gal", "ia",
"id", "ik", "is", "it", "eza", "jgk", "jv", "oyb", "ja", "kl",
"kn", "ks", "ka", "kr", "kk", "zkd", "tdf", "ncq", "kml", "mn",
"km", "ki", "rw", "ky", "ku", "kr", "kg", "kok", "kwv", "kv",
"kg", "ko", "jkm", "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam",
"tvd", "kru", "dgl", "dtp", "dtp", "ksp", "lo", "la", "lv", "bnc",
"enl", "raq", "li", "ln", "lt", "ngt", "rmx", "lb", "lu", "lg",
"lv", "mk", "mh", "ml", "mi", "mr", "ms", "cir", "jbk", "chm",
"mk", "mg", "mt", "man", "wnn", "xnt", "ro", "mn", "mi", "ms",
"mry", "raj", "dmw", "vaj", "my", "aog", "mry", "xny", "na", "nv",
"nru", "nr", "ekc", "kdz", "nd", "ng", "ne", "nl", "azd", "nrk",
"nn", "nbr", "ngv", "nb", "dtd", "no", "ne", "pij", "bpp", "ny",
"oc", "oj", "oj", "or", "om", "or", "os", "vaj", "pa", "kxr",
"ps", "adx", "fa", "fa", "pi", "mg", "huw", "phr", "lah", "pl",
"pt", "bfy", "lcq", "prt", "ps", "pub", "qu", "qu", "emx", "rom",
"rm", "ro", "ro", "rn", "ru", "sg", "sa", "aqt", "hle", "sr",
"hr", "isk", "si", "oyb", "sk", "sk", "sl", "kmb", "se", "sm",
"sn", "iba", "sd", "so", "st", "es", "kln", "sq", "sc", "sc",
"sr", "ss", "sgd", "ulw", "su", "sw", "sv", "sw", "ty", "ta",
"tt", "dtp", "te", "bjp", "tg", "fil", "th", "tpo", "ola", "oyb",
"bo", "itd", "ras", "ti", "twm", "weo", "tyj", "kak", "to", "taj",
"tn", "ts", "tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro",
"ema", "ur", "uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol",
"nwo", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj",
"den", "rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", "zom",
"yug", "zap", "za", "zh", "scv", "ms", "zu", "za",
};
if (const char* replacement = SearchReplacement(languages, aliases, language)) {
@@ -221,8 +219,8 @@ bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@@ -243,8 +241,8 @@ bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& languag
}
// Mappings from script subtags to preferred values.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span()));
MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span()));
@@ -259,8 +257,8 @@ bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
}
// Mappings from region subtags to preferred values.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@@ -285,69 +283,69 @@ bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
}
{
static const char regions[300][4] = {
static const char regions[299][4] = {
"004", "008", "010", "012", "016", "020", "024", "028", "031", "032",
"036", "040", "044", "048", "050", "051", "052", "056", "060", "062",
"064", "068", "070", "072", "074", "076", "084", "086", "090", "092",
"096", "100", "104", "108", "112", "116", "120", "124", "132", "136",
"140", "144", "148", "152", "156", "158", "162", "166", "170", "174",
"175", "178", "180", "184", "188", "191", "192", "196", "203", "204",
"208", "212", "214", "218", "222", "226", "230", "231", "232", "233",
"234", "238", "239", "242", "246", "248", "249", "250", "254", "258",
"260", "262", "266", "268", "270", "275", "276", "278", "280", "288",
"292", "296", "300", "304", "308", "312", "316", "320", "324", "328",
"332", "334", "336", "340", "344", "348", "352", "356", "360", "364",
"368", "372", "376", "380", "384", "388", "392", "398", "400", "404",
"408", "410", "414", "417", "418", "422", "426", "428", "430", "434",
"438", "440", "442", "446", "450", "454", "458", "462", "466", "470",
"474", "478", "480", "484", "492", "496", "498", "499", "500", "504",
"508", "512", "516", "520", "524", "528", "531", "533", "534", "535",
"540", "548", "554", "558", "562", "566", "570", "574", "578", "580",
"581", "583", "584", "585", "586", "591", "598", "600", "604", "608",
"612", "616", "620", "624", "626", "630", "634", "638", "642", "643",
"646", "652", "654", "659", "660", "662", "663", "666", "670", "674",
"678", "682", "686", "688", "690", "694", "702", "703", "704", "705",
"706", "710", "716", "720", "724", "728", "729", "732", "736", "740",
"744", "748", "752", "756", "760", "762", "764", "768", "772", "776",
"780", "784", "788", "792", "795", "796", "798", "800", "804", "807",
"818", "826", "830", "831", "832", "833", "834", "840", "850", "854",
"858", "860", "862", "876", "882", "886", "887", "891", "894", "958",
"959", "960", "962", "963", "964", "965", "966", "967", "968", "969",
"970", "971", "972", "973", "974", "975", "976", "977", "978", "979",
"980", "981", "982", "983", "984", "985", "986", "987", "988", "989",
"990", "991", "992", "993", "994", "995", "996", "997", "998", "999",
"036", "040", "044", "048", "050", "051", "052", "056", "060", "064",
"068", "070", "072", "074", "076", "084", "086", "090", "092", "096",
"100", "104", "108", "112", "116", "120", "124", "132", "136", "140",
"144", "148", "152", "156", "158", "162", "166", "170", "174", "175",
"178", "180", "184", "188", "191", "192", "196", "203", "204", "208",
"212", "214", "218", "222", "226", "230", "231", "232", "233", "234",
"238", "239", "242", "246", "248", "249", "250", "254", "258", "260",
"262", "266", "268", "270", "275", "276", "278", "280", "288", "292",
"296", "300", "304", "308", "312", "316", "320", "324", "328", "332",
"334", "336", "340", "344", "348", "352", "356", "360", "364", "368",
"372", "376", "380", "384", "388", "392", "398", "400", "404", "408",
"410", "414", "417", "418", "422", "426", "428", "430", "434", "438",
"440", "442", "446", "450", "454", "458", "462", "466", "470", "474",
"478", "480", "484", "492", "496", "498", "499", "500", "504", "508",
"512", "516", "520", "524", "528", "531", "533", "534", "535", "540",
"548", "554", "558", "562", "566", "570", "574", "578", "580", "581",
"583", "584", "585", "586", "591", "598", "600", "604", "608", "612",
"616", "620", "624", "626", "630", "634", "638", "642", "643", "646",
"652", "654", "659", "660", "662", "663", "666", "670", "674", "678",
"682", "686", "688", "690", "694", "702", "703", "704", "705", "706",
"710", "716", "720", "724", "728", "729", "732", "736", "740", "744",
"748", "752", "756", "760", "762", "764", "768", "772", "776", "780",
"784", "788", "792", "795", "796", "798", "800", "804", "807", "818",
"826", "830", "831", "832", "833", "834", "840", "850", "854", "858",
"860", "862", "876", "882", "886", "887", "891", "894", "958", "959",
"960", "962", "963", "964", "965", "966", "967", "968", "969", "970",
"971", "972", "973", "974", "975", "976", "977", "978", "979", "980",
"981", "982", "983", "984", "985", "986", "987", "988", "989", "990",
"991", "992", "993", "994", "995", "996", "997", "998", "999",
};
static const char* aliases[300] = {
static const char* aliases[299] = {
"AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR",
"AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "034",
"BT", "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG",
"BN", "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY",
"CF", "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM",
"YT", "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ",
"DK", "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE",
"FO", "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF",
"TF", "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH",
"GI", "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY",
"HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR",
"IQ", "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE",
"KP", "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY",
"LI", "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT",
"MQ", "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA",
"MZ", "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ",
"NC", "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP",
"UM", "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH",
"PN", "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU",
"RW", "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM",
"ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI",
"SO", "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR",
"SJ", "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO",
"TT", "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK",
"EG", "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF",
"UY", "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA",
"QM", "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW",
"QX", "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG",
"XH", "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ",
"XR", "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ",
"AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "BT",
"BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", "BN",
"BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", "CF",
"LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", "YT",
"CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", "DK",
"DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", "FO",
"FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", "TF",
"DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", "GI",
"KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", "HT",
"HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ",
"IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", "KP",
"KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", "LI",
"LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", "MQ",
"MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", "MZ",
"OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", "NC",
"VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", "UM",
"FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", "PN",
"PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", "RW",
"BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", "ST",
"SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", "SO",
"ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", "SJ",
"SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", "TT",
"AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", "EG",
"GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", "UY",
"UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", "QM",
"QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", "QX",
"QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH",
"XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", "XR",
"XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ",
};
if (const char* replacement = SearchReplacement(regions, aliases, region)) {
@@ -359,8 +357,8 @@ bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@@ -373,8 +371,8 @@ bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
}
{
static const char regions[8][4] = {
"172", "200", "530", "532", "536", "582", "810", "890",
static const char regions[9][4] = {
"062", "172", "200", "530", "532", "536", "582", "810", "890",
};
return HasReplacement(regions, region);
@@ -382,8 +380,8 @@ bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
void mozilla::intl::Locale::PerformComplexLanguageMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
@@ -418,15 +416,24 @@ void mozilla::intl::Locale::PerformComplexLanguageMappings() {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
void mozilla::intl::Locale::PerformComplexRegionMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span()));
if (Region().EqualTo("172")) {
if (Region().EqualTo("062")) {
if (Language().EqualTo("oui") ||
(Language().EqualTo("und") && Script().EqualTo("Ougr"))) {
SetRegion("143");
}
else {
SetRegion("034");
}
}
else if (Region().EqualTo("172")) {
if (Language().EqualTo("axm") ||
Language().EqualTo("hy") ||
Language().EqualTo("hyw") ||
@@ -435,17 +442,20 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
SetRegion("AM");
}
else if (Language().EqualTo("az") ||
(Language().EqualTo("azb") && Script().EqualTo("Cyrl")) ||
(Language().EqualTo("azb") && Script().EqualTo("Latn")) ||
Language().EqualTo("bdk") ||
(Language().EqualTo("jdt") && Script().EqualTo("Latn")) ||
Language().EqualTo("kjj") ||
Language().EqualTo("kry") ||
(Language().EqualTo("rut") && Script().EqualTo("Latn")) ||
Language().EqualTo("tkr") ||
Language().EqualTo("tly") ||
Language().EqualTo("ttt") ||
(Language().EqualTo("und") && Script().EqualTo("Aghb")) ||
Language().EqualTo("xag")) {
Language().EqualTo("ttt")) {
SetRegion("AZ");
}
else if (Language().EqualTo("be")) {
else if (Language().EqualTo("be") ||
(Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) {
SetRegion("BY");
}
else if (Language().EqualTo("ab") ||
@@ -467,7 +477,8 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
Language().EqualTo("ky")) {
SetRegion("KG");
}
else if (Language().EqualTo("kk") ||
else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("kk") ||
(Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
SetRegion("KZ");
}
@@ -475,10 +486,11 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
SetRegion("MD");
}
else if (Language().EqualTo("abh") ||
(Language().EqualTo("isk") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("paq") ||
Language().EqualTo("sgh") ||
Language().EqualTo("tg") ||
Language().EqualTo("yah") ||
(Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("yai")) {
SetRegion("TJ");
}
@@ -490,10 +502,10 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
Language().EqualTo("got") ||
Language().EqualTo("jct") ||
Language().EqualTo("ji") ||
(Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("rue") ||
Language().EqualTo("uk") ||
(Language().EqualTo("und") && Script().EqualTo("Goth")) ||
Language().EqualTo("yi")) {
(Language().EqualTo("und") && Script().EqualTo("Goth"))) {
SetRegion("UA");
}
else if (Language().EqualTo("auz") ||
@@ -532,9 +544,7 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
else if (Region().EqualTo("536") ||
Region().EqualTo("NT")) {
if (Language().EqualTo("acm") ||
Language().EqualTo("aii") ||
Language().EqualTo("akk") ||
(Language().EqualTo("arc") && Script().EqualTo("Hatr")) ||
Language().EqualTo("ayp") ||
Language().EqualTo("bjm") ||
Language().EqualTo("ckb") ||
@@ -544,7 +554,6 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
Language().EqualTo("sdb") ||
Language().EqualTo("sdf") ||
Language().EqualTo("syr") ||
(Language().EqualTo("und") && Script().EqualTo("Hatr")) ||
(Language().EqualTo("und") && Script().EqualTo("Syrc")) ||
(Language().EqualTo("und") && Script().EqualTo("Xsux"))) {
SetRegion("IQ");
@@ -581,21 +590,23 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
SetRegion("AM");
}
else if (Language().EqualTo("az") ||
(Language().EqualTo("azb") && Script().EqualTo("Cyrl")) ||
(Language().EqualTo("azb") && Script().EqualTo("Latn")) ||
Language().EqualTo("bdk") ||
(Language().EqualTo("jdt") && Script().EqualTo("Latn")) ||
Language().EqualTo("kjj") ||
Language().EqualTo("kry") ||
(Language().EqualTo("rut") && Script().EqualTo("Latn")) ||
Language().EqualTo("tkr") ||
Language().EqualTo("tly") ||
Language().EqualTo("ttt") ||
(Language().EqualTo("und") && Script().EqualTo("Aghb")) ||
Language().EqualTo("xag")) {
Language().EqualTo("ttt")) {
SetRegion("AZ");
}
else if (Language().EqualTo("be")) {
else if (Language().EqualTo("be") ||
(Language().EqualTo("rml") && Script().EqualTo("Cyrl"))) {
SetRegion("BY");
}
else if (Language().EqualTo("et") ||
Language().EqualTo("ie") ||
Language().EqualTo("vro")) {
SetRegion("EE");
}
@@ -618,7 +629,8 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
Language().EqualTo("ky")) {
SetRegion("KG");
}
else if (Language().EqualTo("kk") ||
else if ((Language().EqualTo("ili") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("kk") ||
(Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) {
SetRegion("KZ");
}
@@ -637,10 +649,11 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
SetRegion("MD");
}
else if (Language().EqualTo("abh") ||
(Language().EqualTo("isk") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("paq") ||
Language().EqualTo("sgh") ||
Language().EqualTo("tg") ||
Language().EqualTo("yah") ||
(Language().EqualTo("wbl") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("yai")) {
SetRegion("TJ");
}
@@ -652,10 +665,10 @@ void mozilla::intl::Locale::PerformComplexRegionMappings() {
Language().EqualTo("got") ||
Language().EqualTo("jct") ||
Language().EqualTo("ji") ||
(Language().EqualTo("kdr") && Script().EqualTo("Cyrl")) ||
Language().EqualTo("rue") ||
Language().EqualTo("uk") ||
(Language().EqualTo("und") && Script().EqualTo("Goth")) ||
Language().EqualTo("yi")) {
(Language().EqualTo("und") && Script().EqualTo("Goth"))) {
SetRegion("UA");
}
else if (Language().EqualTo("auz") ||
@@ -709,8 +722,8 @@ static bool IsLessThan(const T& a, const U& b) {
}
// Mappings from variant subtags to preferred values.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::PerformVariantMappings() {
// The variant subtags need to be sorted for binary search.
MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(),
@@ -773,8 +786,8 @@ bool mozilla::intl::Locale::PerformVariantMappings() {
}
// Canonicalize legacy locale identifiers.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::UpdateLegacyMappings() {
// We're mapping legacy tags to non-legacy form here.
// Other tags remain unchanged.
@@ -931,8 +944,8 @@ bool mozilla::intl::Locale::UpdateLegacyMappings() {
}
// Mappings from legacy sign languages.
// Derived from CLDR Supplemental Data, version 46.
// https://unicode.org/Public/cldr/46/cldr-common-46.0.zip
// Derived from CLDR Supplemental Data, version 43.
// https://unicode.org/Public/cldr/43/cldr-common-43.0.zip
bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language,
const RegionSubtag& region) {
MOZ_ASSERT(language.EqualTo("sgn"));
@@ -1074,7 +1087,7 @@ const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType(
}
else if (IsUnicodeKey(key, "rg") ||
IsUnicodeKey(key, "sd")) {
static const char* types[147] = {
static const char* types[144] = {
"cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" ,
"cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" ,
"cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" ,
@@ -1086,18 +1099,18 @@ const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType(
"czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" ,
"czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" ,
"frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" ,
"frgua" , "frh" , "fri" , "frj" , "frk" , "frl" , "frlre" ,
"frm" , "frmay" , "frmf" , "frmq" , "frn" , "frnc" , "fro" ,
"frp" , "frpf" , "frpm" , "frq" , "frr" , "frre" , "frs" ,
"frt" , "frtf" , "fru" , "frv" , "frwf" , "fryt" , "laxn" ,
"lud" , "lug" , "lul" , "mrnkc" , "nlaw" , "nlcw" , "nlsx" ,
"no23" , "nzn" , "nzs" , "omba" , "omsh" , "plds" , "plkp" ,
"pllb" , "plld" , "pllu" , "plma" , "plmz" , "plop" , "plpd" ,
"plpk" , "plpm" , "plsk" , "plsl" , "plwn" , "plwp" , "plzp" ,
"shta" , "tteto" , "ttrcm" , "ttwto" , "twkhq" , "twtnq" , "twtpq" ,
"twtxq" , "usas" , "usgu" , "usmp" , "uspr" , "usum" , "usvi" ,
"frh" , "fri" , "frj" , "frk" , "frl" , "frm" , "frmf" ,
"frmq" , "frn" , "frnc" , "fro" , "frp" , "frpf" , "frpm" ,
"frq" , "frr" , "frre" , "frs" , "frt" , "frtf" , "fru" ,
"frv" , "frwf" , "fryt" , "laxn" , "lud" , "lug" , "lul" ,
"mrnkc" , "nlaw" , "nlcw" , "nlsx" , "no23" , "nzn" , "nzs" ,
"omba" , "omsh" , "plds" , "plkp" , "pllb" , "plld" , "pllu" ,
"plma" , "plmz" , "plop" , "plpd" , "plpk" , "plpm" , "plsk" ,
"plsl" , "plwn" , "plwp" , "plzp" , "shta" , "tteto" , "ttrcm" ,
"ttwto" , "twkhq" , "twtnq" , "twtpq" , "twtxq" , "usas" , "usgu" ,
"usmp" , "uspr" , "usum" , "usvi" ,
};
static const char* aliases[147] = {
static const char* aliases[144] = {
"cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" ,
"cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" ,
"cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" ,
@@ -1109,41 +1122,33 @@ const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType(
"cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" ,
"cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" ,
"cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz",
"gpzzzz", "frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "rezzzz",
"frges" , "ytzzzz", "mfzzzz", "mqzzzz", "frocc" , "nczzzz", "frhdf" ,
"frnor" , "pfzzzz", "pmzzzz", "frnor" , "frpdl" , "rezzzz", "frhdf" ,
"frnaq" , "tfzzzz", "frpac" , "frara" , "wfzzzz", "ytzzzz", "laxs" ,
"lucl" , "luec" , "luca" , "mr13" , "awzzzz", "cwzzzz", "sxzzzz",
"no50" , "nzauk" , "nzcan" , "ombj" , "omsj" , "pl02" , "pl04" ,
"pl08" , "pl10" , "pl06" , "pl12" , "pl14" , "pl16" , "pl20" ,
"pl18" , "pl22" , "pl26" , "pl24" , "pl28" , "pl30" , "pl32" ,
"tazzzz", "tttob" , "ttmrc" , "tttob" , "twkhh" , "twtnn" , "twnwt" ,
"twtxg" , "aszzzz", "guzzzz", "mpzzzz", "przzzz", "umzzzz", "vizzzz",
"frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "frges" , "mfzzzz",
"mqzzzz", "frocc" , "nczzzz", "frhdf" , "frnor" , "pfzzzz", "pmzzzz",
"frnor" , "frpdl" , "rezzzz", "frhdf" , "frnaq" , "tfzzzz", "frpac" ,
"frara" , "wfzzzz", "ytzzzz", "laxs" , "lucl" , "luec" , "luca" ,
"mr13" , "awzzzz", "cwzzzz", "sxzzzz", "no50" , "nzauk" , "nzcan" ,
"ombj" , "omsj" , "pl02" , "pl04" , "pl08" , "pl10" , "pl06" ,
"pl12" , "pl14" , "pl16" , "pl20" , "pl18" , "pl22" , "pl26" ,
"pl24" , "pl28" , "pl30" , "pl32" , "tazzzz", "tttob" , "ttmrc" ,
"tttob" , "twkhh" , "twtnn" , "twnwt" , "twtxg" , "aszzzz", "guzzzz",
"mpzzzz", "przzzz", "umzzzz", "vizzzz",
};
return SearchUnicodeReplacement(types, aliases, type);
}
else if (IsUnicodeKey(key, "tz")) {
static const char* types[50] = {
"aqams" , "aukns" , "caffs" , "camtr" , "canpg" , "capnt" ,
"cathu" , "cayzf" , "cet" , "cnckg" , "cnhrb" , "cnkhg" ,
"cst6cdt" , "cuba" , "eet" , "egypt" , "eire" , "est" ,
"est5edt" , "factory" , "gaza" , "gmt0" , "hongkong", "hst" ,
"iceland" , "iran" , "israel" , "jamaica" , "japan" , "libya" ,
"met" , "mncoq" , "mst" , "mst7mdt" , "mxstis" , "navajo" ,
"poland" , "portugal", "prc" , "pst8pdt" , "roc" , "rok" ,
"turkey" , "uaozh" , "uauzh" , "uct" , "umjon" , "usnavajo",
"wet" , "zulu" ,
static const char* types[30] = {
"aqams" , "camtr" , "cnckg" , "cnhrb" , "cnkhg" , "cuba" ,
"egypt" , "eire" , "est" , "gaza" , "gmt0" , "hongkong",
"hst" , "iceland" , "iran" , "israel" , "jamaica" , "japan" ,
"libya" , "mst" , "navajo" , "poland" , "portugal", "prc" ,
"roc" , "rok" , "turkey" , "uct" , "usnavajo", "zulu" ,
};
static const char* aliases[50] = {
"nzakl" , "auhba" , "cawnp" , "cator" , "cator" , "caiql" ,
"cator" , "caedm" , "bebru" , "cnsha" , "cnsha" , "cnurc" ,
"uschi" , "cuhav" , "grath" , "egcai" , "iedub" , "papty" ,
"usnyc" , "unk" , "gazastrp", "gmt" , "hkhkg" , "ushnl" ,
"isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" ,
"bebru" , "mnuln" , "usphx" , "usden" , "mxtij" , "usden" ,
"plwaw" , "ptlis" , "cnsha" , "uslax" , "twtpe" , "krsel" ,
"trist" , "uaiev" , "uaiev" , "utc" , "ushnl" , "usden" ,
"ptlis" , "utc" ,
static const char* aliases[30] = {
"nzakl" , "cator" , "cnsha" , "cnsha" , "cnurc" , "cuhav" ,
"egcai" , "iedub" , "utcw05" , "gazastrp", "gmt" , "hkhkg" ,
"utcw10" , "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" ,
"lytip" , "utcw07" , "usden" , "plwaw" , "ptlis" , "cnsha" ,
"twtpe" , "krsel" , "trist" , "utc" , "usden" , "utc" ,
};
return SearchUnicodeReplacement(types, aliases, type);
}

View File

@@ -9,32 +9,32 @@
*/
/*
* Created on Mon Oct 28 17:12:38 2024 from UCD data files with version info:
* Created on Wed Sep 25 08:09:45 2024 from UCD data files with version info:
*
# Unicode Character Database
# Date: 2024-08-25
# © 2024 Unicode®, Inc.
# Date: 2022-09-02
# © 2022 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# For documentation, see the following:
# NamesList.html
# UAX #38, "Unicode Han Database (Unihan)"
# UAX #44, "Unicode Character Database"
# UTS #51, "Unicode Emoji"
# UAX #57, "Unicode Egyptian Hieroglyph Database"
#
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode16.0.0/
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.0.0/
This directory contains final data files
for the Unicode Character Database, for Version 16.0.0 of the Unicode Standard.
This directory contains the final data files
for the Unicode Character Database, for Version 15.0.0 of the Unicode Standard.
# IdentifierStatus.txt
# Date: 2024-05-04, 21:31:06 GMT
# Date: 2022-08-26, 16:49:09 GMT
#
# Unihan_Variants.txt
# Date: 2024-07-31 00:00:00 GMT [KL]
# Date: 2022-08-01 16:36:07 GMT [JHJ]
*
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
@@ -245,16 +245,8 @@ enum class Script : int16_t {
VITHKUQI = 197,
KAWI = 198,
NAG_MUNDARI = 199,
ARABIC_NASTALIQ = 200,
GARAY = 201,
GURUNG_KHEMA = 202,
KIRAT_RAI = 203,
OL_ONAL = 204,
SUNUWAR = 205,
TODHRI = 206,
TULU_TIGALARI = 207,
NUM_SCRIPT_CODES = 208,
NUM_SCRIPT_CODES = 200,
INVALID = -1
};

View File

@@ -1,7 +1,7 @@
diff --git a/intl/icu/source/Makefile.in b/intl/icu/source/Makefile.in
--- a/intl/icu/source/Makefile.in
+++ b/intl/icu/source/Makefile.in
@@ -140,32 +140,36 @@ endif
@@ -134,32 +134,36 @@ endif
LOCAL_SUBDIRS = $(SUBDIRS)
CLEAN_FIRST_SUBDIRS = $(TOOLS)

View File

@@ -1,7 +1,7 @@
diff --git a/intl/icu/source/data/locales/dsb.txt b/intl/icu/source/data/locales/dsb.txt
--- a/intl/icu/source/data/locales/dsb.txt
+++ b/intl/icu/source/data/locales/dsb.txt
@@ -480,21 +480,25 @@ dsb{
@@ -547,23 +547,27 @@ dsb{
other{"W. 'tyźeń' MMMM"}
two{"W. 'tyźeń' MMMM"}
}
@@ -14,6 +14,8 @@ diff --git a/intl/icu/source/data/locales/dsb.txt b/intl/icu/source/data/locales
h{"ha"}
hm{"h:mma"}
hms{"h:mm:ssa"}
ms{"mm:ss"}
y{"y"}
yM{"M.y"}
yMEd{"E, d.M.y"}
+ yMM{"MM y"}
@@ -30,7 +32,7 @@ diff --git a/intl/icu/source/data/locales/dsb.txt b/intl/icu/source/data/locales
diff --git a/intl/icu/source/data/locales/hsb.txt b/intl/icu/source/data/locales/hsb.txt
--- a/intl/icu/source/data/locales/hsb.txt
+++ b/intl/icu/source/data/locales/hsb.txt
@@ -480,21 +480,25 @@ hsb{
@@ -546,23 +546,27 @@ hsb{
other{"W. 'tydźeń' MMMM"}
two{"W. 'tydźeń' MMMM"}
}
@@ -43,6 +45,8 @@ diff --git a/intl/icu/source/data/locales/hsb.txt b/intl/icu/source/data/locales
h{"ha"}
hm{"h:mma"}
hms{"h:mm:ssa"}
ms{"mm:ss"}
y{"y"}
yM{"M.y"}
yMEd{"E, d.M.y"}
+ yMM{"MM y"}

View File

@@ -6,7 +6,7 @@
diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locales/root.txt
--- a/intl/icu/source/data/locales/root.txt
+++ b/intl/icu/source/data/locales/root.txt
@@ -1283,17 +1283,19 @@ root{
@@ -213,17 +213,19 @@ root{
}
NoonMarker:alias{"/LOCALE/calendar/gregorian/NoonMarker"}
NoonMarkerNarrow:alias{"/LOCALE/calendar/gregorian/NoonMarkerNarrow"}
@@ -26,7 +26,7 @@ diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locale
Year{"{1} {0}"}
}
availableFormats{
@@ -1833,17 +1835,19 @@ root{
@@ -749,17 +751,19 @@ root{
}
NoonMarker:alias{"/LOCALE/calendar/gregorian/NoonMarker"}
NoonMarkerNarrow:alias{"/LOCALE/calendar/gregorian/NoonMarkerNarrow"}
@@ -46,9 +46,9 @@ diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locale
Year{"{1} {0}"}
}
availableFormats{
@@ -2113,17 +2117,19 @@ root{
"yMMMd",
"yMMdd",
@@ -1018,17 +1022,19 @@ root{
"{1} {0}",
"{1} {0}",
}
appendItems{
Day{"{0} ({2}: {1})"}
@@ -66,30 +66,10 @@ diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locale
Year{"{1} {0}"}
}
availableFormats{
@@ -2694,17 +2700,19 @@ root{
}
NoonMarker:alias{"/LOCALE/calendar/gregorian/NoonMarker"}
NoonMarkerNarrow:alias{"/LOCALE/calendar/gregorian/NoonMarkerNarrow"}
appendItems{
Day{"{0} ({2}: {1})"}
+ DayPeriod{"{0} ({2}: {1})"}
Day-Of-Week{"{0} {1}"}
Era{"{1} {0}"}
Hour{"{0} ({2}: {1})"}
Minute{"{0} ({2}: {1})"}
Month{"{0} ({2}: {1})"}
Quarter{"{0} ({2}: {1})"}
Second{"{0} ({2}: {1})"}
+ FractionalSecond{"{0} ({2}: {1})"}
Timezone{"{0} {1}"}
Week{"{0} ({2}: {1})"}
Year{"{1} {0}"}
}
availableFormats{
diff --git a/intl/icu/source/i18n/dtptngen.cpp b/intl/icu/source/i18n/dtptngen.cpp
--- a/intl/icu/source/i18n/dtptngen.cpp
+++ b/intl/icu/source/i18n/dtptngen.cpp
@@ -259,12 +259,12 @@ static const dtTypeElem dtTypes[] = {
@@ -257,12 +257,12 @@ static const dtTypeElem dtTypes[] = {
{0, UDATPG_FIELD_COUNT, 0, 0, 0} , // last row of dtTypes[]
};

View File

@@ -5,7 +5,7 @@
diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locales/root.txt
--- a/intl/icu/source/data/locales/root.txt
+++ b/intl/icu/source/data/locales/root.txt
@@ -3866,10 +3866,15 @@ root{
@@ -2527,10 +2527,15 @@ root{
zone{
dn{"Zone"}
}
@@ -24,7 +24,7 @@ diff --git a/intl/icu/source/data/locales/root.txt b/intl/icu/source/data/locale
diff --git a/intl/icu/source/i18n/dtptngen.cpp b/intl/icu/source/i18n/dtptngen.cpp
--- a/intl/icu/source/i18n/dtptngen.cpp
+++ b/intl/icu/source/i18n/dtptngen.cpp
@@ -266,11 +266,11 @@ static const char* const CLDR_FIELD_APPEND[] = {
@@ -264,11 +264,11 @@ static const char* const CLDR_FIELD_APPE
};
static const char* const CLDR_FIELD_NAME[UDATPG_FIELD_COUNT] = {

View File

@@ -6,22 +6,16 @@ diff --git a/intl/icu/source/common/putilimp.h b/intl/icu/source/common/putilimp
index 5b95a68..7097232 100644
--- a/intl/icu/source/common/putilimp.h
+++ b/intl/icu/source/common/putilimp.h
@@ -105,10 +105,12 @@ typedef size_t uintptr_t;
@@ -103,6 +103,8 @@ typedef size_t uintptr_t;
#endif
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#elif U_PLATFORM == U_PF_HAIKU
/* not defined */
+#elif defined(__wasi__)
+ /* not defined */
#else
# define U_TZSET tzset
#endif
#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
@@ -130,10 +132,12 @@ typedef size_t uintptr_t;
/* not defined */
#elif U_PLATFORM == U_PF_OS400
@@ -128,6 +130,8 @@ typedef size_t uintptr_t;
/* not defined */
#elif U_PLATFORM == U_PF_IPHONE
/* not defined */
@@ -30,28 +24,20 @@ index 5b95a68..7097232 100644
#else
# define U_TIMEZONE timezone
#endif
#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
@@ -145,10 +149,12 @@ typedef size_t uintptr_t;
@@ -141,6 +145,8 @@ typedef size_t uintptr_t;
#endif
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#elif U_PLATFORM == U_PF_HAIKU
/* not defined, (well it is but a loop back to icu) */
+#elif defined(__wasi__)
+ /* not defined */
#else
# define U_TZNAME tzname
#endif
#ifdef U_HAVE_MMAP
diff --git a/intl/icu/source/common/umapfile.h b/intl/icu/source/common/umapfile.h
index 92bd567..4ed1112 100644
--- a/intl/icu/source/common/umapfile.h
+++ b/intl/icu/source/common/umapfile.h
@@ -38,10 +38,12 @@ U_CFUNC void uprv_unmapFile(UDataMemory *pData);
#define MAP_POSIX 2
#define MAP_STDIO 3
@@ -41,6 +41,8 @@ U_CFUNC void uprv_unmapFile(UDataMemory *pData);
#if UCONFIG_NO_FILE_IO
# define MAP_IMPLEMENTATION MAP_NONE
@@ -60,8 +46,6 @@ index 92bd567..4ed1112 100644
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define MAP_IMPLEMENTATION MAP_WIN32
#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
# define MAP_IMPLEMENTATION MAP_POSIX
#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
diff --git a/intl/icu/source/common/umutex.cpp b/intl/icu/source/common/umutex.cpp
index ccbee99..6c3452c 100644
--- a/intl/icu/source/common/umutex.cpp
@@ -222,28 +206,23 @@ index 8d76b3f..c1a58db 100644
U_NAMESPACE_BEGIN
@@ -68,10 +68,12 @@ U_NAMESPACE_BEGIN
*
* Low Level Atomic Operations, ICU wrappers for.
@@ -70,6 +76,8 @@ U_NAMESPACE_BEGIN
*
****************************************************************************/
+#ifndef __wasi__
+
typedef std::atomic<int32_t> u_atomic_int32_t;
#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
return var.load(std::memory_order_acquire);
}
@@ -86,10 +88,31 @@ inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
@@ -89,6 +97,28 @@ inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
return var->fetch_sub(1) - 1;
}
+#else
+
+typedef int32_t u_atomic_int32_t;
+#define ATOMIC_INT32_T_INITIALIZER(val) val
+
+inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
+ return var;
@@ -265,11 +244,7 @@ index 8d76b3f..c1a58db 100644
/*************************************************************************************************
*
* UInitOnce Definitions.
*
@@ -225,21 +248,29 @@ class U_COMMON_API UMutex {
UMutex &operator =(const UMutex &other) = delete;
void *operator new(size_t) = delete;
@@ -231,17 +261,25 @@ class U_COMMON_API UMutex {
// requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
void lock() {
@@ -296,11 +271,7 @@ index 8d76b3f..c1a58db 100644
/** All initialized UMutexes are kept in a linked list, so that they can be found,
* and the underlying std::mutex destructed, by u_cleanup().
*/
UMutex *fListLink { nullptr };
@@ -247,11 +278,13 @@ class U_COMMON_API UMutex {
/** Out-of-line function to lazily initialize a UMutex on first use.
@@ -253,7 +291,9 @@ class U_COMMON_API UMutex {
* Initial fast check is inline, in lock(). The returned value may never
* be nullptr.
*/
@@ -310,9 +281,6 @@ index 8d76b3f..c1a58db 100644
};
/* Lock a mutex.
* @param mutex The given mutex to be locked. Pass NULL to specify
diff --git a/intl/icu/source/common/unifiedcache.cpp b/intl/icu/source/common/unifiedcache.cpp
--- a/intl/icu/source/common/unifiedcache.cpp
+++ b/intl/icu/source/common/unifiedcache.cpp
@@ -660,7 +628,7 @@ index 9ecd776..d094289 100644
diff --git a/intl/icu/source/i18n/numrange_fluent.cpp b/intl/icu/source/i18n/numrange_fluent.cpp
--- a/intl/icu/source/i18n/numrange_fluent.cpp
+++ b/intl/icu/source/i18n/numrange_fluent.cpp
@@ -246,33 +246,53 @@ LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(LocalizedNumberRang
@@ -238,33 +238,53 @@ LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(LocalizedNumberRang
LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(NFS<LNF>&& src) noexcept
: NFS<LNF>(std::move(src)) {
@@ -714,7 +682,7 @@ diff --git a/intl/icu/source/i18n/numrange_fluent.cpp b/intl/icu/source/i18n/num
LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(const RangeMacroProps& macros, const Locale& locale) {
fMacros = macros;
fMacros.locale = locale;
@@ -363,11 +383,15 @@ LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const {
@@ -344,11 +364,15 @@ LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const {
if (U_FAILURE(status)) {
return nullptr;
}
@@ -730,23 +698,24 @@ diff --git a/intl/icu/source/i18n/numrange_fluent.cpp b/intl/icu/source/i18n/num
}
// Try computing the formatter on our own
@@ -378,17 +378,22 @@ LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const {
@@ -364,18 +388,23 @@ LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const {
// Note: ptr starts as nullptr; during compare_exchange,
// it is set to what is actually stored in the atomic
// if another thread beat us to computing the formatter object.
auto* nonConstThis = const_cast<LocalizedNumberRangeFormatter*>(this);
+#ifndef __wasi__
if (!nonConstThis->fAtomicFormatter.compare_exchange_strong(ptr, temp.getAlias())) {
if (!nonConstThis->fAtomicFormatter.compare_exchange_strong(ptr, temp)) {
// Another thread beat us to computing the formatter
delete temp;
return ptr;
} else {
// Our copy of the formatter got stored in the atomic
return temp.orphan();
return temp;
}
+#else
+ nonConstThis->fAtomicFormatter = temp.getAlias();
+ return temp.orphan();
+ nonConstThis->fAtomicFormatter = temp;
+ return temp;
+#endif
}

View File

@@ -0,0 +1,201 @@
# Support relative date formatting with UCONFIG_NO_BREAK_ITERATION
#
# ICU bug: https://unicode-org.atlassian.net/browse/ICU-22260
diff --git a/intl/icu/source/i18n/reldatefmt.cpp b/intl/icu/source/i18n/reldatefmt.cpp
index 24d22a4b4b..6a0c9e65ef 100644
--- a/intl/icu/source/i18n/reldatefmt.cpp
+++ b/intl/icu/source/i18n/reldatefmt.cpp
@@ -12,7 +12,7 @@
#include "unicode/reldatefmt.h"
-#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
+#if !UCONFIG_NO_FORMATTING
#include <cmath>
#include <functional>
@@ -761,6 +761,7 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter(UErrorCode& status) :
fStyle(UDAT_STYLE_LONG),
fContext(UDISPCTX_CAPITALIZATION_NONE),
fOptBreakIterator(nullptr) {
+ (void)fOptBreakIterator; // suppress unused field warning
init(nullptr, nullptr, status);
}
@@ -809,11 +810,16 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter(
return;
}
if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) {
+#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);
if (U_FAILURE(status)) {
return;
}
init(nfToAdopt, bi, status);
+#else
+ status = U_UNSUPPORTED_ERROR;
+ return;
+#endif // !UCONFIG_NO_BREAK_ITERATION
} else {
init(nfToAdopt, nullptr, status);
}
@@ -832,9 +838,11 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter(
fCache->addRef();
fNumberFormat->addRef();
fPluralRules->addRef();
+#if !UCONFIG_NO_BREAK_ITERATION
if (fOptBreakIterator != nullptr) {
fOptBreakIterator->addRef();
}
+#endif // !UCONFIG_NO_BREAK_ITERATION
}
RelativeDateTimeFormatter& RelativeDateTimeFormatter::operator=(
@@ -843,7 +851,9 @@ RelativeDateTimeFormatter& RelativeDateTimeFormatter::operator=(
SharedObject::copyPtr(other.fCache, fCache);
SharedObject::copyPtr(other.fNumberFormat, fNumberFormat);
SharedObject::copyPtr(other.fPluralRules, fPluralRules);
+#if !UCONFIG_NO_BREAK_ITERATION
SharedObject::copyPtr(other.fOptBreakIterator, fOptBreakIterator);
+#endif // !UCONFIG_NO_BREAK_ITERATION
fStyle = other.fStyle;
fContext = other.fContext;
fLocale = other.fLocale;
@@ -861,9 +871,11 @@ RelativeDateTimeFormatter::~RelativeDateTimeFormatter() {
if (fPluralRules != nullptr) {
fPluralRules->removeRef();
}
+#if !UCONFIG_NO_BREAK_ITERATION
if (fOptBreakIterator != nullptr) {
fOptBreakIterator->removeRef();
}
+#endif // !UCONFIG_NO_BREAK_ITERATION
}
const NumberFormat& RelativeDateTimeFormatter::getNumberFormat() const {
@@ -1191,6 +1203,7 @@ UnicodeString& RelativeDateTimeFormatter::combineDateAndTime(
}
UnicodeString& RelativeDateTimeFormatter::adjustForContext(UnicodeString &str) const {
+#if !UCONFIG_NO_BREAK_ITERATION
if (fOptBreakIterator == nullptr
|| str.length() == 0 || !u_islower(str.char32At(0))) {
return str;
@@ -1204,25 +1217,36 @@ UnicodeString& RelativeDateTimeFormatter::adjustForContext(UnicodeString &str) c
fOptBreakIterator->get(),
fLocale,
U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
+#endif // !UCONFIG_NO_BREAK_ITERATION
return str;
}
UBool RelativeDateTimeFormatter::checkNoAdjustForContext(UErrorCode& status) const {
+#if !UCONFIG_NO_BREAK_ITERATION
// This is unsupported because it's hard to keep fields in sync with title
// casing. The code could be written and tested if there is demand.
if (fOptBreakIterator != nullptr) {
status = U_UNSUPPORTED_ERROR;
return false;
}
+#else
+ (void)status; // suppress unused argument warning
+#endif // !UCONFIG_NO_BREAK_ITERATION
return true;
}
void RelativeDateTimeFormatter::init(
NumberFormat *nfToAdopt,
+#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator *biToAdopt,
+#else
+ std::nullptr_t,
+#endif // !UCONFIG_NO_BREAK_ITERATION
UErrorCode &status) {
LocalPointer<NumberFormat> nf(nfToAdopt);
+#if !UCONFIG_NO_BREAK_ITERATION
LocalPointer<BreakIterator> bi(biToAdopt);
+#endif // !UCONFIG_NO_BREAK_ITERATION
UnifiedCache::getByLocale(fLocale, fCache, status);
if (U_FAILURE(status)) {
return;
@@ -1251,6 +1275,7 @@ void RelativeDateTimeFormatter::init(
nf.orphan();
SharedObject::copyPtr(shared, fNumberFormat);
}
+#if !UCONFIG_NO_BREAK_ITERATION
if (bi.isNull()) {
SharedObject::clearPtr(fOptBreakIterator);
} else {
@@ -1262,6 +1287,7 @@ void RelativeDateTimeFormatter::init(
bi.orphan();
SharedObject::copyPtr(shared, fOptBreakIterator);
}
+#endif // !UCONFIG_NO_BREAK_ITERATION
}
U_NAMESPACE_END
diff --git a/intl/icu/source/i18n/unicode/reldatefmt.h b/intl/icu/source/i18n/unicode/reldatefmt.h
index 4123468c65..5dc4905b12 100644
--- a/intl/icu/source/i18n/unicode/reldatefmt.h
+++ b/intl/icu/source/i18n/unicode/reldatefmt.h
@@ -248,8 +248,6 @@ typedef enum UDateDirection {
#endif // U_HIDE_DEPRECATED_API
} UDateDirection;
-#if !UCONFIG_NO_BREAK_ITERATION
-
U_NAMESPACE_BEGIN
class BreakIterator;
@@ -696,11 +694,19 @@ class U_I18N_API RelativeDateTimeFormatter : public UObject {
const SharedPluralRules *fPluralRules;
UDateRelativeDateTimeFormatterStyle fStyle;
UDisplayContext fContext;
+#if !UCONFIG_NO_BREAK_ITERATION
const SharedBreakIterator *fOptBreakIterator;
+#else
+ std::nullptr_t fOptBreakIterator = nullptr;
+#endif // !UCONFIG_NO_BREAK_ITERATION
Locale fLocale;
void init(
NumberFormat *nfToAdopt,
+#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator *brkIter,
+#else
+ std::nullptr_t,
+#endif // !UCONFIG_NO_BREAK_ITERATION
UErrorCode &status);
UnicodeString& adjustForContext(UnicodeString &) const;
UBool checkNoAdjustForContext(UErrorCode& status) const;
@@ -743,7 +749,6 @@ class U_I18N_API RelativeDateTimeFormatter : public UObject {
U_NAMESPACE_END
-#endif /* !UCONFIG_NO_BREAK_ITERATION */
#endif /* !UCONFIG_NO_FORMATTING */
#endif /* U_SHOW_CPLUSPLUS_API */
diff --git a/intl/icu/source/i18n/unicode/ureldatefmt.h b/intl/icu/source/i18n/unicode/ureldatefmt.h
index 3c44890043..0882360d14 100644
--- a/intl/icu/source/i18n/unicode/ureldatefmt.h
+++ b/intl/icu/source/i18n/unicode/ureldatefmt.h
@@ -12,7 +12,7 @@
#include "unicode/utypes.h"
-#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
+#if !UCONFIG_NO_FORMATTING
#include "unicode/unum.h"
#include "unicode/udisplaycontext.h"
@@ -505,6 +505,6 @@ ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt,
int32_t resultCapacity,
UErrorCode* status );
-#endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION */
+#endif /* !UCONFIG_NO_FORMATTING */
#endif
--
2.34.1

View File

@@ -0,0 +1,47 @@
# Allow to set the Gregorian change date for ISO8601 calendars.
#
# ICU bug: https://unicode-org.atlassian.net/browse/ICU-22412
diff --git a/intl/icu/source/i18n/ucal.cpp b/intl/icu/source/i18n/ucal.cpp
--- a/intl/icu/source/i18n/ucal.cpp
+++ b/intl/icu/source/i18n/ucal.cpp
@@ -22,10 +22,11 @@
#include "unicode/ustring.h"
#include "unicode/strenum.h"
#include "unicode/localpointer.h"
#include "cmemory.h"
#include "cstring.h"
+#include "iso8601cal.h"
#include "ustrenum.h"
#include "uenumimp.h"
#include "ulist.h"
#include "ulocimp.h"
@@ -305,11 +306,12 @@ ucal_setGregorianChange(UCalendar *cal,
// We normally don't check "this" pointers for nullptr, but this here avoids
// compiler-generated exception-throwing code in case cal == nullptr.
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- if(typeid(*cpp_cal) != typeid(GregorianCalendar)) {
+ if(typeid(*cpp_cal) != typeid(GregorianCalendar) &&
+ typeid(*cpp_cal) != typeid(ISO8601Calendar)) {
*pErrorCode = U_UNSUPPORTED_ERROR;
return;
}
gregocal->setGregorianChange(date, *pErrorCode);
}
@@ -327,11 +329,12 @@ ucal_getGregorianChange(const UCalendar
// We normally don't check "this" pointers for nullptr, but this here avoids
// compiler-generated exception-throwing code in case cal == nullptr.
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return (UDate)0;
}
- if(typeid(*cpp_cal) != typeid(GregorianCalendar)) {
+ if(typeid(*cpp_cal) != typeid(GregorianCalendar) &&
+ typeid(*cpp_cal) != typeid(ISO8601Calendar)) {
*pErrorCode = U_UNSUPPORTED_ERROR;
return (UDate)0;
}
return gregocal->getGregorianChange();
}

View File

@@ -0,0 +1,27 @@
# Increase the minimum limit for time zone offsets to -24 hours to support
# time zone offset strings like "-23:59".
#
# ICU bug: https://unicode-org.atlassian.net/browse/ICU-22526
diff --git a/intl/icu/source/i18n/calendar.cpp b/intl/icu/source/i18n/calendar.cpp
--- a/intl/icu/source/i18n/calendar.cpp
+++ b/intl/icu/source/i18n/calendar.cpp
@@ -649,17 +649,17 @@ static const int32_t kCalendarLimits[UCA
{ 1, 1, 7, 7 }, // DAY_OF_WEEK
{/*N/A*/-1, /*N/A*/-1, /*N/A*/-1, /*N/A*/-1}, // DAY_OF_WEEK_IN_MONTH
{ 0, 0, 1, 1 }, // AM_PM
{ 0, 0, 11, 11 }, // HOUR
{ 0, 0, 23, 23 }, // HOUR_OF_DAY
{ 0, 0, 59, 59 }, // MINUTE
{ 0, 0, 59, 59 }, // SECOND
{ 0, 0, 999, 999 }, // MILLISECOND
- {-16*kOneHour, -16*kOneHour, 12*kOneHour, 30*kOneHour }, // ZONE_OFFSET
+ {-24*kOneHour, -16*kOneHour, 12*kOneHour, 30*kOneHour }, // ZONE_OFFSET
{ -1*kOneHour, -1*kOneHour, 2*kOneHour, 2*kOneHour }, // DST_OFFSET
{/*N/A*/-1, /*N/A*/-1, /*N/A*/-1, /*N/A*/-1}, // YEAR_WOY
{ 1, 1, 7, 7 }, // DOW_LOCAL
{/*N/A*/-1, /*N/A*/-1, /*N/A*/-1, /*N/A*/-1}, // EXTENDED_YEAR
{ -0x7F000000, -0x7F000000, 0x7F000000, 0x7F000000 }, // JULIAN_DAY
{ 0, 0, 24*kOneHour-1, 24*kOneHour-1 }, // MILLISECONDS_IN_DAY
{ 0, 0, 1, 1 }, // IS_LEAP_MONTH
{ 0, 0, 11, 11 } // ORDINAL_MONTH

View File

@@ -0,0 +1,39 @@
# Fix MacOS 14 default timezone issue
#
# ICU bug: https://unicode-org.atlassian.net/browse/ICU-22541
diff --git a/intl/icu/source/common/putil.cpp b/intl/icu/source/common/putil.cpp
--- a/intl/icu/source/common/putil.cpp
+++ b/intl/icu/source/common/putil.cpp
@@ -1170,16 +1170,31 @@ uprv_tzname(int n)
This is a trick to look at the name of the link to get the Olson ID
because the tzfile contents is underspecified.
This isn't guaranteed to work because it may not be a symlink.
*/
char *ret = realpath(TZDEFAULT, gTimeZoneBuffer);
if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
+ // MacOS14 has the realpath as something like
+ // /usr/share/zoneinfo.default/Australia/Melbourne
+ // which will not have "/zoneinfo/" in the path.
+ // Therefore if we fail, we fall back to read the link which is
+ // /var/db/timezone/zoneinfo/Australia/Melbourne
+ // We also fall back to reading the link if the realpath leads to something like
+ // /usr/share/zoneinfo/posixrules
+ if (tzZoneInfoTailPtr == nullptr ||
+ uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
+ ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
+ if (size > 0) {
+ gTimeZoneBuffer[size] = 0;
+ tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
+ }
+ }
if (tzZoneInfoTailPtr != nullptr) {
tzZoneInfoTailPtr += tzZoneInfoTailLen;
skipZoneIDPrefix(&tzZoneInfoTailPtr);
if (isValidOlsonID(tzZoneInfoTailPtr)) {
return (gTimeZoneBufferPtr = tzZoneInfoTailPtr);
}
}
} else {

View File

@@ -1,14 +1,14 @@
diff --git a/intl/icu/source/acinclude.m4 b/intl/icu/source/acinclude.m4
--- a/intl/icu/source/acinclude.m4
+++ b/intl/icu/source/acinclude.m4
@@ -459,30 +459,36 @@ AC_DEFUN([AC_CHECK_STRICT_COMPILE],
], [ac_use_strict_options=yes])
AC_MSG_RESULT($ac_use_strict_options)
if test "$ac_use_strict_options" = yes
then
if test "$GCC" = yes
then
@@ -469,30 +469,36 @@ AC_DEFUN([AC_CHECK_STRICT_COMPILE],
*)
# Do not use -ansi. It limits us to C90, and it breaks some platforms.
# We use -std=c11 to disable the gnu99 defaults and its associated warnings
CFLAGS="$CFLAGS -std=c11"
;;
esac
CFLAGS="$CFLAGS -Wall -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings"
+
+ # Suppress clang C warnings:
@@ -41,14 +41,14 @@ diff --git a/intl/icu/source/acinclude.m4 b/intl/icu/source/acinclude.m4
diff --git a/intl/icu/source/configure b/intl/icu/source/configure
--- a/intl/icu/source/configure
+++ b/intl/icu/source/configure
@@ -5227,30 +5227,36 @@ fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_use_strict_options" >&5
printf "%s\n" "$ac_use_strict_options" >&6; }
@@ -4359,30 +4359,36 @@ fi
*)
# Do not use -ansi. It limits us to C90, and it breaks some platforms.
# We use -std=c11 to disable the gnu99 defaults and its associated warnings
CFLAGS="$CFLAGS -std=c11"
;;
esac
if test "$ac_use_strict_options" = yes
then
if test "$GCC" = yes
then
CFLAGS="$CFLAGS -Wall -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings"
+
+ # Suppress clang C warnings:

View File

@@ -1,5 +1,5 @@
commit 8eca245c7484ac6cc179e3e5f7c1ea7680810f39
Author: Rahul Pandey <rp9.next@gmail.com>
Date: Mon Oct 21 16:21:38 2024 +0530
commit 5861e1fd52f1d7673eee38bc3c965aa18b336062
Author: Peter Edberg <pedberg@unicode.org>
Date: Tue Apr 11 10:32:35 2023 -0700
ICU-22724 BRS_76_GA Update version number to 76.1 (No more -SNAPSHOT)
ICU-22221 update cldr-icu instructions, and ICU tag for integration

View File

@@ -148,7 +148,7 @@ INLINE_INHERITED_MEMB = NO
# shortest path that makes the file name unique will be used
# The default value is: YES.
FULL_PATH_NAMES = YES
FULL_PATH_NAMES = NO
# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
# Stripping is only done if one of the specified strings matches the left-hand
@@ -160,7 +160,7 @@ FULL_PATH_NAMES = YES
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.
STRIP_FROM_PATH = @srcdir@
STRIP_FROM_PATH =
# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
@@ -2084,7 +2084,7 @@ PREDEFINED = U_EXPORT2= \
U_DEFINE_LOCAL_OPEN_POINTER()= \
U_IN_DOXYGEN=1 \
U_CHAR16_IS_TYPEDEF=0 \
U_CPLUSPLUS_VERSION=17 \
U_CPLUSPLUS_VERSION=11 \
U_WCHAR_IS_UTF16
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this

View File

@@ -77,7 +77,7 @@ EXTRA_DATA =
## List of phony targets
.PHONY : all all-local all-recursive install install-local install-udata install-udata-files install-udata-dlls \
install-recursive install-manx clean clean-local clean-recursive distclean \
install-recursive clean clean-local clean-recursive distclean \
distclean-local distclean-recursive doc dist dist-local dist-recursive \
check check-local check-recursive clean-recursive-with-twist install-icu \
doc install-doc tests icu4j-data icu4j-data-install update-windows-makefiles xcheck-local xcheck-recursive xperf xcheck xperf-recursive \
@@ -88,9 +88,9 @@ check-exhaustive check-exhaustive-local check-exhaustive-recursive releaseDist
## List of standard targets
all: all-local all-recursive
install: install-recursive
install: install-recursive install-local
clean: clean-recursive-with-twist clean-local
distclean : distclean-recursive
distclean : distclean-recursive distclean-local
dist: dist-recursive
check: all check-recursive
check-recursive: all
@@ -279,36 +279,24 @@ config/icu-uc.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Common and Data libraries" >> $@
@echo "Name: $(PACKAGE)-uc" >> $@
ifeq ($(ENABLE_SHARED),)
@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" "${ICULIBS_DT}" '$${baselibs}' >> $@
else
@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" >> $@
@echo "Libs.private:" "${ICULIBS_DT}" '$${baselibs}' >> $@
endif
@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" "${ICULIBS_DT}" >> $@
@echo "Libs.private:" '$${baselibs}' >> $@
@echo $@ updated.
config/icu-i18n.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Internationalization library" >> $@
@echo "Name: $(PACKAGE)-i18n" >> $@
ifeq ($(ENABLE_SHARED),)
@echo "Requires: icu-uc" >> $@
else
@echo "Requires.private: icu-uc" >> $@
endif
@echo "Libs:" '-L$${libdir}' "${ICULIBS_I18N}" >> $@
@echo "Libs:" "${ICULIBS_I18N}" >> $@
@echo $@ updated.
config/icu-io.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Stream and I/O Library" >> $@
@echo "Name: $(PACKAGE)-io" >> $@
ifeq ($(ENABLE_SHARED),)
@echo "Requires: icu-i18n" >> $@
else
@echo "Requires.private: icu-i18n" >> $@
endif
@echo "Libs:" '-L$${libdir}' "${ICULIBS_IO}" >> $@
@echo "Libs:" "${ICULIBS_IO}" >> $@
@echo $@ updated.
ICULEHB_LIBS=@ICULEHB_LIBS@
@@ -322,20 +310,12 @@ config/icu-lx.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Paragraph Layout library $(USING_HB)" >> $@
@echo "Name: $(PACKAGE)-lx" >> $@
ifeq ($(ENABLE_SHARED),)
ifneq ($(ICULEHB_LIBS),)
@echo "Requires: icu-le-hb icu-uc" >> $@
else
@echo "Requires: icu-le" >> $@
endif
else
ifneq ($(ICULEHB_LIBS),)
@echo "Requires.private: icu-le-hb icu-uc" >> $@
else
@echo "Requires.private: icu-le" >> $@
endif
endif
@echo "Libs:" '-L$${libdir}' "${ICULIBS_LX}" >> $@
@echo "Libs:" "${ICULIBS_LX}" >> $@
@echo $@ updated.
@@ -381,7 +361,7 @@ config.status: $(srcdir)/configure $(srcdir)/common/unicode/uvernum.h
install-manx: $(MANX_FILES)
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
ifneq ($(MANX_FILES),)
$(INSTALL_DATA) $^ $(DESTDIR)$(mandir)/man$(SECTION)
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
endif
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in

View File

@@ -463,6 +463,17 @@ AC_DEFUN([AC_CHECK_STRICT_COMPILE],
then
if test "$GCC" = yes
then
case "${host}" in
*-*-solaris*)
# Don't use -std=c11 on Solaris because of timezone check fails
;;
*)
# Do not use -ansi. It limits us to C90, and it breaks some platforms.
# We use -std=c11 to disable the gnu99 defaults and its associated warnings
CFLAGS="$CFLAGS -std=c11"
;;
esac
CFLAGS="$CFLAGS -Wall -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings"
# Suppress clang C warnings:

View File

@@ -1,6 +1,6 @@
# generated automatically by aclocal 1.16.5 -*- Autoconf -*-
# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
# Copyright (C) 1996-2021 Free Software Foundation, Inc.
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -12,9 +12,63 @@
# PARTICULAR PURPOSE.
m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*-
# serial 12 (pkg-config-0.29.2)
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 6
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_VAR_IF(CACHEVAR,yes,
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS
dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
dnl serial 11 (pkg-config-0.29.1)
dnl
dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
dnl
@@ -55,7 +109,7 @@ dnl
dnl See the "Since" comment for each macro you use to see what version
dnl of the macros you require.
m4_defun([PKG_PREREQ],
[m4_define([PKG_MACROS_VERSION], [0.29.2])
[m4_define([PKG_MACROS_VERSION], [0.29.1])
m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
[m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
])dnl PKG_PREREQ
@@ -100,7 +154,7 @@ dnl Check to see whether a particular set of modules exists. Similar to
dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
dnl
dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
dnl only at the first occurrence in configure.ac, so if the first place
dnl only at the first occurence in configure.ac, so if the first place
dnl it's called might be skipped (such as if it is within an "if", you
dnl have to call PKG_CHECK_EXISTS manually
AC_DEFUN([PKG_CHECK_EXISTS],
@@ -156,7 +210,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
pkg_failed=no
AC_MSG_CHECKING([for $2])
AC_MSG_CHECKING([for $1])
_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
_PKG_CONFIG([$1][_LIBS], [libs], [$2])
@@ -166,17 +220,17 @@ and $1[]_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.])
if test $pkg_failed = yes; then
AC_MSG_RESULT([no])
AC_MSG_RESULT([no])
_PKG_SHORT_ERRORS_SUPPORTED
if test $_pkg_short_errors_supported = yes; then
$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
else
$1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
else
$1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
# Put the nasty error message in config.log where it belongs
echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
m4_default([$4], [AC_MSG_ERROR(
m4_default([$4], [AC_MSG_ERROR(
[Package requirements ($2) were not met:
$$1_PKG_ERRORS
@@ -187,8 +241,8 @@ installed software in a non-standard prefix.
_PKG_TEXT])[]dnl
])
elif test $pkg_failed = untried; then
AC_MSG_RESULT([no])
m4_default([$4], [AC_MSG_FAILURE(
AC_MSG_RESULT([no])
m4_default([$4], [AC_MSG_FAILURE(
[The pkg-config script could not be found or is too old. Make sure it
is in your PATH or set the PKG_CONFIG environment variable to the full
path to pkg-config.
@@ -198,10 +252,10 @@ _PKG_TEXT
To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
])
else
$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
AC_MSG_RESULT([yes])
$3
$3
fi[]dnl
])dnl PKG_CHECK_MODULES
@@ -288,73 +342,5 @@ AS_VAR_COPY([$1], [pkg_cv_][$1])
AS_VAR_IF([$1], [""], [$5], [$4])dnl
])dnl PKG_CHECK_VAR
dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND],
dnl [DESCRIPTION], [DEFAULT])
dnl ------------------------------------------
dnl
dnl Prepare a "--with-" configure option using the lowercase
dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and
dnl PKG_CHECK_MODULES in a single macro.
AC_DEFUN([PKG_WITH_MODULES],
[
m4_pushdef([with_arg], m4_tolower([$1]))
m4_pushdef([description],
[m4_default([$5], [build with ]with_arg[ support])])
m4_pushdef([def_arg], [m4_default([$6], [auto])])
m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes])
m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no])
m4_case(def_arg,
[yes],[m4_pushdef([with_without], [--without-]with_arg)],
[m4_pushdef([with_without],[--with-]with_arg)])
AC_ARG_WITH(with_arg,
AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),,
[AS_TR_SH([with_]with_arg)=def_arg])
AS_CASE([$AS_TR_SH([with_]with_arg)],
[yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)],
[auto],[PKG_CHECK_MODULES([$1],[$2],
[m4_n([def_action_if_found]) $3],
[m4_n([def_action_if_not_found]) $4])])
m4_popdef([with_arg])
m4_popdef([description])
m4_popdef([def_arg])
])dnl PKG_WITH_MODULES
dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [DESCRIPTION], [DEFAULT])
dnl -----------------------------------------------
dnl
dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES
dnl check._[VARIABLE-PREFIX] is exported as make variable.
AC_DEFUN([PKG_HAVE_WITH_MODULES],
[
PKG_WITH_MODULES([$1],[$2],,,[$3],[$4])
AM_CONDITIONAL([HAVE_][$1],
[test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"])
])dnl PKG_HAVE_WITH_MODULES
dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [DESCRIPTION], [DEFAULT])
dnl ------------------------------------------------------
dnl
dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after
dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make
dnl and preprocessor variable.
AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES],
[
PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4])
AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
[AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
])dnl PKG_HAVE_DEFINE_WITH_MODULES
m4_include([config/m4/icu-conditional.m4])
m4_include([acinclude.m4])

View File

@@ -3,6 +3,6 @@
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
<PropertyGroup>
<IcuMajorVersion>76</IcuMajorVersion>
<IcuMajorVersion>73</IcuMajorVersion>
</PropertyGroup>
</Project>

View File

@@ -63,11 +63,11 @@
Note: This version must match the version below in the ARM64 section for AdditionalLibraryDirectories
-->
<PropertyGroup Condition="'$(Platform)'=='ARM'">
<WindowsTargetPlatformVersion>10.0.22621.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<WindowsSDKDesktopARMSupport>true</WindowsSDKDesktopARMSupport>
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='ARM64'">
<WindowsTargetPlatformVersion>10.0.22621.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<WindowsSDKDesktopARM64Support>true</WindowsSDKDesktopARM64Support>
</PropertyGroup>
<PropertyGroup>
@@ -96,9 +96,6 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<!-- Enable parallel compilation for faster builds. -->
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<!-- Set the C/C++ versions supported. -->
<LanguageStandard>stdcpp17</LanguageStandard>
<LanguageStandard_C>stdc11</LanguageStandard_C>
</ClCompile>
<ResourceCompile>
<Culture>0x0409</Culture>
@@ -183,7 +180,7 @@
<!-- The ARM64 Desktop SDK doesn't include this by default -->
<AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<!-- Note: This needs to match the same version as WindowsTargetPlatformVersion for ARM -->
<AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.16299.0\um\arm</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all ARM 64-bit configurations for *all* projects. -->
@@ -199,7 +196,7 @@
<!-- The ARM64 Desktop SDK doesn't include this by default -->
<AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<!-- Note: This needs to match the same version as WindowsTargetPlatformVersion for ARM64 -->
<AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.16299.0\um\arm64</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
</Project>

View File

@@ -45,9 +45,6 @@
%(PreprocessorDefinitions);
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
<!-- Set the C/C++ versions supported. -->
<LanguageStandard>stdcpp17</LanguageStandard>
<LanguageStandard_C>stdc11</LanguageStandard_C>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>

View File

@@ -2,6 +2,12 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.27130.2036
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
@@ -21,6 +27,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcxproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcxproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
ProjectSection(ProjectDependencies) = postProject
{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
@@ -200,6 +212,18 @@ Global
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|ARM.ActiveCfg = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|ARM64.ActiveCfg = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.ActiveCfg = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.Build.0 = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.ActiveCfg = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.Build.0 = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|ARM.ActiveCfg = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|ARM64.ActiveCfg = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.ActiveCfg = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.Build.0 = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.ActiveCfg = Release|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.Build.0 = Release|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM.ActiveCfg = Debug|ARM
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM.Build.0 = Debug|ARM
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM64.ActiveCfg = Debug|ARM64
@@ -248,6 +272,18 @@ Global
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.Build.0 = Release|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.ActiveCfg = Release|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.Build.0 = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|ARM.ActiveCfg = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|ARM64.ActiveCfg = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.ActiveCfg = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.Build.0 = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.ActiveCfg = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.Build.0 = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|ARM.ActiveCfg = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|ARM64.ActiveCfg = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.ActiveCfg = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.Build.0 = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.ActiveCfg = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.Build.0 = Release|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM.ActiveCfg = Debug|ARM
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM.Build.0 = Debug|ARM
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM64.ActiveCfg = Debug|ARM64

View File

@@ -603,16 +603,12 @@ cc_library(
"locbased.cpp",
"locid.cpp",
"loclikely.cpp",
"loclikelysubtags.cpp",
"locmap.cpp",
"lsr.cpp",
"resbund.cpp",
"resource.cpp",
"uloc.cpp",
"uloc_tag.cpp",
"uloc_keytype.cpp",
"ulocale.cpp",
"ulocbuilder.cpp",
"uresbund.cpp",
"uresdata.cpp",
"wintz.cpp",

View File

@@ -44,10 +44,10 @@ ifeq ($(ENABLE_SO_VERSION_DATA),1)
SO_VERSION_DATA = common.res
endif
ifeq ($(BUILD_HOST_ICU),OS390)
ifeq ($(OS390BATCH),1)
BATCH_TARGET = $(BATCH_COMMON_TARGET)
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
endif
endif # OS390BATCH
endif # ENABLE_SHARED
@@ -58,6 +58,10 @@ DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
ifeq ($(OS390BATCH),1)
CFLAGS += -WI
CXXFLAGS += -WI
endif
CPPFLAGS += -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
# we want DEFS here
@@ -187,10 +191,10 @@ ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
endif
endif
ifeq ($(BUILD_HOST_ICU),OS390)
ifeq ($(OS390BATCH),1)
$(BATCH_TARGET):$(OBJECTS)
$(SHLIB.cc) $(LD_SONAME) -Wl,-x$@.x $(OUTOPT)$@ $^ $(BATCH_LIBS)
endif
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
endif # OS390BATCH
endif # ENABLE_SHARED
ifeq (,$(MAKECMDGOALS))

View File

@@ -25,7 +25,7 @@ Appendable::~Appendable() {}
UBool
Appendable::appendCodePoint(UChar32 c) {
if(c<=0xffff) {
return appendCodeUnit(static_cast<char16_t>(c));
return appendCodeUnit((char16_t)c);
} else {
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
}

View File

@@ -75,7 +75,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
// Set one bit indicating an all-one block.
uint32_t bits = static_cast<uint32_t>(1) << lead;
uint32_t bits=(uint32_t)1<<lead;
if((start+1)==limit) { // Single-character shortcut.
table[trail]|=bits;
return;
@@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
++lead;
}
if(lead<limitLead) {
bits = ~((static_cast<unsigned>(1) << lead) - 1);
bits=~(((unsigned)1<<lead)-1);
if(limitLead<0x20) {
bits &= (static_cast<unsigned>(1) << limitLead) - 1;
bits&=((unsigned)1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
@@ -111,7 +111,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
// In that case, bits=1<<limitLead is undefined but the bits value
// is not used because trail<limitTrail is already false.
bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
for(trail=0; trail<limitTrail; ++trail) {
table[trail]|=bits;
}
@@ -290,22 +290,22 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
UBool
BMPSet::contains(UChar32 c) const {
if (static_cast<uint32_t>(c) <= 0xff) {
return latin1Contains[c];
} else if (static_cast<uint32_t>(c) <= 0x7ff) {
return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
} else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
if((uint32_t)c<=0xff) {
return (UBool)latin1Contains[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
return twoBits;
return (UBool)twoBits;
} else {
// Look up the code point in its 4k block of code points.
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
}
} else if (static_cast<uint32_t>(c) <= 0x10ffff) {
} else if((uint32_t)c<=0x10ffff) {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
@@ -332,7 +332,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
break;
}
} else if(c<=0x7ff) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
@@ -372,7 +372,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
break;
}
} else if(c<=0x7ff) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
@@ -421,7 +421,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
break;
}
} else if(c<=0x7ff) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
@@ -464,7 +464,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
break;
}
} else if(c<=0x7ff) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
@@ -527,7 +527,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
b=*s;
} while(U8_IS_SINGLE(b));
}
length = static_cast<int32_t>(limit - s);
length=(int32_t)(limit-s);
}
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
@@ -547,7 +547,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
* the truncated sequence.
*/
b=*(limit-1);
if (static_cast<int8_t>(b) < 0) {
if((int8_t)b<0) {
// b>=0x80: lead or trail byte
if(b<0xc0) {
// single trail byte, check for preceding 3- or 4-byte lead byte
@@ -602,15 +602,15 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
if(b>=0xe0) {
if(b<0xf0) {
if( /* handle U+0000..U+FFFF inline */
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
) {
b&=0xf;
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
if(twoBits<=1) {
// All 64 code points with this lead byte and middle trail byte
// are either in the set or not.
if (twoBits != static_cast<uint32_t>(spanCondition)) {
if(twoBits!=(uint32_t)spanCondition) {
return s-1;
}
} else {
@@ -624,12 +624,12 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
continue;
}
} else if( /* handle U+10000..U+10FFFF inline */
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
(t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
) {
// Give an illegal sequence the same value as the result of contains(FFFD).
UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
containsFFFD
@@ -643,9 +643,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
} else {
if( /* handle U+0000..U+07FF inline */
b>=0xc0 &&
(t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
(t1=(uint8_t)(*s-0x80)) <= 0x3f
) {
if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
return s-1;
}
++s;
@@ -711,7 +711,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
// c is a valid code point, not ASCII, not a surrogate
if(c<=0x7ff) {
if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
return prev+1;
}
} else if(c<=0xffff) {
@@ -720,7 +720,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if (twoBits != static_cast<uint32_t>(spanCondition)) {
if(twoBits!=(uint32_t)spanCondition) {
return prev+1;
}
} else {

View File

@@ -156,7 +156,7 @@ private:
};
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
return findCodePoint(c, lo, hi) & 1;
return (UBool)(findCodePoint(c, lo, hi) & 1);
}
U_NAMESPACE_END

View File

@@ -21,7 +21,6 @@
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "unicode/rbbi.h"
#include "brkeng.h"
#include "cmemory.h"
@@ -71,22 +70,20 @@ UnhandledEngine::~UnhandledEngine() {
}
UBool
UnhandledEngine::handles(UChar32 c, const char* locale) const {
(void)locale; // Unused
UnhandledEngine::handles(UChar32 c) const {
return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t startPos,
int32_t /* startPos */,
int32_t endPos,
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
utext_setNativeIndex(text, startPos);
UChar32 c = utext_current32(text);
while (static_cast<int32_t>(utext_getNativeIndex(text)) < endPos && fHandled->contains(c)) {
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
@@ -114,46 +111,50 @@ UnhandledEngine::handleCharacter(UChar32 c) {
*/
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = nullptr;
fEngines = 0;
}
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
delete fEngines;
}
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
if (U_SUCCESS(status)) {
fEngines = engines.orphan();
}
if (fEngines != 0) {
delete fEngines;
}
}
U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const icu::LanguageBreakEngine *) obj;
}
U_CDECL_END
U_NAMESPACE_BEGIN
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
const LanguageBreakEngine *lbe = nullptr;
UErrorCode status = U_ZERO_ERROR;
ensureEngines(status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = static_cast<const LanguageBreakEngine*>(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c, locale)) {
return lbe;
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}
fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c)) {
return lbe;
}
}
}
// We didn't find an engine. Create one.
lbe = loadEngineFor(c, locale);
lbe = loadEngineFor(c);
if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
@@ -161,7 +162,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
@@ -259,7 +260,7 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
CharString ext;
const char16_t *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
if (extStart != nullptr) {
int32_t len = static_cast<int32_t>(extStart - dictfname);
int32_t len = (int32_t)(extStart - dictfname);
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
dictnlength = len;
}
@@ -269,18 +270,18 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
if (U_SUCCESS(status)) {
// build trie
const uint8_t* data = static_cast<const uint8_t*>(udata_getMemory(file));
const int32_t* indexes = reinterpret_cast<const int32_t*>(data);
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
const int32_t *indexes = (const int32_t *)data;
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
DictionaryMatcher *m = nullptr;
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
const char* characters = reinterpret_cast<const char*>(data + offset);
const char *characters = (const char *)(data + offset);
m = new BytesDictionaryMatcher(characters, transform, file);
}
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
const char16_t* characters = reinterpret_cast<const char16_t*>(data + offset);
const char16_t *characters = (const char16_t *)(data + offset);
m = new UCharsDictionaryMatcher(characters, file);
}
if (m == nullptr) {
@@ -298,70 +299,6 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
return nullptr;
}
void ICULanguageBreakFactory::addExternalEngine(
ExternalBreakEngine* external, UErrorCode& status) {
LocalPointer<ExternalBreakEngine> engine(external, status);
ensureEngines(status);
LocalPointer<BreakEngineWrapper> wrapper(
new BreakEngineWrapper(engine.orphan(), status), status);
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
fEngines->push(wrapper.getAlias(), status);
wrapper.orphan();
}
BreakEngineWrapper::BreakEngineWrapper(
ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
}
BreakEngineWrapper::~BreakEngineWrapper() {
}
UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
return delegate->isFor(c, locale);
}
int32_t BreakEngineWrapper::findBreaks(
UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
int32_t result = 0;
// Find the span of characters included in the set.
// The span to break begins at the current position in the text, and
// extends towards the start or end of the text, depending on 'reverse'.
utext_setNativeIndex(text, startPos);
int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && delegate->handles(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
int32_t beforeSize = foundBreaks.size();
int32_t additionalCapacity = rangeEnd - rangeStart + 1;
// enlarge to contains (rangeEnd-rangeStart+1) more items
foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + result);
utext_setNativeIndex(text, current);
return result;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View File

@@ -10,7 +10,6 @@
#ifndef BRKENG_H
#define BRKENG_H
#include "unicode/umisc.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
@@ -22,7 +21,6 @@ class UnicodeSet;
class UStack;
class UVector32;
class DictionaryMatcher;
class ExternalBreakEngine;
/*******************************************************************
* LanguageBreakEngine
@@ -37,7 +35,7 @@ class ExternalBreakEngine;
* <p>LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.</p>
*/
class LanguageBreakEngine : public UObject {
class LanguageBreakEngine : public UMemory {
public:
/**
@@ -56,11 +54,10 @@ class LanguageBreakEngine : public UObject {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param locale The locale.
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, const char* locale) const = 0;
virtual UBool handles(UChar32 c) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@@ -83,35 +80,6 @@ class LanguageBreakEngine : public UObject {
};
/*******************************************************************
* BreakEngineWrapper
*/
/**
* <p>BreakEngineWrapper implement LanguageBreakEngine by
* a thin wrapper that delegate the task to ExternalBreakEngine
* </p>
*/
class BreakEngineWrapper : public LanguageBreakEngine {
public:
BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);
virtual ~BreakEngineWrapper();
virtual UBool handles(UChar32 c, const char* locale) const override;
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const override;
private:
LocalPointer<ExternalBreakEngine> delegate;
};
/*******************************************************************
* LanguageBreakFactory
*/
@@ -157,10 +125,9 @@ class LanguageBreakFactory : public UMemory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
};
@@ -207,11 +174,10 @@ class UnhandledEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param locale The locale.
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, const char* locale) const override;
virtual UBool handles(UChar32 c) const override;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@@ -281,18 +247,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;
/**
* Add and adopt the engine and return an URegistryKey.
* @param engine The ExternalBreakEngine to be added and adopt. The caller
* pass the ownership and should not release the memory after this.
* @param status the error code.
*/
virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
protected:
/**
@@ -301,10 +258,9 @@ protected:
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
@@ -313,9 +269,6 @@ protected:
* @return A DictionaryMatcher with the desired characteristics, or nullptr.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
private:
void ensureEngines(UErrorCode& status);
};
U_NAMESPACE_END

View File

@@ -27,7 +27,6 @@
#include "unicode/rbbi.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/filteredbrk.h"
@@ -85,7 +84,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if (static_cast<size_t>(size) >= sizeof(fnbuff)) {
if ((size_t)size>=sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
@@ -99,7 +98,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
char16_t* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if (extStart != nullptr){
len = static_cast<int>(extStart - brkfname);
len = (int)(extStart-brkfname);
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
@@ -122,11 +121,8 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != nullptr) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY);
result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}
ures_close(b);
@@ -206,20 +202,18 @@ BreakIterator::getAvailableLocales(int32_t& count)
BreakIterator::BreakIterator()
{
*validLocale = *actualLocale = *requestLocale = 0;
*validLocale = *actualLocale = 0;
}
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
}
return *this;
}
@@ -438,14 +432,17 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lb_lw, "line");
UErrorCode kvStatus = U_ZERO_ERROR;
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
CharString value;
CharStringByteSink valueSink(&value);
loc.getKeywordValue("lb", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
// lw=phrase is only supported in Japanese and Korean
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
value = loc.getKeywordValue<CharString>("lw", kvStatus);
value.clear();
loc.getKeywordValue("lw", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && value == "phrase") {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
@@ -496,18 +493,12 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return {requestLocale};
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
}
const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale;
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocaleID(type, status);
}

View File

@@ -64,7 +64,7 @@ ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return false;
}
return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}
void
@@ -81,15 +81,15 @@ ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *
namespace {
// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
sink.Append(s8, 2);
}
@@ -114,7 +114,7 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return false;
}
int32_t length = static_cast<int32_t>(limit - s);
int32_t length = (int32_t)(limit - s);
if (length > 0) {
appendNonEmptyUnchanged(s, length, sink, options, edits);
}

View File

@@ -7,21 +7,61 @@
#ifndef BYTESINKUTIL_H
#define BYTESINKUTIL_H
#include <type_traits>
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
class ByteSink;
class CharString;
class Edits;
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
/** (length) bytes were mapped to valid (s16, s16Length). */
static UBool appendChange(int32_t length,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
static UBool appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** (length) bytes were mapped/changed to valid code point c. */
static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
}
/** Append the two-byte character (U+0080..U+07FF). */
static void appendTwoBytes(UChar32 c, ByteSink &sink);
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
return true;
}
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};
class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
@@ -43,114 +83,6 @@ private:
CharString& dest_;
};
// CharString doesn't provide the public API that StringByteSink requires a
// string class to have so this template specialization replaces the default
// implementation of StringByteSink<CharString> with CharStringByteSink.
template<>
class StringByteSink<CharString> : public CharStringByteSink {
public:
StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
};
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
/** (length) bytes were mapped to valid (s16, s16Length). */
static UBool appendChange(int32_t length,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
static UBool appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** (length) bytes were mapped/changed to valid code point c. */
static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint(static_cast<int32_t>(nextSrc - src), c, sink, edits);
}
/** Append the two-byte character (U+0080..U+07FF). */
static void appendTwoBytes(UChar32 c, ByteSink &sink);
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
return true;
}
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
/**
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
* and then returns through u_terminateChars(), in order to implement
* the classic ICU4C C API writing to a fix sized buffer on top of a
* contemporary C++ API.
*
* @param buffer receiving buffer
* @param capacity capacity of receiving buffer
* @param lambda that gets called with the sink as an argument
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
* @return number of bytes written, or needed (in case of overflow)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
F&& lambda,
UErrorCode& status) {
if (U_FAILURE(status)) { return 0; }
CheckedArrayByteSink sink(buffer, capacity);
lambda(sink, status);
if (U_FAILURE(status)) { return 0; }
int32_t reslen = sink.NumberOfBytesAppended();
if (sink.Overflowed()) {
status = U_BUFFER_OVERFLOW_ERROR;
return reslen;
}
return u_terminateChars(buffer, capacity, reslen, &status);
}
/**
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
* then returns a CharString, in order to implement a contemporary C++ API
* on top of a C/C++ compatibility ByteSink API.
*
* @param lambda that gets called with the sink as an argument
* @param status to check and report
* @return the resulting string, or an empty string (in case of error)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString result;
CharStringByteSink sink(&result);
lambda(sink, status);
return result;
}
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};
U_NAMESPACE_END
#endif //BYTESINKUTIL_H

View File

@@ -327,7 +327,7 @@ BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
++pos; // ignore a comparison byte
// handle its value
int32_t node=*pos++;
UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
if(isFinal) {
@@ -366,7 +366,7 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
// linear-match node
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
} else {
UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
if(haveUniqueValue) {
if(value!=uniqueValue) {
@@ -434,7 +434,7 @@ BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out)
void
BytesTrie::append(ByteSink &out, int c) {
char ch = static_cast<char>(c);
char ch=(char)c;
out.Append(&ch, 1);
}

View File

@@ -43,10 +43,10 @@ public:
int32_t offset=stringOffset;
int32_t length;
if(offset>=0) {
length = static_cast<uint8_t>(strings[offset++]);
length=(uint8_t)strings[offset++];
} else {
offset=~offset;
length = (static_cast<int32_t>(static_cast<uint8_t>(strings[offset])) << 8) | static_cast<uint8_t>(strings[offset + 1]);
length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
offset+=2;
}
return StringPiece(strings.data()+offset, length);
@@ -54,10 +54,10 @@ public:
int32_t getStringLength(const CharString &strings) const {
int32_t offset=stringOffset;
if(offset>=0) {
return static_cast<uint8_t>(strings[offset]);
return (uint8_t)strings[offset];
} else {
offset=~offset;
return (static_cast<int32_t>(static_cast<uint8_t>(strings[offset])) << 8) | static_cast<uint8_t>(strings[offset + 1]);
return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
}
}
@@ -102,9 +102,9 @@ BytesTrieElement::setTo(StringPiece s, int32_t val,
int32_t offset=strings.length();
if(length>0xff) {
offset=~offset;
strings.append(static_cast<char>(length >> 8), errorCode);
strings.append((char)(length>>8), errorCode);
}
strings.append(static_cast<char>(length), errorCode);
strings.append((char)length, errorCode);
stringOffset=offset;
value=val;
strings.append(s, errorCode);
@@ -229,7 +229,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
uprv_sortArray(elements, elementsLength, static_cast<int32_t>(sizeof(BytesTrieElement)),
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
compareElementStrings, strings,
false, // need not be a stable sort
&errorCode);
@@ -284,7 +284,7 @@ BytesTrieBuilder::getElementStringLength(int32_t i) const {
char16_t
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
return static_cast<uint8_t>(elements[i].charAt(byteIndex, *strings));
return (uint8_t)elements[i].charAt(byteIndex, *strings);
}
int32_t
@@ -330,7 +330,7 @@ BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t
int32_t
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const {
char b = static_cast<char>(byte);
char b=(char)byte;
while(b==elements[i].charAt(byteIndex, *strings)) {
++i;
}
@@ -404,7 +404,7 @@ BytesTrieBuilder::write(int32_t byte) {
int32_t newLength=bytesLength+1;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
bytes[bytesCapacity - bytesLength] = static_cast<char>(byte);
bytes[bytesCapacity-bytesLength]=(char)byte;
}
return bytesLength;
}
@@ -432,30 +432,30 @@ BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
char intBytes[5];
int32_t length=1;
if(i<0 || i>0xffffff) {
intBytes[0] = static_cast<char>(BytesTrie::kFiveByteValueLead);
intBytes[1] = static_cast<char>(static_cast<uint32_t>(i) >> 24);
intBytes[2] = static_cast<char>(static_cast<uint32_t>(i) >> 16);
intBytes[3] = static_cast<char>(static_cast<uint32_t>(i) >> 8);
intBytes[4] = static_cast<char>(i);
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
intBytes[1]=(char)((uint32_t)i>>24);
intBytes[2]=(char)((uint32_t)i>>16);
intBytes[3]=(char)((uint32_t)i>>8);
intBytes[4]=(char)i;
length=5;
// } else if(i<=BytesTrie::kMaxOneByteValue) {
// intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
} else {
if(i<=BytesTrie::kMaxTwoByteValue) {
intBytes[0] = static_cast<char>(BytesTrie::kMinTwoByteValueLead + (i >> 8));
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
} else {
if(i<=BytesTrie::kMaxThreeByteValue) {
intBytes[0] = static_cast<char>(BytesTrie::kMinThreeByteValueLead + (i >> 16));
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
} else {
intBytes[0] = static_cast<char>(BytesTrie::kFourByteValueLead);
intBytes[1] = static_cast<char>(i >> 16);
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
intBytes[1]=(char)(i>>16);
length=2;
}
intBytes[length++] = static_cast<char>(i >> 8);
intBytes[length++]=(char)(i>>8);
}
intBytes[length++] = static_cast<char>(i);
intBytes[length++]=(char)i;
}
intBytes[0] = static_cast<char>((intBytes[0] << 1) | isFinal);
intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
return write(intBytes, length);
}
@@ -484,28 +484,28 @@ int32_t
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
intBytes[0] = static_cast<char>(i);
intBytes[0]=(char)i;
return 1;
}
int32_t length=1;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0] = static_cast<char>(BytesTrie::kMinTwoByteDeltaLead + (i >> 8));
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0] = static_cast<char>(BytesTrie::kMinThreeByteDeltaLead + (i >> 16));
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
} else {
if(i<=0xffffff) {
intBytes[0] = static_cast<char>(BytesTrie::kFourByteDeltaLead);
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
} else {
intBytes[0] = static_cast<char>(BytesTrie::kFiveByteDeltaLead);
intBytes[1] = static_cast<char>(i >> 24);
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=2;
}
intBytes[length++] = static_cast<char>(i >> 16);
intBytes[length++]=(char)(i>>16);
}
intBytes[length++] = static_cast<char>(i >> 8);
intBytes[length++]=(char)(i>>8);
}
intBytes[length++] = static_cast<char>(i);
intBytes[length++]=(char)i;
return length;
}

View File

@@ -115,14 +115,14 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
pos=bytes_+stack_->elementAti(stackSize-2);
stack_->setSize(stackSize-2);
str_->truncate(length&0xffff);
length = static_cast<int32_t>(static_cast<uint32_t>(length) >> 16);
length=(int32_t)((uint32_t)length>>16);
if(length>1) {
pos=branchNext(pos, length, errorCode);
if(pos==nullptr) {
return true; // Reached a final value.
}
} else {
str_->append(static_cast<char>(*pos++), errorCode);
str_->append((char)*pos++, errorCode);
}
}
if(remainingMatchLength_>=0) {
@@ -134,7 +134,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
int32_t node=*pos++;
if(node>=kMinValueLead) {
// Deliver value for the byte sequence so far.
UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
UBool isFinal=(UBool)(node&kValueIsFinal);
value_=readValue(pos, node>>1);
if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
pos_=nullptr;
@@ -186,7 +186,7 @@ BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
// Push state for the greater-or-equal edge.
stack_->addElement(static_cast<int32_t>(skipDelta(pos) - bytes_), errorCode);
stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
// Follow the less-than edge.
length>>=1;
@@ -196,12 +196,12 @@ BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &
// Read the first (key, value) pair.
uint8_t trieByte=*pos++;
int32_t node=*pos++;
UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
stack_->addElement(static_cast<int32_t>(pos - bytes_), errorCode);
stack_->addElement((int32_t)(pos-bytes_), errorCode);
stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
str_->append(static_cast<char>(trieByte), errorCode);
str_->append((char)trieByte, errorCode);
if(isFinal) {
pos_=nullptr;
value_=value;

View File

@@ -64,7 +64,6 @@ U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
/**
*@param source string to get results for
*/
@@ -74,10 +73,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
pieces_lengths(nullptr),
current(nullptr),
current_length(0),
nfd(Normalizer2::getNFDInstance(status)),
nfcImpl(Normalizer2Factory::getNFCImpl(status))
nfd(*Normalizer2::getNFDInstance(status)),
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
{
if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
setSource(sourceStr, status);
}
}
@@ -173,7 +172,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
int32_t i = 0;
UnicodeString *list = nullptr;
nfd->normalize(newSource, source, status);
nfd.normalize(newSource, source, status);
if(U_FAILURE(status)) {
return;
}
@@ -183,10 +182,10 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
// catch degenerate case
if (newSource.length() == 0) {
pieces = static_cast<UnicodeString**>(uprv_malloc(sizeof(UnicodeString*)));
pieces_lengths = static_cast<int32_t*>(uprv_malloc(1 * sizeof(int32_t)));
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
pieces_length = 1;
current = static_cast<int32_t*>(uprv_malloc(1 * sizeof(int32_t)));
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
current_length = 1;
if (pieces == nullptr || pieces_lengths == nullptr || current == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
@@ -195,7 +194,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
current[0] = 0;
pieces[0] = new UnicodeString[1];
pieces_lengths[0] = 1;
if (pieces[0] == nullptr) {
if (pieces[0] == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
@@ -204,7 +203,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
list = new UnicodeString[source.length()];
if (list == nullptr) {
if (list == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
@@ -220,7 +219,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
// on the NFD form - see above).
for (; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
if (nfcImpl->isCanonSegmentStarter(cp)) {
if (nfcImpl.isCanonSegmentStarter(cp)) {
source.extract(start, i-start, list[list_length++]); // add up to i
start = i;
}
@@ -229,10 +228,10 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
// allocate the arrays, and find the strings that are CE to each segment
pieces = static_cast<UnicodeString**>(uprv_malloc(list_length * sizeof(UnicodeString*)));
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
pieces_length = list_length;
pieces_lengths = static_cast<int32_t*>(uprv_malloc(list_length * sizeof(int32_t)));
current = static_cast<int32_t*>(uprv_malloc(list_length * sizeof(int32_t)));
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current_length = list_length;
if (pieces == nullptr || pieces_lengths == nullptr || current == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
@@ -253,7 +252,9 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
delete[] list;
if (list != nullptr) {
delete[] list;
}
cleanPieces();
}
@@ -263,19 +264,10 @@ CleanPartialInitialization:
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and return U_UNSUPPORTED_ERROR.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
constexpr int32_t kPermuteDepthLimit = 8;
if (depth > kPermuteDepthLimit) {
status = U_UNSUPPORTED_ERROR;
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;
@@ -285,7 +277,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
if (source.length() <= 2 && source.countChar32() <= 1) {
UnicodeString *toPut = new UnicodeString(source);
/* test for nullptr */
if (toPut == nullptr) {
if (toPut == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
@@ -319,7 +311,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
@@ -330,7 +322,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
// prefix this character to all of them
ne = subpermute.nextElement(el);
while (ne != nullptr) {
UnicodeString* permRes = static_cast<UnicodeString*>(ne->value.pointer);
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
UnicodeString *chStr = new UnicodeString(cp);
//test for nullptr
if (chStr == nullptr) {
@@ -354,7 +346,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
Hashtable permutations(status);
Hashtable basic(status);
if (U_FAILURE(status)) {
return nullptr;
return 0;
}
result.setValueDeleter(uprv_deleteUObject);
permutations.setValueDeleter(uprv_deleteUObject);
@@ -363,9 +355,6 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
char16_t USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
getEquivalents2(&basic, USeg, segLen, status);
if (U_FAILURE(status)) {
return nullptr;
}
// now get all the permutations
// add only the ones that are canonically equivalent
@@ -378,7 +367,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
//while (it.hasNext())
while (ne != nullptr) {
//String item = (String) it.next();
UnicodeString item = *static_cast<UnicodeString*>(ne->value.pointer);
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
permutations.removeAll();
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
@@ -390,9 +379,9 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
while (ne2 != nullptr) {
//String possible = (String) it2.next();
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString possible(*static_cast<UnicodeString*>(ne2->value.pointer));
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString attempt;
nfd->normalize(possible, attempt, status);
nfd.normalize(possible, attempt, status);
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
if (attempt==segment) {
@@ -410,7 +399,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return nullptr;
return 0;
}
// convert into a String[] to clean up storage
//String[] finalResult = new String[result.size()];
@@ -418,7 +407,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
int32_t resultCount;
if((resultCount = result.count()) != 0) {
finalResult = new UnicodeString[resultCount];
if (finalResult == nullptr) {
if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
@@ -432,7 +421,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
el = UHASH_FIRST;
ne = result.nextElement(el);
while(ne != nullptr) {
finalResult[result_len++] = *static_cast<UnicodeString*>(ne->value.pointer);
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
ne = result.nextElement(el);
}
@@ -459,7 +448,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
// see if any character is at the start of some decomposition
U16_GET(segment, 0, i, segLen, cp);
if (!nfcImpl->getCanonStartSet(cp, starts)) {
if (!nfcImpl.getCanonStartSet(cp, starts)) {
continue;
}
// if so, see which decompositions match
@@ -469,9 +458,6 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
Hashtable remainder(status);
remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == nullptr) {
if (U_FAILURE(status)) {
return nullptr;
}
continue;
}
@@ -482,10 +468,10 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
int32_t el = UHASH_FIRST;
const UHashElement *ne = remainder.nextElement(el);
while (ne != nullptr) {
UnicodeString item = *static_cast<UnicodeString*>(ne->value.pointer);
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
UnicodeString *toAdd = new UnicodeString(prefix);
/* test for nullptr */
if (toAdd == nullptr) {
if (toAdd == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
@@ -496,13 +482,6 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
ne = remainder.nextElement(el);
}
// ICU-22642 Guards against strings that have so many permutations
// that they would otherwise hang the function.
constexpr int32_t kResultLimit = 4096;
if (fillinResult->count() > kResultLimit) {
status = U_UNSUPPORTED_ERROR;
return nullptr;
}
}
}
@@ -530,7 +509,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
UnicodeString temp(comp);
int32_t inputLen=temp.length();
UnicodeString decompString;
nfd->normalize(temp, decompString, status);
nfd.normalize(temp, decompString, status);
if (U_FAILURE(status)) {
return nullptr;
}
@@ -594,7 +573,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
// brute force approach
// check to make sure result is canonically equivalent
UnicodeString trial;
nfd->normalize(temp, trial, status);
nfd.normalize(temp, trial, status);
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
return nullptr;
}

View File

@@ -58,17 +58,17 @@ icu::UMutex cpMutex;
// Does not use uset.h to reduce code dependencies
void U_CALLCONV
_set_add(USet *set, UChar32 c) {
reinterpret_cast<UnicodeSet*>(set)->add(c);
((UnicodeSet *)set)->add(c);
}
void U_CALLCONV
_set_addRange(USet *set, UChar32 start, UChar32 end) {
reinterpret_cast<UnicodeSet*>(set)->add(start, end);
((UnicodeSet *)set)->add(start, end);
}
void U_CALLCONV
_set_addString(USet *set, const char16_t *str, int32_t length) {
reinterpret_cast<UnicodeSet*>(set)->add(icu::UnicodeString(static_cast<UBool>(length < 0), str, length));
((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
}
UBool U_CALLCONV characterproperties_cleanup() {
@@ -103,7 +103,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
return;
}
USetAdder sa = {
reinterpret_cast<USet*>(incl.getAlias()),
(USet *)incl.getAlias(),
_set_add,
_set_addRange,
_set_addString,
@@ -169,7 +169,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
case UPROPS_SRC_INPC:
case UPROPS_SRC_INSC:
case UPROPS_SRC_VO:
uprops_addPropertyStarts(src, &sa, &errorCode);
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
break;
case UPROPS_SRC_EMOJI: {
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
@@ -178,18 +178,6 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
}
break;
}
case UPROPS_SRC_IDSU:
// New in Unicode 15.1 for just two characters.
sa.add(sa.set, 0x2FFE);
sa.add(sa.set, 0x2FFF + 1);
break;
case UPROPS_SRC_ID_COMPAT_MATH:
case UPROPS_SRC_MCM:
uprops_addPropertyStarts(src, &sa, &errorCode);
break;
case UPROPS_SRC_BLOCK:
ublock_addPropertyStarts(&sa, errorCode);
break;
default:
errorCode = U_INTERNAL_PROGRAM_ERROR;
break;
@@ -293,7 +281,7 @@ UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
if (U_FAILURE(errorCode)) { return nullptr; }
USetAdder sa = {
reinterpret_cast<USet*>(set.getAlias()),
(USet *)set.getAlias(),
_set_add,
_set_addRange,
_set_addString,

View File

@@ -104,13 +104,6 @@ public:
*/
int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
bool operator==(const CharString& other) const {
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
}
bool operator!=(const CharString& other) const {
return !operator==(other);
}
bool operator==(StringPiece other) const {
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
}

View File

@@ -35,6 +35,7 @@
</Midl>
<ClCompile>
<PreprocessorDefinitions>U_ATTRIBUTE_DEPRECATED=;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableLanguageExtensions>false</DisableLanguageExtensions>
<WarningLevel>Level3</WarningLevel>
<CompileAs>Default</CompileAs>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
@@ -166,7 +167,6 @@
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="ulocbuilder.cpp" />
<ClCompile Include="localematcher.cpp" />
<ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
@@ -185,7 +185,6 @@
<ClCompile Include="ucat.cpp" />
<ClCompile Include="uloc.cpp" />
<ClCompile Include="uloc_tag.cpp" />
<ClCompile Include="ulocale.cpp" />
<ClCompile Include="ures_cnv.cpp" />
<ClCompile Include="uresbund.cpp" />
<ClCompile Include="uresdata.cpp" />

View File

@@ -322,9 +322,6 @@
<ClCompile Include="localebuilder.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="ulocbuilder.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="localematcher.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
@@ -376,9 +373,6 @@
<ClCompile Include="uloc_tag.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="ulocale.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="ures_cnv.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
@@ -1138,9 +1132,6 @@
<CustomBuild Include="unicode\localebuilder.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\ulocbuilder.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\localematcher.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
@@ -1156,9 +1147,6 @@
<CustomBuild Include="unicode\uloc.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\ulocale.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\ures.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>

View File

@@ -80,6 +80,7 @@
<ExceptionHandling>
</ExceptionHandling>
<FunctionLevelLinking>true</FunctionLevelLinking>
<DisableLanguageExtensions>false</DisableLanguageExtensions>
<TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
<WarningLevel>Level3</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
@@ -300,7 +301,6 @@
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="ulocbuilder.cpp" />
<ClCompile Include="localematcher.cpp" />
<ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
@@ -319,7 +319,6 @@
<ClCompile Include="ucat.cpp" />
<ClCompile Include="uloc.cpp" />
<ClCompile Include="uloc_tag.cpp" />
<ClCompile Include="ulocale.cpp" />
<ClCompile Include="ures_cnv.cpp" />
<ClCompile Include="uresbund.cpp" />
<ClCompile Include="uresdata.cpp" />

View File

@@ -126,7 +126,7 @@ T_CString_toLowerCase(char* str)
if (str) {
do
*str = uprv_tolower(*str);
*str = (char)uprv_tolower(*str);
while (*(str++));
}
@@ -140,7 +140,7 @@ T_CString_toUpperCase(char* str)
if (str) {
do
*str = uprv_toupper(*str);
*str = (char)uprv_toupper(*str);
while (*(str++));
}

View File

@@ -42,7 +42,7 @@ DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
DictionaryBreakEngine::handles(UChar32 c, const char*) const {
DictionaryBreakEngine::handles(UChar32 c) const {
return fSet.contains(c);
}
@@ -54,19 +54,19 @@ DictionaryBreakEngine::findBreaks( UText *text,
UBool isPhraseBreaking,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
(void)startPos; // TODO: remove this param?
int32_t result = 0;
// Find the span of characters included in the set.
// The span to break begins at the current position in the text, and
// extends towards the start or end of the text, depending on 'reverse'.
utext_setNativeIndex(text, startPos);
int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && fSet.contains(c)) {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
@@ -137,7 +137,7 @@ public:
int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
// TODO: If getIndex is too slow, use offset < 0 and add discardAll()
int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t start = (int32_t)utext_getNativeIndex(text);
if (start != offset) {
offset = start;
count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, nullptr, &prefix);
@@ -253,7 +253,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cpWordLength = 0;
cuWordLength = 0;
@@ -269,7 +269,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
// If there was more than one, see which one can take us forward the most words
else if (candidates > 1) {
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
do {
@@ -278,7 +278,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
@@ -308,7 +308,7 @@ foundBest:
// The text iterator should now be positioned at the end of the word we found.
UChar32 uc = 0;
if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
@@ -320,9 +320,9 @@ foundBest:
UChar32 pc;
int32_t chars = 0;
for (;;) {
int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
pc = utext_next32(text);
int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
chars += pcSize;
remaining -= pcSize;
if (remaining <= 0) {
@@ -356,28 +356,28 @@ foundBest:
utext_setNativeIndex(text, current+cuWordLength);
}
}
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
// resynch continues to function. For example, one of the suffix characters
// could be a typo in the middle of a word.
if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cuWordLength > 0) {
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) {
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& fSuffixSet.contains(uc = utext_current32(text))) {
if (uc == THAI_PAIYANNOI) {
if (!fSuffixSet.contains(utext_previous32(text))) {
// Skip over previous end and PAIYANNOI
utext_next32(text);
int32_t paiyannoiIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text);
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - paiyannoiIndex; // Add PAIYANNOI to word
cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex; // Add PAIYANNOI to word
uc = utext_current32(text); // Fetch next character
}
else {
@@ -389,9 +389,9 @@ foundBest:
if (utext_previous32(text) != THAI_MAIYAMOK) {
// Skip over previous end and MAIYAMOK
utext_next32(text);
int32_t maiyamokIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text);
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - maiyamokIndex; // Add MAIYAMOK to word
cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex; // Add MAIYAMOK to word
}
else {
// Restore prior position
@@ -489,7 +489,7 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cuWordLength = 0;
cpWordLength = 0;
@@ -514,7 +514,7 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
@@ -541,7 +541,7 @@ foundBest:
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
@@ -554,9 +554,9 @@ foundBest:
UChar32 uc;
int32_t chars = 0;
for (;;) {
int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
pc = utext_next32(text);
int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
chars += pcSize;
remaining -= pcSize;
if (remaining <= 0) {
@@ -590,9 +590,9 @@ foundBest:
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
@@ -682,7 +682,7 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cuWordLength = 0;
cpWordLength = 0;
@@ -707,7 +707,7 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
@@ -734,7 +734,7 @@ foundBest:
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
@@ -747,9 +747,9 @@ foundBest:
UChar32 uc;
int32_t chars = 0;
for (;;) {
int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
pc = utext_next32(text);
int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
chars += pcSize;
remaining -= pcSize;
if (remaining <= 0) {
@@ -783,9 +783,9 @@ foundBest:
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
@@ -888,7 +888,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cuWordLength = 0;
cpWordLength = 0;
@@ -905,7 +905,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
// If there was more than one, see which one can take us forward the most words
else if (candidates > 1) {
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
do {
@@ -914,7 +914,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
@@ -941,7 +941,7 @@ foundBest:
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
@@ -954,9 +954,9 @@ foundBest:
UChar32 uc;
int32_t chars = 0;
for (;;) {
int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
pc = utext_next32(text);
int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
chars += pcSize;
remaining -= pcSize;
if (remaining <= 0) {
@@ -989,9 +989,9 @@ foundBest:
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
@@ -1120,7 +1120,7 @@ static inline bool isKatakana(UChar32 value) {
// Replicates an internal UText function.
static inline int32_t utext_i32_flag(int32_t bitIndex) {
return static_cast<int32_t>(1) << bitIndex;
return (int32_t)1 << bitIndex;
}
/*
@@ -1167,14 +1167,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t limit = rangeEnd;
U_ASSERT(limit <= utext_nativeLength(inText));
if (limit > utext_nativeLength(inText)) {
limit = static_cast<int32_t>(utext_nativeLength(inText));
limit = (int32_t)utext_nativeLength(inText);
}
inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
if (U_FAILURE(status)) {
return 0;
}
while (utext_getNativeIndex(inText) < limit) {
int32_t nativePosition = static_cast<int32_t>(utext_getNativeIndex(inText));
int32_t nativePosition = (int32_t)utext_getNativeIndex(inText);
UChar32 c = utext_next32(inText);
U_ASSERT(c != U_SENTINEL);
inString.append(c);
@@ -1304,7 +1304,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t ix = 0;
bool is_prev_katakana = false;
for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) {
if (static_cast<uint32_t>(bestSnlp.elementAti(i)) == kuint32max) {
if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
continue;
}
@@ -1327,9 +1327,9 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
for (int32_t j = 0; j < count; j++) {
uint32_t newSnlp = static_cast<uint32_t>(bestSnlp.elementAti(i)) + static_cast<uint32_t>(values.elementAti(j));
uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j);
int32_t ln_j_i = lengths.elementAti(j) + i;
if (newSnlp < static_cast<uint32_t>(bestSnlp.elementAti(ln_j_i))) {
if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) {
bestSnlp.setElementAt(newSnlp, ln_j_i);
prev.setElementAt(i, ln_j_i);
}
@@ -1353,7 +1353,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
if (katakanaRunLength < kMaxKatakanaGroupLength) {
uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
if (newSnlp < static_cast<uint32_t>(bestSnlp.elementAti(i + katakanaRunLength))) {
if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
}
@@ -1371,7 +1371,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t numBreaks = 0;
// No segmentation found, set boundary to end of range
if (static_cast<uint32_t>(bestSnlp.elementAti(numCodePts)) == kuint32max) {
if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
t_boundary.addElement(numCodePts, status);
numBreaks++;
} else if (isPhraseBreaking) {

View File

@@ -62,11 +62,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param locale The locale.
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, const char* locale) const override;
virtual UBool handles(UChar32 c) const override;
/**
* <p>Find any breaks within a run in the supplied text.</p>

View File

@@ -47,13 +47,13 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
int32_t *prefix) const {
UCharsTrie uct(characters);
int32_t startingTextIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
int32_t wordCount = 0;
int32_t codePointsMatched = 0;
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
int32_t lengthMatched = static_cast<int32_t>(utext_getNativeIndex(text)) - startingTextIndex;
int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
if (USTRINGTRIE_HAS_VALUE(result)) {
if (wordCount < limit) {
@@ -101,7 +101,7 @@ UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
if (delta < 0 || 0xFD < delta) {
return U_SENTINEL;
}
return static_cast<UChar32>(delta);
return (UChar32)delta;
}
return c;
}
@@ -114,13 +114,13 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const {
BytesTrie bt(characters);
int32_t startingTextIndex = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
int32_t wordCount = 0;
int32_t codePointsMatched = 0;
for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
int32_t lengthMatched = static_cast<int32_t>(utext_getNativeIndex(text)) - startingTextIndex;
int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
codePointsMatched += 1;
if (USTRINGTRIE_HAS_VALUE(result)) {
if (wordCount < limit) {

View File

@@ -47,7 +47,7 @@ Edits &Edits::copyArray(const Edits &other) {
return *this;
}
if (length > capacity) {
uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(length) * 2));
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
if (newArray == nullptr) {
length = delta = numChanges = 0;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
@@ -186,30 +186,30 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
head |= oldLength << 6;
} else if(oldLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL << 6;
array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
array[limit++] = (uint16_t)(0x8000 | oldLength);
} else {
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
array[limit++] = static_cast<uint16_t>(0x8000 | (oldLength >> 15));
array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
array[limit++] = (uint16_t)(0x8000 | oldLength);
}
if(newLength < LENGTH_IN_1TRAIL) {
head |= newLength;
} else if(newLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL;
array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
array[limit++] = (uint16_t)(0x8000 | newLength);
} else {
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
array[limit++] = static_cast<uint16_t>(0x8000 | (newLength >> 15));
array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
array[limit++] = (uint16_t)(0x8000 | newLength);
}
array[length] = static_cast<uint16_t>(head);
array[length] = (uint16_t)head;
length = limit;
}
}
void Edits::append(int32_t r) {
if(length < capacity || growArray()) {
array[length++] = static_cast<uint16_t>(r);
array[length++] = (uint16_t)r;
}
}
@@ -232,7 +232,7 @@ UBool Edits::growArray() {
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return false;
}
uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(newCapacity) * 2));
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return false;
@@ -415,7 +415,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
U_ASSERT(array[index] >= 0x8000);
U_ASSERT(array[index + 1] >= 0x8000);
int32_t len = ((head & 1) << 30) |
(static_cast<int32_t>(array[index] & 0x7fff) << 15) |
((int32_t)(array[index] & 0x7fff) << 15) |
(array[index + 1] & 0x7fff);
index += 2;
return len;

View File

@@ -83,8 +83,8 @@ void
EmojiProps::load(UErrorCode &errorCode) {
memory = udata_openChoice(nullptr, "icu", "uemoji", isAcceptable, this, &errorCode);
if (U_FAILURE(errorCode)) { return; }
const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes = (const int32_t *)inBytes;
int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
@@ -104,7 +104,7 @@ EmojiProps::load(UErrorCode &errorCode) {
offset = inIndexes[i];
nextOffset = inIndexes[i + 1];
// Set/leave nullptr if there is no UCharsTrie.
const char16_t* p = nextOffset > offset ? reinterpret_cast<const char16_t*>(inBytes + offset) : nullptr;
const char16_t *p = nextOffset > offset ? (const char16_t *)(inBytes + offset) : nullptr;
stringTries[getStringTrieIndex(i)] = p;
}
}

View File

@@ -50,8 +50,8 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
* Used with sortedInsert()
*/
static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
const UnicodeString& a = *static_cast<const UnicodeString*>(t1.pointer);
const UnicodeString& b = *static_cast<const UnicodeString*>(t2.pointer);
const UnicodeString &a = *(const UnicodeString*)t1.pointer;
const UnicodeString &b = *(const UnicodeString*)t2.pointer;
return a.compare(b);
}
@@ -76,7 +76,7 @@ class UStringSet : public UVector {
* Return the ith UnicodeString alias
*/
inline const UnicodeString* getStringAt(int32_t i) const {
return static_cast<const UnicodeString*>(elementAt(i));
return (const UnicodeString*)elementAt(i);
}
/**
* Adopt the UnicodeString if not already contained.
@@ -147,7 +147,7 @@ public:
if(umtx_atomic_dec(&refcount) <= 0) {
delete this;
}
return nullptr;
return 0;
}
virtual ~SimpleFilteredSentenceBreakData();

View File

@@ -148,12 +148,12 @@ inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(nullptr) {
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(nullptr)
: hash(0)
{
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
@@ -164,7 +164,7 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
: hash(nullptr)
: hash(0)
{
initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
@@ -175,13 +175,13 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& statu
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(nullptr)
: hash(0)
{
init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);
}
inline Hashtable::Hashtable()
: hash(nullptr)
: hash(0)
{
UErrorCode status = U_ZERO_ERROR;
init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);

View File

@@ -747,7 +747,7 @@ uplug_init(UErrorCode *status) {
FILE *f;
CharString pluginFile;
#ifdef ICU_PLUGINS_DD
#ifdef OS390BATCH
/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */

View File

@@ -63,7 +63,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==5
pInfo->formatVersion[0]==4
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
@@ -82,8 +82,8 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
if(U_FAILURE(errorCode)) {
return;
}
const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_LCCC_CP) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
@@ -101,7 +101,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
offset=nextOffset;
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset);
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
// smallFCD: new in formatVersion 2
offset=nextOffset;
@@ -143,9 +143,6 @@ static icu::UInitOnce nfkcInitOnce {};
static Norm2AllModes *nfkc_cfSingleton;
static icu::UInitOnce nfkc_cfInitOnce {};
static Norm2AllModes *nfkc_scfSingleton;
static icu::UInitOnce nfkc_scfInitOnce {};
static UHashtable *cache=nullptr;
// UInitOnce singleton initialization function
@@ -159,8 +156,6 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
} else if (uprv_strcmp(what, "nfkc_scf") == 0) {
nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
} else {
UPRV_UNREACHABLE_EXIT; // Unknown singleton
}
@@ -188,10 +183,6 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
nfkc_cfSingleton = nullptr;
nfkc_cfInitOnce.reset();
delete nfkc_scfSingleton;
nfkc_scfSingleton = nullptr;
nfkc_scfInitOnce.reset();
uhash_close(cache);
cache=nullptr;
return true;
@@ -222,13 +213,6 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
return nfkc_cfSingleton;
}
const Norm2AllModes *
Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return nullptr; }
umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
return nfkc_scfSingleton;
}
#if !NORM2_HARDCODE_NFC_DATA
const Normalizer2 *
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
@@ -277,12 +261,6 @@ Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
return allModes!=nullptr ? &allModes->comp : nullptr;
}
const Normalizer2 *
Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
return allModes!=nullptr ? &allModes->comp : nullptr;
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
@@ -303,15 +281,13 @@ Normalizer2::getInstance(const char *packageName,
allModes=Norm2AllModes::getNFKCInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc_scf")) {
allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
}
}
if(allModes==nullptr && U_SUCCESS(errorCode)) {
{
Mutex lock;
if(cache!=nullptr) {
allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name));
allModes=(Norm2AllModes *)uhash_get(cache, name);
}
}
if(allModes==nullptr) {
@@ -331,7 +307,7 @@ Normalizer2::getInstance(const char *packageName,
void *temp=uhash_get(cache, name);
if(temp==nullptr) {
int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
char* nameCopy = static_cast<char*>(uprv_malloc(keyLength));
char *nameCopy=(char *)uprv_malloc(keyLength);
if(nameCopy==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return nullptr;
@@ -341,7 +317,7 @@ Normalizer2::getInstance(const char *packageName,
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
} else {
// race condition
allModes = static_cast<Norm2AllModes*>(temp);
allModes=(Norm2AllModes *)temp;
}
}
}
@@ -417,11 +393,6 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,

View File

@@ -1,25 +1,23 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <optional>
#include <string_view>
#include <utility>
#include "bytesinkutil.h" // StringByteSink<CharString>
#include "bytesinkutil.h" // CharStringByteSink
#include "charstr.h"
#include "cstring.h"
#include "ulocimp.h"
#include "unicode/localebuilder.h"
#include "unicode/locid.h"
namespace {
U_NAMESPACE_BEGIN
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
constexpr const char* kAttributeKey = "attribute";
bool _isExtensionSubtags(char key, const char* s, int32_t len) {
static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
switch (uprv_tolower(key)) {
case 'u':
return ultag_isUnicodeExtensionSubtags(s, len);
@@ -32,10 +30,6 @@ bool _isExtensionSubtags(char key, const char* s, int32_t len) {
}
}
} // namespace
U_NAMESPACE_BEGIN
LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
script_(), region_(), variant_(nullptr), extensions_(nullptr)
{
@@ -74,10 +68,8 @@ LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
return *this;
}
namespace {
void setField(StringPiece input, char* dest, UErrorCode& errorCode,
bool (*test)(const char*, int32_t)) {
static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
UBool (*test)(const char*, int32_t)) {
if (U_FAILURE(errorCode)) { return; }
if (input.empty()) {
dest[0] = '\0';
@@ -89,8 +81,6 @@ void setField(StringPiece input, char* dest, UErrorCode& errorCode,
}
}
} // namespace
LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
{
setField(language, language_, status_, &ultag_isLanguageSubtag);
@@ -109,9 +99,7 @@ LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
return *this;
}
namespace {
void transform(char* data, int32_t len) {
static void transform(char* data, int32_t len) {
for (int32_t i = 0; i < len; i++, data++) {
if (*data == '_') {
*data = '-';
@@ -121,8 +109,6 @@ void transform(char* data, int32_t len) {
}
}
} // namespace
LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
{
if (U_FAILURE(status_)) { return *this; }
@@ -148,9 +134,7 @@ LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
return *this;
}
namespace {
bool
static bool
_isKeywordValue(const char* key, const char* value, int32_t value_len)
{
if (key[1] == '\0') {
@@ -164,18 +148,15 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
// otherwise: unicode extension value
// We need to convert from legacy key/value to unicode
// key/value
std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
return unicode_locale_key.has_value() &&
unicode_locale_type.has_value() &&
ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
static_cast<int32_t>(unicode_locale_key->size())) &&
ultag_isUnicodeLocaleType(unicode_locale_type->data(),
static_cast<int32_t>(unicode_locale_type->size()));
return unicode_locale_key && unicode_locale_type &&
ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
ultag_isUnicodeLocaleType(unicode_locale_type, -1);
}
void
static void
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
Locale& to, bool validate, UErrorCode& errorCode)
{
@@ -188,7 +169,9 @@ _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
}
const char* key;
while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
auto value = from.getKeywordValue<CharString>(key, errorCode);
CharString value;
CharStringByteSink sink(&value);
from.getKeywordValue(key, sink, errorCode);
if (U_FAILURE(errorCode)) { return; }
if (uprv_strcmp(key, kAttributeKey) == 0) {
transform(value.data(), value.length());
@@ -203,10 +186,9 @@ _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
}
}
void
void static
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
// Clear Unicode attributes
locale.setKeywordValue(kAttributeKey, "", errorCode);
@@ -219,10 +201,9 @@ _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
}
}
void
static void
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
// Add the unicode extensions to extensions_
CharString locale_str("und-u-", errorCode);
locale_str.append(value, errorCode);
@@ -231,8 +212,6 @@ _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& error
locale, false, errorCode);
}
} // namespace
LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
{
if (U_FAILURE(status_)) { return *this; }
@@ -310,8 +289,10 @@ LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
return *this;
}
CharString attributes;
CharStringByteSink sink(&attributes);
UErrorCode localErrorCode = U_ZERO_ERROR;
auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
if (U_FAILURE(localErrorCode)) {
CharString new_attributes(value_str.data(), status_);
// No attributes, set the attribute.
@@ -363,7 +344,9 @@ LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
}
if (extensions_ == nullptr) { return *this; }
UErrorCode localErrorCode = U_ZERO_ERROR;
auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
CharString attributes;
CharStringByteSink sink(&attributes);
extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
// get failure, just return
if (U_FAILURE(localErrorCode)) { return *this; }
// Do not have any attributes, just return.

File diff suppressed because it is too large Load Diff

View File

@@ -4,8 +4,6 @@
// localematcher.cpp
// created: 2019may08 Markus W. Scherer
#include <optional>
#include "unicode/utypes.h"
#include "unicode/localebuilder.h"
#include "unicode/localematcher.h"
@@ -304,12 +302,12 @@ LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
namespace {
LSR getMaximalLsrOrUnd(const LikelySubtags &likelySubtags, const Locale &locale,
LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
return UND_LSR;
} else {
return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
}
}
@@ -340,7 +338,7 @@ int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength
}
LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
likelySubtags(*LikelySubtags::getSingleton(errorCode)),
likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
localeDistance(*LocaleDistance::getSingleton(errorCode)),
thresholdDistance(builder.thresholdDistance_),
demotionPerDesiredLocale(0),
@@ -553,7 +551,7 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
class LocaleLsrIterator {
public:
LocaleLsrIterator(const LikelySubtags &likelySubtags, Locale::Iterator &locales,
LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
ULocMatchLifetime lifetime) :
likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
@@ -598,7 +596,7 @@ public:
}
private:
const LikelySubtags &likelySubtags;
const XLikelySubtags &likelySubtags;
Locale::Iterator &locales;
ULocMatchLifetime lifetime;
const Locale *current = nullptr, *remembered = nullptr;
@@ -607,11 +605,10 @@ private:
const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return nullptr; }
std::optional<int32_t> suppIndex = getBestSuppIndex(
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
: defaultLocale;
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
@@ -621,14 +618,12 @@ const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
return defaultLocale;
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
: defaultLocale;
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
const Locale *LocaleMatcher::getBestMatchForListString(
StringPiece desiredLocaleList, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return nullptr; }
LocalePriorityList list(desiredLocaleList, errorCode);
LocalePriorityList::Iterator iter = list.iterator();
return getBestMatch(iter, errorCode);
@@ -639,13 +634,13 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
if (U_FAILURE(errorCode)) {
return Result(nullptr, defaultLocale, -1, -1, false);
}
std::optional<int32_t> suppIndex = getBestSuppIndex(
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, -1, false);
} else {
return Result(&desiredLocale, supportedLocales[*suppIndex], 0, *suppIndex, false);
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, false);
}
}
@@ -655,19 +650,18 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
return Result(nullptr, defaultLocale, -1, -1, false);
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, -1, false);
} else {
return Result(lsrIter.orphanRemembered(), supportedLocales[*suppIndex],
lsrIter.getBestDesiredIndex(), *suppIndex, true);
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
lsrIter.getBestDesiredIndex(), suppIndex, true);
}
}
std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
LocaleLsrIterator *remainingIter,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return std::nullopt; }
int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return -1; }
int32_t desiredIndex = 0;
int32_t bestSupportedLsrIndex = -1;
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
@@ -690,7 +684,7 @@ std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
if (U_FAILURE(errorCode)) { return std::nullopt; }
if (U_FAILURE(errorCode)) { return -1; }
}
bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
}
@@ -701,21 +695,20 @@ std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
break;
}
desiredLSR = remainingIter->next(errorCode);
if (U_FAILURE(errorCode)) { return std::nullopt; }
if (U_FAILURE(errorCode)) { return -1; }
++desiredIndex;
}
if (bestSupportedLsrIndex < 0) {
// no good match
return std::nullopt;
return -1;
}
return supportedIndexes[bestSupportedLsrIndex];
}
UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return false; }
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
if (U_FAILURE(errorCode)) { return false; }
if (U_FAILURE(errorCode)) { return 0; }
const LSR *pSuppLSR = &suppLSR;
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
@@ -725,10 +718,9 @@ UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
}
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return 0.; }
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
if (U_FAILURE(errorCode)) { return 0.; }
if (U_FAILURE(errorCode)) { return 0; }
const LSR *pSuppLSR = &suppLSR;
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
@@ -780,7 +772,7 @@ int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desired
ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
}
const char *bestStr = result.getSupportedLocale()->getName();
int32_t bestLength = static_cast<int32_t>(uprv_strlen(bestStr));
int32_t bestLength = (int32_t)uprv_strlen(bestStr);
if (bestLength <= capacity) {
uprv_memcpy(dest, bestStr, bestLength);
}

View File

@@ -21,13 +21,13 @@ U_NAMESPACE_BEGIN
namespace {
int32_t hashLocale(const UHashTok token) {
const auto* locale = static_cast<const Locale*>(token.pointer);
auto *locale = static_cast<const Locale *>(token.pointer);
return locale->hashCode();
}
UBool compareLocales(const UHashTok t1, const UHashTok t2) {
const auto* l1 = static_cast<const Locale*>(t1.pointer);
const auto* l2 = static_cast<const Locale*>(t2.pointer);
auto *l1 = static_cast<const Locale *>(t1.pointer);
auto *l2 = static_cast<const Locale *>(t2.pointer);
return *l1 == *l2;
}

View File

@@ -39,10 +39,14 @@ static icu::Locale* availableLocaleList = nullptr;
static int32_t availableLocaleListCount;
static icu::UInitOnce gInitOnceLocale {};
namespace {
U_NAMESPACE_END
UBool U_CALLCONV locale_available_cleanup()
U_CDECL_BEGIN
static UBool U_CALLCONV locale_available_cleanup()
{
U_NAMESPACE_USE
if (availableLocaleList) {
delete []availableLocaleList;
availableLocaleList = nullptr;
@@ -53,7 +57,9 @@ UBool U_CALLCONV locale_available_cleanup()
return true;
}
} // namespace
U_CDECL_END
U_NAMESPACE_BEGIN
void U_CALLCONV locale_available_init() {
// This function is a friend of class Locale.
@@ -101,9 +107,10 @@ icu::UInitOnce ginstalledLocalesInitOnce {};
class AvailableLocalesSink : public ResourceSink {
public:
void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
if (U_FAILURE(status)) { return; }
ResourceTable resIndexTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
if (U_FAILURE(status)) {
return;
}
for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
ULocAvailableType type;
if (uprv_strcmp(key, "InstalledLocales") == 0) {
@@ -137,8 +144,7 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
}
const char* next(int32_t *resultLength, UErrorCode &status) override {
if (U_FAILURE(status)) { return nullptr; }
const char* next(int32_t *resultLength, UErrorCode&) override {
ULocAvailableType actualType = fType;
int32_t actualIndex = fIndex++;
@@ -170,13 +176,11 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
return result;
}
void reset(UErrorCode &status) override {
if (U_FAILURE(status)) { return; }
void reset(UErrorCode&) override {
fIndex = 0;
}
int32_t count(UErrorCode &status) const override {
if (U_FAILURE(status)) { return 0; }
int32_t count(UErrorCode&) const override {
if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
+ gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
@@ -192,7 +196,7 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
/* ### Get available **************************************************/
UBool U_CALLCONV uloc_cleanup() {
static UBool U_CALLCONV uloc_cleanup() {
for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
uprv_free(gAvailableLocaleNames[i]);
gAvailableLocaleNames[i] = nullptr;
@@ -205,7 +209,7 @@ UBool U_CALLCONV uloc_cleanup() {
// Load Installed Locales. This function will be called exactly once
// via the initOnce mechanism.
void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
static void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
icu::LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "res_index", &status));
@@ -263,3 +267,4 @@ uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
}
return uenum_openFromStringEnumeration(result.orphan(), status);
}

View File

@@ -17,7 +17,7 @@ U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
const char* id = getLocaleID(type, status);
return Locale(id != nullptr ? id : "");
return Locale((id != 0) ? id : "");
}
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
@@ -37,11 +37,11 @@ const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != nullptr) {
if (validID != 0) {
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}
if (actualID != nullptr) {
if (actualID != 0) {
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}

View File

@@ -99,7 +99,7 @@ inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid(const_cast<char*>(validAlias)), actual(const_cast<char*>(actualAlias)) {
valid((char*)validAlias), actual((char*)actualAlias) {
}
U_NAMESPACE_END

View File

@@ -26,6 +26,7 @@
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
@@ -59,7 +60,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -71,7 +72,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -99,7 +100,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -111,7 +112,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -139,7 +140,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -151,7 +152,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -179,7 +180,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -191,7 +192,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -219,7 +220,7 @@ Locale::getDisplayName(const Locale &displayLocale,
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -231,7 +232,7 @@ Locale::getDisplayName(const Locale &displayLocale,
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if (buffer == nullptr) {
if(buffer==0) {
result.truncate(0);
return result;
}
@@ -245,7 +246,7 @@ Locale::getDisplayName(const Locale &displayLocale,
return result;
}
#if !UCONFIG_NO_BREAK_ITERATION
#if ! UCONFIG_NO_BREAK_ITERATION
// -------------------------------------
// Gets the objectLocale display name in the default locale language.
@@ -275,53 +276,50 @@ U_NAMESPACE_END
U_NAMESPACE_USE
namespace {
/* ### Constants **************************************************/
/* These strings describe the resources we attempt to load from
the locale ResourceBundle data file.*/
constexpr char _kLanguages[] = "Languages";
constexpr char _kScripts[] = "Scripts";
constexpr char _kScriptsStandAlone[] = "Scripts%stand-alone";
constexpr char _kCountries[] = "Countries";
constexpr char _kVariants[] = "Variants";
constexpr char _kKeys[] = "Keys";
constexpr char _kTypes[] = "Types";
//constexpr char _kRootName[] = "root";
constexpr char _kCurrency[] = "currency";
constexpr char _kCurrencies[] = "Currencies";
constexpr char _kLocaleDisplayPattern[] = "localeDisplayPattern";
constexpr char _kPattern[] = "pattern";
constexpr char _kSeparator[] = "separator";
static const char _kLanguages[] = "Languages";
static const char _kScripts[] = "Scripts";
static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
static const char _kCountries[] = "Countries";
static const char _kVariants[] = "Variants";
static const char _kKeys[] = "Keys";
static const char _kTypes[] = "Types";
//static const char _kRootName[] = "root";
static const char _kCurrency[] = "currency";
static const char _kCurrencies[] = "Currencies";
static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
static const char _kPattern[] = "pattern";
static const char _kSeparator[] = "separator";
/* ### Display name **************************************************/
int32_t
static int32_t
_getStringOrCopyKey(const char *path, const char *locale,
const char *tableKey,
const char* subTableKey,
const char *itemKey,
const char *substitute,
char16_t *dest, int32_t destCapacity,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return 0; }
UErrorCode *pErrorCode) {
const char16_t *s = nullptr;
int32_t length = 0;
if(itemKey==nullptr) {
/* top-level item: normal resource bundle access */
icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));
icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode));
if(U_SUCCESS(errorCode)) {
s=ures_getStringByKey(rb.getAlias(), tableKey, &length, &errorCode);
if(U_SUCCESS(*pErrorCode)) {
s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode);
/* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
}
} else {
bool isLanguageCode = (uprv_strncmp(tableKey, _kLanguages, 9) == 0);
/* Language code should not be a number. If it is, set the error code. */
if (isLanguageCode && uprv_strtol(itemKey, nullptr, 10)) {
errorCode = U_MISSING_RESOURCE_ERROR;
*pErrorCode = U_MISSING_RESOURCE_ERROR;
} else {
/* second-level item, use special fallback */
s=uloc_getTableStringWithFallback(path, locale,
@@ -329,87 +327,91 @@ _getStringOrCopyKey(const char *path, const char *locale,
subTableKey,
itemKey,
&length,
&errorCode);
if (U_FAILURE(errorCode) && isLanguageCode && itemKey != nullptr) {
pErrorCode);
if (U_FAILURE(*pErrorCode) && isLanguageCode && itemKey != nullptr) {
// convert itemKey locale code to canonical form and try again, ICU-20870
errorCode = U_ZERO_ERROR;
*pErrorCode = U_ZERO_ERROR;
Locale canonKey = Locale::createCanonical(itemKey);
s=uloc_getTableStringWithFallback(path, locale,
tableKey,
subTableKey,
canonKey.getName(),
&length,
&errorCode);
pErrorCode);
}
}
}
if(U_SUCCESS(errorCode)) {
if(U_SUCCESS(*pErrorCode)) {
int32_t copyLength=uprv_min(length, destCapacity);
if(copyLength>0 && s != nullptr) {
u_memcpy(dest, s, copyLength);
}
} else {
/* no string from a resource bundle: convert the substitute */
length = static_cast<int32_t>(uprv_strlen(substitute));
length=(int32_t)uprv_strlen(substitute);
u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
errorCode = U_USING_DEFAULT_WARNING;
*pErrorCode=U_USING_DEFAULT_WARNING;
}
return u_terminateUChars(dest, destCapacity, length, &errorCode);
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&);
typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
int32_t
static int32_t
_getDisplayNameForComponent(const char *locale,
const char *displayLocale,
char16_t *dest, int32_t destCapacity,
UDisplayNameGetter *getter,
const char *tag,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return 0; }
UErrorCode *pErrorCode) {
char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
int32_t length;
UErrorCode localStatus;
const char* root = nullptr;
/* argument checking */
if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
localStatus = U_ZERO_ERROR;
icu::CharString localeBuffer = (*getter)(locale, localStatus);
if (U_FAILURE(localStatus)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (localeBuffer.isEmpty()) {
if(length==0) {
// For the display name, we treat this as unknown language (ICU-20273).
if (getter == ulocimp_getLanguage) {
localeBuffer.append("und", errorCode);
if (getter == uloc_getLanguage) {
uprv_strcpy(localeBuffer, "und");
} else {
return u_terminateUChars(dest, destCapacity, 0, &errorCode);
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
}
root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
return _getStringOrCopyKey(root, displayLocale,
tag, nullptr, localeBuffer.data(),
localeBuffer.data(),
tag, nullptr, localeBuffer,
localeBuffer,
dest, destCapacity,
errorCode);
pErrorCode);
}
} // namespace
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char *locale,
const char *displayLocale,
char16_t *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getLanguage, _kLanguages, *pErrorCode);
uloc_getLanguage, _kLanguages, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@@ -418,20 +420,19 @@ uloc_getDisplayScript(const char* locale,
char16_t *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
if (U_FAILURE(*pErrorCode)) { return 0; }
UErrorCode err = U_ZERO_ERROR;
int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getScript, _kScriptsStandAlone, err);
uloc_getScript, _kScriptsStandAlone, &err);
if (destCapacity == 0 && err == U_BUFFER_OVERFLOW_ERROR) {
// For preflight, return the max of the value and the fallback.
int32_t fallback_res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getScript, _kScripts, *pErrorCode);
uloc_getScript, _kScripts, pErrorCode);
return (fallback_res > res) ? fallback_res : res;
}
if ( err == U_USING_DEFAULT_WARNING ) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getScript, _kScripts, *pErrorCode);
uloc_getScript, _kScripts, pErrorCode);
} else {
*pErrorCode = err;
return res;
@@ -445,7 +446,7 @@ uloc_getDisplayScriptInContext(const char* locale,
UErrorCode *pErrorCode)
{
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getScript, _kScripts, *pErrorCode);
uloc_getScript, _kScripts, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@@ -454,7 +455,7 @@ uloc_getDisplayCountry(const char *locale,
char16_t *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getRegion, _kCountries, *pErrorCode);
uloc_getCountry, _kCountries, pErrorCode);
}
/*
@@ -468,7 +469,7 @@ uloc_getDisplayVariant(const char *locale,
char16_t *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
ulocimp_getVariant, _kVariants, *pErrorCode);
uloc_getVariant, _kVariants, pErrorCode);
}
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
@@ -808,7 +809,7 @@ uloc_getDisplayKeyword(const char* keyword,
keyword,
keyword,
dest, destCapacity,
*status);
status);
}
@@ -836,8 +837,9 @@ uloc_getDisplayKeywordValue( const char* locale,
/* get the keyword value */
CharString keywordValue;
if (keyword != nullptr && *keyword != '\0') {
keywordValue = ulocimp_getKeywordValue(locale, keyword, *status);
{
CharStringByteSink sink(&keywordValue);
ulocimp_getKeywordValue(locale, keyword, sink, status);
}
/*
@@ -895,6 +897,6 @@ uloc_getDisplayKeywordValue( const char* locale,
keywordValue.data(),
keywordValue.data(),
dest, destCapacity,
*status);
status);
}
}

View File

@@ -59,7 +59,7 @@ UBool U_CALLCONV cleanup() {
void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(gLocaleDistance == nullptr);
const LikelySubtags &likely = *LikelySubtags::getSingleton(errorCode);
const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
if (U_FAILURE(errorCode)) { return; }
const LocaleDistanceData &data = likely.getDistanceData();
if (data.distanceTrieBytes == nullptr ||
@@ -83,7 +83,7 @@ const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
return gLocaleDistance;
}
LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely) :
LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) :
likelySubtags(likely),
trie(data.distanceTrieBytes),
regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
@@ -119,7 +119,7 @@ int32_t LocaleDistance::getBestIndexAndDistance(
uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int32_t bestIndex = -1;
// Cached lookup info from LikelySubtags.compareLikely().
// Cached lookup info from XLikelySubtags.compareLikely().
int32_t bestLikelyInfo = -1;
for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
const LSR &supported = *supportedLSRs[slIndex];
@@ -399,7 +399,7 @@ int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue)
}
}
bool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
// Linear search for a very short list (length 6 as of 2019),
// because we look for equivalence not equality, and
// because it's easy.

View File

@@ -62,7 +62,7 @@ public:
ULocMatchFavorSubtag favorSubtag,
ULocMatchDirection direction) const;
bool isParadigmLSR(const LSR &lsr) const;
UBool isParadigmLSR(const LSR &lsr) const;
int32_t getDefaultScriptDistance() const {
return defaultScriptDistance;
@@ -83,14 +83,14 @@ private:
// tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely);
LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
LocaleDistance(const LocaleDistance &other) = delete;
LocaleDistance &operator=(const LocaleDistance &other) = delete;
static void initLocaleDistance(UErrorCode &errorCode);
bool isMatch(const LSR &desired, const LSR &supported,
int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
UBool isMatch(const LSR &desired, const LSR &supported,
int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
const LSR *pSupp = &supported;
return getBestIndexAndDistance(
desired, &pSupp, 1,
@@ -119,7 +119,7 @@ private:
return defaultRegionDistance;
}
const LikelySubtags &likelySubtags;
const XLikelySubtags &likelySubtags;
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.

View File

@@ -18,17 +18,45 @@
#include "unicode/udisplaycontext.h"
#include "unicode/brkiter.h"
#include "unicode/ucurr.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "mutex.h"
#include "uassert.h"
#include "ulocimp.h"
#include "umutex.h"
#include "ureslocs.h"
#include "uresimp.h"
#include <stdarg.h>
/**
* Concatenate a number of null-terminated strings to buffer, leaving a
* null-terminated string. The last argument should be the null pointer.
* Return the length of the string in the buffer, not counting the trailing
* null. Return -1 if there is an error (buffer is null, or buflen < 1).
*/
static int32_t ncat(char *buffer, uint32_t buflen, ...) {
va_list args;
char *str;
char *p = buffer;
const char* e = buffer + buflen - 1;
if (buffer == nullptr || buflen < 1) {
return -1;
}
va_start(args, buflen);
while ((str = va_arg(args, char *)) != 0) {
char c;
while (p != e && (c = *str++) != 0) {
*p++ = c;
}
}
*p = 0;
va_end(args);
return static_cast<int32_t>(p - buffer);
}
U_NAMESPACE_BEGIN
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -36,13 +64,12 @@ U_NAMESPACE_BEGIN
// Access resource data for locale components.
// Wrap code in uloc.c for now.
class ICUDataTable {
const char* const path;
const char* path;
Locale locale;
public:
// Note: path should be a pointer to a statically allocated string.
ICUDataTable(const char* path, const Locale& locale);
~ICUDataTable() = default;
~ICUDataTable();
const Locale& getLocale();
@@ -68,9 +95,23 @@ ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeSt
}
ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
: path(path), locale(locale)
: path(nullptr), locale(Locale::getRoot())
{
U_ASSERT(path != nullptr);
if (path) {
int32_t len = static_cast<int32_t>(uprv_strlen(path));
this->path = (const char*) uprv_malloc(len + 1);
if (this->path) {
uprv_strcpy((char *)this->path, path);
this->locale = locale;
}
}
}
ICUDataTable::~ICUDataTable() {
if (path) {
uprv_free((void*) path);
path = nullptr;
}
}
const Locale&
@@ -264,7 +305,7 @@ class LocaleDisplayNamesImpl : public LocaleDisplayNames {
};
// Capitalization transforms. For each usage type, indicates whether to titlecase for
// the context specified in capitalizationContext (which we know at construction time)
bool fCapitalization[kCapContextUsageCount];
UBool fCapitalization[kCapContextUsageCount];
public:
// constructor
@@ -300,12 +341,12 @@ private:
UnicodeString& result, bool substitute) const;
UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, bool skipAdjust) const;
UnicodeString& regionDisplayName(const char* region, UnicodeString& result, bool skipAdjust) const;
UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, bool skipAdjust) const;
UnicodeString& keyDisplayName(const char* key, UnicodeString& result, bool skipAdjust) const;
UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
UnicodeString& keyValueDisplayName(const char* key, const char* value,
UnicodeString& result, bool skipAdjust) const;
UnicodeString& result, UBool skipAdjust) const;
void initialize();
struct CapitalizationContextSink;
@@ -336,11 +377,10 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
{
while (length-- > 0) {
UDisplayContext value = *contexts++;
UDisplayContextType selector =
static_cast<UDisplayContextType>(static_cast<uint32_t>(value) >> 8);
UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8);
switch (selector) {
case UDISPCTX_TYPE_DIALECT_HANDLING:
dialectHandling = static_cast<UDialectHandling>(value);
dialectHandling = (UDialectHandling)value;
break;
case UDISPCTX_TYPE_CAPITALIZATION:
capitalizationContext = value;
@@ -359,7 +399,7 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
}
struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
bool hasCapitalizationUsage;
UBool hasCapitalizationUsage;
LocaleDisplayNamesImpl& parent;
CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
@@ -408,7 +448,7 @@ LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink()
void
LocaleDisplayNamesImpl::initialize() {
LocaleDisplayNamesImpl* nonConstThis = this;
LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this;
nonConstThis->locale = langData.getLocale() == Locale::getRoot()
? regionData.getLocale()
: langData.getLocale();
@@ -427,16 +467,16 @@ LocaleDisplayNamesImpl::initialize() {
pattern = UnicodeString("{0} ({1})", -1, US_INV);
}
format.applyPatternMinMaxArguments(pattern, 2, 2, status);
if (pattern.indexOf(static_cast<char16_t>(0xFF08)) >= 0) {
formatOpenParen.setTo(static_cast<char16_t>(0xFF08)); // fullwidth (
formatReplaceOpenParen.setTo(static_cast<char16_t>(0xFF3B)); // fullwidth [
formatCloseParen.setTo(static_cast<char16_t>(0xFF09)); // fullwidth )
formatReplaceCloseParen.setTo(static_cast<char16_t>(0xFF3D)); // fullwidth ]
if (pattern.indexOf((char16_t)0xFF08) >= 0) {
formatOpenParen.setTo((char16_t)0xFF08); // fullwidth (
formatReplaceOpenParen.setTo((char16_t)0xFF3B); // fullwidth [
formatCloseParen.setTo((char16_t)0xFF09); // fullwidth )
formatReplaceCloseParen.setTo((char16_t)0xFF3D); // fullwidth ]
} else {
formatOpenParen.setTo(static_cast<char16_t>(0x0028)); // (
formatReplaceOpenParen.setTo(static_cast<char16_t>(0x005B)); // [
formatCloseParen.setTo(static_cast<char16_t>(0x0029)); // )
formatReplaceCloseParen.setTo(static_cast<char16_t>(0x005D)); // ]
formatOpenParen.setTo((char16_t)0x0028); // (
formatReplaceOpenParen.setTo((char16_t)0x005B); // [
formatCloseParen.setTo((char16_t)0x0029); // )
formatReplaceCloseParen.setTo((char16_t)0x005D); // ]
}
UnicodeString ktPattern;
@@ -450,7 +490,7 @@ LocaleDisplayNamesImpl::initialize() {
#if !UCONFIG_NO_BREAK_ITERATION
// Only get the context data if we need it! This is a const object so we know now...
// Also check whether we will need a break iterator (depends on the data)
bool needBrkIter = false;
UBool needBrkIter = false;
if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
LocalUResourceBundlePointer resource(ures_open(nullptr, locale.getName(), &status));
if (U_FAILURE(status)) { return; }
@@ -496,7 +536,7 @@ UDisplayContext
LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
switch (type) {
case UDISPCTX_TYPE_DIALECT_HANDLING:
return static_cast<UDisplayContext>(dialectHandling);
return (UDisplayContext)dialectHandling;
case UDISPCTX_TYPE_CAPITALIZATION:
return capitalizationContext;
case UDISPCTX_TYPE_DISPLAY_LENGTH:
@@ -506,7 +546,7 @@ LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
default:
break;
}
return static_cast<UDisplayContext>(0);
return (UDisplayContext)0;
}
UnicodeString&
@@ -542,51 +582,36 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
const char* country = loc.getCountry();
const char* variant = loc.getVariant();
bool hasScript = uprv_strlen(script) > 0;
bool hasCountry = uprv_strlen(country) > 0;
bool hasVariant = uprv_strlen(variant) > 0;
UBool hasScript = uprv_strlen(script) > 0;
UBool hasCountry = uprv_strlen(country) > 0;
UBool hasVariant = uprv_strlen(variant) > 0;
if (dialectHandling == ULDN_DIALECT_NAMES) {
UErrorCode status = U_ZERO_ERROR;
CharString buffer;
char buffer[ULOC_FULLNAME_CAPACITY];
do { // loop construct is so we can break early out of search
if (hasScript && hasCountry) {
buffer.append(lang, status)
.append('_', status)
.append(script, status)
.append('_', status)
.append(country, status);
if (U_SUCCESS(status)) {
localeIdName(buffer.data(), resultName, false);
if (!resultName.isBogus()) {
hasScript = false;
hasCountry = false;
break;
}
ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
localeIdName(buffer, resultName, false);
if (!resultName.isBogus()) {
hasScript = false;
hasCountry = false;
break;
}
}
if (hasScript) {
buffer.append(lang, status)
.append('_', status)
.append(script, status);
if (U_SUCCESS(status)) {
localeIdName(buffer.data(), resultName, false);
if (!resultName.isBogus()) {
hasScript = false;
break;
}
ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
localeIdName(buffer, resultName, false);
if (!resultName.isBogus()) {
hasScript = false;
break;
}
}
if (hasCountry) {
buffer.append(lang, status)
.append('_', status)
.append(country, status);
if (U_SUCCESS(status)) {
localeIdName(buffer.data(), resultName, false);
if (!resultName.isBogus()) {
hasCountry = false;
break;
}
ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
localeIdName(buffer, resultName, false);
if (!resultName.isBogus()) {
hasCountry = false;
break;
}
}
} while (false);
@@ -633,19 +658,21 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
LocalPointer<StringEnumeration> e(loc.createKeywords(status));
if (e.isValid() && U_SUCCESS(status)) {
UnicodeString temp2;
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
const char* key;
while ((key = e->next((int32_t*)nullptr, status)) != nullptr) {
auto value = loc.getKeywordValue<CharString>(key, status);
if (U_FAILURE(status)) {
return result;
while ((key = e->next((int32_t *)0, status)) != nullptr) {
value[0] = 0;
loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
return result;
}
keyDisplayName(key, temp, true);
temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
keyValueDisplayName(key, value.data(), temp2, true);
keyValueDisplayName(key, value, temp2, true);
temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
if (temp2 != UnicodeString(value.data(), -1, US_INV)) {
if (temp2 != UnicodeString(value, -1, US_INV)) {
appendWithSep(resultRemainder, temp2);
} else if (temp != UnicodeString(key, -1, US_INV)) {
UnicodeString temp3;
@@ -653,7 +680,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
appendWithSep(resultRemainder, temp3);
} else {
appendWithSep(resultRemainder, temp)
.append(static_cast<char16_t>(0x3d) /* = */)
.append((char16_t)0x3d /* = */)
.append(temp2);
}
}
@@ -752,7 +779,7 @@ LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
UnicodeString&
LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
UnicodeString& result,
bool skipAdjust) const {
UBool skipAdjust) const {
if (nameLength == UDISPCTX_LENGTH_SHORT) {
langData.getNoFallback("Scripts%short", script, result);
if (!result.isBogus()) {
@@ -782,7 +809,7 @@ LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
UnicodeString&
LocaleDisplayNamesImpl::regionDisplayName(const char* region,
UnicodeString& result,
bool skipAdjust) const {
UBool skipAdjust) const {
if (nameLength == UDISPCTX_LENGTH_SHORT) {
regionData.getNoFallback("Countries%short", region, result);
if (!result.isBogus()) {
@@ -807,7 +834,7 @@ LocaleDisplayNamesImpl::regionDisplayName(const char* region,
UnicodeString&
LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
UnicodeString& result,
bool skipAdjust) const {
UBool skipAdjust) const {
// don't have a resource for short variant names
if (substitute == UDISPCTX_SUBSTITUTE) {
langData.get("Variants", variant, result);
@@ -826,7 +853,7 @@ LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
UnicodeString&
LocaleDisplayNamesImpl::keyDisplayName(const char* key,
UnicodeString& result,
bool skipAdjust) const {
UBool skipAdjust) const {
// don't have a resource for short key names
if (substitute == UDISPCTX_SUBSTITUTE) {
langData.get("Keys", key, result);
@@ -846,7 +873,7 @@ UnicodeString&
LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
const char* value,
UnicodeString& result,
bool skipAdjust) const {
UBool skipAdjust) const {
if (uprv_strcmp(key, "currency") == 0) {
// ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
UErrorCode sts = U_ZERO_ERROR;
@@ -912,7 +939,7 @@ uldn_open(const char * locale,
UDialectHandling dialectHandling,
UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) {
return nullptr;
return 0;
}
if (locale == nullptr) {
locale = uloc_getDefault();
@@ -925,7 +952,7 @@ uldn_openForContext(const char * locale,
UDisplayContext *contexts, int32_t length,
UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) {
return nullptr;
return 0;
}
if (locale == nullptr) {
locale = uloc_getDefault();

View File

@@ -31,8 +31,6 @@
******************************************************************************
*/
#include <optional>
#include <string_view>
#include <utility>
#include "unicode/bytestream.h"
@@ -59,6 +57,10 @@
#include "ustr_imp.h"
#include "uvector.h"
U_CDECL_BEGIN
static UBool U_CALLCONV locale_cleanup();
U_CDECL_END
U_NAMESPACE_BEGIN
static Locale *gLocaleCache = nullptr;
@@ -104,17 +106,16 @@ typedef enum ELocalePos {
eMAX_LOCALES
} ELocalePos;
namespace {
U_CDECL_BEGIN
//
// Deleter function for Locales owned by the default Locale hash table/
//
void U_CALLCONV
static void U_CALLCONV
deleteLocale(void *obj) {
delete static_cast<icu::Locale*>(obj);
delete (icu::Locale *) obj;
}
UBool U_CALLCONV locale_cleanup()
static UBool U_CALLCONV locale_cleanup()
{
U_NAMESPACE_USE
@@ -130,11 +131,12 @@ UBool U_CALLCONV locale_cleanup()
return true;
}
void U_CALLCONV locale_init(UErrorCode &status) {
static void U_CALLCONV locale_init(UErrorCode &status) {
U_NAMESPACE_USE
U_ASSERT(gLocaleCache == nullptr);
gLocaleCache = new Locale[static_cast<int>(eMAX_LOCALES)];
gLocaleCache = new Locale[(int)eMAX_LOCALES];
if (gLocaleCache == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
@@ -161,7 +163,7 @@ void U_CALLCONV locale_init(UErrorCode &status) {
gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
}
} // namespace
U_CDECL_END
U_NAMESPACE_BEGIN
@@ -180,8 +182,15 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
canonicalize = true; // always canonicalize host ID
}
CharString localeNameBuf =
canonicalize ? ulocimp_canonicalize(id, status) : ulocimp_getName(id, status);
CharString localeNameBuf;
{
CharStringByteSink sink(&localeNameBuf);
if (canonicalize) {
ulocimp_canonicalize(id, sink, &status);
} else {
ulocimp_getName(id, sink, &status);
}
}
if (U_FAILURE(status)) {
return gDefaultLocale;
@@ -196,7 +205,7 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
}
Locale* newDefault = static_cast<Locale*>(uhash_get(gDefaultLocalesHashT, localeNameBuf.data()));
Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
if (newDefault == nullptr) {
newDefault = new Locale(Locale::eBOGUS);
if (newDefault == nullptr) {
@@ -204,7 +213,7 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
return gDefaultLocale;
}
newDefault->init(localeNameBuf.data(), false);
uhash_put(gDefaultLocalesHashT, const_cast<char*>(newDefault->getName()), newDefault, &status);
uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
if (U_FAILURE(status)) {
return gDefaultLocale;
}
@@ -298,7 +307,7 @@ Locale::Locale( const char * newLanguage,
// Language
if ( newLanguage != nullptr )
{
lsize = static_cast<int32_t>(uprv_strlen(newLanguage));
lsize = (int32_t)uprv_strlen(newLanguage);
if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
@@ -310,7 +319,7 @@ Locale::Locale( const char * newLanguage,
// _Country
if ( newCountry != nullptr )
{
csize = static_cast<int32_t>(uprv_strlen(newCountry));
csize = (int32_t)uprv_strlen(newCountry);
if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
@@ -327,7 +336,7 @@ Locale::Locale( const char * newLanguage,
}
// remove trailing _'s
vsize = static_cast<int32_t>(uprv_strlen(newVariant));
vsize = (int32_t)uprv_strlen(newVariant);
if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
@@ -340,7 +349,7 @@ Locale::Locale( const char * newLanguage,
if ( newKeywords != nullptr)
{
ksize = static_cast<int32_t>(uprv_strlen(newKeywords));
ksize = (int32_t)uprv_strlen(newKeywords);
if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
setToBogus();
return;
@@ -485,7 +494,7 @@ namespace {
UInitOnce gKnownCanonicalizedInitOnce {};
UHashtable *gKnownCanonicalized = nullptr;
constexpr const char* KNOWN_CANONICALIZED[] = {
static const char* const KNOWN_CANONICALIZED[] = {
"c",
// Commonly used locales known are already canonicalized
"af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
@@ -509,13 +518,13 @@ constexpr const char* KNOWN_CANONICALIZED[] = {
"zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
};
UBool U_CALLCONV cleanupKnownCanonicalized() {
static UBool U_CALLCONV cleanupKnownCanonicalized() {
gKnownCanonicalizedInitOnce.reset();
if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
return true;
}
void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
cleanupKnownCanonicalized);
LocalUHashtablePointer newKnownCanonicalizedMap(
@@ -554,7 +563,7 @@ private:
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UChar* replacement),
void (*checkReplacement)(const UnicodeString& replacement),
UErrorCode &status);
// Read the languageAlias data from alias to
@@ -691,7 +700,7 @@ AliasDataBuilder::readAlias(
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UChar* replacement),
void (*checkReplacement)(const UnicodeString& replacement),
UErrorCode &status) {
if (U_FAILURE(status)) {
return;
@@ -711,8 +720,8 @@ AliasDataBuilder::readAlias(
LocalUResourceBundlePointer res(
ures_getNextResource(alias, nullptr, &status));
const char* aliasFrom = ures_getKey(res.getAlias());
const UChar* aliasTo =
ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status);
UnicodeString aliasTo =
ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
if (U_FAILURE(status)) return;
checkType(aliasFrom);
@@ -757,7 +766,7 @@ AliasDataBuilder::readLanguageAlias(
#else
[](const char*) {},
#endif
[](const UChar*) {}, status);
[](const UnicodeString&) {}, status);
}
/**
@@ -781,12 +790,12 @@ AliasDataBuilder::readScriptAlias(
[](const char* type) {
U_ASSERT(uprv_strlen(type) == 4);
},
[](const UChar* replacement) {
U_ASSERT(u_strlen(replacement) == 4);
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() == 4);
},
#else
[](const char*) {},
[](const UChar*) { },
[](const UnicodeString&) { },
#endif
status);
}
@@ -815,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias(
#else
[](const char*) {},
#endif
[](const UChar*) { },
[](const UnicodeString&) { },
status);
}
@@ -842,16 +851,15 @@ AliasDataBuilder::readVariantAlias(
U_ASSERT(uprv_strlen(type) != 4 ||
(type[0] >= '0' && type[0] <= '9'));
},
[](const UChar* replacement) {
int32_t len = u_strlen(replacement);
U_ASSERT(len >= 4 && len <= 8);
U_ASSERT(len != 4 ||
(*replacement >= u'0' &&
*replacement <= u'9'));
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
U_ASSERT(replacement.length() != 4 ||
(replacement.charAt(0) >= u'0' &&
replacement.charAt(0) <= u'9'));
},
#else
[](const char*) {},
[](const UChar*) { },
[](const UnicodeString&) { },
#endif
status);
}
@@ -880,7 +888,7 @@ AliasDataBuilder::readSubdivisionAlias(
#else
[](const char*) {},
#endif
[](const UChar*) { },
[](const UnicodeString&) { },
status);
}
@@ -911,8 +919,6 @@ AliasData::loadData(UErrorCode &status)
*/
AliasData*
AliasDataBuilder::build(UErrorCode &status) {
if (U_FAILURE(status)) { return nullptr; }
LocalUResourceBundlePointer metadata(
ures_openDirect(nullptr, "metadata", &status));
LocalUResourceBundlePointer metadataAlias(
@@ -1058,15 +1064,9 @@ AliasDataBuilder::build(UErrorCode &status) {
*/
class AliasReplacer {
public:
AliasReplacer(UErrorCode& status) :
AliasReplacer(UErrorCode status) :
language(nullptr), script(nullptr), region(nullptr),
extensions(nullptr),
// store value in variants only once
variants(nullptr,
([](UElement e1, UElement e2) -> UBool {
return 0==uprv_strcmp((const char*)e1.pointer,
(const char*)e2.pointer);}),
status),
extensions(nullptr), variants(status),
data(nullptr) {
}
~AliasReplacer() {
@@ -1123,12 +1123,12 @@ private:
}
// Gather fields and generate locale ID into out.
CharString& outputToString(CharString& out, UErrorCode& status);
CharString& outputToString(CharString& out, UErrorCode status);
// Generate the lookup key.
CharString& generateKey(const char* language, const char* region,
const char* variant, CharString& out,
UErrorCode& status);
UErrorCode status);
void parseLanguageReplacement(const char* replacement,
const char*& replaceLanguage,
@@ -1165,9 +1165,8 @@ private:
CharString&
AliasReplacer::generateKey(
const char* language, const char* region, const char* variant,
CharString& out, UErrorCode& status)
CharString& out, UErrorCode status)
{
if (U_FAILURE(status)) { return out; }
out.append(language, status);
if (notEmpty(region)) {
out.append(SEP_CHAR, status)
@@ -1202,8 +1201,8 @@ AliasReplacer::parseLanguageReplacement(
return;
}
// We have multiple field so we have to allocate and parse
CharString* str =
new CharString(replacement, static_cast<int32_t>(uprv_strlen(replacement)), status);
CharString* str = new CharString(
replacement, (int32_t)uprv_strlen(replacement), status);
LocalPointer<CharString> lpStr(str, status);
toBeFreed.adoptElement(lpStr.orphan(), status);
if (U_FAILURE(status)) {
@@ -1215,7 +1214,7 @@ AliasReplacer::parseLanguageReplacement(
*endOfField = '\0'; // null terminiate it.
endOfField++;
const char* start = endOfField;
endOfField = const_cast<char*>(uprv_strchr(start, '_'));
endOfField = (char*) uprv_strchr(start, '_');
size_t len = 0;
if (endOfField == nullptr) {
len = uprv_strlen(start);
@@ -1230,7 +1229,7 @@ AliasReplacer::parseLanguageReplacement(
return;
}
start = endOfField++;
endOfField = const_cast<char*>(uprv_strchr(start, '_'));
endOfField = (char*)uprv_strchr(start, '_');
if (endOfField == nullptr) {
len = uprv_strlen(start);
} else {
@@ -1245,7 +1244,7 @@ AliasReplacer::parseLanguageReplacement(
return;
}
start = endOfField++;
endOfField = const_cast<char*>(uprv_strchr(start, '_'));
endOfField = (char*)uprv_strchr(start, '_');
if (endOfField == nullptr) {
len = uprv_strlen(start);
} else {
@@ -1287,7 +1286,7 @@ AliasReplacer::replaceLanguage(
variant_index++) {
if (checkVariants) {
U_ASSERT(variant_index < variant_size);
searchVariant = static_cast<const char*>(variants.elementAt(variant_index));
searchVariant = (const char*)(variants.elementAt(variant_index));
}
if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
@@ -1408,13 +1407,13 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
U_ASSERT(foundInReplacement[len] == ' ' ||
foundInReplacement[len] == '\0');
item.adoptInsteadAndCheckErrorCode(
new CharString(foundInReplacement, static_cast<int32_t>(len), status), status);
new CharString(foundInReplacement, (int32_t)len, status), status);
}
}
if (item.isNull() && U_SUCCESS(status)) {
item.adoptInsteadAndCheckErrorCode(
new CharString(replacement,
static_cast<int32_t>(firstSpace - replacement), status), status);
(int32_t)(firstSpace - replacement), status), status);
}
if (U_FAILURE(status)) { return false; }
replacedRegion = item->data();
@@ -1456,7 +1455,7 @@ AliasReplacer::replaceVariant(UErrorCode& status)
}
// Since we may have more than one variant, we need to loop through them.
for (int32_t i = 0; i < variants.size(); i++) {
const char* variant = static_cast<const char*>(variants.elementAt(i));
const char *variant = (const char*)(variants.elementAt(i));
const char *replacement = data->variantMap().get(variant);
if (replacement == nullptr) {
// Found no replacement data for this variant.
@@ -1498,7 +1497,7 @@ AliasReplacer::replaceSubdivision(
size_t len = (firstSpace != nullptr) ?
(firstSpace - replacement) : uprv_strlen(replacement);
if (2 <= len && len <= 8) {
output.append(replacement, static_cast<int32_t>(len), status);
output.append(replacement, (int32_t)len, status);
if (2 == len) {
// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
output.append("zzzz", 4, status);
@@ -1523,12 +1522,13 @@ AliasReplacer::replaceTransformedExtensions(
const char* tkey = ultag_getTKeyStart(str);
int32_t tlangLen = (tkey == str) ? 0 :
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
CharStringByteSink sink(&output);
if (tlangLen > 0) {
Locale tlang = LocaleBuilder()
.setLanguageTag(StringPiece(str, tlangLen))
.build(status);
tlang.canonicalize(status);
output = tlang.toLanguageTag<CharString>(status);
tlang.toLanguageTag(sink, status);
if (U_FAILURE(status)) {
return false;
}
@@ -1548,7 +1548,7 @@ AliasReplacer::replaceTransformedExtensions(
}
const char* nextTKey = ultag_getTKeyStart(tvalue);
if (nextTKey != nullptr) {
*const_cast<char*>(nextTKey - 1) = '\0'; // NUL terminate tvalue
*((char*)(nextTKey-1)) = '\0'; // NUL terminate tvalue
}
tfields.insertElementAt((void*)tkey, tfields.size(), status);
if (U_FAILURE(status)) {
@@ -1563,17 +1563,17 @@ AliasReplacer::replaceTransformedExtensions(
if (output.length() > 0) {
output.append('-', status);
}
const char* tfield = static_cast<const char*>(tfields.elementAt(i));
const char* tfield = (const char*) tfields.elementAt(i);
const char* tvalue = uprv_strchr(tfield, '-');
if (tvalue == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return false;
}
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
*const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
*((char*)tvalue++) = '\0'; // NUL terminate tkey
output.append(tfield, status).append('-', status);
std::optional<std::string_view> bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
}
}
if (U_FAILURE(status)) {
@@ -1584,9 +1584,8 @@ AliasReplacer::replaceTransformedExtensions(
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode& status)
CharString& out, UErrorCode status)
{
if (U_FAILURE(status)) { return out; }
out.append(language, status);
if (notEmpty(script)) {
out.append(SEP_CHAR, status)
@@ -1606,7 +1605,7 @@ AliasReplacer::outputToString(
int32_t variantsStart = out.length();
for (int32_t i = 0; i < variants.size(); i++) {
out.append(SEP_CHAR, status)
.append(static_cast<const char*>(variants.elementAt(i)),
.append((const char*)(variants.elementAt(i)),
status);
}
T_CString_toUpperCase(out.data() + variantsStart);
@@ -1653,16 +1652,10 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
U_SUCCESS(status)) {
*end = NULL_CHAR; // null terminate inside variantsBuff
// do not add "" or duplicate data to variants
if (*start && !variants.contains(start)) {
variants.addElement(start, status);
}
variants.addElement(start, status);
start = end + 1;
}
// do not add "" or duplicate data to variants
if (*start && !variants.contains(start)) {
variants.addElement(start, status);
}
variants.addElement(start, status);
}
if (U_FAILURE(status)) { return false; }
@@ -1675,7 +1668,7 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
int changed = 0;
// A UVector to to hold CharString allocated by the replace* method
// and freed when out of scope from his function.
UVector stringsToBeFreed([](void *obj) { delete static_cast<CharString*>(obj); },
UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
nullptr, 10, status);
while (U_SUCCESS(status)) {
// Something wrong with the data cause looping here more than 10 times
@@ -1729,7 +1722,9 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
while ((key = iter->next(nullptr, status)) != nullptr) {
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
uprv_strcmp("t", key) == 0) {
auto value = locale.getKeywordValue<CharString>(key, status);
CharString value;
CharStringByteSink valueSink(&value);
locale.getKeywordValue(key, valueSink, status);
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
@@ -1774,7 +1769,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
bool
canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
{
if (U_FAILURE(status)) { return false; }
AliasReplacer replacer(status);
return replacer.replace(locale, out, status);
}
@@ -1784,8 +1778,6 @@ canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
bool
isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
{
if (U_FAILURE(status)) { return false; }
if ( uprv_strcmp(locale, "c") == 0 ||
uprv_strcmp(locale, "en") == 0 ||
uprv_strcmp(locale, "en_US") == 0) {
@@ -1804,30 +1796,24 @@ isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
} // namespace
U_NAMESPACE_END
// Function for testing.
U_EXPORT const char* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length)
U_CAPI const char* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
{
U_NAMESPACE_USE
length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
*length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
return KNOWN_CANONICALIZED;
}
// Function for testing.
U_EXPORT bool
U_CAPI bool
ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
{
U_NAMESPACE_USE
Locale l(localeName);
UErrorCode status = U_ZERO_ERROR;
CharString temp;
return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
}
U_NAMESPACE_BEGIN
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(const char* localeID, UBool canonicalize)
{
@@ -1847,7 +1833,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
// without goto and without another function
do {
char *separator;
char *field[5] = {nullptr};
char *field[5] = {0};
int32_t fieldLen[5] = {0};
int32_t fieldIdx;
int32_t variantField;
@@ -1868,14 +1854,14 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
U_ASSERT(baseName == nullptr);
/*Go to heap for the fullName if necessary*/
char* newFullName = static_cast<char*>(uprv_malloc(sizeof(char) * (length + 1)));
if (newFullName == nullptr) {
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
if(fullName == 0) {
fullName = fullNameBuffer;
break; // error: out of memory
}
fullName = newFullName;
err = U_ZERO_ERROR;
length = canonicalize ?
uloc_canonicalize(localeID, fullName, length+1, &err) :
@@ -1893,11 +1879,11 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
separator = field[0] = fullName;
fieldIdx = 1;
char* at = uprv_strchr(fullName, '@');
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != nullptr &&
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
fieldIdx < UPRV_LENGTHOF(field)-1 &&
(at == nullptr || separator < at)) {
field[fieldIdx] = separator + 1;
fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
fieldIdx++;
}
// variant may contain @foo or .foo POSIX cruft; remove it
@@ -1907,12 +1893,12 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
if (separator==nullptr || (sep2!=nullptr && separator > sep2)) {
separator = sep2;
}
fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
} else {
fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
}
if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
if (fieldLen[0] >= (int32_t)(sizeof(language)))
{
break; // error: the language field is too long
}
@@ -1943,7 +1929,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
if (fieldLen[variantField] > 0) {
/* We have a variant */
variantBegin = static_cast<int32_t>(field[variantField] - fullName);
variantBegin = (int32_t)(field[variantField] - fullName);
}
err = U_ZERO_ERROR;
@@ -1993,13 +1979,12 @@ Locale::initBaseName(UErrorCode &status) {
const char *eqPtr = uprv_strchr(fullName, '=');
if (atPtr && eqPtr && atPtr < eqPtr) {
// Key words exist.
int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
char* newBaseName = static_cast<char*>(uprv_malloc(baseNameLength + 1));
if (newBaseName == nullptr) {
int32_t baseNameLength = (int32_t)(atPtr - fullName);
baseName = (char *)uprv_malloc(baseNameLength + 1);
if (baseName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
baseName = newBaseName;
uprv_strncpy(baseName, fullName, baseNameLength);
baseName[baseNameLength] = 0;
@@ -2076,7 +2061,11 @@ Locale::addLikelySubtags(UErrorCode& status) {
return;
}
CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
CharString maximizedLocaleID;
{
CharStringByteSink sink(&maximizedLocaleID);
ulocimp_addLikelySubtags(fullName, sink, &status);
}
if (U_FAILURE(status)) {
return;
@@ -2090,15 +2079,15 @@ Locale::addLikelySubtags(UErrorCode& status) {
void
Locale::minimizeSubtags(UErrorCode& status) {
Locale::minimizeSubtags(false, status);
}
void
Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
CharString minimizedLocaleID;
{
CharStringByteSink sink(&minimizedLocaleID);
ulocimp_minimizeSubtags(fullName, sink, &status);
}
if (U_FAILURE(status)) {
return;
@@ -2149,12 +2138,17 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
CharString localeID;
int32_t parsedLength;
CharString localeID = ulocimp_forLanguageTag(
tag.data(),
tag.length(),
&parsedLength,
status);
{
CharStringByteSink sink(&localeID);
ulocimp_forLanguageTag(
tag.data(),
tag.length(),
sink,
&parsedLength,
&status);
}
if (U_FAILURE(status)) {
return result;
@@ -2184,7 +2178,7 @@ Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
return;
}
ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status);
ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, &status);
}
Locale U_EXPORT2
@@ -2408,24 +2402,33 @@ Locale::getLocaleCache()
}
class KeywordEnumeration : public StringEnumeration {
protected:
CharString keywords;
private:
const char *current;
static const char fgClassID;
char *keywords;
char *current;
int32_t length;
UnicodeString currUSKey;
static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
public:
static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
public:
KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
: keywords(), current(keywords.data()) {
: keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
if(U_SUCCESS(status) && keywordLen != 0) {
if(keys == nullptr || keywordLen < 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
keywords.append(keys, keywordLen, status);
current = keywords.data() + currentIndex;
keywords = (char *)uprv_malloc(keywordLen+1);
if (keywords == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
else {
uprv_memcpy(keywords, keys, keywordLen);
keywords[keywordLen] = 0;
current = keywords + currentIndex;
length = keywordLen;
}
}
}
}
@@ -2435,14 +2438,11 @@ public:
virtual StringEnumeration * clone() const override
{
UErrorCode status = U_ZERO_ERROR;
return new KeywordEnumeration(
keywords.data(), keywords.length(),
static_cast<int32_t>(current - keywords.data()), status);
return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
}
virtual int32_t count(UErrorCode& status) const override {
if (U_FAILURE(status)) { return 0; }
const char *kw = keywords.data();
virtual int32_t count(UErrorCode &/*status*/) const override {
char *kw = keywords;
int32_t result = 0;
while(*kw) {
result++;
@@ -2456,7 +2456,7 @@ public:
int32_t len;
if(U_SUCCESS(status) && *current != 0) {
result = current;
len = static_cast<int32_t>(uprv_strlen(current));
len = (int32_t)uprv_strlen(current);
current += len+1;
if(resultLength != nullptr) {
*resultLength = len;
@@ -2471,22 +2471,21 @@ public:
}
virtual const UnicodeString* snext(UErrorCode& status) override {
if (U_FAILURE(status)) { return nullptr; }
int32_t resultLength = 0;
const char *s = next(&resultLength, status);
return setChars(s, resultLength, status);
}
virtual void reset(UErrorCode& status) override {
if (U_FAILURE(status)) { return; }
current = keywords.data();
virtual void reset(UErrorCode& /*status*/) override {
current = keywords;
}
};
const char KeywordEnumeration::fgClassID = '\0';
// Out-of-line virtual destructor to serve as the "key function".
KeywordEnumeration::~KeywordEnumeration() = default;
KeywordEnumeration::~KeywordEnumeration() {
uprv_free(keywords);
}
// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
// the next() method for each keyword before returning it.
@@ -2511,18 +2510,6 @@ public:
if (resultLength != nullptr) *resultLength = 0;
return nullptr;
}
virtual int32_t count(UErrorCode& status) const override {
if (U_FAILURE(status)) { return 0; }
const char *kw = keywords.data();
int32_t result = 0;
while(*kw) {
if (uloc_toUnicodeLocaleKey(kw) != nullptr) {
result++;
}
kw += uprv_strlen(kw)+1;
}
return result;
}
};
// Out-of-line virtual destructor to serve as the "key function".
@@ -2541,7 +2528,9 @@ Locale::createKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
CharString keywords;
CharStringByteSink sink(&keywords);
ulocimp_getKeywords(variantStart+1, '@', sink, false, &status);
if (U_SUCCESS(status) && !keywords.isEmpty()) {
result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
if (!result) {
@@ -2568,7 +2557,9 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
CharString keywords;
CharStringByteSink sink(&keywords);
ulocimp_getKeywords(variantStart+1, '@', sink, false, &status);
if (U_SUCCESS(status) && !keywords.isEmpty()) {
result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
if (!result) {
@@ -2599,127 +2590,137 @@ Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& sta
return;
}
ulocimp_getKeywordValue(fullName, keywordName, sink, status);
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
}
void
Locale::getUnicodeKeywordValue(StringPiece keywordName,
ByteSink& sink,
UErrorCode& status) const {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
if (!legacy_key.has_value()) {
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);
CharString legacy_value;
{
CharStringByteSink sink(&legacy_value);
getKeywordValue(legacy_key, sink, status);
}
if (U_FAILURE(status)) {
return;
}
std::optional<std::string_view> unicode_value =
ulocimp_toBcpTypeWithFallback(keywordName, legacy_value.toStringPiece());
if (!unicode_value.has_value()) {
const char* unicode_value = uloc_toUnicodeLocaleType(
keywordName_nul.data(), legacy_value.data());
if (unicode_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
}
void
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (keywordName.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR;
}
int32_t length = static_cast<int32_t>(uprv_strlen(fullName));
int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1;
const char* start = locale_getKeywordsStart(fullName);
int32_t offset = start == nullptr ? length : start - fullName;
for (;;) {
// Remove -1 from the capacity so that this function can guarantee NUL termination.
CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
int32_t reslen = ulocimp_setKeywordValue(
{fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
keywordName,
keywordValue,
sink,
status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
capacity = reslen + offset + 1;
char* newFullName = static_cast<char*>(uprv_malloc(capacity));
if (newFullName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(newFullName, fullName, length + 1);
if (fullName != fullNameBuffer) {
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.
}
// if fullName is already on the heap, need to free it.
uprv_free(fullName);
}
fullName = newFullName;
status = U_ZERO_ERROR;
continue;
int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
bufferLength, &status) + 1;
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
/* Handle the case the current buffer is not enough to hold the new id */
if (status == U_BUFFER_OVERFLOW_ERROR) {
U_ASSERT(newLength > bufferLength);
char* newFullName = (char *)uprv_malloc(newLength);
if (newFullName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(status)) { return; }
u_terminateChars(fullName, capacity, reslen + offset, &status);
break;
uprv_strcpy(newFullName, fullName);
if (fullName != fullNameBuffer) {
// if full Name is already on the heap, need to free it.
uprv_free(fullName);
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.
}
}
fullName = newFullName;
status = U_ZERO_ERROR;
uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
} else {
U_ASSERT(newLength <= bufferLength);
}
if (baseName == fullName) {
if (U_SUCCESS(status) && baseName == fullName) {
// May have added the first keyword, meaning that the fullName is no longer also the baseName.
initBaseName(status);
}
}
void
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
}
void
Locale::setUnicodeKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
if (U_FAILURE(status)) {
return;
}
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
if (!legacy_key.has_value()) {
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
std::string_view value;
const char* legacy_value = nullptr;
if (!keywordValue.empty()) {
std::optional<std::string_view> legacy_value =
ulocimp_toLegacyTypeWithFallback(keywordName, keywordValue);
if (!legacy_value.has_value()) {
if (!keywordValue_nul.isEmpty()) {
legacy_value =
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
if (legacy_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
value = *legacy_value;
}
setKeywordValue(*legacy_key, value, status);
setKeywordValue(legacy_key, legacy_value, status);
}
const char *

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,6 @@
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
#include "unicode/uscript.h"
#include "charstr.h"
#include "cstring.h"
#include "loclikelysubtags.h"
@@ -24,7 +23,6 @@
#include "uniquecharstr.h"
#include "uresdata.h"
#include "uresimp.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
@@ -51,7 +49,8 @@ LocaleDistanceData::~LocaleDistanceData() {
delete[] paradigms;
}
struct LikelySubtagsData {
// TODO(ICU-20777): Rename to just LikelySubtagsData.
struct XLikelySubtagsData {
UResourceBundle *langInfoBundle = nullptr;
UniqueCharStrings strings;
CharStringMap languageAliases;
@@ -62,15 +61,14 @@ struct LikelySubtagsData {
LocaleDistanceData distanceData;
LikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
~LikelySubtagsData() {
~XLikelySubtagsData() {
ures_close(langInfoBundle);
delete[] lsrs;
}
void load(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
if (U_FAILURE(errorCode)) { return; }
StackUResourceBundle stackTempBundle;
@@ -83,18 +81,11 @@ struct LikelySubtagsData {
// Read all strings in the resource bundle and convert them to invariant char *.
LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
ResourceArray m49Array;
if (likelyTable.findValue("m49", value)) {
m49Array = value.getArray(errorCode);
} else {
errorCode = U_MISSING_RESOURCE_ERROR;
return;
}
if (!readStrings(likelyTable, "languageAliases", value,
languageIndexes, languagesLength, errorCode) ||
!readStrings(likelyTable, "regionAliases", value,
regionIndexes, regionsLength, errorCode) ||
!readLSREncodedStrings(likelyTable, "lsrnum", value, m49Array,
!readStrings(likelyTable, "lsrs", value,
lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
return;
}
@@ -145,7 +136,7 @@ struct LikelySubtagsData {
if (!readStrings(matchTable, "partitions", value,
partitionIndexes, partitionsLength, errorCode) ||
!readLSREncodedStrings(matchTable, "paradigmnum", value, m49Array,
!readStrings(matchTable, "paradigms", value,
paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
return;
}
@@ -231,7 +222,6 @@ struct LikelySubtagsData {
private:
bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
if (table.findValue(key, value)) {
ResourceArray stringArray = value.getArray(errorCode);
if (U_FAILURE(errorCode)) { return false; }
@@ -243,97 +233,10 @@ private:
return false;
}
for (int i = 0; i < length; ++i) {
if (stringArray.getValue(i, value)) { // returns true because i < length
int32_t strLength = 0;
rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
}
}
return true;
}
UnicodeString toLanguage(int encoded) {
if (encoded == 0) {
return UNICODE_STRING_SIMPLE("");
}
if (encoded == 1) {
return UNICODE_STRING_SIMPLE("skip");
}
encoded &= 0x00ffffff;
encoded %= 27*27*27;
char lang[3];
lang[0] = 'a' + ((encoded % 27) - 1);
lang[1] = 'a' + (((encoded / 27 ) % 27) - 1);
if (encoded / (27 * 27) == 0) {
return UnicodeString(lang, 2, US_INV);
}
lang[2] = 'a' + ((encoded / (27 * 27)) - 1);
return UnicodeString(lang, 3, US_INV);
}
UnicodeString toScript(int encoded) {
if (encoded == 0) {
return UNICODE_STRING_SIMPLE("");
}
if (encoded == 1) {
return UNICODE_STRING_SIMPLE("script");
}
encoded = (encoded >> 24) & 0x000000ff;
const char* script = uscript_getShortName(static_cast<UScriptCode>(encoded));
if (script == nullptr) {
return UNICODE_STRING_SIMPLE("");
}
U_ASSERT(uprv_strlen(script) == 4);
return UnicodeString(script, 4, US_INV);
}
UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return UNICODE_STRING_SIMPLE("");
}
if (m49Array.getValue(index, value)) {
return value.getUnicodeString(errorCode);
}
// "m49" does not include the index.
errorCode = U_MISSING_RESOURCE_ERROR;
return UNICODE_STRING_SIMPLE("");
}
UnicodeString toRegion(const ResourceArray& m49Array, ResourceValue &value, int encoded, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || encoded == 0 || encoded == 1) {
return UNICODE_STRING_SIMPLE("");
}
encoded &= 0x00ffffff;
encoded /= 27 * 27 * 27;
encoded %= 27 * 27;
if (encoded < 27) {
// Selected M49 code index, find the code from "m49" resource.
return m49IndexToCode(m49Array, value, encoded, errorCode);
}
char region[2];
region[0] = 'A' + ((encoded % 27) - 1);
region[1] = 'A' + (((encoded / 27) % 27) - 1);
return UnicodeString(region, 2, US_INV);
}
bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
if (table.findValue(key, value)) {
const int32_t* vectors = value.getIntVector(length, errorCode);
if (U_FAILURE(errorCode)) { return false; }
if (length == 0) { return true; }
int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length * 3);
if (rawIndexes == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return false;
}
for (int i = 0; i < length; ++i) {
rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode);
rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode);
rawIndexes[i*3+2] = strings.addByValue(
toRegion(m49Array, value, vectors[i], errorCode), errorCode);
stringArray.getValue(i, value); // returns true because i < length
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
length *= 3;
}
return true;
}
@@ -341,143 +244,39 @@ private:
namespace {
LikelySubtags *gLikelySubtags = nullptr;
UVector *gMacroregions = nullptr;
XLikelySubtags *gLikelySubtags = nullptr;
UInitOnce gInitOnce {};
UBool U_CALLCONV cleanup() {
delete gLikelySubtags;
gLikelySubtags = nullptr;
delete gMacroregions;
gMacroregions = nullptr;
gInitOnce.reset();
return true;
}
constexpr const char16_t* MACROREGION_HARDCODE[] = {
u"001~3",
u"005",
u"009",
u"011",
u"013~5",
u"017~9",
u"021",
u"029",
u"030",
u"034~5",
u"039",
u"053~4",
u"057",
u"061",
u"142~3",
u"145",
u"150~1",
u"154~5",
u"202",
u"419",
u"EU",
u"EZ",
u"QO",
u"UN",
};
constexpr char16_t RANGE_MARKER = 0x7E; /* '~' */
void processMacroregionRange(const UnicodeString& regionName, UVector* newMacroRegions, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
char16_t buf[6];
regionName.extract(buf,6,status);
if ( rangeMarkerLocation > 0 ) {
char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
buf[rangeMarkerLocation] = 0;
while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
buf[rangeMarkerLocation-1]++;
}
} else {
LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
}
}
#if U_DEBUG
UVector* loadMacroregions(UErrorCode &status) {
if (U_FAILURE(status)) { return nullptr; }
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
if (U_FAILURE(status)) {
return nullptr;
}
while (ures_hasNext(regionMacro.getAlias())) {
UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
if (U_FAILURE(status)) {
return nullptr;
}
}
return newMacroRegions.orphan();
}
#endif // U_DEBUG
UVector* getStaticMacroregions(UErrorCode &status) {
if (U_FAILURE(status)) { return nullptr; }
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
if (U_FAILURE(status)) {
return nullptr;
}
for (const auto *region : MACROREGION_HARDCODE) {
UnicodeString regionName(region);
processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
if (U_FAILURE(status)) {
return nullptr;
}
}
return newMacroRegions.orphan();
}
} // namespace
void U_CALLCONV LikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(gLikelySubtags == nullptr);
LikelySubtagsData data(errorCode);
XLikelySubtagsData data(errorCode);
data.load(errorCode);
if (U_FAILURE(errorCode)) { return; }
gLikelySubtags = new LikelySubtags(data);
gMacroregions = getStaticMacroregions(errorCode);
#if U_DEBUG
auto macroregionsFromData = loadMacroregions(errorCode);
U_ASSERT((*gMacroregions) == (*macroregionsFromData));
delete macroregionsFromData;
#endif
if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
delete gLikelySubtags;
delete gMacroregions;
gLikelySubtags = new XLikelySubtags(data);
if (gLikelySubtags == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
}
const LikelySubtags *LikelySubtags::getSingleton(UErrorCode &errorCode) {
const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
return gLikelySubtags;
}
LikelySubtags::LikelySubtags(LikelySubtagsData &data) :
XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
langInfoBundle(data.langInfoBundle),
strings(data.strings.orphanCharStrings()),
languageAliases(std::move(data.languageAliases)),
@@ -486,7 +285,7 @@ LikelySubtags::LikelySubtags(LikelySubtagsData &data) :
lsrs(data.lsrs),
#if U_DEBUG
lsrsLength(data.lsrsLength),
#endif // U_DEBUG
#endif
distanceData(std::move(data.distanceData)) {
data.langInfoBundle = nullptr;
data.lsrs = nullptr;
@@ -512,39 +311,21 @@ LikelySubtags::LikelySubtags(LikelySubtagsData &data) :
}
}
LikelySubtags::~LikelySubtags() {
XLikelySubtags::~XLikelySubtags() {
ures_close(langInfoBundle);
delete strings;
delete[] lsrs;
}
LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return {}; }
if (locale.isBogus()) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return {};
}
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag... which CLDR changes to
// und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant(), returnInputIfUnmatch, errorCode);
if (uprv_strlen(max.language) == 0 &&
uprv_strlen(max.script) == 0 &&
uprv_strlen(max.region) == 0) {
// No match. ICU API mandate us to
// If the provided ULocale instance is already in the maximal form, or
// there is no data available available for maximization, it will be
// returned.
return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode);
}
return max;
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant(), errorCode);
}
namespace {
@@ -556,136 +337,105 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) {
} // namespace
LSR LikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
const char *variant,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return {}; }
LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
const char *variant, UErrorCode &errorCode) const {
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
// They should match only themselves,
// not other locales with what looks like the same language and script subtags.
if (!returnInputIfUnmatch) {
char c1;
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
switch (c1) {
case 'A':
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
case 'B':
return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
case 'C':
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
default: // normal locale
break;
}
char c1;
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
switch (c1) {
case 'A':
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
case 'B':
return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
case 'C':
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
LSR::EXPLICIT_LSR, errorCode);
default: // normal locale
break;
}
}
if (variant[0] == 'P' && variant[1] == 'S') {
int32_t lsrFlags = *region == 0 ?
LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
if (uprv_strcmp(variant, "PSACCENT") == 0) {
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
*region == 0 ? "XA" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
return LSR(PSEUDO_BIDI_PREFIX, language, script,
*region == 0 ? "XB" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
*region == 0 ? "XC" : region, lsrFlags, errorCode);
}
// else normal locale
if (variant[0] == 'P' && variant[1] == 'S') {
int32_t lsrFlags = *region == 0 ?
LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
if (uprv_strcmp(variant, "PSACCENT") == 0) {
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
*region == 0 ? "XA" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
return LSR(PSEUDO_BIDI_PREFIX, language, script,
*region == 0 ? "XB" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
*region == 0 ? "XC" : region, lsrFlags, errorCode);
}
} // end of if (!returnInputIfUnmatch)
// else normal locale
}
language = getCanonical(languageAliases, language);
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
return maximize(language, script, region, returnInputIfUnmatch, errorCode);
return maximize(language, script, region);
}
LSR LikelySubtags::maximize(const char *language, const char *script, const char *region,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return {}; }
return maximize({language, static_cast<int32_t>(uprv_strlen(language))},
{script, static_cast<int32_t>(uprv_strlen(script))},
{region, static_cast<int32_t>(uprv_strlen(region))},
returnInputIfUnmatch,
errorCode);
}
bool LikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) { return false; }
// In Java, we use Region class. In C++, since Region is under i18n,
// we read the same data used by Region into gMacroregions avoid dependency
// from common to i18n/region.cpp
umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
if (U_FAILURE(errorCode)) { return false; }
UnicodeString str(UnicodeString::fromUTF8(region));
return gMacroregions->contains((void *)&str);
}
LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return {}; }
if (language.compare("und") == 0) {
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
if (uprv_strcmp(language, "und") == 0) {
language = "";
}
if (script.compare("Zzzz") == 0) {
if (uprv_strcmp(script, "Zzzz") == 0) {
script = "";
}
if (region.compare("ZZ") == 0) {
if (uprv_strcmp(region, "ZZ") == 0) {
region = "";
}
if (!script.empty() && !region.empty() && !language.empty()) {
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); // already maximized
if (*script != 0 && *region != 0 && *language != 0) {
return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
}
bool retainLanguage = false;
bool retainScript = false;
bool retainRegion = false;
uint32_t retainOldMask = 0;
BytesTrie iter(trie);
uint64_t state;
int32_t value;
// Small optimization: Array lookup for first language letter.
int32_t c0;
if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 &&
language.length() >= 2 &&
if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
language[1] != 0 && // language.length() >= 2
(state = trieFirstLetterStates[c0]) != 0) {
value = trieNext(iter.resetToState64(state), language, 1);
} else {
value = trieNext(iter, language, 0);
}
bool matchLanguage = (value >= 0);
bool matchScript = false;
if (value >= 0) {
retainLanguage = !language.empty();
if (*language != 0) {
retainOldMask |= 4;
}
state = iter.getState64();
} else {
retainLanguage = true;
retainOldMask |= 4;
iter.resetToState64(trieUndState); // "und" ("*")
state = 0;
}
if (value >= 0 && !script.empty()) {
matchScript = true;
}
if (value > 0) {
// Intermediate or final value from just language.
if (value == SKIP_SCRIPT) {
value = 0;
}
retainScript = !script.empty();
if (*script != 0) {
retainOldMask |= 2;
}
} else {
value = trieNext(iter, script, 0);
if (value >= 0) {
retainScript = !script.empty();
if (*script != 0) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainScript = true;
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
@@ -697,19 +447,19 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
}
}
bool matchRegion = false;
if (value > 0) {
// Final value from just language or language+script.
retainRegion = !region.empty();
if (*region != 0) {
retainOldMask |= 1;
}
} else {
value = trieNext(iter, region, 0);
if (value >= 0) {
if (!region.empty() && !isMacroregion(region, errorCode)) {
retainRegion = true;
matchRegion = true;
if (*region != 0) {
retainOldMask |= 1;
}
} else {
retainRegion = true;
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
} else {
@@ -720,36 +470,31 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
}
}
U_ASSERT(value < lsrsLength);
const LSR &matched = lsrs[value];
const LSR &result = lsrs[value];
if (returnInputIfUnmatch &&
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
}
if (language.empty()) {
language = StringPiece("und");
if (*language == 0) {
language = "und";
}
if (!(retainLanguage || retainScript || retainRegion)) {
if (retainOldMask == 0) {
// Quickly return a copy of the lookup-result LSR
// without new allocation of the subtags.
return LSR(matched.language, matched.script, matched.region, matched.flags);
return LSR(result.language, result.script, result.region, result.flags);
}
if (!retainLanguage) {
language = matched.language;
if ((retainOldMask & 4) == 0) {
language = result.language;
}
if (!retainScript) {
script = matched.script;
if ((retainOldMask & 2) == 0) {
script = result.script;
}
if (!retainRegion) {
region = matched.region;
if ((retainOldMask & 1) == 0) {
region = result.region;
}
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
// retainOldMask flags = LSR explicit-subtag flags
return LSR(language, script, region, retainMask, errorCode);
return LSR(language, script, region, retainOldMask);
}
int32_t LikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
// If likelyInfo >= 0:
// likelyInfo bit 1 is set if the previous comparison with lsr
// was for equal language and script.
@@ -791,7 +536,7 @@ int32_t LikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t l
}
// Subset of maximize().
int32_t LikelySubtags::getLikelyIndex(const char *language, const char *script) const {
int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
if (uprv_strcmp(language, "und") == 0) {
language = "";
}
@@ -849,7 +594,7 @@ int32_t LikelySubtags::getLikelyIndex(const char *language, const char *script)
return value;
}
int32_t LikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
UStringTrieResult result;
uint8_t c;
if ((c = s[i]) == 0) {
@@ -882,88 +627,57 @@ int32_t LikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
default: return -1;
}
}
int32_t LikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
UStringTrieResult result;
uint8_t c;
if (s.length() == i) {
result = iter.next(u'*');
} else {
c = s.data()[i];
for (;;) {
c = uprv_invCharToAscii(c);
// EBCDIC: If s[i] is not an invariant character,
// then c is now 0 and will simply not match anything, which is harmless.
if (i+1 != s.length()) {
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
return -1;
}
c = s.data()[++i];
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
}
}
}
switch (result) {
case USTRINGTRIE_NO_MATCH: return -1;
case USTRINGTRIE_NO_VALUE: return 0;
case USTRINGTRIE_INTERMEDIATE_VALUE:
U_ASSERT(iter.getValue() == SKIP_SCRIPT);
return SKIP_SCRIPT;
case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
default: return -1;
}
}
LSR LikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
StringPiece region,
bool favorScript,
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
// in loclikely.cpp to this new code, including activating this
// minimizeSubtags() function. The LocaleMatcher does not minimize.
#if 0
LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
const char *regionIn, ULocale.Minimize fieldToFavor,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return {}; }
LSR max = maximize(language, script, region, true, errorCode);
if (U_FAILURE(errorCode)) { return {}; }
// If no match, return it.
if (uprv_strlen(max.language) == 0 &&
uprv_strlen(max.script) == 0 &&
uprv_strlen(max.region) == 0) {
// No match. ICU API mandate us to
// "If this Locale is already in the minimal form, or not valid, or
// there is no data available for minimization, the Locale will be
// unchanged."
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
LSR result = maximize(languageIn, scriptIn, regionIn);
// We could try just a series of checks, like:
// LSR result2 = addLikelySubtags(languageIn, "", "");
// if result.equals(result2) return result2;
// However, we can optimize 2 of the cases:
// (languageIn, "", "")
// (languageIn, "", regionIn)
// value00 = lookup(result.language, "", "")
BytesTrie iter = new BytesTrie(trie);
int value = trieNext(iter, result.language, 0);
U_ASSERT(value >= 0);
if (value == 0) {
value = trieNext(iter, "", 0);
U_ASSERT(value >= 0);
if (value == 0) {
value = trieNext(iter, "", 0);
}
}
// try language
LSR test = maximize(max.language, "", "", true, errorCode);
if (U_FAILURE(errorCode)) { return {}; }
if (test.isEquivalentTo(max)) {
return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
U_ASSERT(value > 0);
LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
if (result.region.equals(value00.region)) {
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
} else {
favorRegionOk = true;
}
}
if (!favorScript) {
// favor Region
// try language and region
test = maximize(max.language, "", max.region, true, errorCode);
if (U_FAILURE(errorCode)) { return {}; }
if (test.isEquivalentTo(max)) {
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
}
// The last case is not as easy to optimize.
// Maybe do later, but for now use the straightforward code.
LSR result2 = maximize(languageIn, scriptIn, "");
if (result2.equals(result)) {
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
} else if (favorRegionOk) {
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
}
// try language and script
test = maximize(max.language, max.script, "", true, errorCode);
if (U_FAILURE(errorCode)) { return {}; }
if (test.isEquivalentTo(max)) {
return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
}
if (favorScript) {
// try language and region
test = maximize(max.language, "", max.region, true, errorCode);
if (U_FAILURE(errorCode)) { return {}; }
if (test.isEquivalentTo(max)) {
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
}
}
return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode);
return result;
}
#endif
U_NAMESPACE_END

View File

@@ -11,7 +11,6 @@
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
#include "charstrmap.h"
@@ -19,7 +18,7 @@
U_NAMESPACE_BEGIN
struct LikelySubtagsData;
struct XLikelySubtagsData;
struct LocaleDistanceData {
LocaleDistanceData() = default;
@@ -37,19 +36,18 @@ private:
LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
};
class LikelySubtags final : public UMemory {
// TODO(ICU-20777): Rename to just LikelySubtags.
class XLikelySubtags final : public UMemory {
public:
~LikelySubtags();
~XLikelySubtags();
static constexpr int32_t SKIP_SCRIPT = 1;
// VisibleForTesting
static const LikelySubtags *getSingleton(UErrorCode &errorCode);
static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
// VisibleForTesting
LSR makeMaximizedLsrFrom(const Locale &locale,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const;
LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
/**
* Tests whether lsr is "more likely" than other.
@@ -63,40 +61,35 @@ public:
*/
int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region,
bool favorScript,
UErrorCode &errorCode) const;
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
// in loclikely.cpp to this new code, including activating this
// minimizeSubtags() function. The LocaleMatcher does not minimize.
#if 0
LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
#endif
// visible for LocaleDistance
const LocaleDistanceData &getDistanceData() const { return distanceData; }
private:
LikelySubtags(LikelySubtagsData &data);
LikelySubtags(const LikelySubtags &other) = delete;
LikelySubtags &operator=(const LikelySubtags &other) = delete;
XLikelySubtags(XLikelySubtagsData &data);
XLikelySubtags(const XLikelySubtags &other) = delete;
XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
static void initLikelySubtags(UErrorCode &errorCode);
LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
const char *variant,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const;
const char *variant, UErrorCode &errorCode) const;
/**
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
*/
LSR maximize(const char *language, const char *script, const char *region,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const;
LSR maximize(StringPiece language, StringPiece script, StringPiece region,
bool returnInputIfUnmatch,
UErrorCode &errorCode) const;
LSR maximize(const char *language, const char *script, const char *region) const;
int32_t getLikelyIndex(const char *language, const char *script) const;
bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const;
static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i);
UResourceBundle *langInfoBundle;
// We could store the strings by value, except that if there were few enough strings,
@@ -119,7 +112,7 @@ private:
int32_t lsrsLength;
#endif
// distance/matcher data: see comment in LikelySubtagsData::load()
// distance/matcher data: see comment in XLikelySubtagsData::load()
LocaleDistanceData distanceData;
};

View File

@@ -28,6 +28,7 @@
*/
#include "locmap.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "cmemory.h"
@@ -48,8 +49,6 @@
* [MS-LCID] Windows Language Code Identifier (LCID) Reference
*/
namespace {
/*
////////////////////////////////////////////////
//
@@ -88,7 +87,7 @@ typedef struct ILcidPosixMap
* @param posixID posix ID of the language_TERRITORY such as 'de_CH'
*/
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
constexpr ILcidPosixElement locmap_ ## languageID [] = { \
static const ILcidPosixElement locmap_ ## languageID [] = { \
{LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
{hostID, #posixID}, \
};
@@ -98,7 +97,7 @@ constexpr ILcidPosixElement locmap_ ## languageID [] = { \
* @param id the POSIX ID, either a language or language_TERRITORY
*/
#define ILCID_POSIX_SUBTABLE(id) \
constexpr ILcidPosixElement locmap_ ## id [] =
static const ILcidPosixElement locmap_ ## id [] =
/**
@@ -797,7 +796,7 @@ ILCID_POSIX_SUBTABLE(zh) {
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
/* This must be static and grouped by LCID. */
constexpr ILcidPosixMap gPosixIDmap[] = {
static const ILcidPosixMap gPosixIDmap[] = {
ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
@@ -946,14 +945,14 @@ constexpr ILcidPosixMap gPosixIDmap[] = {
ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
};
constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
/**
* Do not call this function. It is called by hostID.
* The function is not private because this struct must stay as a C struct,
* and this is an internal class.
*/
int32_t
static int32_t
idCmp(const char* id1, const char* id2)
{
int32_t diffIdx = 0;
@@ -973,13 +972,12 @@ idCmp(const char* id1, const char* id2)
* no equivalent Windows LCID.
* @return the LCID
*/
uint32_t
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
static uint32_t
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
{
if (U_FAILURE(status)) { return locmap_root->hostID; }
int32_t bestIdx = 0;
int32_t bestIdxDiff = 0;
int32_t posixIDlen = static_cast<int32_t>(uprv_strlen(posixID));
int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
uint32_t idx;
for (idx = 0; idx < this_0->numRegions; idx++ ) {
@@ -998,16 +996,16 @@ getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
&& this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
{
status = U_USING_FALLBACK_WARNING;
*status = U_USING_FALLBACK_WARNING;
return this_0->regionMaps[bestIdx].hostID;
}
/*no match found */
status = U_ILLEGAL_ARGUMENT_ERROR;
return locmap_root->hostID;
*status = U_ILLEGAL_ARGUMENT_ERROR;
return this_0->regionMaps->hostID;
}
const char*
static const char*
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
{
uint32_t i;
@@ -1037,20 +1035,18 @@ getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
* quz -> qu
* prs -> fa
*/
void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) {
if (len >= 3) {
if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {
buffer[2] = 0;
uprv_strcat(buffer, buffer+3);
} else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {
buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;
uprv_strcat(buffer, buffer+3);
}
#define FIX_LANGUAGE_ID_TAG(buffer, len) \
if (len >= 3) { \
if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
buffer[2] = 0; \
uprv_strcat(buffer, buffer+3); \
} else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
uprv_strcat(buffer, buffer+3); \
} \
}
}
#endif
} // namespace
#endif
U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
@@ -1151,7 +1147,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
/* no match found */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
return -1;
}
/*
@@ -1174,13 +1170,17 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
// conversion functionality when available.
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
int32_t len;
icu::CharString baseName;
char baseName[ULOC_FULLNAME_CAPACITY] = {};
const char * mylocaleID = localeID;
// Check any for keywords.
if (uprv_strchr(localeID, '@'))
{
icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
icu::CharString collVal;
{
icu::CharStringByteSink sink(&collVal);
ulocimp_getKeywordValue(localeID, "collation", sink, status);
}
if (U_SUCCESS(*status) && !collVal.isEmpty())
{
// If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
@@ -1189,16 +1189,19 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
else
{
// If the locale ID contains keywords other than collation, just use the base name.
baseName = ulocimp_getBaseName(localeID, *status);
if (U_SUCCESS(*status) && !baseName.isEmpty())
len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
if (U_SUCCESS(*status) && len > 0)
{
mylocaleID = baseName.data();
baseName[len] = 0;
mylocaleID = baseName;
}
}
}
char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);
(void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, status);
if (U_SUCCESS(*status))
{
@@ -1246,14 +1249,6 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
U_CAPI uint32_t
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
{
if (U_FAILURE(*status) ||
langID == nullptr ||
posixID == nullptr ||
uprv_strlen(langID) < 2 ||
uprv_strlen(posixID) < 2) {
return locmap_root->hostID;
}
// This function does the table lookup when native platform name->lcid conversion isn't available,
// or for locales that don't follow patterns the platform expects.
uint32_t low = 0;
@@ -1267,6 +1262,11 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
UErrorCode myStatus;
uint32_t idx;
/* Check for incomplete id. */
if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
return 0;
}
/*Binary search for the map entry for normal cases */
while (high > low) /*binary search*/{
@@ -1284,7 +1284,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
low = mid;
}
else /*we found it*/{
return getHostID(&gPosixIDmap[mid], posixID, *status);
return getHostID(&gPosixIDmap[mid], posixID, status);
}
oldmid = mid;
}
@@ -1295,7 +1295,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
*/
for (idx = 0; idx < gLocaleCount; idx++ ) {
myStatus = U_ZERO_ERROR;
value = getHostID(&gPosixIDmap[idx], posixID, myStatus);
value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
if (myStatus == U_ZERO_ERROR) {
return value;
}
@@ -1311,5 +1311,5 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
/* no match found */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return locmap_root->hostID; /* return international (root) */
return 0; /* return international (root) */
}

View File

@@ -24,7 +24,6 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "charstr.h"
#include "cstring.h"
#include "ulocimp.h"
#include "uresimp.h"
@@ -49,10 +48,10 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
int32_t *pLength,
UErrorCode *pErrorCode)
{
if (U_FAILURE(*pErrorCode)) { return nullptr; }
/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
const char16_t *item=nullptr;
UErrorCode errorCode;
char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
/*
* open the bundle for the current locale
@@ -127,16 +126,15 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
*pErrorCode = errorCode;
break;
}
icu::CharString explicitFallbackName;
explicitFallbackName.appendInvariantChars(fallbackLocale, len, errorCode);
u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
/* guard against recursive fallback */
if (explicitFallbackName == locale) {
if(uprv_strcmp(explicitFallbackName, locale)==0){
*pErrorCode = U_INTERNAL_PROGRAM_ERROR;
break;
}
rb.adoptInstead(ures_open(path, explicitFallbackName.data(), &errorCode));
rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode));
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
break;
@@ -150,65 +148,61 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
return item;
}
namespace {
ULayoutType
static ULayoutType
_uloc_getOrientationHelper(const char* localeId,
const char* key,
UErrorCode& status)
UErrorCode *status)
{
ULayoutType result = ULOC_LAYOUT_UNKNOWN;
if (U_FAILURE(status)) { return result; }
if (!U_FAILURE(*status)) {
int32_t length = 0;
char localeBuffer[ULOC_FULLNAME_CAPACITY];
icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);
uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
if (U_FAILURE(status)) { return result; }
if (!U_FAILURE(*status)) {
const char16_t* const value =
uloc_getTableStringWithFallback(
nullptr,
localeBuffer,
"layout",
nullptr,
key,
&length,
status);
int32_t length = 0;
const char16_t* const value =
uloc_getTableStringWithFallback(
nullptr,
localeBuffer.data(),
"layout",
nullptr,
key,
&length,
&status);
if (U_FAILURE(status)) { return result; }
if (length != 0) {
switch(value[0])
{
case 0x0062: /* 'b' */
result = ULOC_LAYOUT_BTT;
break;
case 0x006C: /* 'l' */
result = ULOC_LAYOUT_LTR;
break;
case 0x0072: /* 'r' */
result = ULOC_LAYOUT_RTL;
break;
case 0x0074: /* 't' */
result = ULOC_LAYOUT_TTB;
break;
default:
status = U_INTERNAL_PROGRAM_ERROR;
break;
if (!U_FAILURE(*status) && length != 0) {
switch(value[0])
{
case 0x0062: /* 'b' */
result = ULOC_LAYOUT_BTT;
break;
case 0x006C: /* 'l' */
result = ULOC_LAYOUT_LTR;
break;
case 0x0072: /* 'r' */
result = ULOC_LAYOUT_RTL;
break;
case 0x0074: /* 't' */
result = ULOC_LAYOUT_TTB;
break;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
break;
}
}
}
}
return result;
}
} // namespace
U_CAPI ULayoutType U_EXPORT2
uloc_getCharacterOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "characters", *status);
return _uloc_getOrientationHelper(localeId, "characters", status);
}
/**
@@ -222,5 +216,5 @@ U_CAPI ULayoutType U_EXPORT2
uloc_getLineOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "lines", *status);
return _uloc_getOrientationHelper(localeId, "lines", status);
}

View File

@@ -145,7 +145,9 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res
Locale&
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
{
if (id.isBogus()) {
enum { BUFLEN = 128 }; // larger than ever needed
if (id.isBogus() || id.length() >= BUFLEN) {
result.setToBogus();
} else {
/*
@@ -166,29 +168,24 @@ LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
*
* There should be only at most one '@' in a locale ID.
*/
CharString buffer;
char buffer[BUFLEN];
int32_t prev, i;
prev = 0;
UErrorCode status = U_ZERO_ERROR;
do {
i = id.indexOf(static_cast<char16_t>(0x40), prev);
for(;;) {
i = id.indexOf((char16_t)0x40, prev);
if(i < 0) {
// no @ between prev and the rest of the string
buffer.appendInvariantChars(id.tempSubString(prev), status);
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
break; // done
} else {
// normal invariant-character conversion for text between @s
buffer.appendInvariantChars(id.tempSubString(prev, i - prev), status);
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
buffer.append('@', status);
buffer[i] = '@';
prev = i + 1;
}
} while (U_SUCCESS(status));
if (U_FAILURE(status)) {
result.setToBogus();
} else {
result = Locale::createFromName(buffer.data());
}
result = Locale::createFromName(buffer);
}
return result;
}
@@ -224,7 +221,7 @@ LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
Hashtable* htp;
umtx_lock(nullptr);
htp = static_cast<Hashtable*>(cache->get(bundleID));
htp = (Hashtable*) cache->get(bundleID);
umtx_unlock(nullptr);
if (htp == nullptr) {
@@ -262,7 +259,7 @@ LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
return htp;
}
bool
UBool
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
{
return child.indexOf(root) == 0 &&
@@ -274,3 +271,5 @@ U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

View File

@@ -28,7 +28,7 @@ public:
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
static bool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
};
U_NAMESPACE_END

View File

@@ -31,26 +31,6 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t
}
}
LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
UErrorCode &errorCode) :
language(nullptr), script(nullptr), region(nullptr),
regionIndex(indexForRegion(r.data())), flags(f) {
if (U_SUCCESS(errorCode)) {
CharString data;
data.append(lang, errorCode).append('\0', errorCode);
int32_t scriptOffset = data.length();
data.append(scr, errorCode).append('\0', errorCode);
int32_t regionOffset = data.length();
data.append(r, errorCode);
owned = data.cloneData(errorCode);
if (U_SUCCESS(errorCode)) {
language = owned;
script = owned + scriptOffset;
region = owned + regionOffset;
}
}
}
LSR::LSR(LSR &&other) noexcept :
language(other.language), script(other.script), region(other.region), owned(other.owned),
regionIndex(other.regionIndex), flags(other.flags),

View File

@@ -7,7 +7,6 @@
#ifndef __LSR_H__
#define __LSR_H__
#include "unicode/stringpiece.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "cstring.h"
@@ -46,8 +45,6 @@ struct LSR final : public UMemory {
*/
LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
UErrorCode &errorCode);
LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
UErrorCode &errorCode);
LSR(LSR &&other) noexcept;
LSR(const LSR &other) = delete;
inline ~LSR() {

View File

@@ -164,7 +164,7 @@ public:
Array1D() : memory_(nullptr), data_(nullptr), d1_(0) {}
Array1D(int32_t d1, UErrorCode &status)
: memory_(uprv_malloc(d1 * sizeof(float))),
data_(static_cast<float*>(memory_)), d1_(d1) {
data_((float*)memory_), d1_(d1) {
if (U_SUCCESS(status)) {
if (memory_ == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
@@ -301,7 +301,7 @@ public:
Array2D() : memory_(nullptr), data_(nullptr), d1_(0), d2_(0) {}
Array2D(int32_t d1, int32_t d2, UErrorCode &status)
: memory_(uprv_malloc(d1 * d2 * sizeof(float))),
data_(static_cast<float*>(memory_)), d1_(d1), d2_(d2) {
data_((float*)memory_), d1_(d1), d2_(d2) {
if (U_SUCCESS(status)) {
if (memory_ == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
@@ -526,11 +526,11 @@ void CodePointsVectorizer::vectorize(
int32_t current;
char16_t str[2] = {0, 0};
while (U_SUCCESS(status) &&
(current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos) {
(current = (int32_t)utext_getNativeIndex(text)) < endPos) {
// Since the LSTMBreakEngine is currently only accept chars in BMP,
// we can ignore the possibility of hitting supplementary code
// point.
str[0] = static_cast<char16_t>(utext_next32(text));
str[0] = (char16_t) utext_next32(text);
U_ASSERT(!U_IS_SURROGATE(str[0]));
offsets.addElement(current, status);
indices.addElement(stringToIndex(str), status);
@@ -733,7 +733,7 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text,
#endif // LSTM_DEBUG
// current = argmax(logp)
LSTMClass current = static_cast<LSTMClass>(logp.maxIndex());
LSTMClass current = (LSTMClass)logp.maxIndex();
// BIES logic.
if (current == BEGIN || current == SINGLE) {
if (i != 0) {

View File

@@ -351,7 +351,7 @@ MessagePattern::autoQuoteApostropheDeep() const {
for(int32_t i=count; i>0;) {
const Part &part=getPart(--i);
if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
modified.insert(part.index, static_cast<char16_t>(part.value));
modified.insert(part.index, (char16_t)part.value);
}
}
return modified;
@@ -437,7 +437,7 @@ MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
if(U_FAILURE(errorCode)) {
return 0;
}
if(nestingLevel>Part::MAX_NESTED_LEVELS) {
if(nestingLevel>Part::MAX_VALUE) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
@@ -628,7 +628,7 @@ MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingL
}
}
// change the ARG_START type from NONE to argType
partsList->a[argStart].value = static_cast<int16_t>(argType);
partsList->a[argStart].value=(int16_t)argType;
if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
}
@@ -980,13 +980,13 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
}
// Let Double.parseDouble() throw a NumberFormatException.
char numberChars[128];
int32_t capacity = static_cast<int32_t>(sizeof(numberChars));
int32_t capacity=(int32_t)sizeof(numberChars);
int32_t length=limit-start;
if(length>=capacity) {
break; // number too long
}
msg.extract(start, length, numberChars, capacity, US_INV);
if (static_cast<int32_t>(uprv_strlen(numberChars)) < length) {
if((int32_t)uprv_strlen(numberChars)<length) {
break; // contains non-invariant character that was turned into NUL
}
char *end;
@@ -999,6 +999,7 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
}
setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
errorCode=U_PATTERN_SYNTAX_ERROR;
return;
}
int32_t
@@ -1006,7 +1007,7 @@ MessagePattern::skipWhiteSpace(int32_t index) {
const char16_t *s=msg.getBuffer();
int32_t msgLength=msg.length();
const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
return static_cast<int32_t>(t - s);
return (int32_t)(t-s);
}
int32_t
@@ -1014,7 +1015,7 @@ MessagePattern::skipIdentifier(int32_t index) {
const char16_t *s=msg.getBuffer();
int32_t msgLength=msg.length();
const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
return static_cast<int32_t>(t - s);
return (int32_t)(t-s);
}
int32_t
@@ -1105,8 +1106,8 @@ MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t len
Part &part=partsList->a[partsLength++];
part.type=type;
part.index=index;
part.length = static_cast<uint16_t>(length);
part.value = static_cast<int16_t>(value);
part.length=(uint16_t)length;
part.value=(int16_t)value;
part.limitPartIndex=0;
}
}

View File

@@ -57,12 +57,12 @@ int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t
// moving forward, finally the last six values in the indexList are
// [length-4, length-3, length-2, length-1, -1, -1]. The "+4" here means four extra "-1".
int32_t indexSize = codePointLength + 4;
LocalMemory<int32_t> indexList(static_cast<int32_t*>(uprv_malloc(indexSize * sizeof(int32_t))));
if (indexList.isNull()) {
int32_t *indexList = (int32_t *)uprv_malloc(indexSize * sizeof(int32_t));
if (indexList == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
int32_t numCodeUnits = initIndexList(inString, indexList.getAlias(), status);
int32_t numCodeUnits = initIndexList(inString, indexList, status);
// Add a break for the start.
boundary.addElement(0, status);
@@ -71,12 +71,13 @@ int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t
for (int32_t idx = 0; idx + 1 < codePointLength && U_SUCCESS(status); idx++) {
numBreaks =
evaluateBreakpoint(inString, indexList.getAlias(), idx, numCodeUnits, numBreaks, boundary, status);
evaluateBreakpoint(inString, indexList, idx, numCodeUnits, numBreaks, boundary, status);
if (idx + 4 < codePointLength) {
indexList[idx + 6] = numCodeUnits;
numCodeUnits += U16_LENGTH(inString.char32At(indexList[idx + 6]));
}
}
uprv_free(indexList);
if (U_FAILURE(status)) return 0;

File diff suppressed because it is too large Load Diff

View File

@@ -174,7 +174,7 @@ public:
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return static_cast<int32_t>(spanQuickCheckYes(sArray, sArray + s.length(), errorCode) - sArray);
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
}
virtual const char16_t *
spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
@@ -391,7 +391,6 @@ struct Norm2AllModes : public UMemory {
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKC_SCFInstance(UErrorCode &errorCode);
Normalizer2Impl *impl;
ComposeNormalizer2 comp;

View File

@@ -380,8 +380,8 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2,
firstLength=firstString.length(); // In case it was -1.
// secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(nullptr, nullptr, buffer, ...) would crash.
if(secondLength!=0) {
const Normalizer2* n2 = reinterpret_cast<const Normalizer2*>(norm2);
const Normalizer2WithImpl* n2wi = dynamic_cast<const Normalizer2WithImpl*>(n2);
const Normalizer2 *n2=(const Normalizer2 *)norm2;
const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
if(n2wi!=nullptr) {
// Avoid duplicate argument checking and support NUL-terminated src.
UnicodeString safeMiddle;

View File

@@ -53,9 +53,9 @@ namespace {
*/
inline uint8_t leadByteForCP(UChar32 c) {
if (c <= 0x7f) {
return static_cast<uint8_t>(c);
return (uint8_t)c;
} else if (c <= 0x7ff) {
return static_cast<uint8_t>(0xc0 + (c >> 6));
return (uint8_t)(0xc0+(c>>6));
} else {
// Should not occur because ccc(U+0300)!=0.
return 0xe0;
@@ -82,7 +82,7 @@ UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
return ((c&0x1f)<<6) | (cpStart[1]&0x3f);
case 3:
// no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (char16_t)
return static_cast<char16_t>((c << 12) | ((cpStart[1] & 0x3f) << 6) | (cpStart[2] & 0x3f));
return (char16_t)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f));
case 4:
return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
default:
@@ -100,8 +100,8 @@ UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) {
uint8_t l = *p;
uint8_t t1, t2;
if (0xe1 <= l && l <= 0xed &&
(t1 = static_cast<uint8_t>(p[1] - 0x80)) <= 0x3f &&
(t2 = static_cast<uint8_t>(p[2] - 0x80)) <= 0x3f &&
(t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&
(t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&
(l < 0xed || t1 <= 0x1f)) {
return ((l & 0xf) << 12) | (t1 << 6) | t2;
}
@@ -125,7 +125,7 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
}
} else if (src[1] == 0x87) {
uint8_t t = src[2];
if (static_cast<int8_t>(t) <= static_cast<int8_t>(0x82u)) {
if ((int8_t)t <= (int8_t)0x82u) {
return t - (0xa7 - 0x40);
}
}
@@ -138,10 +138,10 @@ appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t del
ByteSink &sink, Edits *edits) {
char buffer[U8_MAX_LENGTH];
int32_t length;
int32_t cpLength = static_cast<int32_t>(cpLimit - cpStart);
int32_t cpLength = (int32_t)(cpLimit - cpStart);
if (cpLength == 1) {
// The builder makes ASCII map to ASCII.
buffer[0] = static_cast<uint8_t>(*cpStart + delta);
buffer[0] = (uint8_t)(*cpStart + delta);
length = 1;
} else {
int32_t trail = *(cpLimit-1) + delta;
@@ -150,7 +150,7 @@ appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t del
--cpLimit;
length = 0;
do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit);
buffer[length++] = static_cast<uint8_t>(trail);
buffer[length++] = (uint8_t)trail;
} else {
// Decode the code point, add the delta, re-encode.
UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta;
@@ -205,16 +205,16 @@ UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
}
UBool ReorderingBuffer::equals(const char16_t *otherStart, const char16_t *otherLimit) const {
int32_t length = static_cast<int32_t>(limit - start);
int32_t length=(int32_t)(limit-start);
return
length == static_cast<int32_t>(otherLimit - otherStart) &&
length==(int32_t)(otherLimit-otherStart) &&
0==u_memcmp(start, otherStart, length);
}
UBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const {
U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller
int32_t length = static_cast<int32_t>(limit - start);
int32_t otherLength = static_cast<int32_t>(otherLimit - otherStart);
int32_t length = (int32_t)(limit - start);
int32_t otherLength = (int32_t)(otherLimit - otherStart);
// For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.
if (otherLength < length || (otherLength / 3) > length) {
return false;
@@ -284,7 +284,7 @@ UBool ReorderingBuffer::append(const char16_t *s, int32_t length, UBool isNFD,
U16_NEXT(s, i, length, c);
if(i<length) {
if (isNFD) {
leadCC = Normalizer2Impl::getCCFromYesOrMaybeYes(impl.getRawNorm16(c));
leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
} else {
leadCC = impl.getCC(impl.getNorm16(c));
}
@@ -304,7 +304,7 @@ UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
}
remainingCapacity-=cpLength;
if(cpLength==1) {
*limit++ = static_cast<char16_t>(c);
*limit++=(char16_t)c;
} else {
limit[0]=U16_LEAD(c);
limit[1]=U16_TRAIL(c);
@@ -319,7 +319,7 @@ UBool ReorderingBuffer::appendZeroCC(const char16_t *s, const char16_t *sLimit,
if(s==sLimit) {
return true;
}
int32_t length = static_cast<int32_t>(sLimit - s);
int32_t length=(int32_t)(sLimit-s);
if(remainingCapacity<length && !resize(length, errorCode)) {
return false;
}
@@ -350,8 +350,8 @@ void ReorderingBuffer::removeSuffix(int32_t suffixLength) {
}
UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
int32_t reorderStartIndex = static_cast<int32_t>(reorderStart - start);
int32_t length = static_cast<int32_t>(limit - start);
int32_t reorderStartIndex=(int32_t)(reorderStart-start);
int32_t length=(int32_t)(limit-start);
str.releaseBuffer(length);
int32_t newCapacity=length+appendLength;
int32_t doubleCapacity=2*str.getCapacity();
@@ -392,7 +392,7 @@ uint8_t ReorderingBuffer::previousCC() {
--codePointStart;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
return impl.getCCFromYesOrMaybeYesCP(c);
return impl.getCCFromYesOrMaybeCP(c);
}
// Inserts c somewhere before the last character.
@@ -440,14 +440,15 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
minMaybeNo = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_NO]);
minMaybeNoCombinesFwd = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_NO_COMBINES_FWD]);
minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
U_ASSERT((minMaybeNo & 7) == 0); // 8-aligned for noNoDelta bit fields
centerNoNoDelta = (minMaybeNo >> DELTA_SHIFT) - MAX_DELTA - 1;
U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields
centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
normTrie=inTrie;
extraData=inExtraData;
maybeYesCompositions=inExtraData;
extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
smallFCD=inSmallFCD;
}
@@ -485,7 +486,7 @@ Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
nullptr, nullptr, &value)) >= 0) {
sa->add(sa->set, start);
if (start != end && isAlgorithmicNoNo(static_cast<uint16_t>(value)) &&
if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
// Range of code points with same-norm16-value algorithmic decompositions.
// They might have different non-zero FCD16 values.
@@ -569,7 +570,7 @@ Normalizer2Impl::decompose(const char16_t *src, const char16_t *limit,
int32_t destLengthEstimate,
UErrorCode &errorCode) const {
if(destLengthEstimate<0 && limit!=nullptr) {
destLengthEstimate = static_cast<int32_t>(limit - src);
destLengthEstimate=(int32_t)(limit-src);
}
dest.remove();
ReorderingBuffer buffer(*this, dest);
@@ -649,7 +650,7 @@ Normalizer2Impl::decompose(const char16_t *src, const char16_t *limit,
}
} else {
if(isDecompYes(norm16)) {
uint8_t cc=getCCFromYesOrMaybeYes(norm16);
uint8_t cc=getCCFromYesOrMaybe(norm16);
if(prevCC<=cc || cc==0) {
prevCC=cc;
if(cc<=1) {
@@ -701,13 +702,12 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
UErrorCode &errorCode) const {
// get the decomposition and the lead and trail cc's
if (norm16 >= limitNoNo) {
if (isMaybeYesOrNonZeroCC(norm16)) {
return buffer.append(c, getCCFromYesOrMaybeYes(norm16), errorCode);
} else if (norm16 < minMaybeNo) {
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getRawNorm16(c);
if (isMaybeOrNonZeroCC(norm16)) {
return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
}
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getRawNorm16(c);
}
if (norm16 < minYesNo) {
// c does not decompose
@@ -718,17 +718,17 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getData(norm16);
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
int32_t length=firstUnit&MAPPING_LENGTH_MASK;
uint8_t leadCC, trailCC;
trailCC = static_cast<uint8_t>(firstUnit >> 8);
trailCC=(uint8_t)(firstUnit>>8);
if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
leadCC = static_cast<uint8_t>(*(mapping - 1) >> 8);
leadCC=(uint8_t)(*(mapping-1)>>8);
} else {
leadCC=0;
}
return buffer.append(reinterpret_cast<const char16_t*>(mapping) + 1, length, true, leadCC, trailCC, errorCode);
return buffer.append((const char16_t *)mapping+1, length, true, leadCC, trailCC, errorCode);
}
// Dual functionality:
@@ -787,9 +787,9 @@ Normalizer2Impl::decomposeUTF8(uint32_t options,
}
// Medium-fast path: Quick check.
if (isMaybeYesOrNonZeroCC(norm16)) {
if (isMaybeOrNonZeroCC(norm16)) {
// Does not decompose.
uint8_t cc = getCCFromYesOrMaybeYes(norm16);
uint8_t cc = getCCFromYesOrMaybe(norm16);
if (prevCC <= cc || cc == 0) {
prevCC = cc;
if (cc <= 1) {
@@ -836,7 +836,7 @@ Normalizer2Impl::decomposeUTF8(uint32_t options,
}
// We already know there was a change if the original character decomposed;
// otherwise compare.
if (isMaybeYesOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
*sink, options, edits, errorCode)) {
break;
@@ -867,9 +867,9 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
// Get the decomposition and the lead and trail cc's.
UChar32 c = U_SENTINEL;
if (norm16 >= limitNoNo) {
if (isMaybeYesOrNonZeroCC(norm16)) {
if (isMaybeOrNonZeroCC(norm16)) {
// No comp boundaries around this character.
uint8_t cc = getCCFromYesOrMaybeYes(norm16);
uint8_t cc = getCCFromYesOrMaybe(norm16);
if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
return prevSrc;
}
@@ -881,15 +881,14 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
return src;
}
continue;
} else if (norm16 < minMaybeNo) {
// Maps to an isCompYesAndZeroCC.
if (stopAt != STOP_AT_LIMIT) {
return prevSrc;
}
c = codePointFromValidUTF8(prevSrc, src);
c = mapAlgorithmic(c, norm16);
norm16 = getRawNorm16(c);
}
// Maps to an isCompYesAndZeroCC.
if (stopAt != STOP_AT_LIMIT) {
return prevSrc;
}
c = codePointFromValidUTF8(prevSrc, src);
c = mapAlgorithmic(c, norm16);
norm16 = getRawNorm16(c);
} else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
return prevSrc;
}
@@ -919,20 +918,20 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
}
} else {
// The character decomposes, get everything from the variable-length extra data.
const uint16_t *mapping = getData(norm16);
const uint16_t *mapping = getMapping(norm16);
uint16_t firstUnit = *mapping;
int32_t length = firstUnit & MAPPING_LENGTH_MASK;
uint8_t trailCC = static_cast<uint8_t>(firstUnit >> 8);
uint8_t trailCC = (uint8_t)(firstUnit >> 8);
uint8_t leadCC;
if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) {
leadCC = static_cast<uint8_t>(*(mapping - 1) >> 8);
leadCC = (uint8_t)(*(mapping-1) >> 8);
} else {
leadCC = 0;
}
if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
return prevSrc;
}
if (!buffer.append(reinterpret_cast<const char16_t*>(mapping) + 1, length, true, leadCC, trailCC, errorCode)) {
if (!buffer.append((const char16_t *)mapping+1, length, true, leadCC, trailCC, errorCode)) {
return nullptr;
}
}
@@ -947,7 +946,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
const char16_t *
Normalizer2Impl::getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length) const {
uint16_t norm16;
if(c<minDecompNoCP || isMaybeYesOrNonZeroCC(norm16=getNorm16(c))) {
if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
// c does not decompose
return nullptr;
}
@@ -969,9 +968,9 @@ Normalizer2Impl::getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length
return buffer;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getData(norm16);
const uint16_t *mapping=getMapping(norm16);
length=*mapping&MAPPING_LENGTH_MASK;
return reinterpret_cast<const char16_t*>(mapping) + 1;
return (const char16_t *)mapping+1;
}
// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
@@ -996,7 +995,7 @@ Normalizer2Impl::getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &le
return buffer;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getData(norm16);
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping
if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
@@ -1006,17 +1005,17 @@ Normalizer2Impl::getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &le
uint16_t rm0=*rawMapping;
if(rm0<=MAPPING_LENGTH_MASK) {
length=rm0;
return reinterpret_cast<const char16_t*>(rawMapping) - rm0;
return (const char16_t *)rawMapping-rm0;
} else {
// Copy the normal mapping and replace its first two code units with rm0.
buffer[0] = static_cast<char16_t>(rm0);
u_memcpy(buffer + 1, reinterpret_cast<const char16_t*>(mapping) + 1 + 2, mLength - 2);
buffer[0]=(char16_t)rm0;
u_memcpy(buffer+1, (const char16_t *)mapping+1+2, mLength-2);
length=mLength-1;
return buffer;
}
} else {
length=mLength;
return reinterpret_cast<const char16_t*>(mapping) + 1;
return (const char16_t *)mapping+1;
}
}
@@ -1053,7 +1052,7 @@ void Normalizer2Impl::decomposeAndAppend(const char16_t *src, const char16_t *li
limit=u_strchr(p, 0);
}
if (buffer.append(src, static_cast<int32_t>(p - src), false, firstCC, prevCC, errorCode)) {
if (buffer.append(src, (int32_t)(p - src), false, firstCC, prevCC, errorCode)) {
buffer.appendZeroCC(p, limit, errorCode);
}
}
@@ -1071,7 +1070,7 @@ UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const {
return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getDataForYesOrNo(norm16);
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
// true if leadCC==0 (hasFCDBoundaryBefore())
return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
@@ -1092,15 +1091,14 @@ UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const {
return true;
}
if (norm16 >= limitNoNo) {
if (isMaybeYesOrNonZeroCC(norm16)) {
if (isMaybeOrNonZeroCC(norm16)) {
return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
} else if (norm16 < minMaybeNo) {
// Maps to an isCompYesAndZeroCC.
return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
}
// Maps to an isCompYesAndZeroCC.
return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getData(norm16);
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
// decomp after-boundary: same as hasFCDBoundaryAfter(),
// fcd16<=1 || trailCC==0
@@ -1142,13 +1140,13 @@ int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
if(trail<COMP_1_TRAIL_LIMIT) {
// trail character is 0..33FF
// result entry may have 2 or 3 units
key1 = static_cast<uint16_t>(trail << 1);
key1=(uint16_t)(trail<<1);
while(key1>(firstUnit=*list)) {
list+=2+(firstUnit&COMP_1_TRIPLE);
}
if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
if(firstUnit&COMP_1_TRIPLE) {
return (static_cast<int32_t>(list[1]) << 16) | list[2];
return ((int32_t)list[1]<<16)|list[2];
} else {
return list[1];
}
@@ -1156,10 +1154,10 @@ int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
} else {
// trail character is 3400..10FFFF
// result entry has 3 units
key1 = static_cast<uint16_t>(COMP_1_TRAIL_LIMIT +
key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
(((trail>>COMP_1_TRAIL_SHIFT))&
~COMP_1_TRIPLE));
uint16_t key2 = static_cast<uint16_t>(trail << COMP_2_TRAIL_SHIFT);
uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
uint16_t secondUnit;
for(;;) {
if(key1>(firstUnit=*list)) {
@@ -1172,7 +1170,7 @@ int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
list+=3;
}
} else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
return (static_cast<int32_t>(secondUnit & ~COMP_2_TRAIL_MASK) << 16) | list[2];
return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
} else {
break;
}
@@ -1197,7 +1195,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const
compositeAndFwd=list[1];
list+=2;
} else {
compositeAndFwd = ((static_cast<int32_t>(list[1]) & ~COMP_2_TRAIL_MASK) << 16) | list[2];
compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
list+=3;
}
UChar32 composite=compositeAndFwd>>1;
@@ -1242,7 +1240,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
for(;;) {
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
cc=getCCFromYesOrMaybeYes(norm16);
cc=getCCFromYesOrMaybe(norm16);
if( // this character combines backward and
isMaybe(norm16) &&
// we have seen a starter that combines forward and
@@ -1254,15 +1252,15 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
// c is a Jamo V/T, see if we can compose it with the previous character.
if(c<Hangul::JAMO_T_BASE) {
// c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
char16_t prev = static_cast<char16_t>(*starter - Hangul::JAMO_L_BASE);
char16_t prev=(char16_t)(*starter-Hangul::JAMO_L_BASE);
if(prev<Hangul::JAMO_L_COUNT) {
pRemove=p-1;
char16_t syllable = static_cast<char16_t>(
Hangul::HANGUL_BASE +
char16_t syllable=(char16_t)
(Hangul::HANGUL_BASE+
(prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
Hangul::JAMO_T_COUNT);
char16_t t;
if (p != limit && (t = static_cast<char16_t>(*p - Hangul::JAMO_T_BASE)) < Hangul::JAMO_T_COUNT) {
if(p!=limit && (t=(char16_t)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
++p;
syllable+=t; // The next character was a Jamo T.
}
@@ -1300,7 +1298,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
starter[0]=U16_LEAD(composite);
starter[1]=U16_TRAIL(composite);
} else {
*starter = static_cast<char16_t>(composite);
*starter=(char16_t)composite;
// The composite is shorter than the starter,
// move the intermediate characters forward one.
starterIsSupplementary=false;
@@ -1325,7 +1323,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
*--starter=U16_LEAD(composite); // undo the temporary increment
} else {
// both are on the BMP
*starter = static_cast<char16_t>(composite);
*starter=(char16_t)composite;
}
/* remove the combining mark by moving the following text over it */
@@ -1392,11 +1390,8 @@ Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
} else if(norm16<minYesNoMappingsOnly) {
// a combines forward.
if(isJamoL(norm16)) {
if (b < Hangul::JAMO_V_BASE) {
return U_SENTINEL;
}
b-=Hangul::JAMO_V_BASE;
if(b<Hangul::JAMO_V_COUNT) {
if(0<=b && b<Hangul::JAMO_V_COUNT) {
return
(Hangul::HANGUL_BASE+
((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
@@ -1405,33 +1400,25 @@ Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
return U_SENTINEL;
}
} else if(isHangulLV(norm16)) {
if (b <= Hangul::JAMO_T_BASE) {
return U_SENTINEL;
}
b-=Hangul::JAMO_T_BASE;
if(b<Hangul::JAMO_T_COUNT) { // not b==0!
if(0<b && b<Hangul::JAMO_T_COUNT) { // not b==0!
return a+b;
} else {
return U_SENTINEL;
}
} else {
// 'a' has a compositions list in extraData
list=getDataForYesOrNo(norm16);
list=getMapping(norm16);
if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list
list+= // mapping pointer
1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length
}
}
} else if(norm16<minMaybeNoCombinesFwd || MIN_NORMAL_MAYBE_YES<=norm16) {
} else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
return U_SENTINEL;
} else {
list=getDataForMaybe(norm16);
if(norm16<minMaybeYes) { // composite 'a' has both mapping & compositions list
list+= // mapping pointer
1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length
}
list=getCompositionsListForMaybe(norm16);
}
if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b
return U_SENTINEL;
@@ -1509,12 +1496,12 @@ Normalizer2Impl::compose(const char16_t *src, const char16_t *limit,
}
// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
// The current character is either a "noNo" (has a mapping)
// or a "maybeYes" / "maybeNo" (combines backward)
// or a "maybeYes" (combines backward)
// or a "yesYes" with ccc!=0.
// It is not a Hangul syllable or Jamo L because those have "yes" properties.
// Medium-fast path: Handle cases that do not require full decomposition and recomposition.
if (norm16 < minMaybeNo) { // minNoNo <= norm16 < minMaybeNo
if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
if (!doCompose) {
return false;
}
@@ -1541,7 +1528,7 @@ Normalizer2Impl::compose(const char16_t *src, const char16_t *limit,
if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
break;
}
const char16_t *mapping = reinterpret_cast<const char16_t *>(getDataForYesOrNo(norm16));
const char16_t *mapping = reinterpret_cast<const char16_t *>(getMapping(norm16));
int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) {
break;
@@ -1569,14 +1556,14 @@ Normalizer2Impl::compose(const char16_t *src, const char16_t *limit,
if(c<Hangul::JAMO_T_BASE) {
// The current character is a Jamo Vowel,
// compose with previous Jamo L and following Jamo T.
char16_t l = static_cast<char16_t>(prev - Hangul::JAMO_L_BASE);
char16_t l = (char16_t)(prev-Hangul::JAMO_L_BASE);
if(l<Hangul::JAMO_L_COUNT) {
if (!doCompose) {
return false;
}
int32_t t;
if (src != limit &&
0 < (t = (static_cast<int32_t>(*src) - Hangul::JAMO_T_BASE)) &&
0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&
t < Hangul::JAMO_T_COUNT) {
// The next character is a Jamo T.
++src;
@@ -1594,7 +1581,7 @@ Normalizer2Impl::compose(const char16_t *src, const char16_t *limit,
if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
break;
}
if (!buffer.appendBMP(static_cast<char16_t>(syllable), 0, errorCode)) {
if(!buffer.appendBMP((char16_t)syllable, 0, errorCode)) {
break;
}
prevBoundary = src;
@@ -1619,7 +1606,7 @@ Normalizer2Impl::compose(const char16_t *src, const char16_t *limit,
if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
break;
}
if (!buffer.appendBMP(static_cast<char16_t>(syllable), 0, errorCode)) {
if(!buffer.appendBMP((char16_t)syllable, 0, errorCode)) {
break;
}
prevBoundary = src;
@@ -1770,7 +1757,7 @@ Normalizer2Impl::composeQuickCheck(const char16_t *src, const char16_t *limit,
}
// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
// The current character is either a "noNo" (has a mapping)
// or a "maybeYes" / "maybeNo" (combines backward)
// or a "maybeYes" (combines backward)
// or a "yesYes" with ccc!=0.
// It is not a Hangul syllable or Jamo L because those have "yes" properties.
@@ -1791,9 +1778,8 @@ Normalizer2Impl::composeQuickCheck(const char16_t *src, const char16_t *limit,
}
}
if (norm16 >= minMaybeNo) {
uint16_t fcd16 = getFCD16FromMaybeOrNonZeroCC(norm16);
uint8_t cc = fcd16 >> 8;
if(isMaybeOrNonZeroCC(norm16)) {
uint8_t cc=getCCFromYesOrMaybe(norm16);
if (onlyContiguous /* FCC */ && cc != 0 &&
getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
// The [prevBoundary..prevSrc[ character
@@ -1814,12 +1800,11 @@ Normalizer2Impl::composeQuickCheck(const char16_t *src, const char16_t *limit,
if (src == limit) {
return src;
}
uint8_t prevCC = fcd16;
uint8_t prevCC = cc;
nextSrc = src;
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
if (norm16 >= minMaybeNo) {
fcd16 = getFCD16FromMaybeOrNonZeroCC(norm16);
cc = fcd16 >> 8;
if (isMaybeOrNonZeroCC(norm16)) {
cc = getCCFromYesOrMaybe(norm16);
if (!(prevCC <= cc || cc == 0)) {
break;
}
@@ -1854,11 +1839,11 @@ void Normalizer2Impl::composeAndAppend(const char16_t *src, const char16_t *limi
if(src!=firstStarterInSrc) {
const char16_t *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
buffer.getLimit(), onlyContiguous);
int32_t destSuffixLength = static_cast<int32_t>(buffer.getLimit() - lastStarterInDest);
int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
UnicodeString middle(lastStarterInDest, destSuffixLength);
buffer.removeSuffix(destSuffixLength);
safeMiddle=middle;
middle.append(src, static_cast<int32_t>(firstStarterInSrc - src));
middle.append(src, (int32_t)(firstStarterInSrc-src));
const char16_t *middleStart=middle.getBuffer();
compose(middleStart, middleStart+middle.length(), onlyContiguous,
true, buffer, errorCode);
@@ -1912,12 +1897,12 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
}
// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
// The current character is either a "noNo" (has a mapping)
// or a "maybeYes" / "maybeNo" (combines backward)
// or a "maybeYes" (combines backward)
// or a "yesYes" with ccc!=0.
// It is not a Hangul syllable or Jamo L because those have "yes" properties.
// Medium-fast path: Handle cases that do not require full decomposition and recomposition.
if (norm16 < minMaybeNo) { // minNoNo <= norm16 < minMaybeNo
if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
if (sink == nullptr) {
return false;
}
@@ -1946,9 +1931,9 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
*sink, options, edits, errorCode)) {
break;
}
const uint16_t *mapping = getDataForYesOrNo(norm16);
const uint16_t *mapping = getMapping(norm16);
int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
if (!ByteSinkUtil::appendChange(prevSrc, src, reinterpret_cast<const char16_t*>(mapping), length,
if (!ByteSinkUtil::appendChange(prevSrc, src, (const char16_t *)mapping, length,
*sink, edits, errorCode)) {
break;
}
@@ -1967,7 +1952,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
break;
}
if (edits != nullptr) {
edits->addReplace(static_cast<int32_t>(src - prevSrc), 0);
edits->addReplace((int32_t)(src - prevSrc), 0);
}
prevBoundary = src;
continue;
@@ -1985,7 +1970,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
// The current character is a Jamo Vowel,
// compose with previous Jamo L and following Jamo T.
UChar32 l = prev - Hangul::JAMO_L_BASE;
if (static_cast<uint32_t>(l) < Hangul::JAMO_L_COUNT) {
if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
if (sink == nullptr) {
return false;
}
@@ -2213,20 +2198,20 @@ uint8_t Normalizer2Impl::getPreviousTrailCC(const char16_t *start, const char16_
if (start == p) {
return 0;
}
int32_t i = static_cast<int32_t>(p - start);
int32_t i = (int32_t)(p - start);
UChar32 c;
U16_PREV(start, 0, i, c);
return static_cast<uint8_t>(getFCD16(c));
return (uint8_t)getFCD16(c);
}
uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const {
if (start == p) {
return 0;
}
int32_t i = static_cast<int32_t>(p - start);
int32_t i = (int32_t)(p - start);
UChar32 c;
U8_PREV(start, 0, i, c);
return static_cast<uint8_t>(getFCD16(c));
return (uint8_t)getFCD16(c);
}
// Note: normalizer2impl.cpp r30982 (2011-nov-27)
@@ -2254,7 +2239,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
return norm16|(norm16<<8);
} else if(norm16>=minMaybeYes) {
return 0;
} else if(norm16<minMaybeNo) { // isDecompNoAlgorithmic(norm16)
} else { // isDecompNoAlgorithmic(norm16)
uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK;
if (deltaTrailCC <= DELTA_TCCC_1) {
return deltaTrailCC >> OFFSET_SHIFT;
@@ -2269,7 +2254,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
return 0;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getData(norm16);
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
norm16=firstUnit>>8; // tccc
if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
@@ -2281,23 +2266,6 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
#pragma optimize( "", on )
#endif
uint16_t Normalizer2Impl::getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const {
U_ASSERT(norm16 >= minMaybeNo);
if (norm16 >= MIN_NORMAL_MAYBE_YES) {
// combining mark
norm16 = getCCFromNormalYesOrMaybe(norm16);
return norm16 | (norm16<<8);
} else if (norm16 >= minMaybeYes) {
return 0;
}
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping = getDataForMaybe(norm16);
uint16_t firstUnit = *mapping;
// maybeNo has lccc = 0
U_ASSERT((firstUnit & MAPPING_HAS_CCC_LCCC_WORD) == 0 || (*(mapping - 1) & 0xff00) == 0);
return firstUnit >> 8; // tccc
}
// Dual functionality:
// buffer!=nullptr: normalize
// buffer==nullptr: isNormalized/quickCheck/spanQuickCheckYes
@@ -2421,7 +2389,7 @@ Normalizer2Impl::makeFCD(const char16_t *src, const char16_t *limit,
* already but is now going to be decomposed.
* prevSrc is set to after what was copied/appended.
*/
buffer->removeSuffix(static_cast<int32_t>(prevSrc - prevBoundary));
buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
/*
* Find the part of the source that needs to be decomposed,
* up to the next safe boundary.
@@ -2452,11 +2420,11 @@ void Normalizer2Impl::makeFCDAndAppend(const char16_t *src, const char16_t *limi
if(src!=firstBoundaryInSrc) {
const char16_t *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
buffer.getLimit());
int32_t destSuffixLength = static_cast<int32_t>(buffer.getLimit() - lastBoundaryInDest);
int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
UnicodeString middle(lastBoundaryInDest, destSuffixLength);
buffer.removeSuffix(destSuffixLength);
safeMiddle=middle;
middle.append(src, static_cast<int32_t>(firstBoundaryInSrc - src));
middle.append(src, (int32_t)(firstBoundaryInSrc-src));
const char16_t *middleStart=middle.getBuffer();
makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
if(U_FAILURE(errorCode)) {
@@ -2533,8 +2501,8 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
if(U_FAILURE(errorCode)) {
return;
}
UChar32 firstOrigin = static_cast<UChar32>(canonValue & CANON_VALUE_MASK);
canonValue = (canonValue & ~CANON_VALUE_MASK) | CANON_HAS_SET | static_cast<uint32_t>(canonStartSets.size());
UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
canonStartSets.adoptElement(lpSet.orphan(), errorCode);
if (U_FAILURE(errorCode)) {
@@ -2544,7 +2512,7 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
set->add(firstOrigin);
}
} else {
set = static_cast<UnicodeSet*>(canonStartSets[static_cast<int32_t>(canonValue & CANON_VALUE_MASK)]);
set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
}
set->add(origin);
}
@@ -2601,11 +2569,9 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
CanonIterData &newData,
UErrorCode &errorCode) const {
if(isInert(norm16) ||
(minYesNo<=norm16 && norm16<minNoNo) ||
(minMaybeNo<=norm16 && norm16<minMaybeYes)) {
if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
// Inert, or 2-way mapping (including Hangul syllable).
// We do not write a canonStartSet for any yesNo/maybeNo character.
// We do not write a canonStartSet for any yesNo character.
// Composites from 2-way mappings are added at runtime from the
// starter's compositions list, and the other characters in
// 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
@@ -2615,7 +2581,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
for(UChar32 c=start; c<=end; ++c) {
uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
uint32_t newValue=oldValue;
if(isMaybeYesOrNonZeroCC(norm16)) {
if(isMaybeOrNonZeroCC(norm16)) {
// not a segment starter if it occurs in a decomposition or has cc!=0
newValue|=CANON_NOT_SEGMENT_STARTER;
if(norm16<MIN_NORMAL_MAYBE_YES) {
@@ -2637,7 +2603,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
}
if (norm16_2 > minYesNo) {
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getDataForYesOrNo(norm16_2);
const uint16_t *mapping=getMapping(norm16_2);
uint16_t firstUnit=*mapping;
int32_t length=firstUnit&MAPPING_LENGTH_MASK;
if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
@@ -2685,11 +2651,11 @@ UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
}
int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
return static_cast<int32_t>(ucptrie_get(fCanonIterData->trie, c));
return (int32_t)ucptrie_get(fCanonIterData->trie, c);
}
const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
return *static_cast<const UnicodeSet*>(fCanonIterData->canonStartSets[n]);
return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
}
UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
@@ -2712,7 +2678,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
uint16_t norm16=getRawNorm16(c);
if(norm16==JAMO_L) {
UChar32 syllable=
static_cast<UChar32>(Hangul::HANGUL_BASE + (c - Hangul::JAMO_L_BASE) * Hangul::JAMO_VT_COUNT);
(UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
} else {
addComposites(getCompositionsList(norm16), set);
@@ -2756,7 +2722,7 @@ unorm2_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
(1<=formatVersion0 && formatVersion0<=5)
(1<=formatVersion0 && formatVersion0<=4)
)) {
udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
@@ -2775,10 +2741,8 @@ unorm2_swap(const UDataSwapper *ds,
minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
} else if(formatVersion0==2) {
minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
} else if(formatVersion0<=4) {
minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
} else {
minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_NO_COMBINES_FWD+1;
minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
}
if(length>=0) {

View File

@@ -81,10 +81,10 @@ public:
return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
}
static inline UBool isJamoL(UChar32 c) {
return static_cast<uint32_t>(c - JAMO_L_BASE) < JAMO_L_COUNT;
return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
}
static inline UBool isJamoV(UChar32 c) {
return static_cast<uint32_t>(c - JAMO_V_BASE) < JAMO_V_COUNT;
return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
}
static inline UBool isJamoT(UChar32 c) {
int32_t t=c-JAMO_T_BASE;
@@ -103,12 +103,12 @@ public:
c-=HANGUL_BASE;
UChar32 c2=c%JAMO_T_COUNT;
c/=JAMO_T_COUNT;
buffer[0] = static_cast<char16_t>(JAMO_L_BASE + c / JAMO_V_COUNT);
buffer[1] = static_cast<char16_t>(JAMO_V_BASE + c % JAMO_V_COUNT);
buffer[0]=(char16_t)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(char16_t)(JAMO_V_BASE+c%JAMO_V_COUNT);
if(c2==0) {
return 2;
} else {
buffer[2] = static_cast<char16_t>(JAMO_T_BASE + c2);
buffer[2]=(char16_t)(JAMO_T_BASE+c2);
return 3;
}
}
@@ -123,11 +123,11 @@ public:
UChar32 c2=c%JAMO_T_COUNT;
if(c2==0) {
c/=JAMO_T_COUNT;
buffer[0] = static_cast<char16_t>(JAMO_L_BASE + c / JAMO_V_COUNT);
buffer[1] = static_cast<char16_t>(JAMO_V_BASE + c % JAMO_V_COUNT);
buffer[0]=(char16_t)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(char16_t)(JAMO_V_BASE+c%JAMO_V_COUNT);
} else {
buffer[0] = static_cast<char16_t>(orig - c2); // LV syllable
buffer[1] = static_cast<char16_t>(JAMO_T_BASE + c2);
buffer[0]=(char16_t)(orig-c2); // LV syllable
buffer[1]=(char16_t)(JAMO_T_BASE+c2);
}
}
private:
@@ -141,19 +141,19 @@ public:
/** Constructs only; init() should be called. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
impl(ni), str(dest),
start(nullptr), reorderStart(nullptr), limit(nullptr),
start(NULL), reorderStart(NULL), limit(NULL),
remainingCapacity(0), lastCC(0) {}
/** Constructs, removes the string contents, and initializes for a small initial capacity. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
~ReorderingBuffer() {
if (start != nullptr) {
str.releaseBuffer(static_cast<int32_t>(limit - start));
if(start!=NULL) {
str.releaseBuffer((int32_t)(limit-start));
}
}
UBool init(int32_t destCapacity, UErrorCode &errorCode);
UBool isEmpty() const { return start==limit; }
int32_t length() const { return static_cast<int32_t>(limit - start); }
int32_t length() const { return (int32_t)(limit-start); }
char16_t *getStart() { return start; }
char16_t *getLimit() { return limit; }
uint8_t getLastCC() const { return lastCC; }
@@ -163,7 +163,7 @@ public:
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ?
appendBMP(static_cast<char16_t>(c), cc, errorCode) :
appendBMP((char16_t)c, cc, errorCode) :
appendSupplementary(c, cc, errorCode);
}
UBool append(const char16_t *s, int32_t length, UBool isNFD,
@@ -190,12 +190,12 @@ public:
void remove();
void removeSuffix(int32_t suffixLength);
void setReorderingLimit(char16_t *newLimit) {
remainingCapacity += static_cast<int32_t>(limit - newLimit);
remainingCapacity+=(int32_t)(limit-newLimit);
reorderStart=limit=newLimit;
lastCC=0;
}
void copyReorderableSuffixTo(UnicodeString &s) const {
s.setTo(ConstChar16Ptr(reorderStart), static_cast<int32_t>(limit - reorderStart));
s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart));
}
private:
/*
@@ -215,7 +215,7 @@ private:
void insert(UChar32 c, uint8_t cc);
static void writeCodePoint(char16_t *p, UChar32 c) {
if(c<=0xffff) {
*p = static_cast<char16_t>(c);
*p=(char16_t)c;
} else {
p[0]=U16_LEAD(c);
p[1]=U16_TRAIL(c);
@@ -241,11 +241,11 @@ private:
* Low-level implementation of the Unicode Normalization Algorithm.
* For the data structure and details see the documentation at the end of
* this normalizer2impl.h and in the design doc at
* https://unicode-org.github.io/icu/design/normalization/custom.html
* https://icu.unicode.org/design/normalization/custom
*/
class U_COMMON_API Normalizer2Impl : public UObject {
public:
Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { }
virtual ~Normalizer2Impl();
void init(const int32_t *inIndexes, const UCPTrie *inTrie,
@@ -271,14 +271,14 @@ public:
UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
return UNORM_YES;
} else if(minMaybeNo<=norm16) {
} else if(minMaybeYes<=norm16) {
return UNORM_MAYBE;
} else {
return UNORM_NO;
}
}
UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeNo; }
UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeNo; }
UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }
UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
uint8_t getCC(uint16_t norm16) const {
@@ -291,14 +291,14 @@ public:
return getCCFromNoNo(norm16);
}
static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
return static_cast<uint8_t>(norm16 >> OFFSET_SHIFT);
return (uint8_t)(norm16 >> OFFSET_SHIFT);
}
static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) {
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
}
uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const {
uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
if (c < minCompNoMaybeCP) { return 0; }
return getCCFromYesOrMaybeYes(getNorm16(c));
return getCCFromYesOrMaybe(getNorm16(c));
}
/**
@@ -364,13 +364,11 @@ public:
// 0<=lead<=0xffff
uint8_t bits=smallFCD[lead>>8];
if(bits==0) { return false; }
return (bits >> ((lead >> 5) & 7)) & 1;
return (UBool)((bits>>((lead>>5)&7))&1);
}
/** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const;
/**
* Gets the decomposition for one code point.
* @param c code point
@@ -452,13 +450,7 @@ public:
IX_MIN_LCCC_CP,
IX_RESERVED19,
/** Two-way mappings; each starts with a character that combines backward. */
IX_MIN_MAYBE_NO, // 20
/** Two-way mappings & compositions. */
IX_MIN_MAYBE_NO_COMBINES_FWD,
IX_COUNT // 22
IX_COUNT
};
enum {
@@ -549,8 +541,7 @@ public:
uint16_t norm16=getNorm16(c);
return isCompYesAndZeroCC(norm16) &&
(norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isInert(norm16) || *getDataForYesOrNo(norm16) <= 0x1ff);
// The last check fetches the mapping's first unit and checks tccc<=1.
(!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
}
UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
@@ -560,8 +551,8 @@ private:
friend class InitCanonIterData;
friend class LcccContext;
UBool isMaybe(uint16_t norm16) const { return minMaybeNo<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeYesOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==INERT; }
static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
@@ -575,7 +566,7 @@ private:
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
// }
// UBool isCompYesOrMaybe(uint16_t norm16) const {
// return norm16<minNoNo || minMaybeNo<=norm16;
// return norm16<minNoNo || minMaybeYes<=norm16;
// }
// UBool hasZeroCCFromDecompYes(uint16_t norm16) const {
// return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
@@ -588,12 +579,12 @@ private:
/**
* A little faster and simpler than isDecompYesAndZeroCC() but does not include
* the MaybeYes which combine-forward and have ccc=0.
* (Standard Unicode 10 normalization does not have such characters.)
*/
UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
}
/** Since formatVersion 5: same as isAlgorithmicNoNo() */
UBool isDecompNoAlgorithmic(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeNo; }
UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }
// For use with isCompYes().
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
@@ -601,9 +592,9 @@ private:
// return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
// }
uint8_t getCCFromNoNo(uint16_t norm16) const {
const uint16_t *mapping=getDataForYesOrNo(norm16);
const uint16_t *mapping=getMapping(norm16);
if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
return static_cast<uint8_t>(*(mapping - 1));
return (uint8_t)*(mapping-1);
} else {
return 0;
}
@@ -614,7 +605,7 @@ private:
return 0; // yesYes and Hangul LV have ccc=tccc=0
} else {
// For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
return static_cast<uint8_t>(*getDataForYesOrNo(norm16) >> 8); // tccc from yesNo
return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
}
}
uint8_t getPreviousTrailCC(const char16_t *start, const char16_t *p) const;
@@ -628,33 +619,28 @@ private:
return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
}
const uint16_t *getDataForYesOrNo(uint16_t norm16) const {
return extraData+(norm16>>OFFSET_SHIFT);
}
const uint16_t *getDataForMaybe(uint16_t norm16) const {
return extraData+((norm16-minMaybeNo+limitNoNo)>>OFFSET_SHIFT);
}
const uint16_t *getData(uint16_t norm16) const {
if(norm16>=minMaybeNo) {
norm16=norm16-minMaybeNo+limitNoNo;
}
return extraData+(norm16>>OFFSET_SHIFT);
}
// Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
return nullptr;
return NULL;
} else if(norm16<minMaybeYes) {
return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
} else {
// if yesYes: if Jamo L: harmless empty list
return getData(norm16);
return maybeYesCompositions+norm16-minMaybeYes;
}
}
const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
// A composite has both mapping & compositions list.
const uint16_t *list=getData(norm16);
const uint16_t *list=getMapping(norm16);
return list+ // mapping pointer
1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length
}
const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
// minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
}
/**
* @param c code point must have compositions
* @return compositions list pointer
@@ -706,13 +692,11 @@ private:
/** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getDataForYesOrNo(norm16) <= 0x1ff);
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
}
const char16_t *findPreviousCompBoundary(const char16_t *start, const char16_t *p,
UBool onlyContiguous) const;
const char16_t *findNextCompBoundary(const char16_t *p, const char16_t *limit,
UBool onlyContiguous) const;
const char16_t *findPreviousCompBoundary(const char16_t *start, const char16_t *p, UBool onlyContiguous) const;
const char16_t *findNextCompBoundary(const char16_t *p, const char16_t *limit, UBool onlyContiguous) const;
const char16_t *findPreviousFCDBoundary(const char16_t *start, const char16_t *p) const;
const char16_t *findNextFCDBoundary(const char16_t *p, const char16_t *limit) const;
@@ -739,12 +723,11 @@ private:
uint16_t minNoNoEmpty;
uint16_t limitNoNo;
uint16_t centerNoNoDelta;
uint16_t minMaybeNo;
uint16_t minMaybeNoCombinesFwd;
uint16_t minMaybeYes;
const UCPTrie *normTrie;
const uint16_t *extraData; // mappings and/or compositions
const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
UInitOnce fCanonIterDataInitOnce {};
@@ -802,12 +785,11 @@ unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.
* Format version 5.0.
* Format version 4.0.
*
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm),
* NFKC_Casefold (nfkc_cf.nrm) and NFKC_Simple_Casefold (nfkc_scf.nrm).
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
* Custom (application-specific) data can be built into additional .nrm files
* with the gennorm2 build tool.
* ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
@@ -824,7 +806,7 @@ unorm_getFCD16(UChar32 c);
* Constants are defined as enum values of the Normalizer2Impl class.
*
* Many details of the data structures are described in the design doc
* which is at https://unicode-org.github.io/icu/design/normalization/custom.html
* which is at https://icu.unicode.org/design/normalization/custom
*
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
*
@@ -846,9 +828,7 @@ unorm_getFCD16(UChar32 c);
*
* The next eight indexes are thresholds of 16-bit trie values for ranges of
* values indicating multiple normalization properties.
* Format version 5 adds the two minMaybeNo* threshold indexes.
* The thresholds are listed here in threshold order,
* not in the order they are stored in the indexes.
* They are listed here in threshold order, not in the order they are stored in the indexes.
* minYesNo=indexes[IX_MIN_YES_NO];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* minNoNo=indexes[IX_MIN_NO_NO];
@@ -856,8 +836,6 @@ unorm_getFCD16(UChar32 c);
* minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
* minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
* limitNoNo=indexes[IX_LIMIT_NO_NO];
* minMaybeNo=indexes[IX_MIN_MAYBE_NO];
* minMaybeNoCombinesFwd=indexes[IX_MIN_MAYBE_NO_COMBINES_FWD];
* minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* See the normTrie description below and the design doc for details.
*
@@ -866,14 +844,13 @@ unorm_getFCD16(UChar32 c);
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits),
* information is extracted primarily via range checks.
* Except, format version 3+ uses bit 0 for hasCompBoundaryAfter().
* Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
* For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
* means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
* which means it has a two-way (round-trip) decomposition mapping.
* Values in the ranges 2<=norm16<limitNoNo and minMaybeNo<=norm16<minMaybeYes
* are also directly indexes into the extraData
* Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
* pointing to mappings, compositions lists, or both.
* Value norm16==INERT (0 in versions 1 & 2, 1 in version 3+)
* Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
* means that the character is normalization-inert, that is,
* it does not have a mapping, does not participate in composition, has a zero
* canonical combining class, and forms a boundary where text before it and after it
@@ -892,38 +869,33 @@ unorm_getFCD16(UChar32 c);
* When the lead surrogate unit's value exceeds the quick check minimum during processing,
* the properties for the full supplementary code point need to be looked up.
*
* uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes];
* uint16_t extraData[];
*
* The extraData array contains many per-character data sections.
* Each section contains mappings and/or composition lists.
* The norm16 value of each character that has such data is directly an index to
* a section of the extraData array.
* There is only one byte offset for the end of these two arrays.
* The split between them is given by the constant and variable mentioned above.
* In version 3, the difference must be shifted right by OFFSET_SHIFT.
*
* In version 3+, the norm16 values must be shifted right by OFFSET_SHIFT
* The maybeYesCompositions array contains compositions lists for characters that
* combine both forward (as starters in composition pairs)
* and backward (as trailing characters in composition pairs).
* Such characters do not occur in Unicode 5.2 but are allowed by
* the Unicode Normalization algorithms.
* If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES
* and the maybeYesCompositions array is empty.
* If there are such characters, then minMaybeYes is subtracted from their norm16 values
* to get the index into this array.
*
* The extraData array contains compositions lists for "YesYes" characters,
* followed by mappings and optional compositions lists for "YesNo" characters,
* followed by only mappings for "NoNo" characters.
* (Referring to pairs of NFC/NFD quick check values.)
* The norm16 values of those characters are directly indexes into the extraData array.
* In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
* for accessing extraData.
*
* The data structures for compositions lists and mappings are described in the design doc.
*
* In version 4 and below, the composition lists for MaybeYes characters were stored before
* the data for other characters.
* This sub-array had a length of MIN_NORMAL_MAYBE_YES-minMaybeYes.
* In version 3 & 4, the difference must be shifted right by OFFSET_SHIFT.
*
* In version 5, the data for MaybeNo and MaybeYes characters is stored after
* the data for other characters.
*
* If there are no MaybeNo and no MaybeYes characters,
* then minMaybeYes==minMaybeNo==MIN_NORMAL_MAYBE_YES.
* If there are such characters, then minMaybeNo is subtracted from their norm16 values
* to get the index into the extraData.
* In version 4 and below, the data index for Yes* and No* characters needs to be
* offset by the length of the MaybeYes data.
* In version 5, the data index for Maybe* characters needs to be offset by limitNoNo.
*
* Version 5 is the first to support MaybeNo characters, and
* adds the minMaybeNo and minMaybeNoCombinesFwd thresholds and
* the corresponding sections of the extraData.
*
* uint8_t smallFCD[0x100]; -- new in format version 2
*
* This is a bit set to help speed up FCD value lookups in the absence of a full
@@ -963,7 +935,7 @@ unorm_getFCD16(UChar32 c);
* to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
* See DELTA_TCCC_MASK etc.
* This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
* minMaybeNo is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
* minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
*
* - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
* and ASCII characters are mapped algorithmically only to other ASCII characters.
@@ -1009,23 +981,6 @@ unorm_getFCD16(UChar32 c);
* gennorm2 now has to reject mappings for surrogate code points.
* UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
* custom normalization data file.
*
* Changes from format version 4 to format version 5 (ICU 76) ------------------
*
* Unicode 16 adds the first MaybeYes characters which combine both backward and forward,
* taking this formerly theoretical data structure into reality.
*
* Unicode 16 also adds the first characters that have two-way mappings whose first characters
* combine backward. In order for normalization and the quick check to work properly,
* these composite characters also must be marked as NFC_QC=Maybe,
* corresponding to "combines back", although the composites themselves do not combine backward.
* Format version 5 adds two new ranges between "algorithmic NoNo" and MaybeYes,
* with thresholds minMaybeNo and minMaybeNoCombinesFwd,
* and indexes[IX_MIN_MAYBE_NO] and indexes[IX_MIN_MAYBE_NO_COMBINES_FWD],
* and corresponding mappings and composition lists in the extraData.
*
* Format version 5 moves the data for Maybe* characters from the start of the extraData array
* to its end.
*/
#endif /* !UCONFIG_NO_NORMALIZATION */

View File

@@ -120,12 +120,12 @@ PatternProps::isSyntax(UChar32 c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return (latin1[c] >> 1) & 1;
return (UBool)(latin1[c]>>1)&1;
} else if(c<0x2010) {
return false;
} else if(c<=0x3030) {
uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
return (bits >> (c & 0x1f)) & 1;
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
@@ -138,12 +138,12 @@ PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return latin1[c] & 1;
return (UBool)(latin1[c]&1);
} else if(c<0x200e) {
return false;
} else if(c<=0x3030) {
uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
return (bits >> (c & 0x1f)) & 1;
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
@@ -156,7 +156,7 @@ PatternProps::isWhiteSpace(UChar32 c) {
if(c<0) {
return false;
} else if(c<=0xff) {
return (latin1[c] >> 2) & 1;
return (UBool)(latin1[c]>>2)&1;
} else if(0x200e<=c && c<=0x2029) {
return c<=0x200f || 0x2028<=c;
} else {

View File

@@ -43,7 +43,7 @@ getASCIIPropertyNameChar(const char *name) {
) {}
if(c!=0) {
return (i << 8) | static_cast<uint8_t>(uprv_asciitolower(c));
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
@@ -66,7 +66,7 @@ getEBCDICPropertyNameChar(const char *name) {
) {}
if(c!=0) {
return (i << 8) | static_cast<uint8_t>(uprv_ebcdictolower(c));
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
@@ -231,7 +231,7 @@ UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
if(!USTRINGTRIE_HAS_NEXT(result)) {
return false;
}
result = trie.next(static_cast<uint8_t>(c));
result=trie.next((uint8_t)c);
}
return USTRINGTRIE_HAS_VALUE(result);
}

File diff suppressed because it is too large Load Diff

View File

@@ -102,29 +102,29 @@ _findRow(UPropsVectors *pv, UChar32 rangeStart) {
/* check the vicinity of the last-seen row (start searching with an unrolled loop) */
row=pv->v+prevRow*columns;
if (rangeStart >= static_cast<UChar32>(row[0])) {
if (rangeStart < static_cast<UChar32>(row[1])) {
if(rangeStart>=(UChar32)row[0]) {
if(rangeStart<(UChar32)row[1]) {
/* same row as last seen */
return row;
} else if (rangeStart < static_cast<UChar32>((row += columns)[1])) {
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* next row after the last one */
pv->prevRow=prevRow+1;
return row;
} else if (rangeStart < static_cast<UChar32>((row += columns)[1])) {
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* second row after the last one */
pv->prevRow=prevRow+2;
return row;
} else if ((rangeStart - static_cast<UChar32>(row[1])) < 10) {
} else if((rangeStart-(UChar32)row[1])<10) {
/* we are close, continue looping */
prevRow+=2;
do {
++prevRow;
row+=columns;
} while (rangeStart >= static_cast<UChar32>(row[1]));
} while(rangeStart>=(UChar32)row[1]);
pv->prevRow=prevRow;
return row;
}
} else if (rangeStart < static_cast<UChar32>(pv->v[1])) {
} else if(rangeStart<(UChar32)pv->v[1]) {
/* the very first row */
pv->prevRow=0;
return pv->v;
@@ -135,9 +135,9 @@ _findRow(UPropsVectors *pv, UChar32 rangeStart) {
while(start<limit-1) {
i=(start+limit)/2;
row=pv->v+i*columns;
if (rangeStart < static_cast<UChar32>(row[0])) {
if(rangeStart<(UChar32)row[0]) {
limit=i;
} else if (rangeStart < static_cast<UChar32>(row[1])) {
} else if(rangeStart<(UChar32)row[1]) {
pv->prevRow=i;
return row;
} else {
@@ -194,8 +194,8 @@ upvec_setValue(UPropsVectors *pv,
* input range (only possible for the first and last rows)
* and if their value differs from the input value.
*/
splitFirstRow = start != static_cast<UChar32>(firstRow[0]) && value != (firstRow[column] & mask);
splitLastRow = limit != static_cast<UChar32>(lastRow[1]) && value != (lastRow[column] & mask);
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
/* split first/last rows if necessary */
if(splitFirstRow || splitLastRow) {
@@ -312,8 +312,8 @@ upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
static int32_t U_CALLCONV
upvec_compareRows(const void *context, const void *l, const void *r) {
const uint32_t* left = static_cast<const uint32_t*>(l), *right = static_cast<const uint32_t*>(r);
const UPropsVectors* pv = static_cast<const UPropsVectors*>(context);
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
const UPropsVectors *pv=(const UPropsVectors *)context;
int32_t i, count, columns;
count=columns=pv->columns; /* includes start/limit columns */

View File

@@ -97,12 +97,12 @@ digitToBasic(int32_t digit, UBool uppercase) {
/* 26..35 map to ASCII 0..9 */
if(digit<26) {
if(uppercase) {
return static_cast<char>(_CAPITAL_A + digit);
return (char)(_CAPITAL_A+digit);
} else {
return static_cast<char>(_SMALL_A + digit);
return (char)(_SMALL_A+digit);
}
} else {
return static_cast<char>((_ZERO_ - 26) + digit);
return (char)((_ZERO_-26)+digit);
}
}
@@ -353,10 +353,10 @@ u_strToPunycode(const char16_t *src, int32_t srcLength,
}
if(destLength<destCapacity) {
dest[destLength] = digitToBasic(q, cpBuffer[j] < 0);
dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
}
++destLength;
bias = adaptBias(delta, handledCPCount + 1, handledCPCount == basicLength);
bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
delta=0;
++handledCPCount;
}
@@ -421,7 +421,7 @@ u_strFromPunycode(const char16_t *src, int32_t srcLength,
}
if(j<destCapacity) {
dest[j] = b;
dest[j]=(char16_t)b;
if(caseFlags!=nullptr) {
caseFlags[j]=IS_BASIC_UPPERCASE(b);
@@ -500,7 +500,7 @@ u_strFromPunycode(const char16_t *src, int32_t srcLength,
* where needed instead of in for() loop tail.
*/
++destCPCount;
bias = adaptBias(i - oldi, destCPCount, oldi == 0);
bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
/*
* i was supposed to wrap around from (incremented) destCPCount to 0,

View File

@@ -46,6 +46,11 @@
// First, the platform type. Need this for U_PLATFORM.
#include "unicode/platform.h"
#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
/* tzset isn't defined in strict ANSI on MinGW. */
#undef __STRICT_ANSI__
#endif
/*
* Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
*/
@@ -175,8 +180,8 @@ typedef union {
int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
double d64;
} BitPatternConversion;
static const BitPatternConversion gNan = {static_cast<int64_t>(INT64_C(0x7FF8000000000000))};
static const BitPatternConversion gInf = {static_cast<int64_t>(INT64_C(0x7FF0000000000000))};
static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
/*---------------------------------------------------------------------------
Platform utilities
@@ -225,7 +230,7 @@ u_signBit(double d) {
#if U_IS_BIG_ENDIAN
hiByte = *(uint8_t *)&d;
#else
hiByte = *(reinterpret_cast<uint8_t*>(&d) + sizeof(double) - 1);
hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
#endif
return (hiByte & 0x80) != 0;
}
@@ -342,7 +347,7 @@ uprv_isNaN(double number)
BitPatternConversion convertedNumber;
convertedNumber.d64 = number;
/* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
return (convertedNumber.i64 & U_INT64_MAX) > gInf.i64;
return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
#elif U_PLATFORM == U_PF_OS390
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
@@ -368,7 +373,7 @@ uprv_isInfinite(double number)
BitPatternConversion convertedNumber;
convertedNumber.d64 = number;
/* Infinity is exactly 0x7FF0000000000000U. */
return (convertedNumber.i64 & U_INT64_MAX) == gInf.i64;
return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
#elif U_PLATFORM == U_PF_OS390
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
@@ -389,7 +394,7 @@ U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)
{
#if IEEE_754 || U_PLATFORM == U_PF_OS390
return number > 0 && uprv_isInfinite(number);
return (UBool)(number > 0 && uprv_isInfinite(number));
#else
return uprv_isInfinite(number);
#endif
@@ -399,7 +404,7 @@ U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)
{
#if IEEE_754 || U_PLATFORM == U_PF_OS390
return number < 0 && uprv_isInfinite(number);
return (UBool)(number < 0 && uprv_isInfinite(number));
#else
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
@@ -744,11 +749,11 @@ static UBool isValidOlsonID(const char *id) {
The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
"GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
The rest of the time it could be an Olson ID. George */
return id[idx] == 0
return (UBool)(id[idx] == 0
|| uprv_strcmp(id, "PST8PDT") == 0
|| uprv_strcmp(id, "MST7MDT") == 0
|| uprv_strcmp(id, "CST6CDT") == 0
|| uprv_strcmp(id, "EST5EDT") == 0;
|| uprv_strcmp(id, "EST5EDT") == 0);
}
/* On some Unix-like OS, 'posix' subdirectory in
@@ -927,7 +932,7 @@ static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFil
*/
if (tzInfo->defaultTZBuffer == nullptr) {
rewind(tzInfo->defaultTZFilePtr);
tzInfo->defaultTZBuffer = static_cast<char*>(uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize));
tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
}
rewind(file);
@@ -1493,6 +1498,7 @@ static void U_CALLCONV dataDirectoryInitFn() {
}
u_setDataDirectory(path);
return;
}
U_CAPI const char * U_EXPORT2
@@ -1616,7 +1622,7 @@ static const char *uprv_getPOSIXIDForCategory(int category)
* of nullptr, will modify the libc behavior.
*/
posixID = setlocale(category, nullptr);
if ((posixID == nullptr)
if ((posixID == 0)
|| (uprv_strcmp("C", posixID) == 0)
|| (uprv_strcmp("POSIX", posixID) == 0))
{
@@ -1630,16 +1636,16 @@ static const char *uprv_getPOSIXIDForCategory(int category)
posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
if ((posixID == 0) || (posixID[0] == '\0')) {
#else
if (posixID == nullptr) {
if (posixID == 0) {
posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
if (posixID == nullptr) {
if (posixID == 0) {
#endif
posixID = getenv("LANG");
}
}
}
}
if ((posixID == nullptr)
if ((posixID==0)
|| (uprv_strcmp("C", posixID) == 0)
|| (uprv_strcmp("POSIX", posixID) == 0))
{
@@ -1659,7 +1665,7 @@ static const char *uprv_getPOSIXIDForCategory(int category)
static const char *uprv_getPOSIXIDForDefaultLocale()
{
static const char* posixID = nullptr;
if (posixID == nullptr) {
if (posixID == 0) {
posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
}
return posixID;

View File

@@ -90,8 +90,6 @@ typedef size_t uintptr_t;
# define U_NL_LANGINFO_CODESET -1
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#elif U_PLATFORM == U_PF_HAIKU
/* not defined */
#else
# define U_NL_LANGINFO_CODESET CODESET
#endif
@@ -105,8 +103,6 @@ typedef size_t uintptr_t;
#endif
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#elif U_PLATFORM == U_PF_HAIKU
/* not defined */
#elif defined(__wasi__)
/* not defined */
#else
@@ -149,8 +145,6 @@ typedef size_t uintptr_t;
#endif
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#elif U_PLATFORM == U_PF_HAIKU
/* not defined, (well it is but a loop back to icu) */
#elif defined(__wasi__)
/* not defined */
#else
@@ -565,7 +559,7 @@ inline int32_t pinCapacity(T *dest, int32_t capacity) {
if (maxInt < destInt) {
// Less than 2GB to the end of the address space.
// Pin to that to prevent address overflow.
maxInt = static_cast<uintptr_t>(-1);
maxInt = (uintptr_t)-1;
}
# endif

View File

@@ -110,7 +110,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
const RBBIDataHeader* data = reinterpret_cast<const RBBIDataHeader*>(compiledRules);
const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules;
if (data->fLength > ruleLength) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
@@ -553,7 +553,7 @@ int32_t RuleBasedBreakIterator::first() {
* @return The text's past-the-end offset.
*/
int32_t RuleBasedBreakIterator::last() {
int32_t endPos = static_cast<int32_t>(utext_nativeLength(&fText));
int32_t endPos = (int32_t)utext_nativeLength(&fText);
UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
(void)endShouldBeBoundary;
U_ASSERT(endShouldBeBoundary);
@@ -625,7 +625,7 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
// Move requested offset to a code point start. It might be on a trail surrogate,
// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
utext_setNativeIndex(&fText, startPos);
startPos = static_cast<int32_t>(utext_getNativeIndex(&fText));
startPos = (int32_t)utext_getNativeIndex(&fText);
UErrorCode status = U_ZERO_ERROR;
fBreakCache->following(startPos, status);
@@ -881,7 +881,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
if (accepting == ACCEPTING_UNCONDITIONAL) {
// Match found, common case.
if (mode != RBBI_START) {
result = static_cast<int32_t>(UTEXT_GETNATIVEINDEX(&fText));
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
fRuleStatusIndex = row->fTagsIdx; // Remember the break status (tag) values.
} else if (accepting > ACCEPTING_UNCONDITIONAL) {
@@ -905,7 +905,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
U_ASSERT(rule == 0 || rule > ACCEPTING_UNCONDITIONAL);
U_ASSERT(rule == 0 || rule < fData->fForwardTable->fLookAheadResultsSize);
if (rule > ACCEPTING_UNCONDITIONAL) {
int32_t pos = static_cast<int32_t>(UTEXT_GETNATIVEINDEX(&fText));
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
fLookAheadMatches[rule] = pos;
}
@@ -937,7 +937,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
if (result == initialPosition) {
utext_setNativeIndex(&fText, initialPosition);
utext_next32(&fText);
result = static_cast<int32_t>(utext_getNativeIndex(&fText));
result = (int32_t)utext_getNativeIndex(&fText);
fRuleStatusIndex = 0;
}
@@ -1027,7 +1027,7 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
}
// The state machine is done. Check whether it found a match...
result = static_cast<int32_t>(UTEXT_GETNATIVEINDEX(&fText));
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPrintf("result = %d\n\n", result);
@@ -1091,7 +1091,7 @@ const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
length = 0;
if (fData != nullptr) {
retPtr = reinterpret_cast<const uint8_t*>(fData->fHeader);
retPtr = (const uint8_t *)fData->fHeader;
length = fData->fHeader->fLength;
}
return retPtr;
@@ -1125,7 +1125,6 @@ static icu::UStack *gLanguageBreakFactories = nullptr;
static const icu::UnicodeString *gEmptyString = nullptr;
static icu::UInitOnce gLanguageBreakFactoriesInitOnce {};
static icu::UInitOnce gRBBIInitOnce {};
static icu::ICULanguageBreakFactory *gICULanguageBreakFactory = nullptr;
/**
* Release all static memory held by breakiterator.
@@ -1154,41 +1153,37 @@ static void U_CALLCONV rbbiInit() {
ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
}
static void U_CALLCONV initLanguageFactories(UErrorCode& status) {
static void U_CALLCONV initLanguageFactories() {
UErrorCode status = U_ZERO_ERROR;
U_ASSERT(gLanguageBreakFactories == nullptr);
gLanguageBreakFactories = new UStack(_deleteFactory, nullptr, status);
if (gLanguageBreakFactories != nullptr && U_SUCCESS(status)) {
LocalPointer<ICULanguageBreakFactory> factory(new ICULanguageBreakFactory(status), status);
if (U_SUCCESS(status)) {
gICULanguageBreakFactory = factory.orphan();
gLanguageBreakFactories->push(gICULanguageBreakFactory, status);
ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
gLanguageBreakFactories->push(builtIn, status);
#ifdef U_LOCAL_SERVICE_HOOK
LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
if (extra != nullptr) {
gLanguageBreakFactories->push(extra, status);
}
#endif
LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
if (extra != nullptr) {
gLanguageBreakFactories->push(extra, status);
}
#endif
}
ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
}
void ensureLanguageFactories(UErrorCode& status) {
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories, status);
}
static const LanguageBreakEngine*
getLanguageBreakEngineFromFactory(UChar32 c, const char* locale)
getLanguageBreakEngineFromFactory(UChar32 c)
{
UErrorCode status = U_ZERO_ERROR;
ensureLanguageFactories(status);
if (U_FAILURE(status)) return nullptr;
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
if (gLanguageBreakFactories == nullptr) {
return nullptr;
}
int32_t i = gLanguageBreakFactories->size();
const LanguageBreakEngine *lbe = nullptr;
while (--i >= 0) {
LanguageBreakFactory* factory = static_cast<LanguageBreakFactory*>(gLanguageBreakFactories->elementAt(i));
lbe = factory->getEngineFor(c, locale);
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
lbe = factory->getEngineFor(c);
if (lbe != nullptr) {
break;
}
@@ -1204,7 +1199,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, const char* locale)
//
//-------------------------------------------------------------------------------
const LanguageBreakEngine *
RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
const LanguageBreakEngine *lbe = nullptr;
UErrorCode status = U_ZERO_ERROR;
@@ -1212,22 +1207,22 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
fLanguageBreakEngines = new UStack(status);
if (fLanguageBreakEngines == nullptr || U_FAILURE(status)) {
delete fLanguageBreakEngines;
fLanguageBreakEngines = nullptr;
fLanguageBreakEngines = 0;
return nullptr;
}
}
int32_t i = fLanguageBreakEngines->size();
while (--i >= 0) {
lbe = static_cast<const LanguageBreakEngine*>(fLanguageBreakEngines->elementAt(i));
if (lbe->handles(c, locale)) {
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
if (lbe->handles(c)) {
return lbe;
}
}
// No existing dictionary took the character. See if a factory wants to
// give us a new LanguageBreakEngine for this character.
lbe = getLanguageBreakEngineFromFactory(c, locale);
lbe = getLanguageBreakEngineFromFactory(c);
// If we got one, use it and push it on our stack.
if (lbe != nullptr) {
@@ -1252,7 +1247,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
U_ASSERT(!fLanguageBreakEngines->hasDeleter());
if (U_FAILURE(status)) {
delete fUnhandledBreakEngine;
fUnhandledBreakEngine = nullptr;
fUnhandledBreakEngine = 0;
return nullptr;
}
}
@@ -1264,18 +1259,6 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
return fUnhandledBreakEngine;
}
#ifndef U_HIDE_DRAFT_API
void U_EXPORT2 RuleBasedBreakIterator::registerExternalBreakEngine(
ExternalBreakEngine* toAdopt, UErrorCode& status) {
LocalPointer<ExternalBreakEngine> engine(toAdopt, status);
if (U_FAILURE(status)) return;
ensureLanguageFactories(status);
if (U_FAILURE(status)) return;
gICULanguageBreakFactory->addExternalEngine(engine.orphan(), status);
}
#endif /* U_HIDE_DRAFT_API */
void RuleBasedBreakIterator::dumpCache() {
fBreakCache->dumpCache();
}

Some files were not shown because too many files have changed in this diff Show More