Bug 1954425 - Part 2: Update in-tree ICU to 77. r=platform-i18n-reviewers,nordzilla

Update ICU by running `./update-icu.sh https://github.com/unicode-org/icu.git maint/maint-77`. Differential Revision: https://phabricator.services.mozilla.com/D241784
2025-03-20 09:46:17 +00:00
parent 7be5a0750e
commit 40fab635a7
4632 changed files with 16753 additions and 13516 deletions
--- a/config/external/icu/data/icudt77l.dat
+++ b/config/external/icu/data/icudt77l.dat
--- a/intl/icu/GIT-INFO
+++ b/intl/icu/GIT-INFO
@@ -1,5 +1,7 @@
-commit 8eca245c7484ac6cc179e3e5f7c1ea7680810f39
-Author: Rahul Pandey <rp9.next@gmail.com>
-Date:   Mon Oct 21 16:21:38 2024 +0530
+commit f23af97cf467a3ae761e5943a6caf793415ac857
+Author: Markus Scherer <markus.icu@gmail.com>
+Date:   Wed Mar 12 16:40:58 2025 +0000

-    ICU-22724 BRS_76_GA Update version number to 76.1 (No more -SNAPSHOT)
+    ICU-22923 ICU 77 release page updates
+    
+    See #3432
--- a/intl/icu/source/Doxyfile.in
+++ b/intl/icu/source/Doxyfile.in
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.13
+# Doxyfile 1.9.1

 # Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
@@ -25,11 +25,11 @@
 # Project related configuration options
 #---------------------------------------------------------------------------

-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
 # The default value is: UTF-8.

 DOXYFILE_ENCODING      = UTF-8
@@ -101,6 +101,14 @@ ALLOW_UNICODE_NAMES    = NO

 OUTPUT_LANGUAGE        = English

+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -187,6 +195,16 @@ SHORT_NAMES            = NO

 JAVADOC_AUTOBRIEF      = YES

+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
 # set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -207,6 +225,14 @@ QT_AUTOBRIEF           = NO

 MULTILINE_CPP_IS_BRIEF = NO

+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@@ -234,7 +260,12 @@ TAB_SIZE               = 8
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
 # "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})

 ALIASES                = "memo=\par Note:\n" \
                         "draft=\xrefitem draft \"Draft\" \"Draft List\"  This API may be changed in the future versions and was introduced in" \
@@ -244,12 +275,6 @@ ALIASES                = "memo=\par Note:\n" \
                         "system=\xrefitem system \"System\" \"System List\" \n Do not use unless you know what you are doing." \
                         "internal=\xrefitem internal \"Internal\"  \"Internal List\"  Do not use. This API is for internal use only."

-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -278,28 +303,40 @@ OPTIMIZE_FOR_FORTRAN   = NO

 OPTIMIZE_OUTPUT_VHDL   = NO

+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
 # Doxygen selects the parser to use depending on the extension of the files it
 # parses. With this tag you can assign which parser to use for a given
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
-# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
-# Fortran. In the later case the parser tries to guess whether the code is fixed
-# or free formatted code, this is the default for Fortran type files), VHDL. For
-# instance to make doxygen treat .inc files as Fortran files (default is PHP),
-# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
 #
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.

 EXTENSION_MAPPING      =

 # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
 # according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
 # The output of markdown processing is further processed by doxygen, so you can
 # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
 # case of backward compatibilities issues.
@@ -311,7 +348,7 @@ MARKDOWN_SUPPORT       = YES
 # to that level are automatically included in the table of contents, even if
 # they do not have an id attribute.
 # Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 0.
+# Minimum value: 0, maximum value: 99, default value: 5.
 # This tag requires that the tag MARKDOWN_SUPPORT is set to YES.

 TOC_INCLUDE_HEADINGS   = 0
@@ -341,7 +378,7 @@ BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO

 # Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
 # will parse them like normal C++ but will assume all classes use public instead
 # of private inheritance when no explicit protection keyword is present.
 # The default value is: NO.
@@ -427,6 +464,19 @@ TYPEDEF_HIDES_STRUCT   = NO

 LOOKUP_CACHE_SIZE      = 0

+# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which efficively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -447,6 +497,12 @@ EXTRACT_ALL            = NO

 EXTRACT_PRIVATE        = NO

+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
 # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
 # scope will be included in the documentation.
 # The default value is: NO.
@@ -484,6 +540,13 @@ EXTRACT_LOCAL_METHODS  = NO

 EXTRACT_ANON_NSPACES   = NO

+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -501,8 +564,8 @@ HIDE_UNDOC_MEMBERS     = NO
 HIDE_UNDOC_CLASSES     = NO

 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO, these declarations will be
-# included in the documentation.
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
 # The default value is: NO.

 HIDE_FRIEND_COMPOUNDS  = NO
@@ -521,11 +584,18 @@ HIDE_IN_BODY_DOCS      = NO

 INTERNAL_DOCS          = YES

-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
 # The default value is: system dependent.

 CASE_SENSE_NAMES       = YES
@@ -712,7 +782,7 @@ LAYOUT_FILE            =
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
 # extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
 # For LaTeX the style of the bibliography can be controlled using
 # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
 # search path. See also \cite for info how to create references.
@@ -757,13 +827,17 @@ WARN_IF_DOC_ERROR      = YES
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
 # value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation.
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
 # The default value is: NO.

 WARN_NO_PARAMDOC       = NO

 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
 # The default value is: NO.

 WARN_AS_ERROR          = NO
@@ -802,8 +876,8 @@ INPUT                  = @srcdir@/common/unicode \
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
 # The default value is: UTF-8.

 INPUT_ENCODING         = UTF-8
@@ -816,11 +890,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
 # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
 # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
 # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
-# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.

 FILE_PATTERNS          = *.h

@@ -979,7 +1057,7 @@ INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = YES

 # If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
+# entity all documented functions referencing it will be listed.
 # The default value is: NO.

 REFERENCED_BY_RELATION = YES
@@ -1011,12 +1089,12 @@ SOURCE_TOOLTIPS        = YES
 # If the USE_HTAGS tag is set to YES then the references to source code will
 # point to the HTML generated by the htags(1) tool instead of doxygen built-in
 # source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
+# (see https://www.gnu.org/software/global/global.html). You will need version
 # 4.8.6 or higher.
 #
 # To use it do the following:
 # - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
 # - Make sure the INPUT points to the root of the source tree
 # - Run doxygen as normal
 #
@@ -1039,16 +1117,22 @@ USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES

 # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
+# clang parser (see:
+# http://clang.llvm.org/) for more accurate parsing at the cost of reduced
+# performance. This can be particularly helpful with template rich C++ code for
+# which doxygen's built-in parser lacks the necessary type information.
 # Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse-libclang=ON option for CMake.
+# generated with the -Duse_libclang=ON option for CMake.
 # The default value is: NO.

 CLANG_ASSISTED_PARSING = NO

+# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to
+# YES then doxygen will add the directory of each input to the include path.
+# The default value is: YES.
+
+CLANG_ADD_INC_PATHS    = YES
+
 # If clang assisted parsing is enabled you can provide the compiler with command
 # line options that you would normally use when invoking the compiler. Note that
 # the include paths will already be set by doxygen for the files and directories
@@ -1057,6 +1141,19 @@ CLANG_ASSISTED_PARSING = NO

 CLANG_OPTIONS          =

+# If clang assisted parsing is enabled you can provide the clang parser with the
+# path to the directory containing a file called compile_commands.json. This
+# file is the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
+# options used when the source files were built. This is equivalent to
+# specifying the -p option to a clang tool, such as clang-check. These options
+# will then be passed to the parser. Any options specified with CLANG_OPTIONS
+# will be added as well.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+
+CLANG_DATABASE_PATH    =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
@@ -1068,13 +1165,6 @@ CLANG_OPTIONS          =

 ALPHABETICAL_INDEX     = YES

-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
 # In case all classes in a project start with a common prefix, all classes will
 # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
 # can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -1175,7 +1265,7 @@ HTML_EXTRA_FILES       =
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
 # this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
 # Minimum value: 0, maximum value: 359, default value: 220.
@@ -1211,6 +1301,17 @@ HTML_COLORSTYLE_GAMMA  = 80

 HTML_TIMESTAMP         = NO

+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
 # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 # documentation will contain sections that can be hidden and shown after the
 # page has loaded.
@@ -1234,13 +1335,14 @@ HTML_INDEX_NUM_ENTRIES = 100

 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.

@@ -1279,8 +1381,8 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1310,7 +1412,7 @@ CHM_FILE               =
 HHC_LOCATION           =

 # The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the primary .chm file (NO).
+# (YES) or that it should be included in the main .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.

@@ -1355,7 +1457,8 @@ QCH_FILE               =

 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@@ -1363,8 +1466,8 @@ QHP_NAMESPACE          = org.doxygen.Project

 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@@ -1372,30 +1475,30 @@ QHP_VIRTUAL_FOLDER     = doc

 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_NAME   =

 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_ATTRS  =

 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
 # project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_SECT_FILTER_ATTRS  =

-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHG_LOCATION           =
@@ -1472,6 +1575,17 @@ TREEVIEW_WIDTH         = 250

 EXT_LINKS_IN_WINDOW    = NO

+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
 # Use this tag to change the font size of LaTeX formulas included as images in
 # the HTML documentation. When you change the font size after a successful
 # doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1481,7 +1595,7 @@ EXT_LINKS_IN_WINDOW    = NO

 FORMULA_FONTSIZE       = 10

-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
 # generated for formulas are transparent PNGs. Transparent PNGs are not
 # supported properly for IE 6.0, but are supported on all modern browsers.
 #
@@ -1492,8 +1606,14 @@ FORMULA_FONTSIZE       = 10

 FORMULA_TRANSPARENT    = YES

+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
 # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
 # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
 # installed or if you want to formulas look prettier in the HTML output. When
 # enabled you may also need to install MathJax separately and configure the path
@@ -1505,7 +1625,7 @@ USE_MATHJAX            = NO

 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
 # Possible values are: HTML-CSS (which is slower, but has the best
 # compatibility), NativeMML (i.e. MathML) and SVG.
 # The default value is: HTML-CSS.
@@ -1520,8 +1640,8 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
 # This tag requires that the tag USE_MATHJAX is set to YES.

 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
@@ -1535,7 +1655,8 @@ MATHJAX_EXTENSIONS     =

 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.

@@ -1563,7 +1684,7 @@ MATHJAX_CODEFILE       =
 SEARCHENGINE           = NO

 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
+# implemented using a web server instead of a web client using JavaScript. There
 # are two flavors of web server based searching depending on the EXTERNAL_SEARCH
 # setting. When disabled, doxygen will generate a PHP script for searching and
 # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
@@ -1582,7 +1703,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1595,8 +1717,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.

 SEARCHENGINE_URL       =
@@ -1647,21 +1770,35 @@ LATEX_OUTPUT           = latex
 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
 # invoked.
 #
-# Note that when enabling USE_PDFLATEX this option is only used for generating
-# bitmaps for formulas in the HTML output, but not in the Makefile that is
-# written to the output directory.
-# The default file is: latex.
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

 LATEX_CMD_NAME         = latex

 # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
 # index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
 # The default file is: makeindex.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

 MAKEINDEX_CMD_NAME     = makeindex

+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
 # If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
 # documents. This may be useful for small projects and may help to save some
 # trees in general.
@@ -1746,9 +1883,11 @@ LATEX_EXTRA_FILES      =

 PDF_HYPERLINKS         = NO

-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

@@ -1782,7 +1921,7 @@ LATEX_SOURCE_CODE      = NO

 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
-# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
 # The default value is: plain.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

@@ -1796,6 +1935,14 @@ LATEX_BIB_STYLE        = plain

 LATEX_TIMESTAMP        = NO

+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the RTF output
 #---------------------------------------------------------------------------
@@ -1835,9 +1982,9 @@ COMPACT_RTF            = NO

 RTF_HYPERLINKS         = NO

-# Load stylesheet definitions from file. Syntax is similar to doxygen's config
-# file, i.e. a series of assignments. You only have to provide replacements,
-# missing definitions are set to their default value.
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
 #
 # See also section "Doxygen usage" for information on how to generate the
 # default style sheet that doxygen normally uses.
@@ -1846,8 +1993,8 @@ RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =

 # Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's config file. A template extensions file can be generated
-# using doxygen -e rtf extensionFile.
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
 # This tag requires that the tag GENERATE_RTF is set to YES.

 RTF_EXTENSIONS_FILE    =
@@ -1933,6 +2080,13 @@ XML_OUTPUT             = xml

 XML_PROGRAMLISTING     = YES

+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
 #---------------------------------------------------------------------------
 # Configuration options related to the DOCBOOK output
 #---------------------------------------------------------------------------
@@ -1965,9 +2119,9 @@ DOCBOOK_PROGRAMLISTING = NO
 #---------------------------------------------------------------------------

 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sf.net) file that captures the
-# structure of the code including all documentation. Note that this feature is
-# still experimental and incomplete at the moment.
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
 # The default value is: NO.

 GENERATE_AUTOGEN_DEF   = NO
@@ -2079,7 +2233,7 @@ PREDEFINED             = U_EXPORT2= \
                         U_CDECL_BEGIN= \
                         U_CDECL_END= \
                         "U_NAMESPACE_BEGIN=namespace icu{" \
-                         "U_NAMESPACE_END=}" \
+                         U_NAMESPACE_END=} \
                         U_SHOW_CPLUSPLUS_API=1 \
                         U_DEFINE_LOCAL_OPEN_POINTER()= \
                         U_IN_DOXYGEN=1 \
@@ -2129,7 +2283,7 @@ TAGFILES               =
 # tag file that is based on the input files it reads. See section "Linking to
 # external documentation" for more information about the usage of tag files.

-GENERATE_TAGFILE       = "@builddir@/doc/html/icudocs.tag"
+GENERATE_TAGFILE       = @builddir@/doc/html/icudocs.tag

 # If the ALLEXTERNALS tag is set to YES, all external class will be listed in
 # the class index. If set to NO, only the inherited external classes will be
@@ -2152,12 +2306,6 @@ EXTERNAL_GROUPS        = YES

 EXTERNAL_PAGES         = YES

-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
@@ -2171,15 +2319,6 @@ PERL_PATH              = /usr/bin/perl

 CLASS_DIAGRAMS         = YES

-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
 # You can include diagrams made with dia in doxygen documentation. Doxygen will
 # then run dia to produce the diagram and insert it in the documentation. The
 # DIA_PATH tag allows you to specify the directory where the dia binary resides.
@@ -2277,10 +2416,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.

 UML_LIMIT_NUM_FIELDS   = 10

+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2472,9 +2633,11 @@ DOT_MULTI_TARGETS      = NO

 GENERATE_LEGEND        = YES

-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.

 DOT_CLEANUP            = YES
--- a/intl/icu/source/Makefile.in
+++ b/intl/icu/source/Makefile.in
@@ -133,7 +133,7 @@ doc/html/index.html: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/c
 Doxyfile: $(srcdir)/Doxyfile.in
 	CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status

-$(DOCZIP): doc
+$(DOCZIP): doc-searchengine
 	-$(RMV) $(DOCZIP)
 	( cd doc/html ; zip -r ../../$(DOCZIP) * )
 endif
@@ -210,7 +210,7 @@ endif
 ifeq ($(DOXYGEN),)
 install-doc:
 else
-install-doc: doc
+install-doc: doc-searchengine
 	$(RM) -r $(DESTDIR)$(docdir)/$(docsubdir)
 	$(MKINSTALLDIRS) $(DESTDIR)$(docdir)/$(docsubdir)
 	$(INSTALL_DATA) $(docfiles) $(DESTDIR)$(docdir)/$(docsubdir)
--- a/intl/icu/source/acinclude.m4
+++ b/intl/icu/source/acinclude.m4
@@ -449,7 +449,7 @@ AC_DEFUN([AC_CHECK_64BIT_LIBS],
 AC_DEFUN([AC_CHECK_STRICT_COMPILE],
 [
    AC_MSG_CHECKING([whether strict compiling is on])
-    AC_ARG_ENABLE(strict,[  --enable-strict         compile with strict compiler options [default=yes]], [
+    AC_ARG_ENABLE(strict,[  --disable-strict        do not compile with strict compiler options], [
        if test "$enableval" = no
        then
            ac_use_strict_options=no
--- a/intl/icu/source/allinone/Build.Windows.IcuVersion.props
+++ b/intl/icu/source/allinone/Build.Windows.IcuVersion.props
@@ -3,6 +3,6 @@
 <Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
  <PropertyGroup>
-    <IcuMajorVersion>76</IcuMajorVersion>
+    <IcuMajorVersion>77</IcuMajorVersion>
  </PropertyGroup>
 </Project>
--- a/intl/icu/source/common/brkiter.cpp
+++ b/intl/icu/source/common/brkiter.cpp
@@ -59,7 +59,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
 {
    char fnbuff[256];
    char ext[4]={'\0'};
-    CharString actualLocale;
+    CharString actual;
    int32_t size;
    const char16_t* brkfname = nullptr;
    UResourceBundle brkRulesStack;
@@ -94,7 +94,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st

        // Use the string if we found it
        if (U_SUCCESS(status) && brkfname) {
-            actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
+            actual.append(ures_getLocaleInternal(brkName, &status), -1, status);

            char16_t* extStart=u_strchr(brkfname, 0x002e);
            int len = 0;
@@ -124,9 +124,8 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
        U_LOCALE_BASED(locBased, *(BreakIterator*)result);

        locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
-                              actualLocale.data());
-        uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY);
-        result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+                              actual.data(), status);
+        LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
    }

    ures_close(b);
@@ -206,26 +205,32 @@ BreakIterator::getAvailableLocales(int32_t& count)

 BreakIterator::BreakIterator()
 {
-    *validLocale = *actualLocale = *requestLocale = 0;
 }

 BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
-    uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
-    uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
-    uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
+    UErrorCode status = U_ZERO_ERROR;
+    U_LOCALE_BASED(locBased, *this);
+    locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
+    LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
+    U_ASSERT(U_SUCCESS(status));
 }

 BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
    if (this != &other) {
-        uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
-        uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
-        uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
+        UErrorCode status = U_ZERO_ERROR;
+        U_LOCALE_BASED(locBased, *this);
+        locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
+        LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
+        U_ASSERT(U_SUCCESS(status));
    }
    return *this;
 }

 BreakIterator::~BreakIterator()
 {
+    delete validLocale;
+    delete actualLocale;
+    delete requestLocale;
 }

 // ------------------------------------------
@@ -394,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
        // revisit this in ICU 3.0 and clean it up/fix it/remove it.
        if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
            U_LOCALE_BASED(locBased, *result);
-            locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
+            locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
        }
        return result;
    }
@@ -488,6 +493,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
    }

    if (U_FAILURE(status)) {
+        delete result;
        return nullptr;
    }

@@ -496,20 +502,25 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)

 Locale
 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
-    if (type == ULOC_REQUESTED_LOCALE) {
-        return {requestLocale};
+    if (U_FAILURE(status)) {
+        return Locale::getRoot();
    }
-    U_LOCALE_BASED(locBased, *this);
-    return locBased.getLocale(type, status);
+    if (type == ULOC_REQUESTED_LOCALE) {
+        return requestLocale == nullptr ?
+            Locale::getRoot() : Locale(requestLocale->data());
+    }
+    return LocaleBased::getLocale(validLocale, actualLocale, type, status);
 }

 const char *
 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
-    if (type == ULOC_REQUESTED_LOCALE) {
-        return requestLocale;
+    if (U_FAILURE(status)) {
+        return nullptr;
    }
-    U_LOCALE_BASED(locBased, *this);
-    return locBased.getLocaleID(type, status);
+    if (type == ULOC_REQUESTED_LOCALE) {
+        return requestLocale == nullptr ?  "" : requestLocale->data();
+    }
+    return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
 }


@@ -536,8 +547,10 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
 }

 BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
+  UErrorCode status = U_ZERO_ERROR;
  U_LOCALE_BASED(locBased, (*this));
-  locBased.setLocaleIDs(valid, actual);
+  locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
+  U_ASSERT(U_SUCCESS(status));
 }

 U_NAMESPACE_END
--- a/intl/icu/source/common/charstr.cpp
+++ b/intl/icu/source/common/charstr.cpp
@@ -70,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
    return *this;
 }

+CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return *this;
+    }
+    len = 0;
+    append(s, errorCode);
+    return *this;
+}
+
 int32_t CharString::lastIndexOf(char c) const {
    for(int32_t i=len; i>0;) {
        if(buffer[--i]==c) {
@@ -143,7 +152,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
    return *this;
 }

-CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
+CharString &CharString::appendNumber(int64_t number, UErrorCode &status) {
    if (number < 0) {
        this->append('-', status);
        if (U_FAILURE(status)) {
--- a/intl/icu/source/common/charstr.h
+++ b/intl/icu/source/common/charstr.h
@@ -74,6 +74,7 @@ public:
     * use a UErrorCode where memory allocations might be needed.
     */
    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+    CharString &copyFrom(StringPiece s, UErrorCode &errorCode);

    UBool isEmpty() const { return len==0; }
    int32_t length() const { return len; }
@@ -135,7 +136,7 @@ public:
    }
    CharString &append(const char *s, int32_t sLength, UErrorCode &status);

-    CharString &appendNumber(int32_t number, UErrorCode &status);
+    CharString &appendNumber(int64_t number, UErrorCode &status);

    /**
     * Returns a writable buffer for appending and writes the buffer's capacity to
--- a/intl/icu/source/common/localefallback_data.h
+++ b/intl/icu/source/common/localefallback_data.h
--- a/intl/icu/source/common/locbased.cpp
+++ b/intl/icu/source/common/locbased.cpp
@@ -12,44 +12,84 @@
 */
 #include "locbased.h"
 #include "cstring.h"
+#include "charstr.h"

 U_NAMESPACE_BEGIN

-Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
-    const char* id = getLocaleID(type, status);
+Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual,
+                              ULocDataLocaleType type, UErrorCode& status) {
+    const char* id = getLocaleID(valid, actual, type, status);
    return Locale(id != nullptr ? id : "");
 }

-const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual,
+                                     ULocDataLocaleType type, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return nullptr;
    }

    switch(type) {
    case ULOC_VALID_LOCALE:
-        return valid;
+        return valid == nullptr ? "" : valid->data();
    case ULOC_ACTUAL_LOCALE:
-        return actual;
+        return actual == nullptr ? "" : actual->data();
    default:
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
 }

-void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
-    if (validID != nullptr) {
-      uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
-      valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
+    setValidLocaleID(validID, status);
+    setActualLocaleID(actualID,status);
+}
+void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
+    setValidLocaleID(validID, status);
+    setActualLocaleID(actualID,status);
+}
+
+void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    if (id == nullptr || *id == 0) {
+        delete dest;
+        dest = nullptr;
+    } else {
+        if (dest == nullptr) {
+            dest = new CharString(id, status);
+            if (dest == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+        } else {
+            dest->copyFrom(id, status);
        }
-    if (actualID != nullptr) {
-      uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
-      actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
    }
 }

-void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
-  uprv_strcpy(valid, validID.getName());
-  uprv_strcpy(actual, actualID.getName());
+void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    if (id == nullptr || id->isEmpty()) {
+        delete dest;
+        dest = nullptr;
+    } else {
+        if (dest == nullptr) {
+            dest = new CharString(*id, status);
+            if (dest == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+        } else {
+            dest->copyFrom(*id, status);
+        }
+    }
+}
+
+bool LocaleBased::equalIDs(const CharString* left, const CharString* right) {
+    // true if both are nullptr
+    if (left == nullptr && right == nullptr) return true;
+    // false if only one is nullptr
+    if (left == nullptr || right == nullptr) return false;
+    return *left == *right;
 }

 U_NAMESPACE_END
--- a/intl/icu/source/common/locbased.h
+++ b/intl/icu/source/common/locbased.h
@@ -19,13 +19,14 @@
 /**
 * Macro to declare a locale LocaleBased wrapper object for the given
 * object, which must have two members named `validLocale' and
- * `actualLocale' of size ULOC_FULLNAME_CAPACITY
+ * `actualLocale' of which are pointers to the internal icu::CharString.
 */
 #define U_LOCALE_BASED(varname, objname) \
  LocaleBased varname((objname).validLocale, (objname).actualLocale)

 U_NAMESPACE_BEGIN

+class CharString;
 /**
 * A utility class that unifies the implementation of getLocale() by
 * various ICU services.  This class is likely to be removed in the
@@ -41,33 +42,35 @@ class U_COMMON_API LocaleBased : public UMemory {
     * Construct a LocaleBased wrapper around the two pointers.  These
     * will be aliased for the lifetime of this object.
     */
-    inline LocaleBased(char* validAlias, char* actualAlias);
-
-    /**
-     * Construct a LocaleBased wrapper around the two const pointers.
-     * These will be aliased for the lifetime of this object.
-     */
-    inline LocaleBased(const char* validAlias, const char* actualAlias);
+    inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);

    /**
     * Return locale meta-data for the service object wrapped by this
     * object.  Either the valid or the actual locale may be
     * retrieved.
+     * @param valid The valid locale.
+     * @param actual The actual locale.
     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
     * @param status input-output error code
     * @return the indicated locale
     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+    static Locale getLocale(
+        const CharString* valid, const CharString* actual,
+        ULocDataLocaleType type, UErrorCode& status);

    /**
     * Return the locale ID for the service object wrapped by this
     * object.  Either the valid or the actual locale may be
     * retrieved.
+     * @param valid The valid locale.
+     * @param actual The actual locale.
     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
     * @param status input-output error code
     * @return the indicated locale ID
     */
-    const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+    static const char* getLocaleID(
+        const CharString* valid, const CharString* actual,
+        ULocDataLocaleType type, UErrorCode& status);

    /**
     * Set the locale meta-data for the service object wrapped by this
@@ -75,31 +78,40 @@ class U_COMMON_API LocaleBased : public UMemory {
     * @param valid the ID of the valid locale
     * @param actual the ID of the actual locale
     */
-    void setLocaleIDs(const char* valid, const char* actual);
+    void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
+    void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);

-    /**
-     * Set the locale meta-data for the service object wrapped by this
-     * object.
-     * @param valid the ID of the valid locale
-     * @param actual the ID of the actual locale
-     */
-    void setLocaleIDs(const Locale& valid, const Locale& actual);
+    static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status);
+    static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status);
+
+    static bool equalIDs(const CharString* left, const CharString* right);

 private:

-    char* valid;
+    void setValidLocaleID(const CharString* id, UErrorCode& status);
+    void setActualLocaleID(const CharString* id, UErrorCode& status);
+    void setValidLocaleID(const char* id, UErrorCode& status);
+    void setActualLocaleID(const char* id, UErrorCode& status);

-    char* actual;
+    CharString*& valid;
+    CharString*& actual;
 };

-inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
+inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
    valid(validAlias), actual(actualAlias) {
 }

-inline LocaleBased::LocaleBased(const char* validAlias,
-                                const char* actualAlias) :
-    // ugh: cast away const
-    valid(const_cast<char*>(validAlias)), actual(const_cast<char*>(actualAlias)) {
+inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) {
+    setLocaleID(id, valid, status);
+}
+inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) {
+    setLocaleID(id, actual, status);
+}
+inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) {
+    setLocaleID(id, valid, status);
+}
+inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) {
+    setLocaleID(id, actual, status);
 }

 U_NAMESPACE_END
--- a/intl/icu/source/common/locdispnames.cpp
+++ b/intl/icu/source/common/locdispnames.cpp
@@ -19,6 +19,8 @@
 *   that then do not depend on resource bundle code and display name data.
 */

+#include <string_view>
+
 #include "unicode/utypes.h"
 #include "unicode/brkiter.h"
 #include "unicode/locid.h"
@@ -359,7 +361,7 @@ _getStringOrCopyKey(const char *path, const char *locale,
    return u_terminateUChars(dest, destCapacity, length, &errorCode);
 }

-using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&);
+using UDisplayNameGetter = icu::CharString(std::string_view, UErrorCode&);

 int32_t
 _getDisplayNameForComponent(const char *locale,
@@ -377,6 +379,10 @@ _getDisplayNameForComponent(const char *locale,
        return 0;
    }

+    if (locale == nullptr) {
+        locale = uloc_getDefault();
+    }
+
    localStatus = U_ZERO_ERROR;
    icu::CharString localeBuffer = (*getter)(locale, localStatus);
    if (U_FAILURE(localStatus)) {
--- a/intl/icu/source/common/locid.cpp
+++ b/intl/icu/source/common/locid.cpp
@@ -1828,8 +1828,13 @@ ulocimp_isCanonicalizedLocaleForTest(const char* localeName)

 U_NAMESPACE_BEGIN

-/*This function initializes a Locale from a C locale ID*/
 Locale& Locale::init(const char* localeID, UBool canonicalize)
+{
+    return localeID == nullptr ? *this = getDefault() : init(StringPiece{localeID}, canonicalize);
+}
+
+/*This function initializes a Locale from a C locale ID*/
+Locale& Locale::init(StringPiece localeID, UBool canonicalize)
 {
    fIsBogus = false;
    /* Free our current storage */
@@ -1854,19 +1859,28 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
        int32_t length;
        UErrorCode err;

-        if(localeID == nullptr) {
-            // not an error, just set the default locale
-            return *this = getDefault();
-        }
-
        /* preset all fields to empty */
        language[0] = script[0] = country[0] = 0;

+        const auto parse = [canonicalize](std::string_view localeID,
+                                          char* name,
+                                          int32_t nameCapacity,
+                                          UErrorCode& status) {
+            return ByteSinkUtil::viaByteSinkToTerminatedChars(
+                name, nameCapacity,
+                [&](ByteSink& sink, UErrorCode& status) {
+                    if (canonicalize) {
+                        ulocimp_canonicalize(localeID, sink, status);
+                    } else {
+                        ulocimp_getName(localeID, sink, status);
+                    }
+                },
+                status);
+        };
+
        // "canonicalize" the locale ID to ICU/Java format
        err = U_ZERO_ERROR;
-        length = canonicalize ?
-            uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
-            uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
+        length = parse(localeID, fullName, sizeof fullNameBuffer, err);

        if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
            U_ASSERT(baseName == nullptr);
@@ -1877,9 +1891,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
            }
            fullName = newFullName;
            err = U_ZERO_ERROR;
-            length = canonicalize ?
-                uloc_canonicalize(localeID, fullName, length+1, &err) :
-                uloc_getName(localeID, fullName, length+1, &err);
+            length = parse(localeID, fullName, length + 1, err);
        }
        if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
            /* should never occur */
@@ -2200,6 +2212,13 @@ Locale::createFromName (const char *name)
    }
 }

+Locale U_EXPORT2
+Locale::createFromName(StringPiece name) {
+    Locale loc("");
+    loc.init(name, false);
+    return loc;
+}
+
 Locale U_EXPORT2
 Locale::createCanonical(const char* name) {
    Locale loc("");
--- a/intl/icu/source/common/loclikely.cpp
+++ b/intl/icu/source/common/loclikely.cpp
@@ -300,6 +300,9 @@ ulocimp_addLikelySubtags(const char* localeID,
                         icu::ByteSink& sink,
                         UErrorCode& status) {
    if (U_FAILURE(status)) { return; }
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
    _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
 }
@@ -334,6 +337,9 @@ ulocimp_minimizeSubtags(const char* localeID,
                        bool favorScript,
                        UErrorCode& status) {
    if (U_FAILURE(status)) { return; }
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
    _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
 }
@@ -349,7 +355,9 @@ uloc_isRightToLeft(const char *locale) {
    UErrorCode errorCode = U_ZERO_ERROR;
    icu::CharString lang;
    icu::CharString script;
-    ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, errorCode);
+    ulocimp_getSubtags(
+        locale == nullptr ? uloc_getDefault() : locale,
+        &lang, &script, nullptr, nullptr, nullptr, errorCode);
    if (U_FAILURE(errorCode) || script.isEmpty()) {
        // Fastpath: We know the likely scripts and their writing direction
        // for some common languages.
@@ -369,7 +377,7 @@ uloc_isRightToLeft(const char *locale) {
        if (U_FAILURE(errorCode)) {
            return false;
        }
-        ulocimp_getSubtags(likely.data(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
+        ulocimp_getSubtags(likely.toStringPiece(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
        if (U_FAILURE(errorCode) || script.isEmpty()) {
            return false;
        }
@@ -430,7 +438,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
    icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status);
    if (U_SUCCESS(status) && rgBuf.isEmpty()) {
        // No valid rg keyword value, try for unicode_region_subtag
-        rgBuf = ulocimp_getRegion(localeID, status);
+        rgBuf = ulocimp_getRegion(localeID == nullptr ? uloc_getDefault() : localeID, status);
        if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) {
            // Second check for sd keyword value
            rgBuf = GetRegionFromKey(localeID, "sd", status);
@@ -439,7 +447,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
                UErrorCode rgStatus = U_ZERO_ERROR;
                icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus);
                if (U_SUCCESS(rgStatus)) {
-                    rgBuf = ulocimp_getRegion(locBuf.data(), status);
+                    rgBuf = ulocimp_getRegion(locBuf.toStringPiece(), status);
                }
            }
        }
--- a/intl/icu/source/common/loclikelysubtags.cpp
+++ b/intl/icu/source/common/loclikelysubtags.cpp
@@ -527,7 +527,7 @@ LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
        return {};
    }
    const char *name = locale.getName();
-    if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
+    if (!returnInputIfUnmatch && uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
        // Private use language tag x-subtag-subtag... which CLDR changes to
        // und-x-subtag-subtag...
        return LSR(name, "", "", LSR::EXPLICIT_LSR);
--- a/intl/icu/source/common/locresdata.cpp
+++ b/intl/icu/source/common/locresdata.cpp
@@ -161,6 +161,9 @@ _uloc_getOrientationHelper(const char* localeId,

    if (U_FAILURE(status)) { return result; }

+    if (localeId == nullptr) {
+        localeId = uloc_getDefault();
+    }
    icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);

    if (U_FAILURE(status)) { return result; }
--- a/intl/icu/source/common/punycode.cpp
+++ b/intl/icu/source/common/punycode.cpp
@@ -193,7 +193,7 @@ u_strToPunycode(const char16_t *src, int32_t srcLength,
        return 0;
    }

-    if(src==nullptr || srcLength<-1 || (dest==nullptr && destCapacity!=0)) {
+    if(src==nullptr || srcLength<-1 || destCapacity<0 || (dest==nullptr && destCapacity!=0)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
--- a/intl/icu/source/common/putil.cpp
+++ b/intl/icu/source/common/putil.cpp
@@ -76,7 +76,7 @@
 #include <float.h>

 #ifndef U_COMMON_IMPLEMENTATION
-#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
+#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
 #endif


--- a/intl/icu/source/common/rbbinode.cpp
+++ b/intl/icu/source/common/rbbinode.cpp
@@ -47,7 +47,10 @@ static int  gLastSerial = 0;
 //    Constructor.   Just set the fields to reasonable default values.
 //
 //-------------------------------------------------------------------------
-RBBINode::RBBINode(NodeType t) : UMemory() {
+RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() {
+    if (U_FAILURE(status)) {
+        return;
+    }
 #ifdef RBBI_DEBUG
    fSerialNum    = ++gLastSerial;
 #endif
@@ -65,10 +68,13 @@ RBBINode::RBBINode(NodeType t) : UMemory() {
    fVal          = 0;
    fPrecedence   = precZero;

-    UErrorCode     status = U_ZERO_ERROR;
-    fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
+    fFirstPosSet  = new UVector(status);
    fLastPosSet   = new UVector(status);
    fFollowPos    = new UVector(status);
+    if (U_SUCCESS(status) &&
+        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
+        status =  U_MEMORY_ALLOCATION_ERROR;
+    }
    if      (t==opCat)    {fPrecedence = precOpCat;}
    else if (t==opOr)     {fPrecedence = precOpOr;}
    else if (t==opStart)  {fPrecedence = precStart;}
@@ -77,7 +83,10 @@ RBBINode::RBBINode(NodeType t) : UMemory() {
 }


-RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
+RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) {
+    if (U_FAILURE(status)) {
+        return;
+    }
 #ifdef RBBI_DEBUG
    fSerialNum   = ++gLastSerial;
 #endif
@@ -94,10 +103,13 @@ RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
    fVal         = other.fVal;
    fRuleRoot    = false;
    fChainIn     = other.fChainIn;
-    UErrorCode     status = U_ZERO_ERROR;
    fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
    fLastPosSet  = new UVector(status);
    fFollowPos   = new UVector(status);
+    if (U_SUCCESS(status) &&
+        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
+        status =  U_MEMORY_ALLOCATION_ERROR;
+    }
 }


@@ -193,27 +205,54 @@ void RBBINode::NRDeleteNode(RBBINode *node) {
 //                  references in preparation for generating the DFA tables.
 //
 //-------------------------------------------------------------------------
-RBBINode *RBBINode::cloneTree() {
+constexpr int kRecursiveDepthLimit = 3500;
+RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) {
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
+    // to avoid stack overflow crash.
+    if (depth > kRecursiveDepthLimit) {
+        status = U_INPUT_TOO_LONG_ERROR;
+        return nullptr;
+    }
    RBBINode    *n;

    if (fType == RBBINode::varRef) {
        // If the current node is a variable reference, skip over it
        //   and clone the definition of the variable instead.
-        n = fLeftChild->cloneTree();
+        n = fLeftChild->cloneTree(status, depth+1);
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
    } else if (fType == RBBINode::uset) {
        n = this;
    } else {
-        n = new RBBINode(*this);
+        n = new RBBINode(*this, status);
+        if (U_FAILURE(status)) {
+            delete n;
+            return nullptr;
+        }
        // Check for null pointer.
-        if (n != nullptr) {
+        if (n == nullptr) {
+            status =  U_MEMORY_ALLOCATION_ERROR;
+            return nullptr;
+        }
        if (fLeftChild != nullptr) {
-                n->fLeftChild          = fLeftChild->cloneTree();
+            n->fLeftChild          = fLeftChild->cloneTree(status, depth+1);
+            if (U_FAILURE(status)) {
+                delete n;
+                return nullptr;
+            }
            n->fLeftChild->fParent = n;
        }
        if (fRightChild != nullptr) {
-                n->fRightChild          = fRightChild->cloneTree();
-                n->fRightChild->fParent = n;
+            n->fRightChild          = fRightChild->cloneTree(status, depth+1);
+            if (U_FAILURE(status)) {
+                delete n;
+                return nullptr;
            }
+            n->fRightChild->fParent = n;
        }
    }
    return n;
@@ -239,7 +278,6 @@ RBBINode *RBBINode::cloneTree() {
 //                      nested references are handled by cloneTree(), not here.
 //
 //-------------------------------------------------------------------------
-constexpr int kRecursiveDepthLimit = 3500;
 RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
    if (U_FAILURE(status)) {
        return this;
@@ -251,21 +289,34 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
        return this;
    }
    if (fType == varRef) {
-        RBBINode *retNode  = fLeftChild->cloneTree();
-        if (retNode != nullptr) {
+        RBBINode *retNode  = fLeftChild->cloneTree(status, depth+1);
+        if (U_FAILURE(status)) {
+            return this;
+        }
        retNode->fRuleRoot = this->fRuleRoot;
        retNode->fChainIn  = this->fChainIn;
-        }
        delete this;   // TODO: undefined behavior. Fix.
        return retNode;
    }

    if (fLeftChild != nullptr) {
        fLeftChild = fLeftChild->flattenVariables(status, depth+1);
+        if (fLeftChild == nullptr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        if (U_FAILURE(status)) {
+            return this;
+        }
        fLeftChild->fParent  = this;
    }
    if (fRightChild != nullptr) {
        fRightChild = fRightChild->flattenVariables(status, depth+1);
+        if (fRightChild == nullptr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        if (U_FAILURE(status)) {
+            return this;
+        }
        fRightChild->fParent = this;
    }
    return this;
@@ -280,7 +331,16 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
 //                 the left child of the uset node.
 //
 //-------------------------------------------------------------------------
-void RBBINode::flattenSets() {
+void RBBINode::flattenSets(UErrorCode &status, int depth) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
+    // to avoid stack overflow crash.
+    if (depth > kRecursiveDepthLimit) {
+        status = U_INPUT_TOO_LONG_ERROR;
+        return;
+    }
    U_ASSERT(fType != setRef);

    if (fLeftChild != nullptr) {
@@ -288,11 +348,15 @@ void RBBINode::flattenSets() {
            RBBINode *setRefNode = fLeftChild;
            RBBINode *usetNode   = setRefNode->fLeftChild;
            RBBINode *replTree   = usetNode->fLeftChild;
-            fLeftChild           = replTree->cloneTree();
+            fLeftChild           = replTree->cloneTree(status, depth+1);
+            if (U_FAILURE(status)) {
+                delete setRefNode;
+                return;
+            }
            fLeftChild->fParent  = this;
            delete setRefNode;
        } else {
-            fLeftChild->flattenSets();
+            fLeftChild->flattenSets(status, depth+1);
        }
    }

@@ -301,11 +365,15 @@ void RBBINode::flattenSets() {
            RBBINode *setRefNode = fRightChild;
            RBBINode *usetNode   = setRefNode->fLeftChild;
            RBBINode *replTree   = usetNode->fLeftChild;
-            fRightChild           = replTree->cloneTree();
+            fRightChild           = replTree->cloneTree(status, depth+1);
+            if (U_FAILURE(status)) {
+                delete setRefNode;
+                return;
+            }
            fRightChild->fParent  = this;
            delete setRefNode;
        } else {
-            fRightChild->flattenSets();
+            fRightChild->flattenSets(status, depth+1);
        }
    }
 }
--- a/intl/icu/source/common/rbbinode.h
+++ b/intl/icu/source/common/rbbinode.h
@@ -91,14 +91,14 @@ class RBBINode : public UMemory {
        UVector       *fFollowPos;


-        RBBINode(NodeType t);
-        RBBINode(const RBBINode &other);
+        RBBINode(NodeType t, UErrorCode& status);
+        RBBINode(const RBBINode &other, UErrorCode& status);
        ~RBBINode();
        static void  NRDeleteNode(RBBINode *node);
        
-        RBBINode    *cloneTree();
+        RBBINode    *cloneTree(UErrorCode &status, int depth=0);
        RBBINode    *flattenVariables(UErrorCode &status, int depth=0);
-        void         flattenSets();
+        void         flattenSets(UErrorCode &status, int depth=0);
        void         findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);

 #ifdef RBBI_DEBUG
--- a/intl/icu/source/common/rbbiscan.cpp
+++ b/intl/icu/source/common/rbbiscan.cpp
@@ -767,15 +767,24 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
            c = s.char32At(0);
            setToAdopt = new UnicodeSet(c, c);
        }
+        if (setToAdopt == nullptr) {
+            error(U_MEMORY_ALLOCATION_ERROR);
+            return;
+        }
    }

    //
    // Make a new uset node to refer to this UnicodeSet
    // This new uset node becomes the child of the caller's setReference node.
    //
-    RBBINode *usetNode    = new RBBINode(RBBINode::uset);
+    UErrorCode localStatus = U_ZERO_ERROR;
+    RBBINode *usetNode    = new RBBINode(RBBINode::uset, localStatus);
    if (usetNode == nullptr) {
-        error(U_MEMORY_ALLOCATION_ERROR);
+        localStatus = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(localStatus)) {
+        delete usetNode;
+        error(localStatus);
        delete setToAdopt;
        return;
    }
@@ -1191,7 +1200,7 @@ RBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {
        return nullptr;
    }
    fNodeStackPtr++;
-    fNodeStack[fNodeStackPtr] = new RBBINode(t);
+    fNodeStack[fNodeStackPtr] = new RBBINode(t, *fRB->fStatus);
    if (fNodeStack[fNodeStackPtr] == nullptr) {
        *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
    }
--- a/intl/icu/source/common/rbbisetb.cpp
+++ b/intl/icu/source/common/rbbisetb.cpp
@@ -375,7 +375,11 @@ void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
 }

 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
-    RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
+    RBBINode *leafNode = new RBBINode(RBBINode::leafChar, *fStatus);
+    if (U_FAILURE(*fStatus)) {
+        delete leafNode;
+        return;
+    }
    if (leafNode == nullptr) {
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
        return;
@@ -388,9 +392,13 @@ void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
        // There are already input symbols present for this set.
        // Set up an OR node, with the previous stuff as the left child
        //   and the new value as the right child.
-        RBBINode *orNode = new RBBINode(RBBINode::opOr);
+        RBBINode *orNode = new RBBINode(RBBINode::opOr, *fStatus);
        if (orNode == nullptr) {
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
+        }
+        if (U_FAILURE(*fStatus)) {
+            delete orNode;
+            delete leafNode;
            return;
        }
        orNode->fLeftChild  = usetNode->fLeftChild;
--- a/intl/icu/source/common/rbbitblb.cpp
+++ b/intl/icu/source/common/rbbitblb.cpp
@@ -99,13 +99,22 @@ void  RBBITableBuilder::buildForwardTable() {
    //   {bof} fake character.
    // 
    if (fRB->fSetBuilder->sawBOF()) {
-        RBBINode *bofTop    = new RBBINode(RBBINode::opCat);
-        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar);
-        // Delete and exit if memory allocation failed.
-        if (bofTop == nullptr || bofLeaf == nullptr) {
+        RBBINode *bofTop    = new RBBINode(RBBINode::opCat, *fStatus);
+        if (bofTop == nullptr) {
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
+        }
+        if (U_FAILURE(*fStatus)) {
            delete bofTop;
+            return;
+        }
+        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar, *fStatus);
+        // Delete and exit if memory allocation failed.
+        if (bofLeaf == nullptr) {
+            *fStatus = U_MEMORY_ALLOCATION_ERROR;
+        }
+        if (U_FAILURE(*fStatus)) {
            delete bofLeaf;
+            delete bofTop;
            return;
        }
        bofTop->fLeftChild  = bofLeaf;
@@ -120,18 +129,23 @@ void  RBBITableBuilder::buildForwardTable() {
    //   Appears as a cat-node, left child being the original tree,
    //   right child being the end marker.
    //
-    RBBINode *cn = new RBBINode(RBBINode::opCat);
+    RBBINode *cn = new RBBINode(RBBINode::opCat, *fStatus);
    // Exit if memory allocation failed.
    if (cn == nullptr) {
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(*fStatus)) {
+        delete cn;
        return;
    }
    cn->fLeftChild = fTree;
    fTree->fParent = cn;
-    RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);
+    RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark, *fStatus);
    // Delete and exit if memory allocation failed.
    if (cn->fRightChild == nullptr) {
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(*fStatus)) {
        delete cn;
        return;
    }
@@ -142,7 +156,7 @@ void  RBBITableBuilder::buildForwardTable() {
    //  Replace all references to UnicodeSets with the tree for the equivalent
    //      expression.
    //
-    fTree->flattenSets();
+    fTree->flattenSets(*fStatus, 0);
 #ifdef RBBI_DEBUG
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
        RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
--- a/intl/icu/source/common/resbund.cpp
+++ b/intl/icu/source/common/resbund.cpp
@@ -388,7 +388,7 @@ const Locale &ResourceBundle::getLocale() const {
    return ncThis->fLocale != nullptr ? *ncThis->fLocale : Locale::getDefault();
 }

-const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
+Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
 {
  return ures_getLocaleByType(fResource, type, &status);
 }
--- a/intl/icu/source/common/ucnvmbcs.cpp
+++ b/intl/icu/source/common/ucnvmbcs.cpp
@@ -3146,11 +3146,8 @@ ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
    if(c<0) {
        if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
            /* incomplete character byte sequence */
-            uint8_t *bytes=cnv->toUBytes;
            cnv->toULength = static_cast<int8_t>(source - lastSource);
-            do {
-                *bytes++=*lastSource++;
-            } while(lastSource<source);
+            uprv_memcpy(cnv->toUBytes, lastSource, cnv->toULength);
            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
        } else if(U_FAILURE(*pErrorCode)) {
            /* callback(illegal) */
--- a/intl/icu/source/common/ucurr.cpp
+++ b/intl/icu/source/common/ucurr.cpp
@@ -372,12 +372,8 @@ struct CReg : public icu::UMemory {
    CReg(const char16_t* _iso, const char* _id)
        : next(nullptr)
    {
-        int32_t len = static_cast<int32_t>(uprv_strlen(_id));
-        if (len > static_cast<int32_t>(sizeof(id) - 1)) {
-            len = (sizeof(id)-1);
-        }
-        uprv_strncpy(id, _id, len);
-        id[len] = 0;
+        uprv_strncpy(id, _id, sizeof(id)-1);
+        id[sizeof(id)-1] = 0;
        u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH);
        iso[ISO_CURRENCY_CODE_LENGTH] = 0;
    }
@@ -682,6 +678,9 @@ ucurr_getName(const char16_t* currency,
    // this function.
    UErrorCode ec2 = U_ZERO_ERROR;

+    if (locale == nullptr) {
+        locale = uloc_getDefault();
+    }
    CharString loc = ulocimp_getName(locale, ec2);
    if (U_FAILURE(ec2)) {
        *ec = U_ILLEGAL_ARGUMENT_ERROR;
@@ -780,6 +779,9 @@ ucurr_getPluralName(const char16_t* currency,
    // this function.
    UErrorCode ec2 = U_ZERO_ERROR;

+    if (locale == nullptr) {
+        locale = uloc_getDefault();
+    }
    CharString loc = ulocimp_getName(locale, ec2);
    if (U_FAILURE(ec2)) {
        *ec = U_ILLEGAL_ARGUMENT_ERROR;
@@ -973,6 +975,9 @@ collectCurrencyNames(const char* locale,
    // Look up the Currencies resource for the given locale.
    UErrorCode ec2 = U_ZERO_ERROR;

+    if (locale == nullptr) {
+        locale = uloc_getDefault();
+    }
    CharString loc = ulocimp_getName(locale, ec2);
    if (U_FAILURE(ec2)) {
        ec = U_ILLEGAL_ARGUMENT_ERROR;
--- a/intl/icu/source/common/uloc.cpp
+++ b/intl/icu/source/common/uloc.cpp
@@ -482,8 +482,8 @@ constexpr CanonicalizationMap CANONICALIZE_MAP[] = {

 /* ### BCP47 Conversion *******************************************/
 /* Gets the size of the shortest subtag in the given localeID. */
-int32_t getShortestSubtagLength(const char *localeID) {
-    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
+int32_t getShortestSubtagLength(std::string_view localeID) {
+    int32_t localeIDLength = static_cast<int32_t>(localeID.length());
    int32_t length = localeIDLength;
    int32_t tmpLength = 0;
    int32_t i;
@@ -507,8 +507,8 @@ int32_t getShortestSubtagLength(const char *localeID) {
    return length;
 }
 /* Test if the locale id has BCP47 u extension and does not have '@' */
-inline bool _hasBCP47Extension(const char *id) {
-    return id != nullptr && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(id) == 1;
+inline bool _hasBCP47Extension(std::string_view id) {
+    return id.find('@') == std::string_view::npos && getShortestSubtagLength(id) == 1;
 }

 /* ### Keywords **************************************************/
@@ -523,10 +523,9 @@ inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c
 #define ULOC_MAX_NO_KEYWORDS 25

 U_CAPI const char * U_EXPORT2
-locale_getKeywordsStart(const char *localeID) {
-    const char *result = nullptr;
-    if((result = uprv_strchr(localeID, '@')) != nullptr) {
-        return result;
+locale_getKeywordsStart(std::string_view localeID) {
+    if (size_t pos = localeID.find('@'); pos != std::string_view::npos) {
+        return localeID.data() + pos;
    }
 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
    else {
@@ -536,8 +535,8 @@ locale_getKeywordsStart(const char *localeID) {
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
        const uint8_t *charToFind = ebcdicSigns;
        while(*charToFind) {
-            if((result = uprv_strchr(localeID, *charToFind)) != nullptr) {
-                return result;
+            if (size_t pos = localeID.find(*charToFind); pos != std::string_view::npos) {
+                return localeID.data() + pos;
            }
            charToFind++;
        }
@@ -590,7 +589,7 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri
 }  // namespace

 U_EXPORT CharString
-ulocimp_getKeywords(const char* localeID,
+ulocimp_getKeywords(std::string_view localeID,
                    char prev,
                    bool valuesToo,
                    UErrorCode& status)
@@ -607,7 +606,7 @@ ulocimp_getKeywords(const char* localeID,
 }

 U_EXPORT void
-ulocimp_getKeywords(const char* localeID,
+ulocimp_getKeywords(std::string_view localeID,
                    char prev,
                    ByteSink& sink,
                    bool valuesToo,
@@ -619,9 +618,8 @@ ulocimp_getKeywords(const char* localeID,

    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
    int32_t numKeywords = 0;
-    const char* pos = localeID;
-    const char* equalSign = nullptr;
-    const char* semicolon = nullptr;
+    size_t equalSign = std::string_view::npos;
+    size_t semicolon = std::string_view::npos;
    int32_t i = 0, j, n;

    if(prev == '@') { /* start of keyword definition */
@@ -629,74 +627,72 @@ ulocimp_getKeywords(const char* localeID,
        do {
            bool duplicate = false;
            /* skip leading spaces */
-            while(*pos == ' ') {
-                pos++;
+            while (localeID.front() == ' ') {
+                localeID.remove_prefix(1);
            }
-            if (!*pos) { /* handle trailing "; " */
+            if (localeID.empty()) { /* handle trailing "; " */
                break;
            }
            if(numKeywords == maxKeywords) {
                status = U_INTERNAL_PROGRAM_ERROR;
                return;
            }
-            equalSign = uprv_strchr(pos, '=');
-            semicolon = uprv_strchr(pos, ';');
+            equalSign = localeID.find('=');
+            semicolon = localeID.find(';');
            /* lack of '=' [foo@currency] is illegal */
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
-            if(!equalSign || (semicolon && semicolon<equalSign)) {
+            if (equalSign == std::string_view::npos ||
+                (semicolon != std::string_view::npos && semicolon < equalSign)) {
+                status = U_INVALID_FORMAT_ERROR;
+                return;
+            }
+            /* zero-length keyword is an error. */
+            if (equalSign == 0) {
                status = U_INVALID_FORMAT_ERROR;
                return;
            }
            /* need to normalize both keyword and keyword name */
-            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
+            if (equalSign >= ULOC_KEYWORD_BUFFER_LEN) {
                /* keyword name too long for internal buffer */
                status = U_INTERNAL_PROGRAM_ERROR;
                return;
            }
-            for(i = 0, n = 0; i < equalSign - pos; ++i) {
-                if (pos[i] != ' ') {
-                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
+            for (i = 0, n = 0; static_cast<size_t>(i) < equalSign; ++i) {
+                if (localeID[i] != ' ') {
+                    keywordList[numKeywords].keyword[n++] = uprv_tolower(localeID[i]);
                }
            }

-            /* zero-length keyword is an error. */
-            if (n == 0) {
-                status = U_INVALID_FORMAT_ERROR;
-                return;
-            }
-
            keywordList[numKeywords].keyword[n] = 0;
            keywordList[numKeywords].keywordLen = n;
            /* now grab the value part. First we skip the '=' */
            equalSign++;
            /* then we leading spaces */
-            while(*equalSign == ' ') {
+            while (equalSign < localeID.length() && localeID[equalSign] == ' ') {
                equalSign++;
            }

            /* Premature end or zero-length value */
-            if (!*equalSign || equalSign == semicolon) {
+            if (equalSign == localeID.length() || equalSign == semicolon) {
                status = U_INVALID_FORMAT_ERROR;
                return;
            }

-            keywordList[numKeywords].valueStart = equalSign;
+            keywordList[numKeywords].valueStart = localeID.data() + equalSign;

-            pos = semicolon;
-            i = 0;
-            if(pos) {
-                while(*(pos - i - 1) == ' ') {
-                    i++;
-                }
-                keywordList[numKeywords].valueLen = static_cast<int32_t>(pos - equalSign - i);
-                pos++;
+            std::string_view value = localeID;
+            if (semicolon != std::string_view::npos) {
+                value.remove_suffix(value.length() - semicolon);
+                localeID.remove_prefix(semicolon + 1);
            } else {
-                i = static_cast<int32_t>(uprv_strlen(equalSign));
-                while(i && equalSign[i-1] == ' ') {
-                    i--;
+                localeID = {};
            }
-                keywordList[numKeywords].valueLen = i;
+            value.remove_prefix(equalSign);
+            if (size_t last = value.find_last_not_of(' '); last != std::string_view::npos) {
+                value.remove_suffix(value.length() - last - 1);
            }
+            keywordList[numKeywords].valueLen = static_cast<int32_t>(value.length());
+
            /* If this is a duplicate keyword, then ignore it */
            for (j=0; j<numKeywords; ++j) {
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
@@ -707,7 +703,7 @@ ulocimp_getKeywords(const char* localeID,
            if (!duplicate) {
                ++numKeywords;
            }
-        } while(pos);
+        } while (!localeID.empty());

        /* now we have a list of keywords */
        /* we need to sort it */
@@ -784,7 +780,7 @@ ulocimp_getKeywordValue(const char* localeID,
      return;
    }

-    if (_hasBCP47Extension(localeID)) {
+    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
        tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
    } else {
@@ -889,7 +885,8 @@ uloc_setKeywordValue(const char* keywordName,
        return 0;
    }

-    char* keywords = const_cast<char*>(locale_getKeywordsStart(buffer));
+    char* keywords = const_cast<char*>(
+        locale_getKeywordsStart({buffer, static_cast<std::string_view::size_type>(bufLen)}));
    int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
    // Remove -1 from the capacity so that this function can guarantee NUL termination.
    CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
@@ -921,7 +918,7 @@ ulocimp_setKeywordValue(std::string_view keywordName,
 {
    if (U_FAILURE(status)) { return; }
    std::string_view keywords;
-    if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) {
+    if (const char* start = locale_getKeywordsStart(localeID.toStringPiece()); start != nullptr) {
        // This is safe because CharString::truncate() doesn't actually erase any
        // data, but simply sets the position for where new data will be written.
        int32_t size = start - localeID.data();
@@ -1138,15 +1135,18 @@ inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' ||

 /*returns true if one of the special prefixes is here (s=string)
  'x-' or 'i-' */
-inline bool _isIDPrefix(const char *s) { return _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); }
+inline bool _isIDPrefix(std::string_view s) {
+    return s.size() >= 2 && _isPrefixLetter(s[0]) && _isIDSeparator(s[1]);
+}

 /* Dot terminates it because of POSIX form  where dot precedes the codepage
 * except for variant
 */
-inline bool _isTerminator(char a) { return a == 0 || a == '.' || a == '@'; }
+inline bool _isTerminator(char a) { return a == '.' || a == '@'; }

-inline bool _isBCP47Extension(const char* p) {
-    return p[0] == '-' &&
+inline bool _isBCP47Extension(std::string_view p) {
+    return p.size() >= 3 &&
+           p[0] == '-' &&
           (p[1] == 't' || p[1] == 'T' ||
            p[1] == 'u' || p[1] == 'U' ||
            p[1] == 'x' || p[1] == 'X') &&
@@ -1202,49 +1202,44 @@ namespace {
 * TODO try to use this in Locale
 */

-void
-_getLanguage(const char* localeID,
-             ByteSink* sink,
-             const char** pEnd,
-             UErrorCode& status) {
-    U_ASSERT(pEnd != nullptr);
-    *pEnd = localeID;
-
-    if (uprv_stricmp(localeID, "root") == 0) {
-        localeID += 4;
-    } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
-               (localeID[3] == '\0' ||
+size_t _getLanguage(std::string_view localeID, ByteSink* sink, UErrorCode& status) {
+    size_t skip = 0;
+    if (localeID.size() == 4 && uprv_strnicmp(localeID.data(), "root", 4) == 0) {
+        skip = 4;
+        localeID.remove_prefix(skip);
+    } else if (localeID.size() >= 3 && uprv_strnicmp(localeID.data(), "und", 3) == 0 &&
+               (localeID.size() == 3 ||
                localeID[3] == '-' ||
                localeID[3] == '_' ||
                localeID[3] == '@')) {
-        localeID += 3;
+        skip = 3;
+        localeID.remove_prefix(skip);
    }

    constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1;  // Minus NUL.

    /* if it starts with i- or x- then copy that prefix */
-    int32_t len = _isIDPrefix(localeID) ? 2 : 0;
-    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+    size_t len = _isIDPrefix(localeID) ? 2 : 0;
+    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
        if (len == MAXLEN) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
-            return;
+            return 0;
        }
        len++;
    }

-    *pEnd = localeID + len;
-    if (sink == nullptr || len == 0) { return; }
+    if (sink == nullptr || len == 0) { return skip + len; }

-    int32_t minCapacity = uprv_max(len, 4);  // Minimum 3 letters plus NUL.
+    int32_t minCapacity = uprv_max(static_cast<int32_t>(len), 4);  // Minimum 3 letters plus NUL.
    char scratch[MAXLEN];
    int32_t capacity = 0;
    char* buffer = sink->GetAppendBuffer(
            minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);

-    for (int32_t i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; ++i) {
        buffer[i] = uprv_tolower(localeID[i]);
    }
-    if (_isIDSeparator(localeID[1])) {
+    if (localeID.size() >= 2 && _isIDSeparator(localeID[1])) {
        buffer[1] = '-';
    }

@@ -1256,32 +1251,26 @@ _getLanguage(const char* localeID,
        if (offset.has_value()) {
            const char* const alias = LANGUAGES[*offset];
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
-            return;
+            return skip + len;
        }
    }

-    sink->Append(buffer, len);
+    sink->Append(buffer, static_cast<int32_t>(len));
+    return skip + len;
 }

-void
-_getScript(const char* localeID,
-           ByteSink* sink,
-           const char** pEnd) {
-    U_ASSERT(pEnd != nullptr);
-    *pEnd = localeID;
-
+size_t _getScript(std::string_view localeID, ByteSink* sink) {
    constexpr int32_t LENGTH = 4;

-    int32_t len = 0;
-    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
+    size_t len = 0;
+    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
            uprv_isASCIILetter(localeID[len])) {
-        if (len == LENGTH) { return; }
+        if (len == LENGTH) { return 0; }
        len++;
    }
-    if (len != LENGTH) { return; }
+    if (len != LENGTH) { return 0; }

-    *pEnd = localeID + LENGTH;
-    if (sink == nullptr) { return; }
+    if (sink == nullptr) { return len; }

    char scratch[LENGTH];
    int32_t capacity = 0;
@@ -1294,27 +1283,21 @@ _getScript(const char* localeID,
    }

    sink->Append(buffer, LENGTH);
+    return len;
 }

-void
-_getRegion(const char* localeID,
-           ByteSink* sink,
-           const char** pEnd) {
-    U_ASSERT(pEnd != nullptr);
-    *pEnd = localeID;
-
+size_t _getRegion(std::string_view localeID, ByteSink* sink) {
    constexpr int32_t MINLEN = 2;
    constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1;  // Minus NUL.

-    int32_t len = 0;
-    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
-        if (len == MAXLEN) { return; }
+    size_t len = 0;
+    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+        if (len == MAXLEN) { return 0; }
        len++;
    }
-    if (len < MINLEN) { return; }
+    if (len < MINLEN) { return 0; }

-    *pEnd = localeID + len;
-    if (sink == nullptr) { return; }
+    if (sink == nullptr) { return len; }

    char scratch[ULOC_COUNTRY_CAPACITY];
    int32_t capacity = 0;
@@ -1325,7 +1308,7 @@ _getRegion(const char* localeID,
            UPRV_LENGTHOF(scratch),
            &capacity);

-    for (int32_t i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; ++i) {
        buffer[i] = uprv_toupper(localeID[i]);
    }

@@ -1337,26 +1320,25 @@ _getRegion(const char* localeID,
        if (offset.has_value()) {
            const char* const alias = COUNTRIES[*offset];
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
-            return;
+            return len;
        }
    }

-    sink->Append(buffer, len);
+    sink->Append(buffer, static_cast<int32_t>(len));
+    return len;
 }

 /**
 * @param needSeparator if true, then add leading '_' if any variants
 * are added to 'variant'
 */
-void
-_getVariant(const char* localeID,
+size_t
+_getVariant(std::string_view localeID,
            char prev,
            ByteSink* sink,
-            const char** pEnd,
            bool needSeparator,
            UErrorCode& status) {
-    if (U_FAILURE(status)) return;
-    if (pEnd != nullptr) { *pEnd = localeID; }
+    if (U_FAILURE(status) || localeID.empty()) return 0;

    // Reasonable upper limit for variants
    // There are no strict limitation of the syntax of variant in the legacy
@@ -1369,42 +1351,62 @@ _getVariant(const char* localeID,
    constexpr int32_t MAX_VARIANTS_LENGTH = 179;

    /* get one or more variant tags and separate them with '_' */
-    int32_t index = 0;
+    size_t index = 0;
    if (_isIDSeparator(prev)) {
        /* get a variant string after a '-' or '_' */
-        for (index=0; !_isTerminator(localeID[index]); index++) {
-            if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
+        for (std::string_view sub = localeID;;) {
+            size_t next = sub.find_first_of(".@_-");
+            // For historical reasons, a trailing separator is included in the variant.
+            bool finished = next == std::string_view::npos || next + 1 == sub.length();
+            size_t limit = finished ? sub.length() : next;
+            index += limit;
+            if (index > MAX_VARIANTS_LENGTH) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
-                return;
-            }
-            if (needSeparator) {
-                if (sink != nullptr) {
-                    sink->Append("_", 1);
-                }
-                needSeparator = false;
-            }
-            if (sink != nullptr) {
-                char c = uprv_toupper(localeID[index]);
-                if (c == '-') c = '_';
-                sink->Append(&c, 1);
-            }
-        }
-        if (pEnd != nullptr) { *pEnd = localeID+index; }
+                return 0;
            }

-    /* if there is no variant tag after a '-' or '_' then look for '@' */
-    if (index == 0) {
-        if (prev=='@') {
-            /* keep localeID */
-        } else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) {
-            ++localeID; /* point after the '@' */
+            if (sink != nullptr) {
+                if (needSeparator) {
+                    sink->Append("_", 1);
                } else {
-            return;
+                    needSeparator = true;
                }
-        for(; !_isTerminator(localeID[index]); index++) {
+
+                int32_t length = static_cast<int32_t>(limit);
+                int32_t minCapacity = uprv_min(length, MAX_VARIANTS_LENGTH);
+                char scratch[MAX_VARIANTS_LENGTH];
+                int32_t capacity = 0;
+                char* buffer = sink->GetAppendBuffer(
+                        minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
+
+                for (size_t i = 0; i < limit; ++i) {
+                    buffer[i] = uprv_toupper(sub[i]);
+                }
+                sink->Append(buffer, length);
+            }
+
+            if (finished) { return index; }
+            sub.remove_prefix(next);
+            if (_isTerminator(sub.front()) || _isBCP47Extension(sub)) { return index; }
+            sub.remove_prefix(1);
+            index++;
+        }
+    }
+
+    size_t skip = 0;
+    /* if there is no variant tag after a '-' or '_' then look for '@' */
+    if (prev == '@') {
+        /* keep localeID */
+    } else if (const char* p = locale_getKeywordsStart(localeID); p != nullptr) {
+        skip = 1 + p - localeID.data(); /* point after the '@' */
+        localeID.remove_prefix(skip);
+    } else {
+        return 0;
+    }
+    for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) {
        if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
            status = U_ILLEGAL_ARGUMENT_ERROR;
-                return;
+            return 0;
        }
        if (needSeparator) {
            if (sink != nullptr) {
@@ -1418,14 +1420,13 @@ _getVariant(const char* localeID,
            sink->Append(&c, 1);
        }
    }
-        if (pEnd != nullptr) { *pEnd = localeID + index; }
-    }
+    return skip + index;
 }

 }  // namespace

 U_EXPORT CharString
-ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
+ulocimp_getLanguage(std::string_view localeID, UErrorCode& status) {
    return ByteSinkUtil::viaByteSinkToCharString(
        [&](ByteSink& sink, UErrorCode& status) {
            ulocimp_getSubtags(
@@ -1441,7 +1442,7 @@ ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
 }

 U_EXPORT CharString
-ulocimp_getScript(const char* localeID, UErrorCode& status) {
+ulocimp_getScript(std::string_view localeID, UErrorCode& status) {
    return ByteSinkUtil::viaByteSinkToCharString(
        [&](ByteSink& sink, UErrorCode& status) {
            ulocimp_getSubtags(
@@ -1457,7 +1458,7 @@ ulocimp_getScript(const char* localeID, UErrorCode& status) {
 }

 U_EXPORT CharString
-ulocimp_getRegion(const char* localeID, UErrorCode& status) {
+ulocimp_getRegion(std::string_view localeID, UErrorCode& status) {
    return ByteSinkUtil::viaByteSinkToCharString(
        [&](ByteSink& sink, UErrorCode& status) {
            ulocimp_getSubtags(
@@ -1473,7 +1474,7 @@ ulocimp_getRegion(const char* localeID, UErrorCode& status) {
 }

 U_EXPORT CharString
-ulocimp_getVariant(const char* localeID, UErrorCode& status) {
+ulocimp_getVariant(std::string_view localeID, UErrorCode& status) {
    return ByteSinkUtil::viaByteSinkToCharString(
        [&](ByteSink& sink, UErrorCode& status) {
            ulocimp_getSubtags(
@@ -1490,7 +1491,7 @@ ulocimp_getVariant(const char* localeID, UErrorCode& status) {

 U_EXPORT void
 ulocimp_getSubtags(
-        const char* localeID,
+        std::string_view localeID,
        CharString* language,
        CharString* script,
        CharString* region,
@@ -1521,7 +1522,7 @@ ulocimp_getSubtags(

 U_EXPORT void
 ulocimp_getSubtags(
-        const char* localeID,
+        std::string_view localeID,
        ByteSink* language,
        ByteSink* script,
        ByteSink* region,
@@ -1531,7 +1532,7 @@ ulocimp_getSubtags(
    if (U_FAILURE(status)) { return; }

    if (pEnd != nullptr) {
-        *pEnd = localeID;
+        *pEnd = localeID.data();
    } else if (language == nullptr &&
               script == nullptr &&
               region == nullptr &&
@@ -1539,62 +1540,94 @@ ulocimp_getSubtags(
        return;
    }

+    if (localeID.empty()) { return; }
+
    bool hasRegion = false;

-    if (localeID == nullptr) {
-        localeID = uloc_getDefault();
+    {
+        size_t len = _getLanguage(localeID, language, status);
+        if (U_FAILURE(status)) { return; }
+        if (len > 0) {
+            localeID.remove_prefix(len);
+        }
    }

-    _getLanguage(localeID, language, &localeID, status);
-    if (U_FAILURE(status)) { return; }
-    U_ASSERT(localeID != nullptr);
-
    if (pEnd != nullptr) {
-        *pEnd = localeID;
+        *pEnd = localeID.data();
    } else if (script == nullptr &&
               region == nullptr &&
               variant == nullptr) {
        return;
    }

-    if (_isIDSeparator(*localeID)) {
-        const char* begin = localeID + 1;
-        const char* end = nullptr;
-        _getScript(begin, script, &end);
-        U_ASSERT(end != nullptr);
-        if (end != begin) {
-            localeID = end;
-            if (pEnd != nullptr) { *pEnd = localeID; }
+    if (localeID.empty()) { return; }
+
+    if (_isIDSeparator(localeID.front())) {
+        std::string_view sub = localeID;
+        sub.remove_prefix(1);
+        size_t len = _getScript(sub, script);
+        if (len > 0) {
+            localeID.remove_prefix(len + 1);
+            if (pEnd != nullptr) { *pEnd = localeID.data(); }
        }
    }

-    if (region == nullptr && variant == nullptr && pEnd == nullptr) { return; }
+    if ((region == nullptr && variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }

-    if (_isIDSeparator(*localeID)) {
-        const char* begin = localeID + 1;
-        const char* end = nullptr;
-        _getRegion(begin, region, &end);
-        U_ASSERT(end != nullptr);
-        if (end != begin) {
+    if (_isIDSeparator(localeID.front())) {
+        std::string_view sub = localeID;
+        sub.remove_prefix(1);
+        size_t len = _getRegion(sub, region);
+        if (len > 0) {
            hasRegion = true;
-            localeID = end;
-            if (pEnd != nullptr) { *pEnd = localeID; }
+            localeID.remove_prefix(len + 1);
+            if (pEnd != nullptr) { *pEnd = localeID.data(); }
        }
    }

-    if (variant == nullptr && pEnd == nullptr) { return; }
+    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }

-    if (_isIDSeparator(*localeID) && !_isBCP47Extension(localeID)) {
+    bool hasVariant = false;
+
+    if (_isIDSeparator(localeID.front()) && !_isBCP47Extension(localeID)) {
+        std::string_view sub = localeID;
        /* If there was no country ID, skip a possible extra IDSeparator */
-        if (!hasRegion && _isIDSeparator(localeID[1])) {
-            localeID++;
-        }
-        const char* begin = localeID + 1;
-        const char* end = nullptr;
-        _getVariant(begin, *localeID, variant, &end, false, status);
+        size_t skip = !hasRegion && localeID.size() > 1 && _isIDSeparator(localeID[1]) ? 2 : 1;
+        sub.remove_prefix(skip);
+        size_t len = _getVariant(sub, localeID[0], variant, false, status);
        if (U_FAILURE(status)) { return; }
-        U_ASSERT(end != nullptr);
-        if (end != begin && pEnd != nullptr) { *pEnd = end; }
+        if (len > 0) {
+            hasVariant = true;
+            localeID.remove_prefix(skip + len);
+            if (pEnd != nullptr) { *pEnd = localeID.data(); }
+        }
+    }
+
+    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
+
+    if (_isBCP47Extension(localeID)) {
+        localeID.remove_prefix(2);
+        constexpr char vaposix[] = "-va-posix";
+        constexpr size_t length = sizeof vaposix - 1;
+        for (size_t next;; localeID.remove_prefix(next)) {
+            next = localeID.find('-', 1);
+            if (next == std::string_view::npos) { break; }
+            next = localeID.find('-', next + 1);
+            bool finished = next == std::string_view::npos;
+            std::string_view sub = localeID;
+            if (!finished) { sub.remove_suffix(sub.length() - next); }
+
+            if (sub.length() == length && uprv_strnicmp(sub.data(), vaposix, length) == 0) {
+                if (variant != nullptr) {
+                    if (hasVariant) { variant->Append("_", 1); }
+                    constexpr char posix[] = "POSIX";
+                    variant->Append(posix, sizeof posix - 1);
+                }
+                if (pEnd != nullptr) { *pEnd = localeID.data() + length; }
+            }
+
+            if (finished) { break; }
+        }
    }
 }

@@ -1700,7 +1733,7 @@ uloc_openKeywords(const char* localeID,
    CharString tempBuffer;
    const char* tmpLocaleID;

-    if (_hasBCP47Extension(localeID)) {
+    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
        tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
    } else {
@@ -1753,7 +1786,7 @@ constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default);
 * This is the code underlying uloc_getName and uloc_canonicalize.
 */
 void
-_canonicalize(const char* localeID,
+_canonicalize(std::string_view localeID,
              ByteSink& sink,
              uint32_t options,
              UErrorCode& err) {
@@ -1764,33 +1797,30 @@ _canonicalize(const char* localeID,
    int32_t j, fieldCount=0;
    CharString tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
-    const char* origLocaleID;
-    const char* tmpLocaleID;
-    const char* keywordAssign = nullptr;
-    const char* separatorIndicator = nullptr;
+    std::string_view origLocaleID;
+    std::string_view tmpLocaleID;
+    size_t keywordAssign = std::string_view::npos;
+    size_t separatorIndicator = std::string_view::npos;

    if (_hasBCP47Extension(localeID)) {
-        const char* localeIDPtr = localeID;
+        std::string_view localeIDPtr = localeID;

        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
-        if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
-            localeIDWithHyphens.append(localeID, -1, err);
+        if (localeID.size() >= 2 && localeID.find('_') != std::string_view::npos && localeID[1] != '-' && localeID[1] != '_') {
+            localeIDWithHyphens.append(localeID, err);
            if (U_SUCCESS(err)) {
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
                    if (*p == '_') {
                        *p = '-';
                    }
                }
-                localeIDPtr = localeIDWithHyphens.data();
+                localeIDPtr = localeIDWithHyphens.toStringPiece();
            }
        }

-        tempBuffer = ulocimp_forLanguageTag(localeIDPtr, -1, nullptr, err);
-        tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
+        tempBuffer = ulocimp_forLanguageTag(localeIDPtr.data(), static_cast<int32_t>(localeIDPtr.size()), nullptr, err);
+        tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? static_cast<std::string_view>(tempBuffer.toStringPiece()) : localeIDPtr;
    } else {
-        if (localeID==nullptr) {
-           localeID=uloc_getDefault();
-        }
        tmpLocaleID=localeID;
    }

@@ -1801,20 +1831,25 @@ _canonicalize(const char* localeID,
    CharString script;
    CharString country;
    CharString variant;
+    const char* end = nullptr;
    ulocimp_getSubtags(
            tmpLocaleID,
            &tag,
            &script,
            &country,
            &variant,
-            &tmpLocaleID,
+            &end,
            err);
    if (U_FAILURE(err)) {
        return;
    }
+    U_ASSERT(end != nullptr);
+    if (end > tmpLocaleID.data()) {
+        tmpLocaleID.remove_prefix(end - tmpLocaleID.data());
+    }

-    if (tag.length() == I_DEFAULT_LENGTH &&
-            uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
+    if (tag.length() == I_DEFAULT_LENGTH && origLocaleID.length() >= I_DEFAULT_LENGTH &&
+            uprv_strncmp(origLocaleID.data(), i_default, I_DEFAULT_LENGTH) == 0) {
        tag.clear();
        tag.append(uloc_getDefault(), err);
    } else {
@@ -1839,15 +1874,14 @@ _canonicalize(const char* localeID,
    }

    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
-    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
+    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && !tmpLocaleID.empty() && tmpLocaleID.front() == '.') {
        tag.append('.', err);
-        ++tmpLocaleID;
-        const char *atPos = nullptr;
+        tmpLocaleID.remove_prefix(1);
        size_t length;
-        if((atPos = uprv_strchr(tmpLocaleID, '@')) != nullptr) {
-            length = atPos - tmpLocaleID;
+        if (size_t atPos = tmpLocaleID.find('@'); atPos != std::string_view::npos) {
+            length = atPos;
        } else {
-            length = uprv_strlen(tmpLocaleID);
+            length = tmpLocaleID.length();
        }
        // The longest charset name we found in IANA charset registry
        // https://www.iana.org/assignments/character-sets/ is
@@ -1859,33 +1893,34 @@ _canonicalize(const char* localeID,
           err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
           return;
        }
-        tag.append(tmpLocaleID, static_cast<int32_t>(length), err);
-        tmpLocaleID += length;
+        if (length > 0) {
+            tag.append(tmpLocaleID.data(), static_cast<int32_t>(length), err);
+            tmpLocaleID.remove_prefix(length);
+        }
    }

    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
-       After this, tmpLocaleID either points to '@' or is nullptr */
-    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=nullptr) {
-        keywordAssign = uprv_strchr(tmpLocaleID, '=');
-        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
+       After this, tmpLocaleID either starts at '@' or is empty. */
+    if (const char* start = locale_getKeywordsStart(tmpLocaleID); start != nullptr) {
+        if (start > tmpLocaleID.data()) {
+            tmpLocaleID.remove_prefix(start - tmpLocaleID.data());
+        }
+        keywordAssign = tmpLocaleID.find('=');
+        separatorIndicator = tmpLocaleID.find(';');
+    } else {
+        tmpLocaleID = {};
    }

    /* Copy POSIX-style variant, if any [mr@FOO] */
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
-        tmpLocaleID != nullptr && keywordAssign == nullptr) {
-        for (;;) {
-            char c = *tmpLocaleID;
-            if (c == 0) {
-                break;
-            }
-            tag.append(c, err);
-            ++tmpLocaleID;
-        }
+        !tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
+        tag.append(tmpLocaleID, err);
+        tmpLocaleID = {};
    }

    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
        /* Handle @FOO variant if @ is present and not followed by = */
-        if (tmpLocaleID!=nullptr && keywordAssign==nullptr) {
+        if (!tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
            /* Add missing '_' if needed */
            if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
                do {
@@ -1895,7 +1930,9 @@ _canonicalize(const char* localeID,
            }

            CharStringByteSink s(&tag);
-            _getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty(), err);
+            std::string_view sub = tmpLocaleID;
+            sub.remove_prefix(1);
+            _getVariant(sub, '@', &s, !variant.isEmpty(), err);
            if (U_FAILURE(err)) { return; }
        }

@@ -1903,7 +1940,7 @@ _canonicalize(const char* localeID,
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
            StringPiece id(CANONICALIZE_MAP[j].id);
            if (tag == id) {
-                if (id.empty() && tmpLocaleID != nullptr) {
+                if (id.empty() && !tmpLocaleID.empty()) {
                    break; /* Don't remap "" if keywords present */
                }
                tag.clear();
@@ -1916,11 +1953,12 @@ _canonicalize(const char* localeID,
    sink.Append(tag.data(), tag.length());

    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
-        if (tmpLocaleID!=nullptr && keywordAssign!=nullptr &&
-            (!separatorIndicator || separatorIndicator > keywordAssign)) {
+        if (!tmpLocaleID.empty() && keywordAssign != std::string_view::npos &&
+            (separatorIndicator == std::string_view::npos || separatorIndicator > keywordAssign)) {
            sink.Append("@", 1);
            ++fieldCount;
-            ulocimp_getKeywords(tmpLocaleID+1, '@', sink, true, err);
+            tmpLocaleID.remove_prefix(1);
+            ulocimp_getKeywords(tmpLocaleID, '@', sink, true, err);
        }
    }
 }
@@ -1989,6 +2027,10 @@ uloc_getLanguage(const char*    localeID,
         int32_t languageCapacity,
         UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
+
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        language, languageCapacity,
@@ -2011,6 +2053,10 @@ uloc_getScript(const char*    localeID,
         int32_t scriptCapacity,
         UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
+
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        script, scriptCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2032,6 +2078,10 @@ uloc_getCountry(const char* localeID,
            int32_t countryCapacity,
            UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
+
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        country, countryCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2053,6 +2103,10 @@ uloc_getVariant(const char* localeID,
                int32_t variantCapacity,
                UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
+
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        variant, variantCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2074,6 +2128,9 @@ uloc_getName(const char* localeID,
             int32_t nameCapacity,
             UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        name, nameCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2083,7 +2140,7 @@ uloc_getName(const char* localeID,
 }

 U_EXPORT CharString
-ulocimp_getName(const char* localeID,
+ulocimp_getName(std::string_view localeID,
                UErrorCode& err)
 {
    return ByteSinkUtil::viaByteSinkToCharString(
@@ -2094,7 +2151,7 @@ ulocimp_getName(const char* localeID,
 }

 U_EXPORT void
-ulocimp_getName(const char* localeID,
+ulocimp_getName(std::string_view localeID,
                ByteSink& sink,
                UErrorCode& err)
 {
@@ -2107,6 +2164,9 @@ uloc_getBaseName(const char* localeID,
                 int32_t nameCapacity,
                 UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        name, nameCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2116,7 +2176,7 @@ uloc_getBaseName(const char* localeID,
 }

 U_EXPORT CharString
-ulocimp_getBaseName(const char* localeID,
+ulocimp_getBaseName(std::string_view localeID,
                    UErrorCode& err)
 {
    return ByteSinkUtil::viaByteSinkToCharString(
@@ -2127,7 +2187,7 @@ ulocimp_getBaseName(const char* localeID,
 }

 U_EXPORT void
-ulocimp_getBaseName(const char* localeID,
+ulocimp_getBaseName(std::string_view localeID,
                    ByteSink& sink,
                    UErrorCode& err)
 {
@@ -2140,6 +2200,9 @@ uloc_canonicalize(const char* localeID,
                  int32_t nameCapacity,
                  UErrorCode* err)
 {
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
        name, nameCapacity,
        [&](ByteSink& sink, UErrorCode& status) {
@@ -2149,7 +2212,7 @@ uloc_canonicalize(const char* localeID,
 }

 U_EXPORT CharString
-ulocimp_canonicalize(const char* localeID,
+ulocimp_canonicalize(std::string_view localeID,
                     UErrorCode& err)
 {
    return ByteSinkUtil::viaByteSinkToCharString(
@@ -2160,7 +2223,7 @@ ulocimp_canonicalize(const char* localeID,
 }

 U_EXPORT void
-ulocimp_canonicalize(const char* localeID,
+ulocimp_canonicalize(std::string_view localeID,
                     ByteSink& sink,
                     UErrorCode& err)
 {
--- a/intl/icu/source/common/uloc_tag.cpp
+++ b/intl/icu/source/common/uloc_tag.cpp
@@ -1043,7 +1043,7 @@ _initializeULanguageTag(ULanguageTag* langtag) {
 }

 void
-_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
+_appendLanguageToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
@@ -1088,7 +1088,7 @@ _appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str
 }

 void
-_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
+_appendScriptToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
@@ -1118,7 +1118,7 @@ _appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool stric
 }

 void
-_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
+_appendRegionToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
@@ -1169,7 +1169,7 @@ void _sortVariants(VariantListEntry* first) {
 }

 void
-_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
+_appendVariantsToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    UErrorCode tmpStatus = U_ZERO_ERROR;
@@ -1872,7 +1872,7 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status)
 }

 void
-_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
+_appendPrivateuseToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    UErrorCode tmpStatus = U_ZERO_ERROR;
@@ -2596,6 +2596,9 @@ ulocimp_toLanguageTag(const char* localeID,
    bool hadPosix = false;
    const char* pKeywordStart;

+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
    /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
    icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
@@ -2604,7 +2607,7 @@ ulocimp_toLanguageTag(const char* localeID,
    }

    /* For handling special case - private use only tag */
-    pKeywordStart = locale_getKeywordsStart(canonical.data());
+    pKeywordStart = locale_getKeywordsStart(canonical.toStringPiece());
    if (pKeywordStart == canonical.data()) {
        int kwdCnt = 0;
        bool done = false;
@@ -2642,12 +2645,12 @@ ulocimp_toLanguageTag(const char* localeID,
        }
    }

-    _appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
-    _appendScriptToLanguageTag(canonical.data(), sink, strict, status);
-    _appendRegionToLanguageTag(canonical.data(), sink, strict, status);
-    _appendVariantsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
+    _appendLanguageToLanguageTag(canonical.toStringPiece(), sink, strict, status);
+    _appendScriptToLanguageTag(canonical.toStringPiece(), sink, strict, status);
+    _appendRegionToLanguageTag(canonical.toStringPiece(), sink, strict, status);
+    _appendVariantsToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
    _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
-    _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
+    _appendPrivateuseToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
 }


--- a/intl/icu/source/common/ulocale.cpp
+++ b/intl/icu/source/common/ulocale.cpp
@@ -10,7 +10,6 @@
 #include "unicode/locid.h"

 #include "bytesinkutil.h"
-#include "charstr.h"
 #include "cmemory.h"

 U_NAMESPACE_USE
@@ -24,9 +23,7 @@ ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) {
    if (length < 0) {
        return EXTERNAL(icu::Locale::createFromName(localeID).clone());
    }
-    CharString str(localeID, length, *err);  // Make a NUL terminated copy.
-    if (U_FAILURE(*err)) { return nullptr; }
-    return EXTERNAL(icu::Locale::createFromName(str.data()).clone());
+    return EXTERNAL(icu::Locale::createFromName(StringPiece{localeID, length}).clone());
 }

 ULocale*
--- a/intl/icu/source/common/ulocimp.h
+++ b/intl/icu/source/common/ulocimp.h
@@ -68,42 +68,42 @@ U_EXPORT std::optional<std::string_view>
 ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);

 U_EXPORT icu::CharString
-ulocimp_getKeywords(const char* localeID,
+ulocimp_getKeywords(std::string_view localeID,
                    char prev,
                    bool valuesToo,
                    UErrorCode& status);

 U_EXPORT void
-ulocimp_getKeywords(const char* localeID,
+ulocimp_getKeywords(std::string_view localeID,
                    char prev,
                    icu::ByteSink& sink,
                    bool valuesToo,
                    UErrorCode& status);

 U_EXPORT icu::CharString
-ulocimp_getName(const char* localeID,
+ulocimp_getName(std::string_view localeID,
                UErrorCode& err);

 U_EXPORT void
-ulocimp_getName(const char* localeID,
+ulocimp_getName(std::string_view localeID,
                icu::ByteSink& sink,
                UErrorCode& err);

 U_EXPORT icu::CharString
-ulocimp_getBaseName(const char* localeID,
+ulocimp_getBaseName(std::string_view localeID,
                    UErrorCode& err);

 U_EXPORT void
-ulocimp_getBaseName(const char* localeID,
+ulocimp_getBaseName(std::string_view localeID,
                    icu::ByteSink& sink,
                    UErrorCode& err);

 U_EXPORT icu::CharString
-ulocimp_canonicalize(const char* localeID,
+ulocimp_canonicalize(std::string_view localeID,
                     UErrorCode& err);

 U_EXPORT void
-ulocimp_canonicalize(const char* localeID,
+ulocimp_canonicalize(std::string_view localeID,
                     icu::ByteSink& sink,
                     UErrorCode& err);

@@ -119,16 +119,16 @@ ulocimp_getKeywordValue(const char* localeID,
                        UErrorCode& status);

 U_EXPORT icu::CharString
-ulocimp_getLanguage(const char* localeID, UErrorCode& status);
+ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);

 U_EXPORT icu::CharString
-ulocimp_getScript(const char* localeID, UErrorCode& status);
+ulocimp_getScript(std::string_view localeID, UErrorCode& status);

 U_EXPORT icu::CharString
-ulocimp_getRegion(const char* localeID, UErrorCode& status);
+ulocimp_getRegion(std::string_view localeID, UErrorCode& status);

 U_EXPORT icu::CharString
-ulocimp_getVariant(const char* localeID, UErrorCode& status);
+ulocimp_getVariant(std::string_view localeID, UErrorCode& status);

 U_EXPORT void
 ulocimp_setKeywordValue(std::string_view keywordName,
@@ -145,7 +145,7 @@ ulocimp_setKeywordValue(std::string_view keywords,

 U_EXPORT void
 ulocimp_getSubtags(
-        const char* localeID,
+        std::string_view localeID,
        icu::CharString* language,
        icu::CharString* script,
        icu::CharString* region,
@@ -155,7 +155,7 @@ ulocimp_getSubtags(

 U_EXPORT void
 ulocimp_getSubtags(
-        const char* localeID,
+        std::string_view localeID,
        icu::ByteSink* language,
        icu::ByteSink* script,
        icu::ByteSink* region,
@@ -165,7 +165,7 @@ ulocimp_getSubtags(

 inline void
 ulocimp_getSubtags(
-        const char* localeID,
+        std::string_view localeID,
        std::nullptr_t,
        std::nullptr_t,
        std::nullptr_t,
@@ -364,7 +364,7 @@ ulocimp_minimizeSubtags(const char* localeID,
                        UErrorCode& err);

 U_CAPI const char * U_EXPORT2
-locale_getKeywordsStart(const char *localeID);
+locale_getKeywordsStart(std::string_view localeID);

 bool
 ultag_isExtensionSubtags(const char* s, int32_t len);
--- a/intl/icu/source/common/umapfile.cpp
+++ b/intl/icu/source/common/umapfile.cpp
@@ -237,8 +237,13 @@ typedef HANDLE MemoryMap;
        pData->map = (char *)data + length;
        pData->pHeader=(const DataHeader *)data;
        pData->mapAddr = data;
-#if U_PLATFORM == U_PF_IPHONE
+#if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID
+    // Apparently supported from Android 23 and higher:
+    //   https://github.com/ggml-org/llama.cpp/pull/3631
+    // Checking for the flag itself is safer than checking for __ANDROID_API__.
+#   ifdef POSIX_MADV_RANDOM
        posix_madvise(data, length, POSIX_MADV_RANDOM);
+#   endif
 #endif
        return true;
    }
--- a/intl/icu/source/common/unicode/brkiter.h
+++ b/intl/icu/source/common/unicode/brkiter.h
@@ -58,6 +58,8 @@ U_NAMESPACE_END

 U_NAMESPACE_BEGIN

+class CharString;
+
 /**
 * The BreakIterator class implements methods for finding the location
 * of boundaries in text. BreakIterator is an abstract base class.
@@ -646,9 +648,9 @@ protected:
 private:

    /** @internal (private) */
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-    char requestLocale[ULOC_FULLNAME_CAPACITY];
+    CharString* actualLocale = nullptr;
+    CharString* validLocale = nullptr;
+    CharString* requestLocale = nullptr;
 };

 #ifndef U_HIDE_DEPRECATED_API
--- a/intl/icu/source/common/unicode/char16ptr.h
+++ b/intl/icu/source/common/unicode/char16ptr.h
@@ -9,10 +9,13 @@

 #include "unicode/utypes.h"

-#if U_SHOW_CPLUSPLUS_API
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API

 #include <cstddef>
 #include <string_view>
+#include <type_traits>
+
+#endif

 /**
 * \file
@@ -21,8 +24,6 @@
 *        Also conversion functions from char16_t * to UChar * and OldUChar *.
 */

-U_NAMESPACE_BEGIN
-
 /**
 * \def U_ALIASING_BARRIER
 * Barrier for pointer anti-aliasing optimizations even across function boundaries.
@@ -36,6 +37,11 @@ U_NAMESPACE_BEGIN
 #   define U_ALIASING_BARRIER(ptr)
 #endif

+// ICU DLL-exported
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
 /**
 * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
 * @stable ICU 59
@@ -251,6 +257,60 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
 #endif
 /// \endcond

+U_NAMESPACE_END
+
+#endif  // U_SHOW_CPLUSPLUS_API
+
+// Usable in header-only definitions
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+
+namespace U_ICU_NAMESPACE_OR_INTERNAL {
+
+#ifndef U_FORCE_HIDE_INTERNAL_API
+/** @internal */
+template<typename T, typename = std::enable_if_t<std::is_same_v<T, UChar>>>
+inline const char16_t *uprv_char16PtrFromUChar(const T *p) {
+    if constexpr (std::is_same_v<UChar, char16_t>) {
+        return p;
+    } else {
+#if U_SHOW_CPLUSPLUS_API
+        return ConstChar16Ptr(p).get();
+#else
+#ifdef U_ALIASING_BARRIER
+        U_ALIASING_BARRIER(p);
+#endif
+        return reinterpret_cast<const char16_t *>(p);
+#endif
+    }
+}
+#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
+/** @internal */
+inline const char16_t *uprv_char16PtrFromUint16(const uint16_t *p) {
+#if U_SHOW_CPLUSPLUS_API
+    return ConstChar16Ptr(p).get();
+#else
+#ifdef U_ALIASING_BARRIER
+    U_ALIASING_BARRIER(p);
+#endif
+    return reinterpret_cast<const char16_t *>(p);
+#endif
+}
+#endif
+#if U_SIZEOF_WCHAR_T==2
+/** @internal */
+inline const char16_t *uprv_char16PtrFromWchar(const wchar_t *p) {
+#if U_SHOW_CPLUSPLUS_API
+    return ConstChar16Ptr(p).get();
+#else
+#ifdef U_ALIASING_BARRIER
+    U_ALIASING_BARRIER(p);
+#endif
+    return reinterpret_cast<const char16_t *>(p);
+#endif
+}
+#endif
+#endif
+
 /**
 * Converts from const char16_t * to const UChar *.
 * Includes an aliasing barrier if available.
@@ -307,6 +367,15 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
    return reinterpret_cast<OldUChar *>(p);
 }

+}  // U_ICU_NAMESPACE_OR_INTERNAL
+
+#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+
+// ICU DLL-exported
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
 #ifndef U_FORCE_HIDE_INTERNAL_API
 /**
 * Is T convertible to a std::u16string_view or some other 16-bit string view?
@@ -379,6 +448,6 @@ inline std::u16string_view toU16StringViewNullable(const T& text) {

 U_NAMESPACE_END

-#endif /* U_SHOW_CPLUSPLUS_API */
+#endif  // U_SHOW_CPLUSPLUS_API

 #endif  // __CHAR16PTR_H__
--- a/intl/icu/source/common/unicode/locid.h
+++ b/intl/icu/source/common/unicode/locid.h
@@ -449,6 +449,11 @@ public:
     */
    static Locale U_EXPORT2 createFromName(const char *name);

+#ifndef U_HIDE_INTERNAL_API
+    /** @internal */
+    static Locale U_EXPORT2 createFromName(StringPiece name);
+#endif  /* U_HIDE_INTERNAL_API */
+
    /**
     * Creates a locale from the given string after canonicalizing
     * the string according to CLDR by calling uloc_canonicalize().
@@ -1133,7 +1138,9 @@ private:
     * @param cLocaleID The new locale name.
     * @param canonicalize whether to call uloc_canonicalize on cLocaleID
     */
-    Locale& init(const char* cLocaleID, UBool canonicalize);
+    Locale& init(const char* localeID, UBool canonicalize);
+    /** @internal */
+    Locale& init(StringPiece localeID, UBool canonicalize);

    /*
     * Internal constructor to allow construction of a locale object with
--- a/intl/icu/source/common/unicode/resbund.h
+++ b/intl/icu/source/common/unicode/resbund.h
@@ -450,7 +450,7 @@ public:
     * @return a Locale object
     * @stable ICU 2.8
     */
-    const Locale
+    Locale
      getLocale(ULocDataLocaleType type, UErrorCode &status) const;
 #ifndef U_HIDE_INTERNAL_API
    /**
--- a/intl/icu/source/common/unicode/uchar.h
+++ b/intl/icu/source/common/unicode/uchar.h
@@ -675,14 +675,14 @@ typedef enum UProperty {
     * @stable ICU 63
     */
    UCHAR_VERTICAL_ORIENTATION=0x1018,
-#ifndef U_HIDE_DRAFT_API
    /**
     * Enumerated property Identifier_Status.
     * Used for UTS #39 General Security Profile for Identifiers
     * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
-     * @draft ICU 75
+     * @stable ICU 75
     */
    UCHAR_IDENTIFIER_STATUS=0x1019,
+#ifndef U_HIDE_DRAFT_API
    /**
     * Enumerated property Indic_Conjunct_Break.
     * Used in the grapheme cluster break algorithm in UAX #29.
@@ -796,7 +796,6 @@ typedef enum UProperty {
    UCHAR_SCRIPT_EXTENSIONS=0x7000,
    /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
    UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
-#ifndef U_HIDE_DRAFT_API
    /**
     * Miscellaneous property Identifier_Type.
     * Used for UTS #39 General Security Profile for Identifiers
@@ -808,10 +807,9 @@ typedef enum UProperty {
     *
     * @see u_hasIDType
     * @see u_getIDTypes
-     * @draft ICU 75
+     * @stable ICU 75
     */
    UCHAR_IDENTIFIER_TYPE=0x7001,
-#endif  // U_HIDE_DRAFT_API
 #ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for Unicode properties with unusual value types.
@@ -2791,13 +2789,12 @@ typedef enum UVerticalOrientation {
    U_VO_UPRIGHT,
 } UVerticalOrientation;

-#ifndef U_HIDE_DRAFT_API
 /**
 * Identifier Status constants.
 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
 *
 * @see UCHAR_IDENTIFIER_STATUS
- * @draft ICU 75
+ * @stable ICU 75
 */
 typedef enum UIdentifierStatus {
    /*
@@ -2806,9 +2803,9 @@ typedef enum UIdentifierStatus {
     *     U_ID_STATUS_<Unicode Identifier_Status value name>
     */

-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_STATUS_RESTRICTED,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_STATUS_ALLOWED,
 } UIdentifierStatus;

@@ -2817,7 +2814,7 @@ typedef enum UIdentifierStatus {
 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
 *
 * @see UCHAR_IDENTIFIER_TYPE
- * @draft ICU 75
+ * @stable ICU 75
 */
 typedef enum UIdentifierType {
    /*
@@ -2826,32 +2823,31 @@ typedef enum UIdentifierType {
     *     U_ID_TYPE_<Unicode Identifier_Type value name>
     */

-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_NOT_CHARACTER,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_DEPRECATED,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_DEFAULT_IGNORABLE,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_NOT_NFKC,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_NOT_XID,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_EXCLUSION,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_OBSOLETE,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_TECHNICAL,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_UNCOMMON_USE,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_LIMITED_USE,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_INCLUSION,
-    /** @draft ICU 75 */
+    /** @stable ICU 75 */
    U_ID_TYPE_RECOMMENDED,
 } UIdentifierType;
-#endif  // U_HIDE_DRAFT_API

 /**
 * Check a binary Unicode property for a code point.
@@ -4057,7 +4053,6 @@ u_isIDStart(UChar32 c);
 U_CAPI UBool U_EXPORT2
 u_isIDPart(UChar32 c);

-#ifndef U_HIDE_DRAFT_API
 /**
 * Does the set of Identifier_Type values code point c contain the given type?
 *
@@ -4069,7 +4064,7 @@ u_isIDPart(UChar32 c);
 * @param c code point
 * @param type Identifier_Type to check
 * @return true if type is in Identifier_Type(c)
- * @draft ICU 75
+ * @stable ICU 75
 */
 U_CAPI bool U_EXPORT2
 u_hasIDType(UChar32 c, UIdentifierType type);
@@ -4104,11 +4099,10 @@ u_hasIDType(UChar32 c, UIdentifierType type);
 *                   function chaining. (See User Guide for details.)
 * @return number of values in c's Identifier_Type,
 *         written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
- * @draft ICU 75
+ * @stable ICU 75
 */
 U_CAPI int32_t U_EXPORT2
 u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode);
-#endif  // U_HIDE_DRAFT_API

 /**
 * Determines if the specified character should be regarded
--- a/intl/icu/source/common/unicode/uniset.h
+++ b/intl/icu/source/common/unicode/uniset.h
@@ -1173,10 +1173,12 @@ public:
    inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
        return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
    }
+#endif  // U_HIDE_DRAFT_API

+#ifndef U_HIDE_DRAFT_API
    /**
     * Returns a C++ iterator for iterating over all of the elements of this set.
-     * Convenient all-in one iteration, but creates a UnicodeString for each
+     * Convenient all-in one iteration, but creates a std::u16string for each
     * code point or string.
     * (Similar to how Java UnicodeSet *is an* Iterable&lt;String&gt;.)
     *
@@ -1185,13 +1187,14 @@ public:
     * \code
     * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
     * for (auto el : set) {
+     *     UnicodeString us(el);
     *     std::string u8;
-     *     printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
+     *     printf("set.element length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
     * }
     * \endcode
     *
     * @return an all-elements iterator.
-     * @draft ICU 76
+     * @draft ICU 77
     * @see end
     * @see codePoints
     * @see ranges
@@ -1203,7 +1206,7 @@ public:

    /**
     * @return an exclusive-end sentinel for iterating over all of the elements of this set.
-     * @draft ICU 76
+     * @draft ICU 77
     * @see begin
     * @see codePoints
     * @see ranges
--- a/intl/icu/source/common/unicode/uset.h
+++ b/intl/icu/source/common/unicode/uset.h
@@ -32,12 +32,13 @@
 #include "unicode/utypes.h"
 #include "unicode/uchar.h"

-#if U_SHOW_CPLUSPLUS_API
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#include <string>
 #include <string_view>
 #include "unicode/char16ptr.h"
 #include "unicode/localpointer.h"
-#include "unicode/unistr.h"
-#endif   // U_SHOW_CPLUSPLUS_API
+#include "unicode/utf16.h"
+#endif

 #ifndef USET_DEFINED

@@ -1392,8 +1393,8 @@ public:
 private:
    friend class USetCodePoints;

-    USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
-            : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
+    USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
+            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
                c(U_SENTINEL), end(U_SENTINEL) {
        // Fetch the first range.
        operator++();
@@ -1429,7 +1430,7 @@ public:
     * Constructs a C++ "range" object over the code points of the USet.
     * @draft ICU 76
     */
-    USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
+    USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}

    /** @draft ICU 76 */
    USetCodePoints(const USetCodePoints &other) = default;
@@ -1460,7 +1461,7 @@ struct CodePointRange {
    /** @draft ICU 76 */
    struct iterator {
        /** @draft ICU 76 */
-        iterator(UChar32 c) : c(c) {}
+        iterator(UChar32 aC) : c(aC) {}

        /** @draft ICU 76 */
        bool operator==(const iterator &other) const { return c == other.c; }
@@ -1573,8 +1574,8 @@ public:
 private:
    friend class USetRanges;

-    USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
-            : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
+    USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
+            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}

    const USet *uset;
    int32_t rangeIndex;
@@ -1610,7 +1611,7 @@ public:
     * Constructs a C++ "range" object over the code point ranges of the USet.
     * @draft ICU 76
     */
-    USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
+    USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}

    /** @draft ICU 76 */
    USetRanges(const USetRanges &other) = default;
@@ -1657,7 +1658,7 @@ public:
            int32_t length;
            const UChar *uchars = uset_getString(uset, index, &length);
            // assert uchars != nullptr;
-            return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
+            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
        }
        return {};
    }
@@ -1684,8 +1685,8 @@ public:
 private:
    friend class USetStrings;

-    USetStringIterator(const USet *uset, int32_t index, int32_t count)
-            : uset(uset), index(index), count(count) {}
+    USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
+            : uset(pUset), index(nIndex), count(nCount) {}

    const USet *uset;
    int32_t index;
@@ -1699,9 +1700,11 @@ private:
 * using U_HEADER_NESTED_NAMESPACE::USetStrings;
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
 * for (auto s : USetStrings(uset.getAlias())) {
- *     UnicodeString us(s);
- *     std::string u8;
- *     printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
+ *     int32_t len32 = s.length();
+ *     char utf8[200];
+ *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
+ *                        s.data(), len32, 0xFFFD, nullptr, errorCode);
+ *     printf("uset.string length %ld \"%s\"\n", long{len32}, utf8);
 * }
 * \endcode
 *
@@ -1718,7 +1721,7 @@ public:
     * Constructs a C++ "range" object over the strings of the USet.
     * @draft ICU 76
     */
-    USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
+    USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}

    /** @draft ICU 76 */
    USetStrings(const USetStrings &other) = default;
@@ -1737,17 +1740,19 @@ private:
    const USet *uset;
    int32_t count;
 };
+#endif  // U_HIDE_DRAFT_API

+#ifndef U_HIDE_DRAFT_API
 /**
 * Iterator returned by USetElements.
- * @draft ICU 76
+ * @draft ICU 77
 */
 class USetElementIterator {
 public:
-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    USetElementIterator(const USetElementIterator &other) = default;

-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    bool operator==(const USetElementIterator &other) const {
        // No need to compare rangeCount & end given private constructor
        // and assuming we don't compare iterators across the set being modified.
@@ -1756,26 +1761,28 @@ public:
        return uset == other.uset && c == other.c && index == other.index;
    }

-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    bool operator!=(const USetElementIterator &other) const { return !operator==(other); }

-    /** @draft ICU 76 */
-    UnicodeString operator*() const {
+    /** @draft ICU 77 */
+    std::u16string operator*() const {
        if (c >= 0) {
-            return UnicodeString(c);
+            return c <= 0xffff ?
+                std::u16string({static_cast<char16_t>(c)}) :
+                std::u16string({U16_LEAD(c), U16_TRAIL(c)});
        } else if (index < totalCount) {
            int32_t length;
            const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
            // assert uchars != nullptr;
-            return UnicodeString(uchars, length);
+            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
        } else {
-            return UnicodeString();
+            return {};
        }
    }

    /**
     * Pre-increment.
-     * @draft ICU 76
+     * @draft ICU 77
     */
    USetElementIterator &operator++() {
        if (c < end) {
@@ -1800,7 +1807,7 @@ public:

    /**
     * Post-increment.
-     * @draft ICU 76
+     * @draft ICU 77
     */
    USetElementIterator operator++(int) {
        USetElementIterator result(*this);
@@ -1811,8 +1818,8 @@ public:
 private:
    friend class USetElements;

-    USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
-            : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
+    USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
+            : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
                c(U_SENTINEL), end(U_SENTINEL) {
        if (index < rangeCount) {
            // Fetch the first range.
@@ -1840,7 +1847,7 @@ private:

 /**
 * A C++ "range" for iterating over all of the elements of a USet.
- * Convenient all-in one iteration, but creates a UnicodeString for each
+ * Convenient all-in one iteration, but creates a std::u16string for each
 * code point or string.
 *
 * Code points are returned first, then empty and multi-character strings.
@@ -1849,15 +1856,18 @@ private:
 * using U_HEADER_NESTED_NAMESPACE::USetElements;
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
 * for (auto el : USetElements(uset.getAlias())) {
- *     std::string u8;
- *     printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
+ *     int32_t len32 = el.length();
+ *     char utf8[200];
+ *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
+ *                        el.data(), len32, 0xFFFD, nullptr, errorCode);
+ *     printf("uset.element length %ld \"%s\"\n", long{len32}, utf8);
 * }
 * \endcode
 *
 * C++ UnicodeSet has member functions for iteration, including begin() and end().
 *
 * @return an all-elements iterator.
- * @draft ICU 76
+ * @draft ICU 77
 * @see USetCodePoints
 * @see USetRanges
 * @see USetStrings
@@ -1866,21 +1876,21 @@ class USetElements {
 public:
    /**
     * Constructs a C++ "range" object over all of the elements of the USet.
-     * @draft ICU 76
+     * @draft ICU 77
     */
-    USetElements(const USet *uset)
-        : uset(uset), rangeCount(uset_getRangeCount(uset)),
-            stringCount(uset_getStringCount(uset)) {}
+    USetElements(const USet *pUset)
+        : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
+            stringCount(uset_getStringCount(pUset)) {}

-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    USetElements(const USetElements &other) = default;

-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    USetElementIterator begin() const {
        return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
    }

-    /** @draft ICU 76 */
+    /** @draft ICU 77 */
    USetElementIterator end() const {
        return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
    }
--- a/intl/icu/source/common/unicode/utf8.h
+++ b/intl/icu/source/common/unicode/utf8.h
@@ -124,7 +124,7 @@
 * @internal
 */
 U_CAPI UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict);

 /**
 * Function for handling "append code point" with error-checking.
@@ -148,7 +148,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
 * @internal
 */
 U_CAPI UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict);

 /**
 * Function for handling "skip backward one code point" with error-checking.
--- a/intl/icu/source/common/unicode/utypes.h
+++ b/intl/icu/source/common/unicode/utypes.h
@@ -598,12 +598,13 @@ typedef enum UErrorCode {
    U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
    U_MF_OPERAND_MISMATCH_ERROR,     /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
    U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
+    U_MF_BAD_OPTION,             /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */
 #ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal formatting API error code.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
-    U_FMT_PARSE_ERROR_LIMIT = 0x10120,
+    U_FMT_PARSE_ERROR_LIMIT = 0x10121,
 #endif  // U_HIDE_DEPRECATED_API

    /*
--- a/intl/icu/source/common/unicode/uvernum.h
+++ b/intl/icu/source/common/unicode/uvernum.h
@@ -53,7 +53,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.4
 */
-#define U_ICU_VERSION_MAJOR_NUM 76
+#define U_ICU_VERSION_MAJOR_NUM 77

 /** The current ICU minor version as an integer.
 *  This value will change in the subsequent releases of ICU
@@ -79,7 +79,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.6
 */
-#define U_ICU_VERSION_SUFFIX _76
+#define U_ICU_VERSION_SUFFIX _77

 /**
 * \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -132,7 +132,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.4
 */
-#define U_ICU_VERSION "76.1"
+#define U_ICU_VERSION "77.1"

 /**
 * The current ICU library major version number as a string, for library name suffixes.
@@ -145,13 +145,13 @@
 *
 * @stable ICU 2.6
 */
-#define U_ICU_VERSION_SHORT "76"
+#define U_ICU_VERSION_SHORT "77"

 #ifndef U_HIDE_INTERNAL_API
 /** Data version in ICU4C.
 * @internal ICU 4.4 Internal Use Only
 **/
-#define U_ICU_DATA_VERSION "76.1"
+#define U_ICU_DATA_VERSION "77.1"
 #endif  /* U_HIDE_INTERNAL_API */

 /*===========================================================================
--- a/intl/icu/source/common/unicode/uversion.h
+++ b/intl/icu/source/common/unicode/uversion.h
@@ -125,7 +125,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
        U_NAMESPACE_USE
 #   endif

-#ifndef U_HIDE_DRAFT_API
+#ifndef U_FORCE_HIDE_DRAFT_API
 /**
 * \def U_HEADER_NESTED_NAMESPACE
 * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
@@ -150,22 +150,37 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
 * @draft ICU 76
 */

+/**
+ * \def U_ICU_NAMESPACE_OR_INTERNAL
+ * Namespace used for header-only APIs that used to be regular C++ APIs.
+ * Different when used inside ICU to prevent public use of internal instantiations.
+ * Similar to U_HEADER_ONLY_NAMESPACE, but the public definition is the same as U_ICU_NAMESPACE.
+ * "U_ICU_NAMESPACE" or "U_ICU_NAMESPACE::internal".
+ *
+ * @draft ICU 77
+ */
+
 // The first test is the same as for defining U_EXPORT for Windows.
 #if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
                          UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
 #   define U_HEADER_NESTED_NAMESPACE header
+#   define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
 #elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
        defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
        defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
 #   define U_HEADER_NESTED_NAMESPACE internal
+#   define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE::internal
+    namespace U_ICU_NAMESPACE_OR_INTERNAL {}
+    using namespace U_ICU_NAMESPACE_OR_INTERNAL;
 #else
 #   define U_HEADER_NESTED_NAMESPACE header
+#   define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
 #endif

 #define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE

 namespace U_HEADER_ONLY_NAMESPACE {}
-#endif  // U_HIDE_DRAFT_API
+#endif  // U_FORCE_HIDE_DRAFT_API

 #endif /* __cplusplus */

--- a/intl/icu/source/common/unistr.cpp
+++ b/intl/icu/source/common/unistr.cpp
@@ -1945,6 +1945,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
      growCapacity = newCapacity;
    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
      growCapacity = US_STACKBUF_SIZE;
+    } else if(newCapacity > growCapacity) {
+      setToBogus();
+      return false;  // bad inputs
+    }
+    if(growCapacity > kMaxCapacity) {
+      setToBogus();
+      return false;
    }

    // save old values
--- a/intl/icu/source/common/uresbund.cpp
+++ b/intl/icu/source/common/uresbund.cpp
@@ -2716,6 +2716,9 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,

    UResourceDataEntry *entry;
    if(openType != URES_OPEN_DIRECT) {
+        if (localeID == nullptr) {
+            localeID = uloc_getDefault();
+        }
        /* first "canonicalize" the locale ID */
        CharString canonLocaleID = ulocimp_getBaseName(localeID, *status);
        if(U_FAILURE(*status)) {
@@ -3080,6 +3083,9 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
            kwVal.clear();
        }
    }
+    if (locid == nullptr) {
+        locid = uloc_getDefault();
+    }
    CharString base = ulocimp_getBaseName(locid, subStatus);
 #if defined(URES_TREE_DEBUG)
    fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", 
@@ -3244,7 +3250,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
            const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus);
            if (U_SUCCESS(subStatus) && validLoc != nullptr && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) {
                CharString validLang = ulocimp_getLanguage(validLoc, subStatus);
-                CharString parentLang = ulocimp_getLanguage(parent.data(), subStatus);
+                CharString parentLang = ulocimp_getLanguage(parent.toStringPiece(), subStatus);
                if (U_SUCCESS(subStatus) && validLang != parentLang) {
                    // validLoc is not root and has a different language than parent, use it instead
                    found.clear().append(validLoc, subStatus);
--- a/intl/icu/source/common/uscript.cpp
+++ b/intl/icu/source/common/uscript.cpp
@@ -59,6 +59,9 @@ getCodesFromLocale(const char *locale,
    if (U_FAILURE(*err)) { return 0; }
    icu::CharString lang;
    icu::CharString script;
+    if (locale == nullptr) {
+        locale = uloc_getDefault();
+    }
    ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err);
    if (U_FAILURE(*err)) { return 0; }
    // Multi-script languages, equivalent to the LocaleScript data
--- a/intl/icu/source/common/ushape.cpp
+++ b/intl/icu/source/common/ushape.cpp
@@ -28,6 +28,7 @@
 #include "ubidi_props.h"
 #include "uassert.h"

+#include <limits>
 /*
 * This implementation is designed for 16-bit Unicode strings.
 * The main assumption is that the Arabic characters and their
@@ -747,6 +748,10 @@ handleGeneratedSpaces(char16_t *dest, int32_t sourceLength,
        }
    }

+    if (static_cast<size_t>(sourceLength) + 1 > std::numeric_limits<size_t>::max() / U_SIZEOF_UCHAR) {
+        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
    tempbuffer = static_cast<char16_t*>(uprv_malloc((sourceLength + 1) * U_SIZEOF_UCHAR));
    /* Test for nullptr */
    if(tempbuffer == nullptr) {
--- a/intl/icu/source/common/usprep.cpp
+++ b/intl/icu/source/common/usprep.cpp
@@ -126,7 +126,7 @@ compareEntries(const UHashTok p1, const UHashTok p2) {
    name2.pointer = b2->name;
    path1.pointer = b1->path;
    path2.pointer = b2->path;
-    return uhash_compareChars(name1, name2) & uhash_compareChars(path1, path2);
+    return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2);
 }

 static void 
--- a/intl/icu/source/common/utf_impl.cpp
+++ b/intl/icu/source/common/utf_impl.cpp
@@ -124,11 +124,9 @@ errorValue(int32_t count, int8_t strict) {
 * >0  Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., true):
 *     Same as the obsolete "safe" behavior, but non-characters are also treated
 *     like illegal sequences.
- *
- * Note that a UBool is the same as an int8_t.
 */
 U_CAPI UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict) {
    // *pi is one after byte c.
    int32_t i=*pi;
    // length can be negative for NUL-terminated strings: Read and validate one byte at a time.
@@ -233,7 +231,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
 }

 U_CAPI UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) {
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict) {
    // *pi is the index of byte c.
    int32_t i=*pi;
    if(U8_IS_TRAIL(c) && i>start) {
--- a/intl/icu/source/common/utypes.cpp
+++ b/intl/icu/source/common/utypes.cpp
@@ -140,7 +140,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
    "U_MF_MISSING_SELECTOR_ANNOTATION_ERROR",
    "U_MF_DUPLICATE_DECLARATION_ERROR",
    "U_MF_OPERAND_MISMATCH_ERROR",
-    "U_MF_DUPLICATE_VARIANT_ERROR"
+    "U_MF_DUPLICATE_VARIANT_ERROR",
+    "U_MF_BAD_OPTION"
 };

 static const char * const
--- a/intl/icu/source/configure
+++ b/intl/icu/source/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.72 for ICU 76.1.
+# Generated by GNU Autoconf 2.72 for ICU 77.1.
 #
 # Report bugs to <https://icu.unicode.org/bugs>.
 #
@@ -606,8 +606,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ICU'
 PACKAGE_TARNAME='icu4c'
-PACKAGE_VERSION='76.1'
-PACKAGE_STRING='ICU 76.1'
+PACKAGE_VERSION='77.1'
+PACKAGE_STRING='ICU 77.1'
 PACKAGE_BUGREPORT='https://icu.unicode.org/bugs'
 PACKAGE_URL='https://icu.unicode.org/'

@@ -1387,7 +1387,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-'configure' configures ICU 76.1 to adapt to many kinds of systems.
+'configure' configures ICU 77.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1453,7 +1453,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of ICU 76.1:";;
+     short | recursive ) echo "Configuration of ICU 77.1:";;
   esac
  cat <<\_ACEOF

@@ -1461,30 +1461,30 @@ Optional Features:
  --disable-option-checking  ignore unrecognized --enable/--with options
  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
-  --enable-icu-config     install icu-config
-  --enable-debug          build debug libraries and enable the U_DEBUG define default=no
-  --enable-release        build release libraries default=yes
-  --enable-strict         compile with strict compiler options default=yes
+  --disable-icu-config    do not install icu-config
+  --enable-debug          build debug libraries and enable the U_DEBUG define
+  --disable-release       do not build release libraries
+  --disable-strict        do not compile with strict compiler options
  --enable-64bit-libs     (deprecated, use --with-library-bits) build 64-bit libraries default= platform default
-  --enable-shared         build shared libraries default=yes
-  --enable-static         build static libraries default=no
-  --enable-auto-cleanup   enable auto cleanup of libraries default=no
-  --enable-draft          enable draft APIs (and internal APIs) default=yes
-  --enable-renaming       add a version suffix to symbols default=yes
-  --enable-tracing        enable function and data tracing default=no
-  --enable-plugins        enable plugins default=no
-  --disable-dyload        disable dynamic loading default=no
+  --disable-shared        do not build shared libraries
+  --enable-static         build static libraries
+  --enable-auto-cleanup   enable auto cleanup of libraries
+  --disable-draft         do not enable draft APIs (and internal APIs)
+  --disable-renaming      do not add a version suffix to symbols
+  --enable-tracing        enable function and data tracing
+  --enable-plugins        enable plugins
+  --disable-dyload        disable dynamic loading
  --enable-rpath          use rpath when linking default is only if necessary
-  --enable-weak-threads   weakly reference the threading library default=no
-  --enable-extras         build ICU extras default=yes
-  --enable-icuio          build ICU's icuio library default=yes
-  --enable-layoutex         build ICU's Paragraph Layout library default=no.
+  --enable-weak-threads   weakly reference the threading library
+  --disable-extras        do not build ICU extras
+  --disable-icuio         do not build ICU's icuio library
+  --enable-layoutex       build ICU's Paragraph Layout library.
            icu-le-hb must be installed via pkg-config. See http://harfbuzz.org

-  --enable-tools         build ICU's tools default=yes
-  --enable-fuzzer        build ICU's fuzzer test targets default=no
-  --enable-tests          build ICU tests default=yes
-  --enable-samples        build ICU samples default=yes
+  --disable-tools         do not build ICU's tools
+  --enable-fuzzer         build ICU's fuzzer test targets
+  --disable-tests         do not build ICU tests
+  --disable-samples       do not build ICU samples

 Additionally, the variable FORCE_LIBS may be set before calling configure.
 If set, it will REPLACE any automatic list of libraries.
@@ -1592,7 +1592,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-ICU configure 76.1
+ICU configure 77.1
 generated by GNU Autoconf 2.72

 Copyright (C) 2023 Free Software Foundation, Inc.
@@ -2184,7 +2184,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by ICU $as_me 76.1, which was
+It was created by ICU $as_me 77.1, which was
 generated by GNU Autoconf 2.72.  Invocation command line was

  $ $0$ac_configure_args_raw
@@ -8487,7 +8487,7 @@ echo "CXXFLAGS=$CXXFLAGS"


 # output the Makefiles
-ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/icuexportdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile test/fuzzer/Makefile samples/Makefile"
+ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/icuexportdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/fuzzer/Makefile samples/Makefile"

 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@@ -9025,7 +9025,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ICU $as_me 76.1, which was
+This file was extended by ICU $as_me 77.1, which was
 generated by GNU Autoconf 2.72.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -9081,7 +9081,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-ICU config.status 76.1
+ICU config.status 77.1
 configured by $0, generated by GNU Autoconf 2.72,
  with options \\"\$ac_cs_config\\"

@@ -9256,7 +9256,6 @@ do
    "test/perf/ustrperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/ustrperf/Makefile" ;;
    "test/perf/utfperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/utfperf/Makefile" ;;
    "test/perf/utrie2perf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/utrie2perf/Makefile" ;;
-    "test/perf/leperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/leperf/Makefile" ;;
    "test/fuzzer/Makefile") CONFIG_FILES="$CONFIG_FILES test/fuzzer/Makefile" ;;
    "samples/Makefile") CONFIG_FILES="$CONFIG_FILES samples/Makefile" ;;

--- a/intl/icu/source/configure.ac
+++ b/intl/icu/source/configure.ac
@@ -89,7 +89,7 @@ UCONFIG_CFLAGS=""

 # Check whether to install icu-config
 AC_ARG_ENABLE([icu-config],
-    AS_HELP_STRING([--enable-icu-config], [install icu-config]),
+    AS_HELP_STRING([--disable-icu-config], [do not install icu-config]),
        [case "${enableval}" in
            yes) enable_icu_config=true ;;
            no) enable_icu_config=false ;;
@@ -102,7 +102,7 @@ AC_MSG_CHECKING([whether to build debug libraries])
 enabled=no
 ENABLE_DEBUG=0
 AC_ARG_ENABLE(debug,
-    [  --enable-debug          build debug libraries and enable the U_DEBUG define [default=no]],
+    [  --enable-debug          build debug libraries and enable the U_DEBUG define],
    [ case "${enableval}" in
         yes|"") enabled=yes; ENABLE_DEBUG=1; CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_DEBUG=1" ;;
         *) ;;
@@ -116,7 +116,7 @@ AC_MSG_CHECKING([whether to build release libraries])
 enabled=yes
 ENABLE_RELEASE=1
 AC_ARG_ENABLE(release,
-    [  --enable-release        build release libraries [default=yes]],
+    [  --disable-release       do not build release libraries],
    [ case "${enableval}" in
         no) enabled=no; ENABLE_RELEASE=0 ;;
         *) ;;
@@ -270,7 +270,7 @@ AC_SUBST(LIB_M)
 AC_MSG_CHECKING([whether to build shared libraries])
 enabled=no
 AC_ARG_ENABLE(shared,
-    [  --enable-shared         build shared libraries [default=yes]],
+    [  --disable-shared        do not build shared libraries],
    [ case "${enableval}" in
 	     yes|"") enabled=yes; ENABLE_SHARED=YES ;;
 	     no);;
@@ -285,7 +285,7 @@ AC_SUBST(ENABLE_SHARED)
 AC_MSG_CHECKING([whether to build static libraries])
 enabled=no
 AC_ARG_ENABLE(static,
-    [  --enable-static         build static libraries [default=no]],
+    [  --enable-static         build static libraries],
    [ case "${enableval}" in
 	     yes|"") enabled=yes; ENABLE_STATIC=YES ;;
 	     no) ;;
@@ -332,7 +332,7 @@ AC_MSG_CHECKING([whether to enable auto cleanup of libraries])
 enabled=no
 UCLN_NO_AUTO_CLEANUP=1
 AC_ARG_ENABLE(auto-cleanup,
-    [  --enable-auto-cleanup   enable auto cleanup of libraries [default=no]],
+    [  --enable-auto-cleanup   enable auto cleanup of libraries],
    [ case "${enableval}" in
         yes) enabled=yes;
         CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DUCLN_NO_AUTO_CLEANUP=0";
@@ -349,7 +349,7 @@ AC_MSG_CHECKING([whether to enable draft APIs])
 enabled=yes
 U_DEFAULT_SHOW_DRAFT=1
 AC_ARG_ENABLE(draft,
-    [  --enable-draft          enable draft APIs (and internal APIs) [default=yes]],
+    [  --disable-draft         do not enable draft APIs (and internal APIs)],
    [ case "${enableval}" in
         no) enabled=no; U_DEFAULT_SHOW_DRAFT=0;
         CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_DEFAULT_SHOW_DRAFT=0"
@@ -376,7 +376,7 @@ AC_MSG_CHECKING([whether to enable renaming of symbols])
 enabled=yes
 U_DISABLE_RENAMING=0
 AC_ARG_ENABLE(renaming,
-    [  --enable-renaming       add a version suffix to symbols [default=yes]],
+    [  --disable-renaming      do not add a version suffix to symbols],
    [ case "${enableval}" in
 	     yes|"") enabled=yes ;;
 	     no) enabled=no; U_DISABLE_RENAMING=1;
@@ -392,7 +392,7 @@ AC_MSG_CHECKING([whether to enable function and data tracing])
 enabled=no
 U_ENABLE_TRACING=0
 AC_ARG_ENABLE(tracing,
-    [  --enable-tracing        enable function and data tracing [default=no]],
+    [  --enable-tracing        enable function and data tracing],
    [ case "${enableval}" in
 	     yes|"") enabled=yes;
                     CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_ENABLE_TRACING=1";
@@ -412,7 +412,7 @@ fi

 # Enable/disable plugins
 AC_ARG_ENABLE(plugins,
-	[  --enable-plugins        enable plugins [default=no]],
+	[  --enable-plugins        enable plugins],
 	[case "${enableval}" in
 		yes) plugins=true ;;
 		no)  plugins=false ;;
@@ -430,7 +430,7 @@ U_ENABLE_DYLOAD=1
 enable=yes
 AC_MSG_CHECKING([whether to enable dynamic loading of plugins. Ignored if plugins disabled.])
 AC_ARG_ENABLE(dyload,
-    [  --disable-dyload        disable dynamic loading [default=no]],
+    [  --disable-dyload        disable dynamic loading],
    [ case "${enableval}" in
 	     yes|"")
 		     U_ENABLE_DYLOAD=1
@@ -577,7 +577,7 @@ case "${host}" in
 esac

 AC_ARG_ENABLE(weak-threads,
-	[  --enable-weak-threads   weakly reference the threading library [default=no]],
+	[  --enable-weak-threads   weakly reference the threading library],
 	[case "${enableval}" in
 		yes)
            LIB_THREAD="${LIBS%${OLD_LIBS}}"
@@ -974,7 +974,7 @@ AC_MSG_RESULT($CHECK_UTF16_STRING_RESULT)

 # Enable/disable extras
 AC_ARG_ENABLE(extras,
-	[  --enable-extras         build ICU extras [default=yes]],
+	[  --disable-extras        do not build ICU extras],
 	[case "${enableval}" in
 		yes) extras=true ;;
 		no)  extras=false ;;
@@ -983,7 +983,7 @@ AC_ARG_ENABLE(extras,
 	extras=true)
 ICU_CONDITIONAL(EXTRAS, test "$extras" = true)
 AC_ARG_ENABLE(icuio,
-	[  --enable-icuio          build ICU's icuio library [default=yes]],
+	[  --disable-icuio         do not build ICU's icuio library],
 	[case "${enableval}" in
 		yes) icuio=true ;;
 		no)  icuio=false ;;
@@ -994,7 +994,7 @@ ICU_CONDITIONAL(ICUIO, test "$icuio" = true)

 # Enable/disable layoutex
 AC_ARG_ENABLE(layoutex,
-	[  --enable-layoutex         build ICU's Paragraph Layout library [default=no].
+	[  --enable-layoutex       build ICU's Paragraph Layout library.
            icu-le-hb must be installed via pkg-config. See http://harfbuzz.org],
 	[case "${enableval}" in
 		yes) layoutex=$have_icu_le_hb ;;
@@ -1016,7 +1016,7 @@ AC_ARG_ENABLE(layout,

 # Enable/disable tools
 AC_ARG_ENABLE(tools,
-	[  --enable-tools         build ICU's tools [default=yes]],
+	[  --disable-tools         do not build ICU's tools],
 	[case "${enableval}" in
 		yes) tools=true ;;
 		no)  tools=false ;;
@@ -1027,7 +1027,7 @@ ICU_CONDITIONAL(TOOLS, test "$tools" = true)

 # Enable/disable fuzzer target tests.
 AC_ARG_ENABLE(fuzzer,
-	[  --enable-fuzzer        build ICU's fuzzer test targets [default=no]],
+	[  --enable-fuzzer         build ICU's fuzzer test targets],
 	[case "${enableval}" in
 		yes) fuzzer=true ;;
 		no)  fuzzer=false ;;
@@ -1144,7 +1144,7 @@ AC_SUBST(ICULIBSUFFIXCNAME)

 # Enable/disable tests
 AC_ARG_ENABLE(tests,
-	[  --enable-tests          build ICU tests [default=yes]],
+	[  --disable-tests         do not build ICU tests],
 	[case "${enableval}" in
 		yes) tests=true ;;
 		no)  tests=false ;;
@@ -1155,7 +1155,7 @@ ICU_CONDITIONAL(TESTS, test "$tests" = true)

 # Enable/disable samples
 AC_ARG_ENABLE(samples,
-	[  --enable-samples        build ICU samples [default=yes]
+	[  --disable-samples       do not build ICU samples

 Additionally, the variable FORCE_LIBS may be set before calling configure.
 If set, it will REPLACE any automatic list of libraries.],
@@ -1304,7 +1304,6 @@ AC_CONFIG_FILES([icudefs.mk \
 		test/perf/ustrperf/Makefile \
 		test/perf/utfperf/Makefile \
 		test/perf/utrie2perf/Makefile \
-		test/perf/leperf/Makefile \
 		test/fuzzer/Makefile \
 		samples/Makefile])
 AC_OUTPUT
--- a/intl/icu/source/data/brkitr/LOCALE_DEPS.json
+++ b/intl/icu/source/data/brkitr/LOCALE_DEPS.json
@@ -1,7 +1,7 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/

 {
-    "cldrVersion": "46"
+    "cldrVersion": "47"
 }
--- a/intl/icu/source/data/brkitr/de.txt
+++ b/intl/icu/source/data/brkitr/de.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 de{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/el.txt
+++ b/intl/icu/source/data/brkitr/el.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 el{
    boundaries{
        sentence:process(dependency){"sent_el.brk"}
--- a/intl/icu/source/data/brkitr/en.txt
+++ b/intl/icu/source/data/brkitr/en.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 en{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/en_US.txt
+++ b/intl/icu/source/data/brkitr/en_US.txt
@@ -1,5 +1,5 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 en_US{
 }
--- a/intl/icu/source/data/brkitr/en_US_POSIX.txt
+++ b/intl/icu/source/data/brkitr/en_US_POSIX.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 en_US_POSIX{
    boundaries{
        word:process(dependency){"word_POSIX.brk"}
--- a/intl/icu/source/data/brkitr/es.txt
+++ b/intl/icu/source/data/brkitr/es.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 es{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/fi.txt
+++ b/intl/icu/source/data/brkitr/fi.txt
@@ -1,8 +1,5 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 fi{
-    boundaries{
-        word:process(dependency){"word_fi_sv.brk"}
-    }
 }
--- a/intl/icu/source/data/brkitr/fr.txt
+++ b/intl/icu/source/data/brkitr/fr.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 fr{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/it.txt
+++ b/intl/icu/source/data/brkitr/it.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 it{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/ja.txt
+++ b/intl/icu/source/data/brkitr/ja.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ja{
    boundaries{
        line:process(dependency){"line_normal.brk"}
--- a/intl/icu/source/data/brkitr/ko.txt
+++ b/intl/icu/source/data/brkitr/ko.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ko{
    boundaries{
        line:process(dependency){"line_normal.brk"}
--- a/intl/icu/source/data/brkitr/pt.txt
+++ b/intl/icu/source/data/brkitr/pt.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 pt{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/root.txt
+++ b/intl/icu/source/data/brkitr/root.txt
@@ -1,8 +1,8 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 root{
-    Version{"46"}
+    Version{"47"}
    boundaries{
        grapheme:process(dependency){"char.brk"}
        line:process(dependency){"line.brk"}
--- a/intl/icu/source/data/brkitr/ru.txt
+++ b/intl/icu/source/data/brkitr/ru.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ru{
    exceptions{
        SentenceBreak:array{
--- a/intl/icu/source/data/brkitr/rules/char.txt
+++ b/intl/icu/source/data/brkitr/rules/char.txt
@@ -24,13 +24,9 @@ $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
 $Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];

-#
-#  From cldr/common/properties/segments/
-#       and issue CLDR-10994
-#
-$Virama      = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Virama}];
-$LinkingConsonant = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Consonant}];
-$ExtCccZwj   = [[\p{gcb=Extend}-\p{ccc=0}] \p{gcb=ZWJ}];
+$InCBConsonant = [\p{InCB=Consonant}];
+$InCBExtend = [\p{InCB=Extend}];
+$InCBLinker = [\p{InCB=Linker}];

 # Korean Syllable Definitions
 #
@@ -64,8 +60,8 @@ $L ($L | $V | $LV | $LVT);
 # GB 9b
 $Prepend [^$Control $CR $LF];

-# GB 9.3, from CLDR-10994
-$LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* $LinkingConsonant;
+# GB 9c
+$InCBConsonant [ $InCBExtend $InCBLinker ]* $InCBLinker [ $InCBExtend $InCBLinker ]* $InCBConsonant;

 # GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
 $Extended_Pict $Extend* $ZWJ $Extended_Pict;
--- a/intl/icu/source/data/brkitr/rules/line.txt
+++ b/intl/icu/source/data/brkitr/rules/line.txt
@@ -297,7 +297,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus; 
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_cj.txt
@@ -298,7 +298,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_loose.txt
+++ b/intl/icu/source/data/brkitr/rules/line_loose.txt
@@ -306,7 +306,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_loose_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_loose_cj.txt
@@ -318,7 +318,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_loose_phrase_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_loose_phrase_cj.txt
@@ -331,7 +331,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_normal.txt
+++ b/intl/icu/source/data/brkitr/rules/line_normal.txt
@@ -299,7 +299,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_normal_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_normal_cj.txt
@@ -304,7 +304,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_normal_phrase_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_normal_phrase_cj.txt
@@ -317,7 +317,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/line_phrase_cj.txt
+++ b/intl/icu/source/data/brkitr/rules/line_phrase_cj.txt
@@ -310,7 +310,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
--- a/intl/icu/source/data/brkitr/rules/word.txt
+++ b/intl/icu/source/data/brkitr/rules/word.txt
@@ -11,7 +11,7 @@
 #      These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
 #
 # Note:  Updates to word.txt will usually need to be merged into
-#        word_POSIX.txt and word_fi_sv.txt also.
+#        word_POSIX.txt also.

 ##############################################################################
 #
@@ -42,7 +42,7 @@ $ALetter            = [\p{Word_Break = ALetter}];
 $Single_Quote       = [\p{Word_Break = Single_Quote}];
 $Double_Quote       = [\p{Word_Break = Double_Quote}];
 $MidNumLet          = [\p{Word_Break = MidNumLet}];
-$MidLetter          = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
+$MidLetter          = [\p{Word_Break = MidLetter}];
 $MidNum             = [\p{Word_Break = MidNum}];
 $Numeric            = [\p{Word_Break = Numeric}];
 $ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
--- a/intl/icu/source/data/brkitr/rules/word_POSIX.txt
+++ b/intl/icu/source/data/brkitr/rules/word_POSIX.txt
@@ -42,7 +42,7 @@ $ALetter            = [\p{Word_Break = ALetter}];
 $Single_Quote       = [\p{Word_Break = Single_Quote}];
 $Double_Quote       = [\p{Word_Break = Double_Quote}];
 $MidNumLet          = [\p{Word_Break = MidNumLet} - [.]];
-$MidLetter          = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
+$MidLetter          = [\p{Word_Break = MidLetter} - [\:]];
 $MidNum             = [\p{Word_Break = MidNum} [.]];
 $Numeric            = [\p{Word_Break = Numeric}];
 $ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
--- a/intl/icu/source/data/brkitr/rules/word_fi_sv.txt
+++ b/intl/icu/source/data/brkitr/rules/word_fi_sv.txt
@@ -1,172 +0,0 @@
-#
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
-# License & terms of use: http://www.unicode.org/copyright.html
-# Copyright (C) 2002-2016, International Business Machines Corporation
-# and others. All Rights Reserved.
-#
-# file:  word_fi_sv.txt
-#
-# ICU Word Break Rules
-#      See Unicode Standard Annex #29.
-#      These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
-#
-# Note:  Updates to word.txt will usually need to be merged into
-#        word_fi_sv.txt also.
-
-##############################################################################
-#
-#  Character class definitions from TR 29
-#
-##############################################################################
-
-!!chain;
-!!quoted_literals_only;
-
-
-#
-#  Character Class Definitions.
-#
-
-$Han                = [:Han:];
-
-$CR                 = [\p{Word_Break = CR}];
-$LF                 = [\p{Word_Break = LF}];
-$Newline            = [\p{Word_Break = Newline}];
-$Extend             = [\p{Word_Break = Extend}-$Han];
-$ZWJ                = [\p{Word_Break = ZWJ}];
-$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
-$Format             = [\p{Word_Break = Format}];
-$Katakana           = [\p{Word_Break = Katakana}];
-$Hebrew_Letter      = [\p{Word_Break = Hebrew_Letter}];
-$ALetter            = [\p{Word_Break = ALetter}];
-$Single_Quote       = [\p{Word_Break = Single_Quote}];
-$Double_Quote       = [\p{Word_Break = Double_Quote}];
-$MidNumLet          = [\p{Word_Break = MidNumLet}];
-$MidLetter          = [\p{Word_Break = MidLetter}];
-$MidNum             = [\p{Word_Break = MidNum}];
-$Numeric            = [\p{Word_Break = Numeric}];
-$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
-$WSegSpace          = [\p{Word_Break = WSegSpace}];
-$Extended_Pict      = [\p{Extended_Pictographic}];
-
-$Hiragana           = [:Hiragana:];
-$Ideographic        = [\p{Ideographic}];
-
-
-#   Dictionary character set, for triggering language-based break engines. Currently
-#   limited to LineBreak=Complex_Context. Note that this set only works in Unicode
-#   5.0 or later as the definition of Complex_Context was corrected to include all
-#   characters requiring dictionary break.
-
-$Control        = [\p{Grapheme_Cluster_Break = Control}];
-$HangulSyllable = [\uac00-\ud7a3];
-$ComplexContext = [:LineBreak = Complex_Context:];
-$KanaKanji      = [$Han $Hiragana $Katakana];
-$dictionaryCJK  = [$KanaKanji $HangulSyllable];
-$dictionary     = [$ComplexContext $dictionaryCJK];
-
-# TODO: check if handling of katakana in dictionary makes rules incorrect/void
-
-# leave CJK scripts out of ALetterPlus
-$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
-
-
-## -------------------------------------------------
-
-# Rule 3 - CR x LF
-#
-$CR $LF;
-
-# Rule 3c   Do not break within emoji zwj sequences.
-#             ZWJ ×  \p{Extended_Pictographic}.  Precedes WB4, so no intervening Extend chars allowed.
-#
-$ZWJ $Extended_Pict;
-
-# Rule 3d - Keep horizontal whitespace together.
-#
-$WSegSpace $WSegSpace;
-
-# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
-#          of a region of Text.
-
-$ExFm  = [$Extend $Format $ZWJ];
-
-^$ExFm+;            # This rule fires only when there are format or extend characters at the
-                    # start of text, or immediately following another boundary. It groups them, in
-                    # the event there are more than one.
-
-[^$CR $LF $Newline $ExFm] $ExFm*;   # This rule rule attaches trailing format/extends to words,
-                                    # with no special rule status value.
-
-$Numeric $ExFm* {100};              # This group of rules also attach trailing format/extends, but
-$ALetterPlus $ExFm* {200};          # with rule status set based on the word's final base character.
-$HangulSyllable {200};
-$Hebrew_Letter $ExFm* {200};
-$Katakana $ExFm* {400};             # note:  these status values override those from rule 5
-$Hiragana $ExFm* {400};             #        by virtue of being numerically larger.
-$Ideographic $ExFm* {400};          #
-
-#
-# rule 5
-#    Do not break between most letters.
-#
-($ALetterPlus | $Hebrew_Letter) $ExFm* ($ALetterPlus | $Hebrew_Letter);
-
-# rule 6 and 7
-($ALetterPlus | $Hebrew_Letter)  $ExFm* ($MidLetter | $MidNumLet | $Single_Quote) $ExFm* ($ALetterPlus | $Hebrew_Letter) {200};
-
-# rule 7a
-$Hebrew_Letter $ExFm* $Single_Quote {200};
-
-# rule 7b and 7c
-$Hebrew_Letter $ExFm* $Double_Quote $ExFm* $Hebrew_Letter;
-
-# rule 8
-
-$Numeric $ExFm* $Numeric;
-
-# rule 9
-
-($ALetterPlus | $Hebrew_Letter)  $ExFm* $Numeric;
-
-# rule 10
-
-$Numeric $ExFm* ($ALetterPlus | $Hebrew_Letter);
-
-# rule 11 and 12
-
-$Numeric $ExFm* ($MidNum | $MidNumLet | $Single_Quote) $ExFm* $Numeric;
-
-# rule 13
-# to be consistent with $KanaKanji $KanaKanhi, changed
-# from 300 to 400.
-# See also TestRuleStatus in intltest/rbbiapts.cpp
-$Katakana $ExFm*  $Katakana {400};
-
-# rule 13a/b
-
-$ALetterPlus   $ExFm* $ExtendNumLet {200};    #  (13a)
-$Hebrew_Letter $ExFm* $ExtendNumLet {200};    #  (13a)
-$Numeric       $ExFm* $ExtendNumLet {100};    #  (13a)
-$Katakana      $ExFm* $ExtendNumLet {400};    #  (13a)
-$ExtendNumLet  $ExFm* $ExtendNumLet {200};    #  (13a)
-
-$ExtendNumLet  $ExFm* $ALetterPlus  {200};    #  (13b)
-$ExtendNumLet  $ExFm* $Hebrew_Letter {200};    #  (13b)
-$ExtendNumLet  $ExFm* $Numeric      {100};    #  (13b)
-$ExtendNumLet  $ExFm* $Katakana     {400};    #  (13b)
-
-# rules 15 - 17
-#    Pairs of Regional Indicators stay together.
-#    With incoming rule chaining disabled by ^, this rule will match exactly two of them.
-#    No other rule begins with a Regional_Indicator, so chaining cannot extend the match.
-#
-^$Regional_Indicator $ExFm* $Regional_Indicator;
-
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
-
-# Rule 999
-#     Match a single code point if no other rule applies.
-.;
--- a/intl/icu/source/data/brkitr/sv.txt
+++ b/intl/icu/source/data/brkitr/sv.txt
@@ -1,8 +1,5 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 sv{
-    boundaries{
-        word:process(dependency){"word_fi_sv.brk"}
-    }
 }
--- a/intl/icu/source/data/brkitr/zh.txt
+++ b/intl/icu/source/data/brkitr/zh.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 zh{
    boundaries{
        line:process(dependency){"line_cj.brk"}
--- a/intl/icu/source/data/brkitr/zh_Hant.txt
+++ b/intl/icu/source/data/brkitr/zh_Hant.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 zh_Hant{
    boundaries{
        line:process(dependency){"line_cj.brk"}
--- a/intl/icu/source/data/coll/LOCALE_DEPS.json
+++ b/intl/icu/source/data/coll/LOCALE_DEPS.json
@@ -1,9 +1,9 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/

 {
-    "cldrVersion": "46",
+    "cldrVersion": "47",
    "aliases": {
        "ars": "ar_SA",
        "in": "id",
--- a/intl/icu/source/data/coll/af.txt
+++ b/intl/icu/source/data/coll/af.txt
@@ -1,11 +1,11 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 af{
    collations{
        standard{
            Sequence{"&N<<<ŉ"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/am.txt
+++ b/intl/icu/source/data/coll/am.txt
@@ -1,11 +1,11 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 am{
    collations{
        standard{
            Sequence{"[reorder Ethi]"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/ar.txt
+++ b/intl/icu/source/data/coll/ar.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ar{
    collations{
        compat{
@@ -9,7 +9,7 @@ ar{
                "&ت<<ة<<<ﺔ<<<ﺓ"
                "&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ"
            }
-            Version{"46"}
+            Version{"47"}
        }
        standard{
            Sequence{
@@ -397,7 +397,7 @@ ar{
                "‎&ۓ‎=ﮰ‎=ﮱ"
                "‎&ۀ‎=ﮤ‎=ﮥ"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/ar_SA.txt
+++ b/intl/icu/source/data/coll/ar_SA.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 /**
 * generated alias target
 */
--- a/intl/icu/source/data/coll/ars.txt
+++ b/intl/icu/source/data/coll/ars.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ars{
    "%%ALIAS"{"ar_SA"}
 }
--- a/intl/icu/source/data/coll/as.txt
+++ b/intl/icu/source/data/coll/as.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 as{
    collations{
        standard{
@@ -11,7 +11,7 @@ as{
                "&[before 1]ত<ৎ=ত্\u200D"
                "&হ<ক্ষ"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/az.txt
+++ b/intl/icu/source/data/coll/az.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 az{
    collations{
        search{
@@ -9,7 +9,7 @@ az{
                "[import az-u-co-standard]"
                "[reorder others]"
            }
-            Version{"46"}
+            Version{"47"}
        }
        standard{
            Sequence{
@@ -26,7 +26,7 @@ az{
                "&H<x<<<X"
                "&Z<w<<<W"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/be.txt
+++ b/intl/icu/source/data/coll/be.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 be{
    collations{
        standard{
@@ -9,7 +9,7 @@ be{
                "&Е<ё<<<Ё"
                "&у<ў<<<Ў"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/bg.txt
+++ b/intl/icu/source/data/coll/bg.txt
@@ -1,11 +1,11 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 bg{
    collations{
        standard{
            Sequence{"[reorder Cyrl]"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/bn.txt
+++ b/intl/icu/source/data/coll/bn.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 bn{
    collations{
        standard{
@@ -9,7 +9,7 @@ bn{
                "[reorder Beng Deva Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
                "&ঔ<ং<ঃ<ঁ"
            }
-            Version{"46"}
+            Version{"47"}
        }
        traditional{
            Sequence{
@@ -629,7 +629,7 @@ bn{
                "&যৌ<<<য়ৌ"
                "&য্<<<য়্"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/bo.txt
+++ b/intl/icu/source/data/coll/bo.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 bo{
    collations{
        standard{
@@ -65,7 +65,7 @@ bo{
                "&ྲཱྀ=ཷ"
                "&ླཱྀ=ཹ"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/br.txt
+++ b/intl/icu/source/data/coll/br.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 br{
    collations{
        standard{
@@ -8,7 +8,7 @@ br{
                "&C<ch<<<Ch<<<CH<c''h=c\u02BCh=c\u2019h<<<C''h=C\u02BCh=C\u2019h<<<C'"
                "'H=C\u02BCH=C\u2019H"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/bs.txt
+++ b/intl/icu/source/data/coll/bs.txt
@@ -1,15 +1,15 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 bs{
    collations{
        search{
            Sequence{"[import hr-u-co-search]"}
-            Version{"46"}
+            Version{"47"}
        }
        standard{
            Sequence{"[import hr]"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/bs_Cyrl.txt
+++ b/intl/icu/source/data/coll/bs_Cyrl.txt
@@ -1,11 +1,11 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 bs_Cyrl{
    collations{
        standard{
            Sequence{"[import sr]"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/ca.txt
+++ b/intl/icu/source/data/coll/ca.txt
@@ -1,6 +1,6 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ca{
    collations{
        search{
@@ -8,7 +8,7 @@ ca{
                "[import und-u-co-search]"
                "&L<ŀ=l·<<<Ŀ=L·"
            }
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/intl/icu/source/data/coll/ceb.txt
+++ b/intl/icu/source/data/coll/ceb.txt
@@ -1,11 +1,11 @@
 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
+// Generated using tools/cldr/cldr-to-icu/
 ceb{
    collations{
        standard{
            Sequence{"&N<ñ<<<Ñ<ng<<<Ng<<<NG"}
-            Version{"46"}
+            Version{"47"}
        }
    }
 }
--- a/Show More
+++ b/Show More