Done with: ./mach static-analysis check --checks="-*, readability-redundant-member-init" --fix . https://clang.llvm.org/extra/clang-tidy/checks/readability/redundant-member-init.html Differential Revision: https://phabricator.services.mozilla.com/D190002
397 lines
13 KiB
C++
397 lines
13 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
#include "ICU4CGlue.h"
|
|
#include "NumberFormatFields.h"
|
|
#include "ScopedICUObject.h"
|
|
|
|
#include "mozilla/FloatingPoint.h"
|
|
#include "unicode/uformattedvalue.h"
|
|
#include "unicode/unum.h"
|
|
#include "unicode/unumberformatter.h"
|
|
|
|
namespace mozilla::intl {
|
|
|
|
bool NumberFormatFields::append(NumberPartType type, int32_t begin,
|
|
int32_t end) {
|
|
MOZ_ASSERT(begin >= 0);
|
|
MOZ_ASSERT(end >= 0);
|
|
MOZ_ASSERT(begin < end, "erm, aren't fields always non-empty?");
|
|
|
|
return fields_.emplaceBack(uint32_t(begin), uint32_t(end), type);
|
|
}
|
|
|
|
bool NumberFormatFields::toPartsVector(size_t overallLength,
|
|
const NumberPartSourceMap& sourceMap,
|
|
NumberPartVector& parts) {
|
|
std::sort(fields_.begin(), fields_.end(),
|
|
[](const NumberFormatField& left, const NumberFormatField& right) {
|
|
// Sort first by begin index, then to place
|
|
// enclosing fields before nested fields.
|
|
return left.begin < right.begin ||
|
|
(left.begin == right.begin && left.end > right.end);
|
|
});
|
|
|
|
// Then iterate over the sorted field list to generate a sequence of parts
|
|
// (what ECMA-402 actually exposes). A part is a maximal character sequence
|
|
// entirely within no field or a single most-nested field.
|
|
//
|
|
// Diagrams may be helpful to illustrate how fields map to parts. Consider
|
|
// formatting -19,766,580,028,249.41, the US national surplus (negative
|
|
// because it's actually a debt) on October 18, 2016.
|
|
//
|
|
// var options =
|
|
// { style: "currency", currency: "USD", currencyDisplay: "name" };
|
|
// var usdFormatter = new Intl.NumberFormat("en-US", options);
|
|
// usdFormatter.format(-19766580028249.41);
|
|
//
|
|
// The formatted result is "-19,766,580,028,249.41 US dollars". ICU
|
|
// identifies these fields in the string:
|
|
//
|
|
// UNUM_GROUPING_SEPARATOR_FIELD
|
|
// |
|
|
// UNUM_SIGN_FIELD | UNUM_DECIMAL_SEPARATOR_FIELD
|
|
// | __________/| |
|
|
// | / | | | |
|
|
// "-19,766,580,028,249.41 US dollars"
|
|
// \________________/ |/ \_______/
|
|
// | | |
|
|
// UNUM_INTEGER_FIELD | UNUM_CURRENCY_FIELD
|
|
// |
|
|
// UNUM_FRACTION_FIELD
|
|
//
|
|
// These fields map to parts as follows:
|
|
//
|
|
// integer decimal
|
|
// _____|________ |
|
|
// / /| |\ |\ |\ | literal
|
|
// /| / | | \ | \ | \| |
|
|
// "-19,766,580,028,249.41 US dollars"
|
|
// | \___|___|___/ |/ \________/
|
|
// | | | |
|
|
// | group | currency
|
|
// | |
|
|
// minusSign fraction
|
|
//
|
|
// The sign is a part. Each comma is a part, splitting the integer field
|
|
// into parts for trillions/billions/&c. digits. The decimal point is a
|
|
// part. Cents are a part. The space between cents and currency is a part
|
|
// (outside any field). Last, the currency field is a part.
|
|
|
|
class PartGenerator {
|
|
// The fields in order from start to end, then least to most nested.
|
|
const FieldsVector& fields;
|
|
|
|
// Index of the current field, in |fields|, being considered to
|
|
// determine part boundaries. |lastEnd <= fields[index].begin| is an
|
|
// invariant.
|
|
size_t index = 0;
|
|
|
|
// The end index of the last part produced, always less than or equal
|
|
// to |limit|, strictly increasing.
|
|
uint32_t lastEnd = 0;
|
|
|
|
// The length of the overall formatted string.
|
|
const uint32_t limit = 0;
|
|
|
|
NumberPartSourceMap sourceMap;
|
|
|
|
Vector<size_t, 4> enclosingFields;
|
|
|
|
void popEnclosingFieldsEndingAt(uint32_t end) {
|
|
MOZ_ASSERT_IF(enclosingFields.length() > 0,
|
|
fields[enclosingFields.back()].end >= end);
|
|
|
|
while (enclosingFields.length() > 0 &&
|
|
fields[enclosingFields.back()].end == end) {
|
|
enclosingFields.popBack();
|
|
}
|
|
}
|
|
|
|
bool nextPartInternal(NumberPart* part) {
|
|
size_t len = fields.length();
|
|
MOZ_ASSERT(index <= len);
|
|
|
|
// If we're out of fields, all that remains are part(s) consisting
|
|
// of trailing portions of enclosing fields, and maybe a final
|
|
// literal part.
|
|
if (index == len) {
|
|
if (enclosingFields.length() > 0) {
|
|
const auto& enclosing = fields[enclosingFields.popCopy()];
|
|
*part = {enclosing.type, sourceMap.source(enclosing), enclosing.end};
|
|
|
|
// If additional enclosing fields end where this part ends,
|
|
// pop them as well.
|
|
popEnclosingFieldsEndingAt(part->endIndex);
|
|
} else {
|
|
*part = {NumberPartType::Literal, sourceMap.source(limit), limit};
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Otherwise we still have a field to process.
|
|
const NumberFormatField* current = &fields[index];
|
|
MOZ_ASSERT(lastEnd <= current->begin);
|
|
MOZ_ASSERT(current->begin < current->end);
|
|
|
|
// But first, deal with inter-field space.
|
|
if (lastEnd < current->begin) {
|
|
if (enclosingFields.length() > 0) {
|
|
// Space between fields, within an enclosing field, is part
|
|
// of that enclosing field, until the start of the current
|
|
// field or the end of the enclosing field, whichever is
|
|
// earlier.
|
|
const auto& enclosing = fields[enclosingFields.back()];
|
|
*part = {enclosing.type, sourceMap.source(enclosing),
|
|
std::min(enclosing.end, current->begin)};
|
|
popEnclosingFieldsEndingAt(part->endIndex);
|
|
} else {
|
|
// If there's no enclosing field, the space is a literal.
|
|
*part = {NumberPartType::Literal, sourceMap.source(current->begin),
|
|
current->begin};
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, the part spans a prefix of the current field. Find
|
|
// the most-nested field containing that prefix.
|
|
const NumberFormatField* next;
|
|
do {
|
|
current = &fields[index];
|
|
|
|
// If the current field is last, the part extends to its end.
|
|
if (++index == len) {
|
|
*part = {current->type, sourceMap.source(*current), current->end};
|
|
return true;
|
|
}
|
|
|
|
next = &fields[index];
|
|
MOZ_ASSERT(current->begin <= next->begin);
|
|
MOZ_ASSERT(current->begin < next->end);
|
|
|
|
// If the next field nests within the current field, push an
|
|
// enclosing field. (If there are no nested fields, don't
|
|
// bother pushing a field that'd be immediately popped.)
|
|
if (current->end > next->begin) {
|
|
if (!enclosingFields.append(index - 1)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Do so until the next field begins after this one.
|
|
} while (current->begin == next->begin);
|
|
|
|
if (current->end <= next->begin) {
|
|
// The next field begins after the current field ends. Therefore
|
|
// the current part ends at the end of the current field.
|
|
*part = {current->type, sourceMap.source(*current), current->end};
|
|
popEnclosingFieldsEndingAt(part->endIndex);
|
|
} else {
|
|
// The current field encloses the next one. The current part
|
|
// ends where the next field/part will start.
|
|
*part = {current->type, sourceMap.source(*current), next->begin};
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
public:
|
|
PartGenerator(const FieldsVector& vec, uint32_t limit,
|
|
const NumberPartSourceMap& sourceMap)
|
|
: fields(vec), limit(limit), sourceMap(sourceMap) {}
|
|
|
|
bool nextPart(bool* hasPart, NumberPart* part) {
|
|
// There are no parts left if we've partitioned the entire string.
|
|
if (lastEnd == limit) {
|
|
MOZ_ASSERT(enclosingFields.length() == 0);
|
|
*hasPart = false;
|
|
return true;
|
|
}
|
|
|
|
if (!nextPartInternal(part)) {
|
|
return false;
|
|
}
|
|
|
|
*hasPart = true;
|
|
lastEnd = part->endIndex;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
// Finally, generate the result array.
|
|
size_t lastEndIndex = 0;
|
|
|
|
PartGenerator gen(fields_, overallLength, sourceMap);
|
|
do {
|
|
bool hasPart;
|
|
NumberPart part;
|
|
if (!gen.nextPart(&hasPart, &part)) {
|
|
return false;
|
|
}
|
|
|
|
if (!hasPart) {
|
|
break;
|
|
}
|
|
|
|
MOZ_ASSERT(lastEndIndex < part.endIndex);
|
|
|
|
if (!parts.append(part)) {
|
|
return false;
|
|
}
|
|
|
|
lastEndIndex = part.endIndex;
|
|
} while (true);
|
|
|
|
MOZ_ASSERT(lastEndIndex == overallLength,
|
|
"result array must partition the entire string");
|
|
|
|
return lastEndIndex == overallLength;
|
|
}
|
|
|
|
Result<std::u16string_view, ICUError> FormatResultToParts(
|
|
const UFormattedNumber* value, Maybe<double> number, bool isNegative,
|
|
bool formatForUnit, NumberPartVector& parts) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
const UFormattedValue* formattedValue = unumf_resultAsValue(value, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
|
|
return FormatResultToParts(formattedValue, number, isNegative, formatForUnit,
|
|
parts);
|
|
}
|
|
|
|
Result<std::u16string_view, ICUError> FormatResultToParts(
|
|
const UFormattedValue* value, Maybe<double> number, bool isNegative,
|
|
bool formatForUnit, NumberPartVector& parts) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
int32_t utf16Length;
|
|
const char16_t* utf16Str = ufmtval_getString(value, &utf16Length, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
|
|
UConstrainedFieldPosition* fpos = ucfpos_open(&status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
|
|
|
|
// We're only interested in UFIELD_CATEGORY_NUMBER fields.
|
|
ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_NUMBER, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
|
|
// Vacuum up fields in the overall formatted string.
|
|
NumberFormatFields fields;
|
|
|
|
while (true) {
|
|
bool hasMore = ufmtval_nextPosition(value, fpos, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
if (!hasMore) {
|
|
break;
|
|
}
|
|
|
|
int32_t fieldName = ucfpos_getField(fpos, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
|
|
int32_t beginIndex, endIndex;
|
|
ucfpos_getIndexes(fpos, &beginIndex, &endIndex, &status);
|
|
if (U_FAILURE(status)) {
|
|
return Err(ToICUError(status));
|
|
}
|
|
|
|
Maybe<NumberPartType> partType = GetPartTypeForNumberField(
|
|
UNumberFormatFields(fieldName), number, isNegative, formatForUnit);
|
|
if (!partType || !fields.append(*partType, beginIndex, endIndex)) {
|
|
return Err(ICUError::InternalError);
|
|
}
|
|
}
|
|
|
|
if (!fields.toPartsVector(utf16Length, parts)) {
|
|
return Err(ICUError::InternalError);
|
|
}
|
|
|
|
return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
|
|
}
|
|
|
|
// See intl/icu/source/i18n/unicode/unum.h for a detailed field list. This
|
|
// list is deliberately exhaustive: cases might have to be added/removed if
|
|
// this code is compiled with a different ICU with more UNumberFormatFields
|
|
// enum initializers. Please guard such cases with appropriate ICU
|
|
// version-testing #ifdefs, should cross-version divergence occur.
|
|
Maybe<NumberPartType> GetPartTypeForNumberField(UNumberFormatFields fieldName,
|
|
Maybe<double> number,
|
|
bool isNegative,
|
|
bool formatForUnit) {
|
|
switch (fieldName) {
|
|
case UNUM_INTEGER_FIELD:
|
|
if (number.isSome()) {
|
|
if (std::isnan(*number)) {
|
|
return Some(NumberPartType::Nan);
|
|
}
|
|
if (!std::isfinite(*number)) {
|
|
return Some(NumberPartType::Infinity);
|
|
}
|
|
}
|
|
return Some(NumberPartType::Integer);
|
|
case UNUM_FRACTION_FIELD:
|
|
return Some(NumberPartType::Fraction);
|
|
case UNUM_DECIMAL_SEPARATOR_FIELD:
|
|
return Some(NumberPartType::Decimal);
|
|
case UNUM_EXPONENT_SYMBOL_FIELD:
|
|
return Some(NumberPartType::ExponentSeparator);
|
|
case UNUM_EXPONENT_SIGN_FIELD:
|
|
return Some(NumberPartType::ExponentMinusSign);
|
|
case UNUM_EXPONENT_FIELD:
|
|
return Some(NumberPartType::ExponentInteger);
|
|
case UNUM_GROUPING_SEPARATOR_FIELD:
|
|
return Some(NumberPartType::Group);
|
|
case UNUM_CURRENCY_FIELD:
|
|
return Some(NumberPartType::Currency);
|
|
case UNUM_PERCENT_FIELD:
|
|
if (formatForUnit) {
|
|
return Some(NumberPartType::Unit);
|
|
}
|
|
return Some(NumberPartType::Percent);
|
|
case UNUM_PERMILL_FIELD:
|
|
MOZ_ASSERT_UNREACHABLE(
|
|
"unexpected permill field found, even though "
|
|
"we don't use any user-defined patterns that "
|
|
"would require a permill field");
|
|
break;
|
|
case UNUM_SIGN_FIELD:
|
|
if (isNegative) {
|
|
return Some(NumberPartType::MinusSign);
|
|
}
|
|
return Some(NumberPartType::PlusSign);
|
|
case UNUM_MEASURE_UNIT_FIELD:
|
|
return Some(NumberPartType::Unit);
|
|
case UNUM_COMPACT_FIELD:
|
|
return Some(NumberPartType::Compact);
|
|
case UNUM_APPROXIMATELY_SIGN_FIELD:
|
|
return Some(NumberPartType::ApproximatelySign);
|
|
#ifndef U_HIDE_DEPRECATED_API
|
|
case UNUM_FIELD_COUNT:
|
|
MOZ_ASSERT_UNREACHABLE(
|
|
"format field sentinel value returned by iterator!");
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
MOZ_ASSERT_UNREACHABLE(
|
|
"unenumerated, undocumented format field returned by iterator");
|
|
return Nothing();
|
|
}
|
|
|
|
} // namespace mozilla::intl
|