Bug 1490601 part 2 - Move C++ entry points to encoding_c_mem to mfbt/. r=jwalden

Differential Revision: https://phabricator.services.mozilla.com/D43957
This commit is contained in:
Henri Sivonen
2019-09-18 08:26:34 +00:00
parent 838f9adfa7
commit c18206b0e8
54 changed files with 621 additions and 355 deletions

View File

@@ -23,6 +23,7 @@
#include "mozilla/dom/Document.h"
#include "mozilla/Logging.h"
#include "mozilla/StaticPtr.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Unused.h"
#include "nsGlobalWindowOuter.h"
@@ -433,7 +434,7 @@ NS_IMETHODIMP
ThirdPartyUtil::GetBaseDomainFromSchemeHost(const nsACString& aScheme,
const nsACString& aAsciiHost,
nsACString& aBaseDomain) {
MOZ_DIAGNOSTIC_ASSERT(IsASCII(aAsciiHost));
MOZ_DIAGNOSTIC_ASSERT(IsAscii(aAsciiHost));
// Get the base domain. this will fail if the host contains a leading dot,
// more than one trailing dot, or is otherwise malformed.

View File

@@ -8038,7 +8038,7 @@ class BulkAppender {
void Append(Span<const char> aStr) {
size_t len = aStr.Length();
MOZ_ASSERT(mPosition + len <= mHandle.Length());
ConvertLatin1toUTF16(aStr, mHandle.AsSpan().From(mPosition));
ConvertLatin1toUtf16(aStr, mHandle.AsSpan().From(mPosition));
mPosition += len;
}

View File

@@ -314,7 +314,7 @@ bool nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength,
}
// Copy data
LossyConvertUTF16toLatin1(MakeSpan(aBuffer, aLength),
LossyConvertUtf16toLatin1(MakeSpan(aBuffer, aLength),
MakeSpan(buff, aLength));
m1b = buff;
mState.mIs2b = false;
@@ -344,7 +344,7 @@ void nsTextFragment::CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount) {
memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
} else {
const char* cp = m1b + aOffset;
ConvertLatin1toUTF16(MakeSpan(cp, aCount), MakeSpan(aDest, aCount));
ConvertLatin1toUtf16(MakeSpan(cp, aCount), MakeSpan(aDest, aCount));
}
}
}
@@ -429,7 +429,7 @@ bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
// Copy data into buff
char16_t* data = static_cast<char16_t*>(buff->Data());
ConvertLatin1toUTF16(MakeSpan(m1b, mState.mLength),
ConvertLatin1toUtf16(MakeSpan(m1b, mState.mLength),
MakeSpan(data, mState.mLength));
memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
@@ -471,7 +471,7 @@ bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
}
// Copy aBuffer into buff.
LossyConvertUTF16toLatin1(MakeSpan(aBuffer, aLength),
LossyConvertUtf16toLatin1(MakeSpan(aBuffer, aLength),
MakeSpan(buff + mState.mLength, aLength));
m1b = buff;

View File

@@ -2654,7 +2654,7 @@ bool NonVoidByteStringToJsval(JSContext* cx, const nsACString& str,
void NormalizeUSVString(nsAString& aString) { EnsureUTF16Validity(aString); }
void NormalizeUSVString(binding_detail::FakeString& aString) {
EnsureUTF16ValiditySpan(aString);
EnsureUtf16ValiditySpan(aString);
}
bool ConvertJSValueToByteString(JSContext* cx, JS::Handle<JS::Value> v,

View File

@@ -12,6 +12,7 @@
#include "jsapi.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Utf8.h"
#include "mozilla/dom/CryptoBuffer.h"
#include "mozilla/dom/CryptoKey.h"
#include "mozilla/dom/KeyAlgorithmProxy.h"
@@ -1365,7 +1366,7 @@ class ImportKeyTask : public WebCryptoTask {
nsDependentCSubstring utf8(
(const char*)mKeyData.Elements(),
(const char*)(mKeyData.Elements() + mKeyData.Length()));
if (!IsUTF8(utf8)) {
if (!IsUtf8(utf8)) {
mEarlyRv = NS_ERROR_DOM_DATA_ERR;
return;
}

View File

@@ -34,7 +34,7 @@ void TextEncoder::Encode(JSContext* aCx, JS::Handle<JSObject*> aObj,
return;
}
size_t utf8Len = ConvertUTF16toUTF8(
size_t utf8Len = ConvertUtf16toUtf8(
aString, MakeSpan(reinterpret_cast<char*>(data.get()), bufLen.value()));
MOZ_ASSERT(utf8Len <= bufLen.value());
@@ -53,7 +53,7 @@ void TextEncoder::EncodeInto(const nsAString& aSrc, const Uint8Array& aDst,
aDst.ComputeLengthAndData();
size_t read;
size_t written;
Tie(read, written) = ConvertUTF16toUTF8Partial(
Tie(read, written) = ConvertUtf16toUtf8Partial(
aSrc, MakeSpan(reinterpret_cast<char*>(aDst.Data()), aDst.Length()));
aResult.mRead.Construct() = read;
aResult.mWritten.Construct() = written;

View File

@@ -246,7 +246,7 @@ namespace {
void PopulateBufferForBinaryString(char16_t* aDest, const char* aSource,
uint32_t aCount) {
// Zero-extend each char to char16_t.
ConvertLatin1toUTF16(MakeSpan(aSource, aCount), MakeSpan(aDest, aCount));
ConvertLatin1toUtf16(MakeSpan(aSource, aCount), MakeSpan(aDest, aCount));
}
nsresult ReadFuncBinaryString(nsIInputStream* aInputStream, void* aClosure,

View File

@@ -18,6 +18,7 @@
#include "mozilla/dom/WheelEventBinding.h"
#include "mozilla/PresShell.h"
#include "mozilla/StaticPrefs_dom.h"
#include "mozilla/TextUtils.h"
#include "nsAttrValueInlines.h"
#include "nsCRTGlue.h"
#include "nsQueryObject.h"
@@ -1540,7 +1541,7 @@ int32_t HTMLInputElement::MonthsSinceJan1970(uint32_t aYear,
/* static */
Decimal HTMLInputElement::StringToDecimal(const nsAString& aValue) {
if (!IsASCII(aValue)) {
if (!IsAscii(aValue)) {
return Decimal::nan();
}
NS_LossyConvertUTF16toASCII asciiString(aValue);

View File

@@ -52,8 +52,10 @@
#include "mozilla/LoadInfo.h"
#include "mozilla/Maybe.h"
#include "mozilla/TextUtils.h"
#include "mozilla/ipc/URIUtils.h"
using mozilla::IsAscii;
using mozilla::dom::AutoEntryScript;
static NS_DEFINE_CID(kJSURICID, NS_JSURI_CID);
@@ -1078,7 +1080,7 @@ nsresult nsJSProtocolHandler::Create(nsISupports* aOuter, REFNSIID aIID,
uStr);
NS_ENSURE_SUCCESS(rv, rv);
if (!IsASCII(uStr)) {
if (!IsAscii(uStr)) {
rv = NS_EscapeURL(NS_ConvertUTF16toUTF8(uStr),
esc_AlwaysCopy | esc_OnlyNonASCII, aUTF8Spec,
mozilla::fallible);

View File

@@ -7,6 +7,8 @@
#include <string.h>
#include "mozilla/EndianUtils.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include <stdint.h>
#include <algorithm>
#include <opus/opus.h>
@@ -109,7 +111,7 @@ bool OggCodecState::AddVorbisComment(UniquePtr<MetadataTags>& aTags,
}
uint32_t valueLength = aLength - (div - aComment);
nsCString value = nsCString(div + 1, valueLength);
if (!IsUTF8(value)) {
if (!IsUtf8(value)) {
LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value"));
return false;
}
@@ -1598,7 +1600,7 @@ bool SkeletonState::DecodeFisbone(ogg_packet* aPacket) {
return false;
}
if ((i == 0 && IsASCII(strMsg)) || (i != 0 && IsUTF8(strMsg))) {
if ((i == 0 && IsAscii(strMsg)) || (i != 0 && IsUtf8(strMsg))) {
EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType;
field->mValuesStore.LookupForAdd(eHeaderType)
.OrInsert([i, msgHead, msgProbe]() {

View File

@@ -11,6 +11,7 @@
#include "mozilla/Assertions.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/Utf8.h"
#include "BufferReader.h"
#include "VideoUtils.h"
#include "TimeUnits.h"
@@ -229,7 +230,7 @@ bool WAVTrackDemuxer::ListChunkParserInit(uint32_t aChunkSize) {
bytesRead += length;
if (!IsUTF8(val)) {
if (!IsUtf8(val)) {
mHeaderParser.Reset();
continue;
}

View File

@@ -58,6 +58,7 @@
#include "mozilla/LoadInfo.h"
#include "mozilla/plugins/PluginBridge.h"
#include "mozilla/plugins/PluginTypes.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Preferences.h"
#include "mozilla/ipc/URIUtils.h"
@@ -1375,7 +1376,7 @@ nsresult nsPluginHost::GetPlugin(const nsACString& aMimeType,
// Normalize 'host' to ACE.
nsresult nsPluginHost::NormalizeHostname(nsCString& host) {
if (IsASCII(host)) {
if (IsAscii(host)) {
ToLowerCase(host);
return NS_OK;
}

View File

@@ -22,9 +22,11 @@
#include "nsJSPrincipals.h"
#include "nsIScriptError.h"
#include "js/Wrapper.h"
#include "mozilla/Utf8.h"
extern mozilla::LazyLogModule MCD;
using mozilla::AutoSafeJSContext;
using mozilla::IsUtf8;
using mozilla::NullPrincipal;
using mozilla::dom::AutoJSAPI;
@@ -138,7 +140,7 @@ nsresult EvaluateAdminConfigScript(JS::HandleObject sandbox,
JS::RootedValue v(cx);
nsString convertedScript;
bool isUTF8 = IsUTF8(script);
bool isUTF8 = IsUtf8(script);
if (isUTF8) {
convertedScript = NS_ConvertUTF8toUTF16(script);
} else {

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include "mozilla/Logging.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Sprintf.h"
#include "gfxGDIFontList.h"
@@ -638,7 +639,7 @@ int CALLBACK gfxGDIFontList::EnumFontFamExProc(ENUMLOGFONTEXW* lpelfe,
// GDI, then if a family name is non-ASCII immediately read in other
// family names. This assures that MS Gothic, MS Mincho are all found
// before lookups begin.
if (!IsASCII(faceName)) {
if (!IsAscii(faceName)) {
family->ReadOtherFamilyNames(gfxPlatformFontList::PlatformFontList());
}

View File

@@ -36,6 +36,7 @@
#include "mozilla/gfx/2D.h"
#include "mozilla/ipc/FileDescriptorUtils.h"
#include "mozilla/ResultExtensions.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Unused.h"
#include "base/eintr_wrapper.h"
@@ -944,7 +945,7 @@ bool gfxPlatformFontList::FindAndAddFamilies(
// since reading name table entries is expensive.
// Although ASCII localized family names are possible they don't occur
// in practice, so avoid pulling in names at startup.
if (!mOtherFamilyNamesInitialized && !IsASCII(aFamily)) {
if (!mOtherFamilyNamesInitialized && !IsAscii(aFamily)) {
InitOtherFamilyNames(
!(aFlags & FindFamiliesFlags::eForceOtherFamilyNamesLoading));
family = SharedFontList()->FindFamily(key);
@@ -981,7 +982,7 @@ bool gfxPlatformFontList::FindAndAddFamilies(
// since reading name table entries is expensive.
// although ASCII localized family names are possible they don't occur
// in practice so avoid pulling in names at startup
if (!familyEntry && !mOtherFamilyNamesInitialized && !IsASCII(aFamily)) {
if (!familyEntry && !mOtherFamilyNamesInitialized && !IsAscii(aFamily)) {
InitOtherFamilyNames(
!(aFlags & FindFamiliesFlags::eForceOtherFamilyNamesLoading));
familyEntry = mOtherFamilyNames.GetWeak(key);

View File

@@ -3,6 +3,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/TextUtils.h"
#include "mozilla/intl/MozLocale.h"
#include "nsReadableUtils.h"
@@ -11,13 +12,14 @@
#include "unicode/uloc.h"
using namespace mozilla::intl;
using mozilla::IsAscii;
/**
* Note: The file name is `MozLocale` to avoid compilation problems on
* case-insensitive Windows. The class name is `Locale`.
*/
Locale::Locale(const nsACString& aLocale) {
if (aLocale.IsEmpty() || !IsASCII(aLocale)) {
if (aLocale.IsEmpty() || !IsAscii(aLocale)) {
mIsWellFormed = false;
return;
}

View File

@@ -32,7 +32,6 @@ EXPORTS.mozilla += [
EXPORTS += [
'../third_party/rust/encoding_c/include/encoding_rs.h',
'../third_party/rust/encoding_c/include/encoding_rs_statics.h',
'../third_party/rust/encoding_c_mem/include/encoding_rs_mem.h',
'../third_party/rust/shift_or_euc_c/include/shift_or_euc.h',
]

View File

@@ -10,6 +10,8 @@
#include "mozilla/ArrayUtils.h"
#include "mozilla/Encoding.h"
#include "mozilla/Preferences.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include "nsISupportsPrimitives.h"
using namespace mozilla;
@@ -79,11 +81,11 @@ nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
bool isStatefulCharset = statefulCharset(aCharset.get());
if (!isStatefulCharset) {
if (IsASCII(aURI)) {
if (IsAscii(aURI)) {
CopyASCIItoUTF16(aURI, aOut);
return NS_OK;
}
if (IsUTF8(aURI)) {
if (IsUtf8(aURI)) {
CopyUTF8toUTF16(aURI, aOut);
return NS_OK;
}
@@ -148,7 +150,7 @@ nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
// leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
// superset since converting "http:" with such an encoding is always a bad
// idea.
if (!IsUTF8(unescapedSpec) &&
if (!IsUtf8(unescapedSpec) &&
(aCharset.LowerCaseEqualsLiteral("utf-16") ||
aCharset.LowerCaseEqualsLiteral("utf-16be") ||
aCharset.LowerCaseEqualsLiteral("utf-16le") ||

View File

@@ -1763,11 +1763,13 @@ bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
"should only see UTF-8 here");
bool simple = utf16WindowLength == encodedWindowLength;
#ifdef DEBUG
auto isAscii = [](Unit u) { return IsAscii(u); };
MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
IsAscii<Unit>) == simple,
isAscii) == simple,
"equal window lengths in UTF-8 should correspond only to "
"wholly-ASCII text");
#endif
if (simple) {
err->tokenOffset = encodedTokenOffset;
err->lineLength = encodedWindowLength;

View File

@@ -1418,7 +1418,7 @@ class TokenStreamCharsShared {
*/
static constexpr MOZ_ALWAYS_INLINE MOZ_MUST_USE bool isAsciiCodePoint(
int32_t unit) {
return mozilla::IsAscii(unit);
return mozilla::IsAscii(static_cast<char32_t>(unit));
}
JSAtom* drainCharBufferIntoAtom(JSContext* cx) {

View File

@@ -33,9 +33,11 @@ using namespace js;
using namespace js::jit;
using namespace js::wasm;
using mozilla::AsChars;
using mozilla::CheckedInt;
using mozilla::CheckedInt32;
using mozilla::IsValidUtf8;
using mozilla::IsUtf8;
using mozilla::MakeSpan;
using mozilla::Unused;
// Decoder implementation.
@@ -1544,7 +1546,7 @@ static UniqueChars DecodeName(Decoder& d) {
return nullptr;
}
if (!IsValidUtf8(bytes, numBytes)) {
if (!IsUtf8(AsChars(MakeSpan(bytes, numBytes)))) {
return nullptr;
}

View File

@@ -261,7 +261,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
// almost always ASCII, so the inexact allocations below
// should be fine.
if (IsUTF8Latin1(*utf8String)) {
if (IsUtf8Latin1(*utf8String)) {
using UniqueLatin1Chars =
js::UniquePtr<JS::Latin1Char[], JS::FreePolicy>;
@@ -271,7 +271,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
return false;
}
size_t written = LossyConvertUTF8toLatin1(
size_t written = LossyConvertUtf8toLatin1(
*utf8String, MakeSpan(reinterpret_cast<char*>(buffer.get()), len));
buffer[written] = 0;
@@ -310,7 +310,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
// code units in the source. That's why it's OK to claim the
// output buffer has len + 1 space but then still expect to
// have space for the zero terminator.
size_t written = ConvertUTF8toUTF16(
size_t written = ConvertUtf8toUtf16(
*utf8String, MakeSpan(buffer.get(), allocLen.value()));
MOZ_RELEASE_ASSERT(written <= len);
buffer[written] = 0;

View File

@@ -10,6 +10,7 @@
#include "ExampleStylesheet.h"
#include "ServoBindings.h"
#include "mozilla/Encoding.h"
#include "mozilla/Utf8.h"
#include "mozilla/NullPrincipalURI.h"
#include "mozilla/css/SheetParsingMode.h"
#include "ReferrerInfo.h"
@@ -54,7 +55,7 @@ static void ServoSetPropertyByIdBench(const nsACString& css) {
RefPtr<URLExtraData> data =
new URLExtraData(NullPrincipalURI::Create(), referrerInfo.forget(),
NullPrincipal::CreateWithoutOriginAttributes());
ASSERT_TRUE(IsUTF8(css));
ASSERT_TRUE(IsUtf8(css));
for (int i = 0; i < SETPROPERTY_REPETITIONS; i++) {
Servo_DeclarationBlock_SetPropertyById(

21
mfbt/JsRust.h Normal file
View File

@@ -0,0 +1,21 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* Checking for jsrust crate availability for linking.
* For testing, define MOZ_PRETEND_NO_JSRUST to pretend
* that we don't have jsrust.
*/
#ifndef mozilla_JsRust_h
#define mozilla_JsRust_h
#if (defined(MOZ_HAS_MOZGLUE) || defined(MOZILLA_INTERNAL_API)) && \
!defined(MOZ_PRETEND_NO_JSRUST)
# define MOZ_HAS_JSRUST() 1
#else
# define MOZ_HAS_JSRUST() 0
#endif
#endif // mozilla_JsRust_h

180
mfbt/Latin1.h Normal file
View File

@@ -0,0 +1,180 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* Latin-1 operations (i.e. a byte is the corresponding code point).
* (Note: this is *not* the same as the encoding of windows-1252 or
* latin1 content on the web. In Web terms, this encoding
* corresponds to "isomorphic decode" / "isomorphic encoding" from
* the Infra Standard.)
*/
#ifndef mozilla_Latin1_h
#define mozilla_Latin1_h
#include "mozilla/JsRust.h"
#include "mozilla/Span.h"
#include "mozilla/Tuple.h"
#include "mozilla/TypeTraits.h"
#if MOZ_HAS_JSRUST()
# include "encoding_rs_mem.h"
#endif
namespace mozilla {
namespace detail {
template <typename Char>
class MakeUnsignedChar : public MakeUnsigned<Char> {};
template <>
class MakeUnsignedChar<char16_t> {
public:
using Type = char16_t;
};
template <>
class MakeUnsignedChar<char32_t> {
public:
using Type = char32_t;
};
} // namespace detail
/**
* Returns true iff |aChar| is Latin-1 but not ASCII, i.e. in the range
* [0x80, 0xFF].
*/
template <typename Char>
constexpr bool IsNonAsciiLatin1(Char aChar) {
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
auto uc = static_cast<UnsignedChar>(aChar);
return uc >= 0x80 && uc <= 0xFF;
}
#if MOZ_HAS_JSRUST()
/**
* Returns |true| iff |aString| contains only Latin1 characters, that is,
* characters in the range [U+0000, U+00FF].
*
* @param aString a potentially-invalid UTF-16 string to scan
*/
inline bool IsUtf16Latin1(mozilla::Span<const char16_t> aString) {
return encoding_mem_is_utf16_latin1(aString.Elements(), aString.Length());
}
/**
* Returns |true| iff |aString| is valid UTF-8 containing only Latin-1
* characters.
*
* If you know that the argument is always absolutely guaranteed to be valid
* UTF-8, use the faster UnsafeIsValidUtf8Latin1() instead.
*
* @param aString potentially-invalid UTF-8 string to scan
*/
inline bool IsUtf8Latin1(mozilla::Span<const char> aString) {
return encoding_mem_is_utf8_latin1(aString.Elements(), aString.Length());
}
/**
* Returns |true| iff |aString|, which MUST be valid UTF-8, contains only
* Latin1 characters, that is, characters in the range [U+0000, U+00FF].
* (If |aString| might not be valid UTF-8, use |IsUtf8Latin1| instead.)
*
* @param aString known-valid UTF-8 string to scan
*/
inline bool UnsafeIsValidUtf8Latin1(mozilla::Span<const char> aString) {
return encoding_mem_is_str_latin1(aString.Elements(), aString.Length());
}
/**
* If all the code points in the input are below U+0100, converts to Latin1,
* i.e. unsigned byte value is Unicode scalar value. If there are code points
* above U+00FF, produces unspecified garbage in a memory-safe way. The
* nature of the garbage must not be relied upon.
*
* The length of aDest must not be less than the length of aSource.
*/
inline void LossyConvertUtf16toLatin1(mozilla::Span<const char16_t> aSource,
mozilla::Span<char> aDest) {
encoding_mem_convert_utf16_to_latin1_lossy(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* If all the code points in the input are below U+0100, converts to Latin1,
* i.e. unsigned byte value is Unicode scalar value. If there are code points
* above U+00FF, produces unspecified garbage in a memory-safe way. The
* nature of the garbage must not be relied upon.
*
* Returns the number of code units written.
*
* The length of aDest must not be less than the length of aSource.
*/
inline size_t LossyConvertUtf8toLatin1(mozilla::Span<const char> aSource,
mozilla::Span<char> aDest) {
return encoding_mem_convert_utf8_to_latin1_lossy(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Converts each byte of |aSource|, interpreted as a Unicode scalar value
* having that unsigned value, to its UTF-8 representation in |aDest|.
*
* Returns the number of code units written.
*
* The length of aDest must be at least twice the length of aSource.
*/
inline size_t ConvertLatin1toUtf8(mozilla::Span<const char> aSource,
mozilla::Span<char> aDest) {
return encoding_mem_convert_latin1_to_utf8(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Converts bytes whose unsigned value is interpreted as Unicode code point
* (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
* output space.
*
* Returns the number of bytes read and the number of bytes written.
*
* If the output isn't large enough, not all input is consumed.
*
* The conversion is guaranteed to be complete if the length of aDest is
* at least the length of aSource times two.
*
* The output is always valid UTF-8 ending on scalar value boundary
* even in the case of partial conversion.
*
* The semantics of this function match the semantics of
* TextEncoder.encodeInto.
* https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto
*/
inline mozilla::Tuple<size_t, size_t> ConvertLatin1toUtf8Partial(
mozilla::Span<const char> aSource, mozilla::Span<char> aDest) {
size_t srcLen = aSource.Length();
size_t dstLen = aDest.Length();
encoding_mem_convert_latin1_to_utf8_partial(aSource.Elements(), &srcLen,
aDest.Elements(), &dstLen);
return mozilla::MakeTuple(srcLen, dstLen);
}
/**
* Converts Latin-1 code points (i.e. each byte is the identical code
* point) from |aSource| to UTF-16 code points in |aDest|.
*
* The length of aDest must not be less than the length of aSource.
*/
inline void ConvertLatin1toUtf16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(),
aDest.Elements(), aDest.Length());
}
#endif
}; // namespace mozilla
#endif // mozilla_Latin1_h

View File

@@ -10,35 +10,86 @@
#define mozilla_TextUtils_h
#include "mozilla/Assertions.h"
#include "mozilla/Latin1.h"
#include "mozilla/TypeTraits.h"
namespace mozilla {
namespace detail {
// See Utf8.h for IsUtf8() and conversions between UTF-8 and UTF-16.
// See Latin1.h for testing UTF-16 and UTF-8 for Latin1ness and
// for conversions to and from Latin1.
template <typename Char>
class MakeUnsignedChar : public MakeUnsigned<Char> {};
template <>
class MakeUnsignedChar<char16_t> {
public:
using Type = char16_t;
};
template <>
class MakeUnsignedChar<char32_t> {
public:
using Type = char32_t;
};
} // namespace detail
// The overloads below are not templated in order to make
// implicit conversions to span work as expected for the Span
// overloads.
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
template <typename Char>
constexpr bool IsAscii(Char aChar) {
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
auto uc = static_cast<UnsignedChar>(aChar);
return uc < 0x80;
constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; }
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
constexpr bool IsAscii(signed char aChar) {
return IsAscii(static_cast<unsigned char>(aChar));
}
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
constexpr bool IsAscii(char aChar) {
return IsAscii(static_cast<unsigned char>(aChar));
}
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; }
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; }
/**
* Returns |true| iff |aString| contains only ASCII characters, that is,
* characters in the range [0x00, 0x80).
*
* @param aString a 8-bit wide string to scan
*/
inline bool IsAscii(mozilla::Span<const char> aString) {
#if MOZ_HAS_JSRUST()
size_t length = aString.Length();
const char* ptr = aString.Elements();
// For short strings, avoid the function call, since, the SIMD
// code won't have a chance to kick in anyway.
if (length < 16) {
const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
uint8_t accu = 0;
for (size_t i = 0; i < length; i++) {
accu |= uptr[i];
}
return accu < 0x80;
}
return encoding_mem_is_ascii(ptr, length);
#else
for (char c : aString) {
if (!IsAscii(c)) {
return false;
}
}
return true;
#endif
}
/**
* Returns |true| iff |aString| contains only ASCII characters, that is,
* characters in the range [0x00, 0x80).
*
* @param aString a 16-bit wide string to scan
*/
inline bool IsAscii(mozilla::Span<const char16_t> aString) {
#if MOZ_HAS_JSRUST()
return encoding_mem_is_basic_latin(aString.Elements(), aString.Length());
#else
for (char16_t c : aString) {
if (!IsAscii(c)) {
return false;
}
}
return true;
#endif
}
/**
@@ -55,17 +106,40 @@ constexpr bool IsAsciiNullTerminated(const Char* aChar) {
return true;
}
#if MOZ_HAS_JSRUST()
/**
* Returns true iff |aChar| is Latin-1 but not ASCII, i.e. in the range
* [0x80, 0xFF].
* Returns the index of the first unpaired surrogate or
* the length of the string if there are none.
*/
template <typename Char>
constexpr bool IsNonAsciiLatin1(Char aChar) {
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
auto uc = static_cast<UnsignedChar>(aChar);
return uc >= 0x80 && uc <= 0xFF;
inline size_t Utf16ValidUpTo(mozilla::Span<const char16_t> aString) {
return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
}
/**
* Replaces unpaired surrogates with U+FFFD in the argument.
*
* Note: If you have an nsAString, use EnsureUTF16Validity() from
* nsReadableUtils.h instead to avoid unsharing a valid shared
* string.
*/
inline void EnsureUtf16ValiditySpan(mozilla::Span<char16_t> aString) {
encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
}
/**
* Convert ASCII to UTF-16. In debug builds, assert that the input is
* ASCII.
*
* The length of aDest must not be less than the length of aSource.
*/
inline void ConvertAsciitoUtf16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
MOZ_ASSERT(IsAscii(aSource));
ConvertLatin1toUtf16(aSource, aDest);
}
#endif // MOZ_HAS_JSRUST
/**
* Returns true iff |aChar| matches Ascii Whitespace.
*

View File

@@ -12,8 +12,9 @@
#include <stddef.h>
#include <stdint.h>
MFBT_API bool mozilla::IsValidUtf8(const void* aCodeUnits, size_t aCount) {
const auto* s = static_cast<const unsigned char*>(aCodeUnits);
MFBT_API bool mozilla::detail::IsValidUtf8(const void* aCodeUnits,
size_t aCount) {
const auto* s = reinterpret_cast<const unsigned char*>(aCodeUnits);
const auto* const limit = s + aCount;
while (s < limit) {

View File

@@ -15,13 +15,30 @@
#include "mozilla/Casting.h" // for mozilla::AssertedCast
#include "mozilla/Likely.h" // for MOZ_UNLIKELY
#include "mozilla/Maybe.h" // for mozilla::Maybe
#include "mozilla/TextUtils.h" // for mozilla::IsAscii
#include "mozilla/Span.h" // for mozilla::Span
#include "mozilla/TextUtils.h" // for mozilla::IsAscii and via Latin1.h for
// encoding_rs_mem.h and MOZ_HAS_JSRUST.
#include "mozilla/Tuple.h" // for mozilla::Tuple
#include "mozilla/Types.h" // for MFBT_API
#include <limits.h> // for CHAR_BIT
#include <stddef.h> // for size_t
#include <stdint.h> // for uint8_t
#if MOZ_HAS_JSRUST()
// Can't include mozilla/Encoding.h here.
extern "C" {
// Declared as uint8_t instead of char to match declaration in another header.
size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
}
#else
namespace mozilla {
namespace detail {
extern MFBT_API bool IsValidUtf8(const void* aCodeUnits, size_t aCount);
}; // namespace detail
}; // namespace mozilla
#endif // MOZ_HAS_JSRUST
namespace mozilla {
union Utf8Unit;
@@ -224,20 +241,127 @@ inline const unsigned char* Utf8AsUnsignedChars(const Utf8Unit* aUnits) {
}
/** Returns true iff |aUnit| is an ASCII value. */
template <>
inline bool IsAscii<Utf8Unit>(Utf8Unit aUnit) {
return IsAscii(aUnit.toUint8());
constexpr bool IsAscii(Utf8Unit aUnit) {
return IsAscii(aUnit.toUnsignedChar());
}
/**
* Returns true if the given length-delimited memory consists of a valid UTF-8
* string, false otherwise.
* Return true if the given span of memory consists of a valid UTF-8
* string and false otherwise.
*
* A valid UTF-8 string contains no overlong-encoded code points (as one would
* expect) and contains no code unit sequence encoding a UTF-16 surrogate. The
* string *may* contain U+0000 NULL code points.
* The string *may* contain U+0000 NULL code points.
*/
extern MFBT_API bool IsValidUtf8(const void* aCodeUnits, size_t aCount);
inline bool IsUtf8(mozilla::Span<const char> aString) {
#if MOZ_HAS_JSRUST()
size_t length = aString.Length();
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
// For short strings, the function call is a pessimization, and the SIMD
// code won't have a chance to kick in anyway.
if (length < 16) {
for (size_t i = 0; i < length; i++) {
if (ptr[i] >= 0x80U) {
ptr += i;
length -= i;
goto end;
}
}
return true;
}
end:
return length == encoding_utf8_valid_up_to(ptr, length);
#else
return detail::IsValidUtf8(aString.Elements(), aString.Length());
#endif
}
#if MOZ_HAS_JSRUST()
// See Latin1.h for conversions between Latin1 and UTF-8.
/**
* Returns the index of the start of the first malformed byte
* sequence or the length of the string if there are none.
*/
inline size_t Utf8ValidUpTo(mozilla::Span<const char> aString) {
return encoding_utf8_valid_up_to(
reinterpret_cast<const uint8_t*>(aString.Elements()), aString.Length());
}
/**
* Converts potentially-invalid UTF-16 to UTF-8 replacing lone surrogates
* with the REPLACEMENT CHARACTER.
*
* The length of aDest must be at least the length of aSource times three.
*
* Returns the number of code units written.
*/
inline size_t ConvertUtf16toUtf8(mozilla::Span<const char16_t> aSource,
mozilla::Span<char> aDest) {
return encoding_mem_convert_utf16_to_utf8(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Converts potentially-invalid UTF-8 to UTF-16 replacing malformed byte
* sequences with the REPLACEMENT CHARACTER with potentially insufficient
* output space.
*
* Returns the number of code units read and the number of bytes written.
*
* If the output isn't large enough, not all input is consumed.
*
* The conversion is guaranteed to be complete if the length of aDest is
* at least the length of aSource times three.
*
* The output is always valid UTF-8 ending on scalar value boundary
* even in the case of partial conversion.
*
* The semantics of this function match the semantics of
* TextEncoder.encodeInto.
* https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto
*/
inline mozilla::Tuple<size_t, size_t> ConvertUtf16toUtf8Partial(
mozilla::Span<const char16_t> aSource, mozilla::Span<char> aDest) {
size_t srcLen = aSource.Length();
size_t dstLen = aDest.Length();
encoding_mem_convert_utf16_to_utf8_partial(aSource.Elements(), &srcLen,
aDest.Elements(), &dstLen);
return mozilla::MakeTuple(srcLen, dstLen);
}
/**
* Converts potentially-invalid UTF-8 to UTF-16 replacing malformed byte
* sequences with the REPLACEMENT CHARACTER.
*
* Returns the number of code units written.
*
* The length of aDest must be at least one greater than the length of aSource
* even though the last slot isn't written to.
*
* If you know that the input is valid for sure, use
* UnsafeConvertValidUtf8toUtf16() instead.
*/
inline size_t ConvertUtf8toUtf16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
return encoding_mem_convert_utf8_to_utf16(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Converts known-valid UTF-8 to UTF-16. If the input might be invalid,
* use ConvertUtf8toUtf16() instead.
*
* Returns the number of code units written.
*
* The length of aDest must be at least the length of aSource.
*/
inline size_t UnsafeConvertValidUtf8toUtf16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
return encoding_mem_convert_utf8_to_utf16(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
#endif // MOZ_HAS_JSRUST
/**
* Returns true iff |aUnit| is a UTF-8 trailing code unit matching the pattern

View File

@@ -9,6 +9,10 @@ with Files("**"):
Library('mfbt')
EXPORTS += [
'../third_party/rust/encoding_c_mem/include/encoding_rs_mem.h',
]
EXPORTS.mozilla = [
'Algorithm.h',
'Alignment.h',
@@ -51,6 +55,8 @@ EXPORTS.mozilla = [
'IntegerRange.h',
'IntegerTypeTraits.h',
'JSONWriter.h',
'JsRust.h',
'Latin1.h',
'Likely.h',
'LinkedList.h',
'MacroArgs.h',

View File

@@ -4,6 +4,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#define MOZ_PRETEND_NO_JSRUST 1
#include "mozilla/Utf8.h"
#include "mozilla/ArrayUtils.h"
@@ -13,11 +15,13 @@
#include "mozilla/TextUtils.h"
using mozilla::ArrayLength;
using mozilla::AsChars;
using mozilla::DecodeOneUtf8CodePoint;
using mozilla::EnumSet;
using mozilla::IntegerRange;
using mozilla::IsAscii;
using mozilla::IsValidUtf8;
using mozilla::IsUtf8;
using mozilla::MakeSpan;
using mozilla::Utf8Unit;
// Disable the C++ 2a warning. See bug #1509926
@@ -243,17 +247,18 @@ static void ExpectBadCodePoint(const Char (&aCharN)[N],
aExpectedUnitsObserved);
}
static void TestIsValidUtf8() {
static void TestIsUtf8() {
// Note we include the U+0000 NULL in this one -- and that's fine.
static const char asciiBytes[] = u8"How about a nice game of chess?";
MOZ_RELEASE_ASSERT(IsValidUtf8(asciiBytes, ArrayLength(asciiBytes)));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(asciiBytes, ArrayLength(asciiBytes))));
static const char endNonAsciiBytes[] = u8"Life is like a 🌯";
MOZ_RELEASE_ASSERT(
IsValidUtf8(endNonAsciiBytes, ArrayLength(endNonAsciiBytes) - 1));
IsUtf8(MakeSpan(endNonAsciiBytes, ArrayLength(endNonAsciiBytes) - 1)));
static const unsigned char badLeading[] = {0x80};
MOZ_RELEASE_ASSERT(!IsValidUtf8(badLeading, ArrayLength(badLeading)));
MOZ_RELEASE_ASSERT(
!IsUtf8(AsChars(MakeSpan(badLeading, ArrayLength(badLeading)))));
// Byte-counts
@@ -261,13 +266,13 @@ static void TestIsValidUtf8() {
static const char oneBytes[] = u8"A"; // U+0041 LATIN CAPITAL LETTER A
constexpr size_t oneBytesLen = ArrayLength(oneBytes);
static_assert(oneBytesLen == 2, "U+0041 plus nul");
MOZ_RELEASE_ASSERT(IsValidUtf8(oneBytes, oneBytesLen));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(oneBytes, oneBytesLen)));
// 2
static const char twoBytes[] = u8"؆"; // U+0606 ARABIC-INDIC CUBE ROOT
constexpr size_t twoBytesLen = ArrayLength(twoBytes);
static_assert(twoBytesLen == 3, "U+0606 in two bytes plus nul");
MOZ_RELEASE_ASSERT(IsValidUtf8(twoBytes, twoBytesLen));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(twoBytes, twoBytesLen)));
ExpectValidCodePoint(twoBytes, 0x0606);
@@ -275,7 +280,7 @@ static void TestIsValidUtf8() {
static const char threeBytes[] = u8""; // U+1A1E BUGINESE PALLAWA
constexpr size_t threeBytesLen = ArrayLength(threeBytes);
static_assert(threeBytesLen == 4, "U+1A1E in three bytes plus nul");
MOZ_RELEASE_ASSERT(IsValidUtf8(threeBytes, threeBytesLen));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(threeBytes, threeBytesLen)));
ExpectValidCodePoint(threeBytes, 0x1A1E);
@@ -284,7 +289,7 @@ static void TestIsValidUtf8() {
u8"🁡"; // U+1F061 DOMINO TILE HORIZONTAL-06-06
constexpr size_t fourBytesLen = ArrayLength(fourBytes);
static_assert(fourBytesLen == 5, "U+1F061 in four bytes plus nul");
MOZ_RELEASE_ASSERT(IsValidUtf8(fourBytes, fourBytesLen));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(fourBytes, fourBytesLen)));
ExpectValidCodePoint(fourBytes, 0x1F061);
@@ -292,7 +297,7 @@ static void TestIsValidUtf8() {
static const char maxCodePoint[] = u8"􏿿"; // U+10FFFF
constexpr size_t maxCodePointLen = ArrayLength(maxCodePoint);
static_assert(maxCodePointLen == 5, "U+10FFFF in four bytes plus nul");
MOZ_RELEASE_ASSERT(IsValidUtf8(maxCodePoint, maxCodePointLen));
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(maxCodePoint, maxCodePointLen)));
ExpectValidCodePoint(maxCodePoint, 0x10FFFF);
@@ -300,7 +305,8 @@ static void TestIsValidUtf8() {
static const unsigned char onePastMaxCodePoint[] = {0xF4, 0x90, 0x80, 0x80,
0x0};
constexpr size_t onePastMaxCodePointLen = ArrayLength(onePastMaxCodePoint);
MOZ_RELEASE_ASSERT(!IsValidUtf8(onePastMaxCodePoint, onePastMaxCodePointLen));
MOZ_RELEASE_ASSERT(
!IsUtf8(AsChars(MakeSpan(onePastMaxCodePoint, onePastMaxCodePointLen))));
ExpectBadCodePoint(onePastMaxCodePoint, 0x110000, 4);
@@ -313,42 +319,45 @@ static void TestIsValidUtf8() {
constexpr size_t justBeforeSurrogatesLen =
ArrayLength(justBeforeSurrogates) - 1;
MOZ_RELEASE_ASSERT(
IsValidUtf8(justBeforeSurrogates, justBeforeSurrogatesLen));
IsUtf8(AsChars(MakeSpan(justBeforeSurrogates, justBeforeSurrogatesLen))));
ExpectValidCodePoint(justBeforeSurrogates, 0xD7FF);
static const unsigned char leastSurrogate[] = {0xED, 0xA0, 0x80, 0x0};
constexpr size_t leastSurrogateLen = ArrayLength(leastSurrogate) - 1;
MOZ_RELEASE_ASSERT(!IsValidUtf8(leastSurrogate, leastSurrogateLen));
MOZ_RELEASE_ASSERT(
!IsUtf8(AsChars(MakeSpan(leastSurrogate, leastSurrogateLen))));
ExpectBadCodePoint(leastSurrogate, 0xD800, 3);
static const unsigned char arbitraryHighSurrogate[] = {0xED, 0xA2, 0x87, 0x0};
constexpr size_t arbitraryHighSurrogateLen =
ArrayLength(arbitraryHighSurrogate) - 1;
MOZ_RELEASE_ASSERT(
!IsValidUtf8(arbitraryHighSurrogate, arbitraryHighSurrogateLen));
MOZ_RELEASE_ASSERT(!IsUtf8(
AsChars(MakeSpan(arbitraryHighSurrogate, arbitraryHighSurrogateLen))));
ExpectBadCodePoint(arbitraryHighSurrogate, 0xD887, 3);
static const unsigned char arbitraryLowSurrogate[] = {0xED, 0xB7, 0xAF, 0x0};
constexpr size_t arbitraryLowSurrogateLen =
ArrayLength(arbitraryLowSurrogate) - 1;
MOZ_RELEASE_ASSERT(
!IsValidUtf8(arbitraryLowSurrogate, arbitraryLowSurrogateLen));
MOZ_RELEASE_ASSERT(!IsUtf8(
AsChars(MakeSpan(arbitraryLowSurrogate, arbitraryLowSurrogateLen))));
ExpectBadCodePoint(arbitraryLowSurrogate, 0xDDEF, 3);
static const unsigned char greatestSurrogate[] = {0xED, 0xBF, 0xBF, 0x0};
constexpr size_t greatestSurrogateLen = ArrayLength(greatestSurrogate) - 1;
MOZ_RELEASE_ASSERT(!IsValidUtf8(greatestSurrogate, greatestSurrogateLen));
MOZ_RELEASE_ASSERT(
!IsUtf8(AsChars(MakeSpan(greatestSurrogate, greatestSurrogateLen))));
ExpectBadCodePoint(greatestSurrogate, 0xDFFF, 3);
static const unsigned char justAfterSurrogates[] = {0xEE, 0x80, 0x80, 0x0};
constexpr size_t justAfterSurrogatesLen =
ArrayLength(justAfterSurrogates) - 1;
MOZ_RELEASE_ASSERT(IsValidUtf8(justAfterSurrogates, justAfterSurrogatesLen));
MOZ_RELEASE_ASSERT(
IsUtf8(AsChars(MakeSpan(justAfterSurrogates, justAfterSurrogatesLen))));
ExpectValidCodePoint(justAfterSurrogates, 0xE000);
}
@@ -737,7 +746,7 @@ static void TestDecodeOneUtf8CodePoint() {
int main() {
TestUtf8Unit();
TestIsValidUtf8();
TestIsUtf8();
TestDecodeOneUtf8CodePoint();
return 0;
}

View File

@@ -63,7 +63,6 @@ CppUnitTests([
'TestTypedEnum',
'TestTypeTraits',
'TestUniquePtr',
'TestUtf8',
'TestVariant',
'TestVector',
'TestWeakPtr',
@@ -71,6 +70,13 @@ CppUnitTests([
'TestXorShift128PlusRNG',
])
# Not to be unified with the rest, because this test
# sets MOZ_PRETEND_NO_JSRUST, which changes the behavior
# of the included headers.
CppUnitTests([
'TestUtf8',
])
if not CONFIG['MOZ_ASAN']:
CppUnitTests([
'TestPoisonArea',

View File

@@ -720,7 +720,7 @@ nsresult ProxyAutoConfig::SetupJS() {
// and otherwise inflate Latin-1 to UTF-16 and compile that.
const char* scriptData = this->mConcatenatedPACData.get();
size_t scriptLength = this->mConcatenatedPACData.Length();
if (mozilla::IsValidUtf8(scriptData, scriptLength)) {
if (mozilla::IsUtf8(mozilla::MakeSpan(scriptData, scriptLength))) {
JS::SourceText<Utf8Unit> srcBuf;
if (!srcBuf.init(cx, scriptData, scriptLength,
JS::SourceOwnership::Borrowed)) {

View File

@@ -19,6 +19,7 @@
#include "nsEscape.h"
#include "nsError.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/TextUtils.h"
#include "mozilla/ipc/URIUtils.h"
#include "nsIURIMutator.h"
#include "mozilla/net/MozURL.h"
@@ -624,7 +625,7 @@ NS_IMETHODIMP
nsSimpleURI::GetAsciiSpec(nsACString& aResult) {
nsresult rv = GetSpec(aResult);
if (NS_FAILED(rv)) return rv;
MOZ_ASSERT(IsASCII(aResult), "The spec should be ASCII");
MOZ_ASSERT(IsAscii(aResult), "The spec should be ASCII");
return NS_OK;
}

View File

@@ -27,6 +27,7 @@
#include "nsReadableUtils.h"
#include "mozilla/net/MozURL_ffi.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
//
// setenv MOZ_LOG nsStandardURL:5
@@ -120,7 +121,7 @@ int32_t nsStandardURL::nsSegmentEncoder::EncodeSegmentCount(
auto encoder = mEncoding->NewEncoder();
nsAutoCString valid; // has to be declared in this scope
if (MOZ_UNLIKELY(!IsUTF8(span.From(upTo)))) {
if (MOZ_UNLIKELY(!IsUtf8(span.From(upTo)))) {
MOZ_ASSERT_UNREACHABLE("Invalid UTF-8 passed to nsStandardURL.");
// It's UB to pass invalid UTF-8 to
// EncodeFromUTF8WithoutReplacement(), so let's make our input valid

View File

@@ -9,6 +9,9 @@
#include "nsEscape.h"
#include "nsIFile.h"
#include "nsNativeCharsetUtils.h"
#include "mozilla/Utf8.h"
using mozilla::IsUtf8;
nsresult net_GetURLSpecFromActualFile(nsIFile* aFile, nsACString& result) {
nsresult rv;
@@ -80,7 +83,7 @@ nsresult net_GetFileFromURLSpec(const nsACString& aURL, nsIFile** result) {
NS_UnescapeURL(path);
if (path.Length() != strlen(path.get())) return NS_ERROR_FILE_INVALID_PATH;
if (IsUTF8(path)) {
if (IsUtf8(path)) {
// speed up the start-up where UTF-8 is the native charset
// (e.g. on recent Linux distributions)
if (NS_IsNativeUTF8())

View File

@@ -9,6 +9,7 @@
#include "nsEscape.h"
#include "nsIFile.h"
#include <windows.h>
#include "mozilla/Utf8.h"
nsresult net_GetURLSpecFromActualFile(nsIFile* aFile, nsACString& result) {
nsresult rv;
@@ -92,7 +93,8 @@ nsresult net_GetFileFromURLSpec(const nsACString& aURL, nsIFile** result) {
// remove leading '\'
if (path.CharAt(0) == '\\') path.Cut(0, 1);
if (IsUTF8(path)) rv = localFile->InitWithPath(NS_ConvertUTF8toUTF16(path));
if (mozilla::IsUtf8(path))
rv = localFile->InitWithPath(NS_ConvertUTF8toUTF16(path));
// XXX In rare cases, a valid UTF-8 string can be valid as a native
// encoding (e.g. 0xC5 0x83 is valid both as UTF-8 and Windows-125x).
// However, the chance is very low that a meaningful word in a legacy

View File

@@ -64,6 +64,7 @@
#include "mozilla/StaticPrefs_network.h"
#include "mozilla/StaticPrefs_privacy.h"
#include "mozilla/Telemetry.h"
#include "mozilla/TextUtils.h"
#include "nsIConsoleService.h"
#include "nsTPriorityQueue.h"
#include "nsVariant.h"
@@ -3948,7 +3949,7 @@ nsresult nsCookieService::GetBaseDomainFromHost(
// components are lower-cased, and UTF-8 components are normalized per
// RFC 3454 and converted to ACE.
nsresult nsCookieService::NormalizeHost(nsCString& aHost) {
if (!IsASCII(aHost)) {
if (!IsAscii(aHost)) {
nsAutoCString host;
nsresult rv = mIDNService->ConvertUTF8toACE(aHost, host);
if (NS_FAILED(rv)) return rv;

View File

@@ -42,6 +42,8 @@
#include "mozilla/net/DNSListenerProxy.h"
#include "mozilla/Services.h"
#include "mozilla/StaticPtr.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
using namespace mozilla;
using namespace mozilla::net;
@@ -776,12 +778,12 @@ nsresult nsDNSService::PreprocessHostname(bool aLocalDomain,
}
}
if (!aIDN || IsASCII(aInput)) {
if (!aIDN || IsAscii(aInput)) {
aACE = aInput;
return NS_OK;
}
if (!(IsUTF8(aInput) && NS_SUCCEEDED(aIDN->ConvertUTF8toACE(aInput, aACE)))) {
if (!(IsUtf8(aInput) && NS_SUCCEEDED(aIDN->ConvertUTF8toACE(aInput, aACE)))) {
return NS_ERROR_FAILURE;
}
return NS_OK;

View File

@@ -12,6 +12,7 @@
#include "mozilla/HashFunctions.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/ResultExtensions.h"
#include "mozilla/TextUtils.h"
#include "MainThreadUtils.h"
#include "nsCRT.h"
@@ -400,7 +401,7 @@ nsresult nsEffectiveTLDService::GetBaseDomainInternal(nsCString& aHostname,
// components are lower-cased, and UTF-8 components are normalized per
// RFC 3454 and converted to ACE.
nsresult nsEffectiveTLDService::NormalizeHostname(nsCString& aHostname) {
if (!IsASCII(aHostname)) {
if (!IsAscii(aHostname)) {
nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
if (NS_FAILED(rv)) return rv;
}

View File

@@ -15,6 +15,8 @@
#include "nsIObserverService.h"
#include "nsISupportsPrimitives.h"
#include "punycode.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
// Currently we use the non-transitional processing option -- see
// http://unicode.org/reports/tr46/
@@ -339,7 +341,7 @@ NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
nsACString& output) {
// protect against bogus input
NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
NS_ConvertUTF8toUTF16 inUTF16(input);
normalizeFullStops(inUTF16);
@@ -419,7 +421,7 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
bool isACE;
IsACE(input, &isACE);
if (IsASCII(input)) {
if (IsAscii(input)) {
// first, canonicalize the host to lowercase, for whitelist lookup
_retval = input;
ToLowerCase(_retval);
@@ -433,7 +435,7 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
ACEtoUTF8(
temp, _retval,
isInWhitelist(temp) ? eStringPrepIgnoreErrors : eStringPrepForUI);
*_isASCII = IsASCII(_retval);
*_isASCII = IsAscii(_retval);
} else {
*_isASCII = true;
}
@@ -463,13 +465,13 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
// the host is converted to ACE by the normalizer, then the host may contain
// unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
// and bug 309311.
*_isASCII = IsASCII(_retval);
*_isASCII = IsAscii(_retval);
if (!*_isASCII && !isInWhitelist(_retval)) {
// UTF8toACE with eStringPrepForUI may return a domain name where
// some labels are in UTF-8 and some are in ACE, depending on
// whether they are considered safe for display
rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
*_isASCII = IsASCII(_retval);
*_isASCII = IsAscii(_retval);
return rv;
}
}
@@ -567,7 +569,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
return NS_ERROR_MALFORMED_URI;
}
if (IsASCII(in)) {
if (IsAscii(in)) {
LossyCopyUTF16toASCII(in, out);
return NS_OK;
}
@@ -578,7 +580,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
NS_ENSURE_SUCCESS(rv, rv);
}
if (IsASCII(strPrep)) {
if (IsAscii(strPrep)) {
LossyCopyUTF16toASCII(strPrep, out);
return NS_OK;
}
@@ -673,7 +675,7 @@ bool nsIDNService::isInWhitelist(const nsACString& host) {
nsAutoCString tld(host);
// make sure the host is ACE for lookup and check that there are no
// unassigned codepoints
if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
if (!IsAscii(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
return false;
}
@@ -702,7 +704,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
}
// We should never get here if the label is ASCII
NS_ASSERTION(!IsASCII(label), "ASCII label in IDN checking");
NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
if (mRestrictionProfile == eASCIIOnlyProfile) {
return false;
}

View File

@@ -20,8 +20,12 @@
#include "nsNativeCharsetUtils.h"
#include "nsError.h"
#include "mozilla/Encoding.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
using mozilla::Encoding;
using mozilla::IsAscii;
using mozilla::IsUtf8;
// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
@@ -64,7 +68,7 @@ static nsresult ConvertStringToUTF8(const nsACString& aString,
// check is requested. It may not be asked for if a caller suspects
// that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
// it's in a charset other than UTF-8 that can be mistaken for UTF-8.
if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
if (!aSkipCheck && (IsAscii(aString) || IsUtf8(aString))) {
aUTF8String = aString;
return NS_OK;
}
@@ -77,7 +81,7 @@ static nsresult ConvertStringToUTF8(const nsACString& aString,
// is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
// was wrong.) We don't check ASCIIness assuming there's no charset
// incompatible with ASCII (we don't support EBCDIC).
if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
if (aSkipCheck && NS_FAILED(rv) && IsUtf8(aString)) {
aUTF8String = aString;
return NS_OK;
}
@@ -160,7 +164,7 @@ nsresult nsMIMEHeaderParamImpl::DoGetParameter(
}
}
if (IsUTF8(str1)) {
if (IsUtf8(str1)) {
CopyUTF8toUTF16(str1, aResult);
return NS_OK;
}
@@ -741,7 +745,7 @@ nsresult internalDecodeRFC2047Header(const char* aHeaderVal,
// to UTF-8. Otherwise, just strips away CRLF.
if (PL_strstr(aHeaderVal, "=?") ||
(!aDefaultCharset.IsEmpty() &&
(!IsUTF8(nsDependentCString(aHeaderVal)) ||
(!IsUtf8(nsDependentCString(aHeaderVal)) ||
Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
} else if (aEatContinuations &&

View File

@@ -12,6 +12,7 @@
#include "mozilla/Attributes.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Utf8.h"
#include "mozilla/net/WebSocketEventService.h"
#include "nsIURI.h"
@@ -1654,7 +1655,7 @@ nsresult WebSocketChannel::ProcessInput(uint8_t* buffer, uint32_t count) {
}
// Section 8.1 says to fail connection if invalid utf-8 in text message
if (!IsUTF8(utf8Data)) {
if (!IsUtf8(utf8Data)) {
LOG(("WebSocketChannel:: text frame invalid utf-8\n"));
return NS_ERROR_CANNOT_CONVERT_DATA;
}
@@ -1703,7 +1704,7 @@ nsresult WebSocketChannel::ProcessInput(uint8_t* buffer, uint32_t count) {
// (which are non-conformant to send) with u+fffd,
// but secteam feels that silently rewriting messages is
// inappropriate - so we will fail the connection instead.
if (!IsUTF8(mServerCloseReason)) {
if (!IsUtf8(mServerCloseReason)) {
LOG(("WebSocketChannel:: close frame invalid utf-8\n"));
return NS_ERROR_CANNOT_CONVERT_DATA;
}

View File

@@ -4,6 +4,7 @@
#include <regex>
#include "json/json.h"
#include "json/reader.h"
#include "mozilla/TextUtils.h"
#include "mozilla/net/MozURL.h"
#include "nsCOMPtr.h"
#include "nsDirectoryServiceDefs.h"
@@ -252,7 +253,7 @@ bool OriginMatchesExpectedOrigin(const nsACString& aOrigin,
}
bool IsUUID(const nsACString& aString) {
if (!IsASCII(aString)) {
if (!IsAscii(aString)) {
return false;
}

View File

@@ -150,7 +150,7 @@ nsHtml5String nsHtml5String::FromLiteral(const char* aLiteral) {
MOZ_CRASH("Out of memory.");
}
char16_t* data = reinterpret_cast<char16_t*>(buffer->Data());
ConvertLatin1toUTF16(MakeSpan(aLiteral, length), MakeSpan(data, length));
ConvertAsciitoUtf16(MakeSpan(aLiteral, length), MakeSpan(data, length));
data[length] = 0;
return nsHtml5String(reinterpret_cast<uintptr_t>(buffer.forget().take()) |
eStringBuffer);

View File

@@ -11,6 +11,7 @@
#include "SharedSSLState.h"
#include "mozilla/Assertions.h"
#include "mozilla/Telemetry.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Unused.h"
#include "nsAppDirectoryServiceDefs.h"
#include "nsCRT.h"
@@ -336,7 +337,7 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName,
uint32_t aOverrideBits,
bool aTemporary) {
NS_ENSURE_ARG_POINTER(aCert);
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
return NS_ERROR_INVALID_ARG;
}
if (aPort < -1) return NS_ERROR_INVALID_ARG;
@@ -392,7 +393,7 @@ nsCertOverrideService::RememberTemporaryValidityOverrideUsingFingerprint(
const nsACString& aHostName, int32_t aPort,
const nsACString& aCertFingerprint, uint32_t aOverrideBits) {
if (aCertFingerprint.IsEmpty() || aHostName.IsEmpty() ||
!IsASCII(aCertFingerprint) || !IsASCII(aHostName) || (aPort < -1)) {
!IsAscii(aCertFingerprint) || !IsAscii(aHostName) || (aPort < -1)) {
return NS_ERROR_INVALID_ARG;
}
@@ -427,7 +428,7 @@ nsCertOverrideService::HasMatchingOverride(const nsACString& aHostName,
return NS_OK;
}
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
return NS_ERROR_INVALID_ARG;
}
if (aPort < -1) return NS_ERROR_INVALID_ARG;
@@ -501,7 +502,7 @@ nsresult nsCertOverrideService::AddEntryToList(
NS_IMETHODIMP
nsCertOverrideService::ClearValidityOverride(const nsACString& aHostName,
int32_t aPort) {
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
return NS_ERROR_INVALID_ARG;
}
if (!NS_IsMainThread()) {

View File

@@ -13,6 +13,7 @@
#include "mozilla/NotNull.h"
#include "mozilla/Sprintf.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Utf8.h"
#include "nsCOMPtr.h"
#include "nsIStringBundle.h"
#include "nsNSSASN1Object.h"
@@ -743,7 +744,7 @@ static nsresult ProcessExtKeyUsage(SECItem* extData, nsAString& text) {
void LossyUTF8ToUTF16(const char* str, uint32_t len,
/*out*/ nsAString& result) {
auto span = MakeSpan(str, len);
if (IsUTF8(span)) {
if (IsUtf8(span)) {
CopyUTF8toUTF16(span, result);
} else {
// Actually Latin1 despite ASCII in the legacy name

View File

@@ -16,6 +16,7 @@
#include "mozilla/ipc/TransportSecurityInfoUtils.h"
#include "mozilla/NotNull.h"
#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Unused.h"
#include "nsArray.h"
#include "nsCOMPtr.h"
@@ -556,7 +557,7 @@ void nsNSSCertificate::GetSubjectAltNames() {
current->name.other.len);
// dNSName fields are defined as type IA5String and thus should
// be limited to ASCII characters.
if (IsASCII(nameFromCert)) {
if (IsAscii(nameFromCert)) {
name.Assign(NS_ConvertASCIItoUTF16(nameFromCert));
mSubjectAltNames.push_back(name);
}

View File

@@ -18,6 +18,7 @@
#include "nsPrintfCString.h"
#include "nsNavHistory.h"
#include "mozilla/Likely.h"
#include "mozilla/Utf8.h"
#include "nsVariant.h"
// Maximum number of chars to search through.
@@ -373,7 +374,7 @@ nsDependentCSubstring MatchAutoCompleteFunction::fixupURISpec(
// Otherwise, we will simply use our original string.
bool unescaped = NS_UnescapeURL(aURISpec.BeginReading(), aURISpec.Length(),
esc_SkipControl, aSpecBuf);
if (unescaped && IsUTF8(aSpecBuf)) {
if (unescaped && IsUtf8(aSpecBuf)) {
fixedSpec.Rebind(aSpecBuf, 0);
} else {
fixedSpec.Rebind(aURISpec, 0);

View File

@@ -24,6 +24,7 @@
#include "mozilla/ScopeExit.h"
#include "mozilla/Services.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Utf8.h"
#include "mozilla/intl/LocaleService.h"
#include "mozilla/recordreplay/ParentIPC.h"
#include "mozilla/JSONWriter.h"
@@ -4391,7 +4392,7 @@ nsresult XREMain::XRE_mainRun() {
nsAutoCString path;
rv = mDirProvider.GetProfileStartupDir(getter_AddRefs(profileDir));
if (NS_SUCCEEDED(rv) && NS_SUCCEEDED(profileDir->GetNativePath(path)) &&
!IsUTF8(path)) {
!IsUtf8(path)) {
PR_fprintf(
PR_STDERR,
"Error: The profile path is not valid UTF-8. Unable to continue.\n");

View File

@@ -9,6 +9,7 @@
#include "nsISupportsUtils.h"
#include "nsStringEnumerator.h"
#include "nsNetUtil.h"
#include "mozilla/Utf8.h"
using namespace mozilla;
@@ -55,7 +56,7 @@ bool nsMIMEInfoAndroid::GetMimeInfoForMimeType(const nsACString& aMimeType,
nsIHandlerApp* systemDefault = nullptr;
if (!IsUTF8(aMimeType)) return false;
if (!IsUtf8(aMimeType)) return false;
NS_ConvertUTF8toUTF16 mimeType(aMimeType);

View File

@@ -7,6 +7,7 @@
#include "mozilla/ArrayUtils.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/UniquePtrExtensions.h"
#include "mozilla/Utf8.h"
#include "nsCOMPtr.h"
#include "nsAutoPtr.h"
@@ -2953,7 +2954,7 @@ nsLocalFile::GetPersistentDescriptor(nsACString& aPersistentDescriptor) {
NS_IMETHODIMP
nsLocalFile::SetPersistentDescriptor(const nsACString& aPersistentDescriptor) {
if (IsUTF8(aPersistentDescriptor)) {
if (IsUtf8(aPersistentDescriptor)) {
return InitWithPath(NS_ConvertUTF8toUTF16(aPersistentDescriptor));
} else {
return InitWithNativePath(aPersistentDescriptor);

View File

@@ -41,7 +41,7 @@ char* ToNewCString(const nsAString& aSource) {
}
auto len = aSource.Length();
LossyConvertUTF16toLatin1(aSource, MakeSpan(dest, len));
LossyConvertUtf16toLatin1(aSource, MakeSpan(dest, len));
dest[len] = 0;
return dest;
}
@@ -64,7 +64,7 @@ char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
size_t destLenVal = destLen.value();
char* dest = static_cast<char*>(moz_xmalloc(destLenVal));
size_t written = ConvertUTF16toUTF8(aSource, MakeSpan(dest, destLenVal));
size_t written = ConvertUtf16toUtf8(aSource, MakeSpan(dest, destLenVal));
dest[written] = 0;
if (aUTF8Count) {
@@ -111,7 +111,7 @@ char16_t* ToNewUnicode(const nsACString& aSource) {
}
auto len = aSource.Length();
ConvertLatin1toUTF16(aSource, MakeSpan(dest, len));
ConvertLatin1toUtf16(aSource, MakeSpan(dest, len));
dest[len] = 0;
return dest;
}
@@ -132,7 +132,7 @@ char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
char16_t* dest = (char16_t*)moz_xmalloc(allocLength.value());
size_t written = ConvertUTF8toUTF16(aSource, MakeSpan(dest, lengthPlusOne));
size_t written = ConvertUtf8toUtf16(aSource, MakeSpan(dest, lengthPlusOne));
dest[written] = 0;
if (aUTF16Count) {

View File

@@ -15,18 +15,10 @@
#include "mozilla/Assertions.h"
#include "nsAString.h"
#include "mozilla/Tuple.h"
#include "encoding_rs_mem.h"
#include "mozilla/TextUtils.h"
#include "nsTArrayForwardDeclare.h"
// Can't include mozilla/Encoding.h here. The implementation is in
// the encoding_rs crate.
extern "C" {
// Declared as uint8_t instead of char to match declaration in another header.
size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
}
// From the nsstring crate
extern "C" {
bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther,
@@ -55,95 +47,6 @@ bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
size_t aOldLen);
}
/**
* If all the code points in the input are below U+0100, converts to Latin1,
* i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
* there are code points above U+00FF, produces garbage in a memory-safe way
* and will likely start asserting in future debug builds. The nature of the
* garbage depends on the CPU architecture and must not be relied upon.
*
* The length of aDest must be not be less than the length of aSource.
*/
inline void LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource,
mozilla::Span<char> aDest) {
encoding_mem_convert_utf16_to_latin1_lossy(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* If all the code points in the input are below U+0100, converts to Latin1,
* i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
* there are code points above U+00FF, asserts in debug builds and produces
* garbage in memory-safe way in release builds. The nature of the garbage
* may depend on the CPU architecture and must not be relied upon.
*
* The length of aDest must be not be less than the length of aSource.
*/
inline size_t LossyConvertUTF8toLatin1(mozilla::Span<const char> aSource,
mozilla::Span<char> aDest) {
return encoding_mem_convert_utf8_to_latin1_lossy(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Interprets unsigned byte value as Unicode scalar value (i.e. not
* windows-1252!).
*
* The length of aDest must be not be less than the length of aSource.
*/
inline void ConvertLatin1toUTF16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(),
aDest.Elements(), aDest.Length());
}
/**
* Lone surrogates are replaced with the REPLACEMENT CHARACTER.
*
* The length of aDest must be at least the length of aSource times three.
*
* Returns the number of code units written.
*/
inline size_t ConvertUTF16toUTF8(mozilla::Span<const char16_t> aSource,
mozilla::Span<char> aDest) {
return encoding_mem_convert_utf16_to_utf8(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
/**
* Lone surrogates are replaced with the REPLACEMENT CHARACTER.
*
* The conversion is guaranteed to be complete if the length of aDest is
* at least the length of aSource times three.
*
* The output is always valid UTF-8 ending on scalar value boundary
* even in the case of partial conversion.
*
* Returns the number of code units read and the number of code
* units written.
*/
inline mozilla::Tuple<size_t, size_t> ConvertUTF16toUTF8Partial(
mozilla::Span<const char16_t> aSource, mozilla::Span<char> aDest) {
size_t srcLen = aSource.Length();
size_t dstLen = aDest.Length();
encoding_mem_convert_utf16_to_utf8_partial(aSource.Elements(), &srcLen,
aDest.Elements(), &dstLen);
return mozilla::MakeTuple(srcLen, dstLen);
}
/**
* Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
*
* The length of aDest must at least one greater than the length of aSource.
*
* Returns the number of code units written.
*/
inline size_t ConvertUTF8toUTF16(mozilla::Span<const char> aSource,
mozilla::Span<char16_t> aDest) {
return encoding_mem_convert_utf8_to_utf16(
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
}
inline size_t Distance(const nsReadingIterator<char16_t>& aStart,
const nsReadingIterator<char16_t>& aEnd) {
MOZ_ASSERT(aStart.get() <= aEnd.get());
@@ -156,6 +59,9 @@ inline size_t Distance(const nsReadingIterator<char>& aStart,
return static_cast<size_t>(aEnd.get() - aStart.get());
}
// NOTE: Operations that don't need an operand to be an XPCOM string
// are in mozilla/TextUtils.h and mozilla/Utf8.h.
// UTF-8 to UTF-16
// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
@@ -473,116 +379,6 @@ char16_t* UTF8ToNewUnicode(const nsACString& aSource,
char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
char16_t* aDest, uint32_t aLength);
/**
* Returns |true| if |aString| contains only ASCII characters, that is,
* characters in the range (0x00, 0x7F).
*
* @param aString a 16-bit wide string to scan
*/
inline bool IsASCII(mozilla::Span<const char16_t> aString) {
return encoding_mem_is_basic_latin(aString.Elements(), aString.Length());
}
/**
* Returns |true| if |aString| contains only ASCII characters, that is,
* characters in the range (0x00, 0x7F).
*
* @param aString a 8-bit wide string to scan
*/
inline bool IsASCII(mozilla::Span<const char> aString) {
size_t length = aString.Length();
const char* ptr = aString.Elements();
// For short strings, avoid the function call, since, the SIMD
// code won't have a chance to kick in anyway.
if (length < 16) {
const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
uint8_t accu = 0;
for (size_t i = 0; i < length; i++) {
accu |= uptr[i];
}
return accu < 0x80U;
}
return encoding_mem_is_ascii(ptr, length);
}
/**
* Returns |true| if |aString| contains only Latin1 characters, that is,
* characters in the range (U+0000, U+00FF).
*
* @param aString a potentially-invalid UTF-16 string to scan
*/
inline bool IsUTF16Latin1(mozilla::Span<const char16_t> aString) {
return encoding_mem_is_utf16_latin1(aString.Elements(), aString.Length());
}
/**
* Returns |true| if |aString| contains only Latin1 characters, that is,
* characters in the range (U+0000, U+00FF).
*
* If you know that the argument is always absolutely guaranteed to be valid
* UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead.
*
* @param aString potentially-invalid UTF-8 string to scan
*/
inline bool IsUTF8Latin1(mozilla::Span<const char> aString) {
return encoding_mem_is_utf8_latin1(aString.Elements(), aString.Length());
}
/**
* Returns |true| if |aString| contains only Latin1 characters, that is,
* characters in the range (U+0000, U+00FF).
*
* The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1
* instead!
*
* @param aString known-valid UTF-8 string to scan
*/
inline bool UnsafeIsValidUTF8Latin1(mozilla::Span<const char> aString) {
return encoding_mem_is_str_latin1(aString.Elements(), aString.Length());
}
/**
* Returns |true| if |aString| is a valid UTF-8 string.
*
* Note that this doesn't check whether the string might look like a valid
* string in another encoding, too, e.g. ISO-2022-JP.
*
* @param aString an 8-bit wide string to scan
*/
inline bool IsUTF8(mozilla::Span<const char> aString) {
size_t length = aString.Length();
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
// For short strings, the function call is a pessimization, and the SIMD
// code won't have a chance to kick in anyway.
if (length < 16) {
for (size_t i = 0; i < length; i++) {
if (ptr[i] >= 0x80U) {
ptr += i;
length -= i;
goto end;
}
}
return true;
}
end:
return length == encoding_utf8_valid_up_to(ptr, length);
}
/**
* Returns the index of the first unpaired surrogate or
* the length of the string if there are none.
*/
inline uint32_t UTF16ValidUpTo(mozilla::Span<const char16_t> aString) {
return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
}
/**
* Replaces unpaired surrogates with U+FFFD in the argument.
*/
inline void EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString) {
encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
}
/**
* Replaces unpaired surrogates with U+FFFD in the argument.
*
@@ -590,7 +386,7 @@ inline void EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString) {
* buffer only if there are unpaired surrogates.
*/
inline void EnsureUTF16Validity(nsAString& aString) {
uint32_t upTo = UTF16ValidUpTo(aString);
uint32_t upTo = mozilla::Utf16ValidUpTo(aString);
uint32_t len = aString.Length();
if (upTo == len) {
return;
@@ -598,7 +394,7 @@ inline void EnsureUTF16Validity(nsAString& aString) {
char16_t* ptr = aString.BeginWriting();
auto span = mozilla::MakeSpan(ptr, len);
span[upTo] = 0xFFFD;
EnsureUTF16ValiditySpan(span.From(upTo + 1));
mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1));
}
bool ParseString(const nsACString& aAstring, char aDelimiter,

View File

@@ -12,7 +12,9 @@
#include "nsReadableUtils.h"
#include "nsCRTGlue.h"
#include "mozilla/RefPtr.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Unused.h"
#include "mozilla/Utf8.h"
#include "nsTArray.h"
#include "gtest/gtest.h"
#include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH
@@ -2025,56 +2027,56 @@ MOZ_GTEST_BENCH_F(Strings, PerfStripCharsCRLF, [this] {
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8One, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsUTF8(*BlackBox(&mAsciiOneUtf8));
bool b = IsUtf8(*BlackBox(&mAsciiOneUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Fifteen, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsUTF8(*BlackBox(&mAsciiFifteenUtf8));
bool b = IsUtf8(*BlackBox(&mAsciiFifteenUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Hundred, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsUTF8(*BlackBox(&mAsciiHundredUtf8));
bool b = IsUtf8(*BlackBox(&mAsciiHundredUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Example3, [this] {
for (int i = 0; i < 100000; i++) {
bool b = IsUTF8(*BlackBox(&mExample3Utf8));
bool b = IsUtf8(*BlackBox(&mExample3Utf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsASCII8One, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsASCII(*BlackBox(&mAsciiOneUtf8));
bool b = IsAscii(*BlackBox(&mAsciiOneUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIFifteen, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsASCII(*BlackBox(&mAsciiFifteenUtf8));
bool b = IsAscii(*BlackBox(&mAsciiFifteenUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIHundred, [this] {
for (int i = 0; i < 200000; i++) {
bool b = IsASCII(*BlackBox(&mAsciiHundredUtf8));
bool b = IsAscii(*BlackBox(&mAsciiHundredUtf8));
BlackBox(&b);
}
});
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIExample3, [this] {
for (int i = 0; i < 100000; i++) {
bool b = IsASCII(*BlackBox(&mExample3Utf8));
bool b = IsAscii(*BlackBox(&mExample3Utf8));
BlackBox(&b);
}
});