Bug 1490601 part 2 - Move C++ entry points to encoding_c_mem to mfbt/. r=jwalden
Differential Revision: https://phabricator.services.mozilla.com/D43957
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
#include "mozilla/dom/Document.h"
|
||||
#include "mozilla/Logging.h"
|
||||
#include "mozilla/StaticPtr.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "nsGlobalWindowOuter.h"
|
||||
|
||||
@@ -433,7 +434,7 @@ NS_IMETHODIMP
|
||||
ThirdPartyUtil::GetBaseDomainFromSchemeHost(const nsACString& aScheme,
|
||||
const nsACString& aAsciiHost,
|
||||
nsACString& aBaseDomain) {
|
||||
MOZ_DIAGNOSTIC_ASSERT(IsASCII(aAsciiHost));
|
||||
MOZ_DIAGNOSTIC_ASSERT(IsAscii(aAsciiHost));
|
||||
|
||||
// Get the base domain. this will fail if the host contains a leading dot,
|
||||
// more than one trailing dot, or is otherwise malformed.
|
||||
|
||||
@@ -8038,7 +8038,7 @@ class BulkAppender {
|
||||
void Append(Span<const char> aStr) {
|
||||
size_t len = aStr.Length();
|
||||
MOZ_ASSERT(mPosition + len <= mHandle.Length());
|
||||
ConvertLatin1toUTF16(aStr, mHandle.AsSpan().From(mPosition));
|
||||
ConvertLatin1toUtf16(aStr, mHandle.AsSpan().From(mPosition));
|
||||
mPosition += len;
|
||||
}
|
||||
|
||||
|
||||
@@ -314,7 +314,7 @@ bool nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength,
|
||||
}
|
||||
|
||||
// Copy data
|
||||
LossyConvertUTF16toLatin1(MakeSpan(aBuffer, aLength),
|
||||
LossyConvertUtf16toLatin1(MakeSpan(aBuffer, aLength),
|
||||
MakeSpan(buff, aLength));
|
||||
m1b = buff;
|
||||
mState.mIs2b = false;
|
||||
@@ -344,7 +344,7 @@ void nsTextFragment::CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount) {
|
||||
memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
|
||||
} else {
|
||||
const char* cp = m1b + aOffset;
|
||||
ConvertLatin1toUTF16(MakeSpan(cp, aCount), MakeSpan(aDest, aCount));
|
||||
ConvertLatin1toUtf16(MakeSpan(cp, aCount), MakeSpan(aDest, aCount));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -429,7 +429,7 @@ bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
|
||||
|
||||
// Copy data into buff
|
||||
char16_t* data = static_cast<char16_t*>(buff->Data());
|
||||
ConvertLatin1toUTF16(MakeSpan(m1b, mState.mLength),
|
||||
ConvertLatin1toUtf16(MakeSpan(m1b, mState.mLength),
|
||||
MakeSpan(data, mState.mLength));
|
||||
|
||||
memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
|
||||
@@ -471,7 +471,7 @@ bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
|
||||
}
|
||||
|
||||
// Copy aBuffer into buff.
|
||||
LossyConvertUTF16toLatin1(MakeSpan(aBuffer, aLength),
|
||||
LossyConvertUtf16toLatin1(MakeSpan(aBuffer, aLength),
|
||||
MakeSpan(buff + mState.mLength, aLength));
|
||||
|
||||
m1b = buff;
|
||||
|
||||
@@ -2654,7 +2654,7 @@ bool NonVoidByteStringToJsval(JSContext* cx, const nsACString& str,
|
||||
void NormalizeUSVString(nsAString& aString) { EnsureUTF16Validity(aString); }
|
||||
|
||||
void NormalizeUSVString(binding_detail::FakeString& aString) {
|
||||
EnsureUTF16ValiditySpan(aString);
|
||||
EnsureUtf16ValiditySpan(aString);
|
||||
}
|
||||
|
||||
bool ConvertJSValueToByteString(JSContext* cx, JS::Handle<JS::Value> v,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "jsapi.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "mozilla/dom/CryptoBuffer.h"
|
||||
#include "mozilla/dom/CryptoKey.h"
|
||||
#include "mozilla/dom/KeyAlgorithmProxy.h"
|
||||
@@ -1365,7 +1366,7 @@ class ImportKeyTask : public WebCryptoTask {
|
||||
nsDependentCSubstring utf8(
|
||||
(const char*)mKeyData.Elements(),
|
||||
(const char*)(mKeyData.Elements() + mKeyData.Length()));
|
||||
if (!IsUTF8(utf8)) {
|
||||
if (!IsUtf8(utf8)) {
|
||||
mEarlyRv = NS_ERROR_DOM_DATA_ERR;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ void TextEncoder::Encode(JSContext* aCx, JS::Handle<JSObject*> aObj,
|
||||
return;
|
||||
}
|
||||
|
||||
size_t utf8Len = ConvertUTF16toUTF8(
|
||||
size_t utf8Len = ConvertUtf16toUtf8(
|
||||
aString, MakeSpan(reinterpret_cast<char*>(data.get()), bufLen.value()));
|
||||
MOZ_ASSERT(utf8Len <= bufLen.value());
|
||||
|
||||
@@ -53,7 +53,7 @@ void TextEncoder::EncodeInto(const nsAString& aSrc, const Uint8Array& aDst,
|
||||
aDst.ComputeLengthAndData();
|
||||
size_t read;
|
||||
size_t written;
|
||||
Tie(read, written) = ConvertUTF16toUTF8Partial(
|
||||
Tie(read, written) = ConvertUtf16toUtf8Partial(
|
||||
aSrc, MakeSpan(reinterpret_cast<char*>(aDst.Data()), aDst.Length()));
|
||||
aResult.mRead.Construct() = read;
|
||||
aResult.mWritten.Construct() = written;
|
||||
|
||||
@@ -246,7 +246,7 @@ namespace {
|
||||
void PopulateBufferForBinaryString(char16_t* aDest, const char* aSource,
|
||||
uint32_t aCount) {
|
||||
// Zero-extend each char to char16_t.
|
||||
ConvertLatin1toUTF16(MakeSpan(aSource, aCount), MakeSpan(aDest, aCount));
|
||||
ConvertLatin1toUtf16(MakeSpan(aSource, aCount), MakeSpan(aDest, aCount));
|
||||
}
|
||||
|
||||
nsresult ReadFuncBinaryString(nsIInputStream* aInputStream, void* aClosure,
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "mozilla/dom/WheelEventBinding.h"
|
||||
#include "mozilla/PresShell.h"
|
||||
#include "mozilla/StaticPrefs_dom.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "nsAttrValueInlines.h"
|
||||
#include "nsCRTGlue.h"
|
||||
#include "nsQueryObject.h"
|
||||
@@ -1540,7 +1541,7 @@ int32_t HTMLInputElement::MonthsSinceJan1970(uint32_t aYear,
|
||||
|
||||
/* static */
|
||||
Decimal HTMLInputElement::StringToDecimal(const nsAString& aValue) {
|
||||
if (!IsASCII(aValue)) {
|
||||
if (!IsAscii(aValue)) {
|
||||
return Decimal::nan();
|
||||
}
|
||||
NS_LossyConvertUTF16toASCII asciiString(aValue);
|
||||
|
||||
@@ -52,8 +52,10 @@
|
||||
|
||||
#include "mozilla/LoadInfo.h"
|
||||
#include "mozilla/Maybe.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/ipc/URIUtils.h"
|
||||
|
||||
using mozilla::IsAscii;
|
||||
using mozilla::dom::AutoEntryScript;
|
||||
|
||||
static NS_DEFINE_CID(kJSURICID, NS_JSURI_CID);
|
||||
@@ -1078,7 +1080,7 @@ nsresult nsJSProtocolHandler::Create(nsISupports* aOuter, REFNSIID aIID,
|
||||
uStr);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
if (!IsASCII(uStr)) {
|
||||
if (!IsAscii(uStr)) {
|
||||
rv = NS_EscapeURL(NS_ConvertUTF16toUTF8(uStr),
|
||||
esc_AlwaysCopy | esc_OnlyNonASCII, aUTF8Spec,
|
||||
mozilla::fallible);
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "mozilla/EndianUtils.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <opus/opus.h>
|
||||
@@ -109,7 +111,7 @@ bool OggCodecState::AddVorbisComment(UniquePtr<MetadataTags>& aTags,
|
||||
}
|
||||
uint32_t valueLength = aLength - (div - aComment);
|
||||
nsCString value = nsCString(div + 1, valueLength);
|
||||
if (!IsUTF8(value)) {
|
||||
if (!IsUtf8(value)) {
|
||||
LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value"));
|
||||
return false;
|
||||
}
|
||||
@@ -1598,7 +1600,7 @@ bool SkeletonState::DecodeFisbone(ogg_packet* aPacket) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((i == 0 && IsASCII(strMsg)) || (i != 0 && IsUTF8(strMsg))) {
|
||||
if ((i == 0 && IsAscii(strMsg)) || (i != 0 && IsUtf8(strMsg))) {
|
||||
EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType;
|
||||
field->mValuesStore.LookupForAdd(eHeaderType)
|
||||
.OrInsert([i, msgHead, msgProbe]() {
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/EndianUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "BufferReader.h"
|
||||
#include "VideoUtils.h"
|
||||
#include "TimeUnits.h"
|
||||
@@ -229,7 +230,7 @@ bool WAVTrackDemuxer::ListChunkParserInit(uint32_t aChunkSize) {
|
||||
|
||||
bytesRead += length;
|
||||
|
||||
if (!IsUTF8(val)) {
|
||||
if (!IsUtf8(val)) {
|
||||
mHeaderParser.Reset();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "mozilla/LoadInfo.h"
|
||||
#include "mozilla/plugins/PluginBridge.h"
|
||||
#include "mozilla/plugins/PluginTypes.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Preferences.h"
|
||||
#include "mozilla/ipc/URIUtils.h"
|
||||
|
||||
@@ -1375,7 +1376,7 @@ nsresult nsPluginHost::GetPlugin(const nsACString& aMimeType,
|
||||
|
||||
// Normalize 'host' to ACE.
|
||||
nsresult nsPluginHost::NormalizeHostname(nsCString& host) {
|
||||
if (IsASCII(host)) {
|
||||
if (IsAscii(host)) {
|
||||
ToLowerCase(host);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -22,9 +22,11 @@
|
||||
#include "nsJSPrincipals.h"
|
||||
#include "nsIScriptError.h"
|
||||
#include "js/Wrapper.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
extern mozilla::LazyLogModule MCD;
|
||||
using mozilla::AutoSafeJSContext;
|
||||
using mozilla::IsUtf8;
|
||||
using mozilla::NullPrincipal;
|
||||
using mozilla::dom::AutoJSAPI;
|
||||
|
||||
@@ -138,7 +140,7 @@ nsresult EvaluateAdminConfigScript(JS::HandleObject sandbox,
|
||||
JS::RootedValue v(cx);
|
||||
|
||||
nsString convertedScript;
|
||||
bool isUTF8 = IsUTF8(script);
|
||||
bool isUTF8 = IsUtf8(script);
|
||||
if (isUTF8) {
|
||||
convertedScript = NS_ConvertUTF8toUTF16(script);
|
||||
} else {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "mozilla/Logging.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Sprintf.h"
|
||||
|
||||
#include "gfxGDIFontList.h"
|
||||
@@ -638,7 +639,7 @@ int CALLBACK gfxGDIFontList::EnumFontFamExProc(ENUMLOGFONTEXW* lpelfe,
|
||||
// GDI, then if a family name is non-ASCII immediately read in other
|
||||
// family names. This assures that MS Gothic, MS Mincho are all found
|
||||
// before lookups begin.
|
||||
if (!IsASCII(faceName)) {
|
||||
if (!IsAscii(faceName)) {
|
||||
family->ReadOtherFamilyNames(gfxPlatformFontList::PlatformFontList());
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "mozilla/gfx/2D.h"
|
||||
#include "mozilla/ipc/FileDescriptorUtils.h"
|
||||
#include "mozilla/ResultExtensions.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Unused.h"
|
||||
|
||||
#include "base/eintr_wrapper.h"
|
||||
@@ -944,7 +945,7 @@ bool gfxPlatformFontList::FindAndAddFamilies(
|
||||
// since reading name table entries is expensive.
|
||||
// Although ASCII localized family names are possible they don't occur
|
||||
// in practice, so avoid pulling in names at startup.
|
||||
if (!mOtherFamilyNamesInitialized && !IsASCII(aFamily)) {
|
||||
if (!mOtherFamilyNamesInitialized && !IsAscii(aFamily)) {
|
||||
InitOtherFamilyNames(
|
||||
!(aFlags & FindFamiliesFlags::eForceOtherFamilyNamesLoading));
|
||||
family = SharedFontList()->FindFamily(key);
|
||||
@@ -981,7 +982,7 @@ bool gfxPlatformFontList::FindAndAddFamilies(
|
||||
// since reading name table entries is expensive.
|
||||
// although ASCII localized family names are possible they don't occur
|
||||
// in practice so avoid pulling in names at startup
|
||||
if (!familyEntry && !mOtherFamilyNamesInitialized && !IsASCII(aFamily)) {
|
||||
if (!familyEntry && !mOtherFamilyNamesInitialized && !IsAscii(aFamily)) {
|
||||
InitOtherFamilyNames(
|
||||
!(aFlags & FindFamiliesFlags::eForceOtherFamilyNamesLoading));
|
||||
familyEntry = mOtherFamilyNames.GetWeak(key);
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/intl/MozLocale.h"
|
||||
|
||||
#include "nsReadableUtils.h"
|
||||
@@ -11,13 +12,14 @@
|
||||
#include "unicode/uloc.h"
|
||||
|
||||
using namespace mozilla::intl;
|
||||
using mozilla::IsAscii;
|
||||
|
||||
/**
|
||||
* Note: The file name is `MozLocale` to avoid compilation problems on
|
||||
* case-insensitive Windows. The class name is `Locale`.
|
||||
*/
|
||||
Locale::Locale(const nsACString& aLocale) {
|
||||
if (aLocale.IsEmpty() || !IsASCII(aLocale)) {
|
||||
if (aLocale.IsEmpty() || !IsAscii(aLocale)) {
|
||||
mIsWellFormed = false;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,6 @@ EXPORTS.mozilla += [
|
||||
EXPORTS += [
|
||||
'../third_party/rust/encoding_c/include/encoding_rs.h',
|
||||
'../third_party/rust/encoding_c/include/encoding_rs_statics.h',
|
||||
'../third_party/rust/encoding_c_mem/include/encoding_rs_mem.h',
|
||||
'../third_party/rust/shift_or_euc_c/include/shift_or_euc.h',
|
||||
]
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/Preferences.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "nsISupportsPrimitives.h"
|
||||
|
||||
using namespace mozilla;
|
||||
@@ -79,11 +81,11 @@ nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
|
||||
bool isStatefulCharset = statefulCharset(aCharset.get());
|
||||
|
||||
if (!isStatefulCharset) {
|
||||
if (IsASCII(aURI)) {
|
||||
if (IsAscii(aURI)) {
|
||||
CopyASCIItoUTF16(aURI, aOut);
|
||||
return NS_OK;
|
||||
}
|
||||
if (IsUTF8(aURI)) {
|
||||
if (IsUtf8(aURI)) {
|
||||
CopyUTF8toUTF16(aURI, aOut);
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -148,7 +150,7 @@ nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
|
||||
// leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
|
||||
// superset since converting "http:" with such an encoding is always a bad
|
||||
// idea.
|
||||
if (!IsUTF8(unescapedSpec) &&
|
||||
if (!IsUtf8(unescapedSpec) &&
|
||||
(aCharset.LowerCaseEqualsLiteral("utf-16") ||
|
||||
aCharset.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
aCharset.LowerCaseEqualsLiteral("utf-16le") ||
|
||||
|
||||
@@ -1763,11 +1763,13 @@ bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
|
||||
"should only see UTF-8 here");
|
||||
|
||||
bool simple = utf16WindowLength == encodedWindowLength;
|
||||
#ifdef DEBUG
|
||||
auto isAscii = [](Unit u) { return IsAscii(u); };
|
||||
MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
|
||||
IsAscii<Unit>) == simple,
|
||||
isAscii) == simple,
|
||||
"equal window lengths in UTF-8 should correspond only to "
|
||||
"wholly-ASCII text");
|
||||
|
||||
#endif
|
||||
if (simple) {
|
||||
err->tokenOffset = encodedTokenOffset;
|
||||
err->lineLength = encodedWindowLength;
|
||||
|
||||
@@ -1418,7 +1418,7 @@ class TokenStreamCharsShared {
|
||||
*/
|
||||
static constexpr MOZ_ALWAYS_INLINE MOZ_MUST_USE bool isAsciiCodePoint(
|
||||
int32_t unit) {
|
||||
return mozilla::IsAscii(unit);
|
||||
return mozilla::IsAscii(static_cast<char32_t>(unit));
|
||||
}
|
||||
|
||||
JSAtom* drainCharBufferIntoAtom(JSContext* cx) {
|
||||
|
||||
@@ -33,9 +33,11 @@ using namespace js;
|
||||
using namespace js::jit;
|
||||
using namespace js::wasm;
|
||||
|
||||
using mozilla::AsChars;
|
||||
using mozilla::CheckedInt;
|
||||
using mozilla::CheckedInt32;
|
||||
using mozilla::IsValidUtf8;
|
||||
using mozilla::IsUtf8;
|
||||
using mozilla::MakeSpan;
|
||||
using mozilla::Unused;
|
||||
|
||||
// Decoder implementation.
|
||||
@@ -1544,7 +1546,7 @@ static UniqueChars DecodeName(Decoder& d) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!IsValidUtf8(bytes, numBytes)) {
|
||||
if (!IsUtf8(AsChars(MakeSpan(bytes, numBytes)))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
@@ -261,7 +261,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
|
||||
// almost always ASCII, so the inexact allocations below
|
||||
// should be fine.
|
||||
|
||||
if (IsUTF8Latin1(*utf8String)) {
|
||||
if (IsUtf8Latin1(*utf8String)) {
|
||||
using UniqueLatin1Chars =
|
||||
js::UniquePtr<JS::Latin1Char[], JS::FreePolicy>;
|
||||
|
||||
@@ -271,7 +271,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t written = LossyConvertUTF8toLatin1(
|
||||
size_t written = LossyConvertUtf8toLatin1(
|
||||
*utf8String, MakeSpan(reinterpret_cast<char*>(buffer.get()), len));
|
||||
buffer[written] = 0;
|
||||
|
||||
@@ -310,7 +310,7 @@ bool XPCConvert::NativeData2JS(JSContext* cx, MutableHandleValue d,
|
||||
// code units in the source. That's why it's OK to claim the
|
||||
// output buffer has len + 1 space but then still expect to
|
||||
// have space for the zero terminator.
|
||||
size_t written = ConvertUTF8toUTF16(
|
||||
size_t written = ConvertUtf8toUtf16(
|
||||
*utf8String, MakeSpan(buffer.get(), allocLen.value()));
|
||||
MOZ_RELEASE_ASSERT(written <= len);
|
||||
buffer[written] = 0;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "ExampleStylesheet.h"
|
||||
#include "ServoBindings.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "mozilla/NullPrincipalURI.h"
|
||||
#include "mozilla/css/SheetParsingMode.h"
|
||||
#include "ReferrerInfo.h"
|
||||
@@ -54,7 +55,7 @@ static void ServoSetPropertyByIdBench(const nsACString& css) {
|
||||
RefPtr<URLExtraData> data =
|
||||
new URLExtraData(NullPrincipalURI::Create(), referrerInfo.forget(),
|
||||
NullPrincipal::CreateWithoutOriginAttributes());
|
||||
ASSERT_TRUE(IsUTF8(css));
|
||||
ASSERT_TRUE(IsUtf8(css));
|
||||
|
||||
for (int i = 0; i < SETPROPERTY_REPETITIONS; i++) {
|
||||
Servo_DeclarationBlock_SetPropertyById(
|
||||
|
||||
21
mfbt/JsRust.h
Normal file
21
mfbt/JsRust.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/*
|
||||
* Checking for jsrust crate availability for linking.
|
||||
* For testing, define MOZ_PRETEND_NO_JSRUST to pretend
|
||||
* that we don't have jsrust.
|
||||
*/
|
||||
|
||||
#ifndef mozilla_JsRust_h
|
||||
#define mozilla_JsRust_h
|
||||
|
||||
#if (defined(MOZ_HAS_MOZGLUE) || defined(MOZILLA_INTERNAL_API)) && \
|
||||
!defined(MOZ_PRETEND_NO_JSRUST)
|
||||
# define MOZ_HAS_JSRUST() 1
|
||||
#else
|
||||
# define MOZ_HAS_JSRUST() 0
|
||||
#endif
|
||||
|
||||
#endif // mozilla_JsRust_h
|
||||
180
mfbt/Latin1.h
Normal file
180
mfbt/Latin1.h
Normal file
@@ -0,0 +1,180 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Latin-1 operations (i.e. a byte is the corresponding code point).
|
||||
* (Note: this is *not* the same as the encoding of windows-1252 or
|
||||
* latin1 content on the web. In Web terms, this encoding
|
||||
* corresponds to "isomorphic decode" / "isomorphic encoding" from
|
||||
* the Infra Standard.)
|
||||
*/
|
||||
|
||||
#ifndef mozilla_Latin1_h
|
||||
#define mozilla_Latin1_h
|
||||
|
||||
#include "mozilla/JsRust.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/Tuple.h"
|
||||
#include "mozilla/TypeTraits.h"
|
||||
|
||||
#if MOZ_HAS_JSRUST()
|
||||
# include "encoding_rs_mem.h"
|
||||
#endif
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename Char>
|
||||
class MakeUnsignedChar : public MakeUnsigned<Char> {};
|
||||
|
||||
template <>
|
||||
class MakeUnsignedChar<char16_t> {
|
||||
public:
|
||||
using Type = char16_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
class MakeUnsignedChar<char32_t> {
|
||||
public:
|
||||
using Type = char32_t;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* Returns true iff |aChar| is Latin-1 but not ASCII, i.e. in the range
|
||||
* [0x80, 0xFF].
|
||||
*/
|
||||
template <typename Char>
|
||||
constexpr bool IsNonAsciiLatin1(Char aChar) {
|
||||
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
||||
auto uc = static_cast<UnsignedChar>(aChar);
|
||||
return uc >= 0x80 && uc <= 0xFF;
|
||||
}
|
||||
|
||||
#if MOZ_HAS_JSRUST()
|
||||
|
||||
/**
|
||||
* Returns |true| iff |aString| contains only Latin1 characters, that is,
|
||||
* characters in the range [U+0000, U+00FF].
|
||||
*
|
||||
* @param aString a potentially-invalid UTF-16 string to scan
|
||||
*/
|
||||
inline bool IsUtf16Latin1(mozilla::Span<const char16_t> aString) {
|
||||
return encoding_mem_is_utf16_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| iff |aString| is valid UTF-8 containing only Latin-1
|
||||
* characters.
|
||||
*
|
||||
* If you know that the argument is always absolutely guaranteed to be valid
|
||||
* UTF-8, use the faster UnsafeIsValidUtf8Latin1() instead.
|
||||
*
|
||||
* @param aString potentially-invalid UTF-8 string to scan
|
||||
*/
|
||||
inline bool IsUtf8Latin1(mozilla::Span<const char> aString) {
|
||||
return encoding_mem_is_utf8_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| iff |aString|, which MUST be valid UTF-8, contains only
|
||||
* Latin1 characters, that is, characters in the range [U+0000, U+00FF].
|
||||
* (If |aString| might not be valid UTF-8, use |IsUtf8Latin1| instead.)
|
||||
*
|
||||
* @param aString known-valid UTF-8 string to scan
|
||||
*/
|
||||
inline bool UnsafeIsValidUtf8Latin1(mozilla::Span<const char> aString) {
|
||||
return encoding_mem_is_str_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* If all the code points in the input are below U+0100, converts to Latin1,
|
||||
* i.e. unsigned byte value is Unicode scalar value. If there are code points
|
||||
* above U+00FF, produces unspecified garbage in a memory-safe way. The
|
||||
* nature of the garbage must not be relied upon.
|
||||
*
|
||||
* The length of aDest must not be less than the length of aSource.
|
||||
*/
|
||||
inline void LossyConvertUtf16toLatin1(mozilla::Span<const char16_t> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
encoding_mem_convert_utf16_to_latin1_lossy(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* If all the code points in the input are below U+0100, converts to Latin1,
|
||||
* i.e. unsigned byte value is Unicode scalar value. If there are code points
|
||||
* above U+00FF, produces unspecified garbage in a memory-safe way. The
|
||||
* nature of the garbage must not be relied upon.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*
|
||||
* The length of aDest must not be less than the length of aSource.
|
||||
*/
|
||||
inline size_t LossyConvertUtf8toLatin1(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
return encoding_mem_convert_utf8_to_latin1_lossy(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts each byte of |aSource|, interpreted as a Unicode scalar value
|
||||
* having that unsigned value, to its UTF-8 representation in |aDest|.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*
|
||||
* The length of aDest must be at least twice the length of aSource.
|
||||
*/
|
||||
inline size_t ConvertLatin1toUtf8(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
return encoding_mem_convert_latin1_to_utf8(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts bytes whose unsigned value is interpreted as Unicode code point
|
||||
* (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
|
||||
* output space.
|
||||
*
|
||||
* Returns the number of bytes read and the number of bytes written.
|
||||
*
|
||||
* If the output isn't large enough, not all input is consumed.
|
||||
*
|
||||
* The conversion is guaranteed to be complete if the length of aDest is
|
||||
* at least the length of aSource times two.
|
||||
*
|
||||
* The output is always valid UTF-8 ending on scalar value boundary
|
||||
* even in the case of partial conversion.
|
||||
*
|
||||
* The semantics of this function match the semantics of
|
||||
* TextEncoder.encodeInto.
|
||||
* https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto
|
||||
*/
|
||||
inline mozilla::Tuple<size_t, size_t> ConvertLatin1toUtf8Partial(
|
||||
mozilla::Span<const char> aSource, mozilla::Span<char> aDest) {
|
||||
size_t srcLen = aSource.Length();
|
||||
size_t dstLen = aDest.Length();
|
||||
encoding_mem_convert_latin1_to_utf8_partial(aSource.Elements(), &srcLen,
|
||||
aDest.Elements(), &dstLen);
|
||||
return mozilla::MakeTuple(srcLen, dstLen);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Latin-1 code points (i.e. each byte is the identical code
|
||||
* point) from |aSource| to UTF-16 code points in |aDest|.
|
||||
*
|
||||
* The length of aDest must not be less than the length of aSource.
|
||||
*/
|
||||
inline void ConvertLatin1toUtf16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(),
|
||||
aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}; // namespace mozilla
|
||||
|
||||
#endif // mozilla_Latin1_h
|
||||
132
mfbt/TextUtils.h
132
mfbt/TextUtils.h
@@ -10,35 +10,86 @@
|
||||
#define mozilla_TextUtils_h
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Latin1.h"
|
||||
#include "mozilla/TypeTraits.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
namespace detail {
|
||||
// See Utf8.h for IsUtf8() and conversions between UTF-8 and UTF-16.
|
||||
// See Latin1.h for testing UTF-16 and UTF-8 for Latin1ness and
|
||||
// for conversions to and from Latin1.
|
||||
|
||||
template <typename Char>
|
||||
class MakeUnsignedChar : public MakeUnsigned<Char> {};
|
||||
|
||||
template <>
|
||||
class MakeUnsignedChar<char16_t> {
|
||||
public:
|
||||
using Type = char16_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
class MakeUnsignedChar<char32_t> {
|
||||
public:
|
||||
using Type = char32_t;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
// The overloads below are not templated in order to make
|
||||
// implicit conversions to span work as expected for the Span
|
||||
// overloads.
|
||||
|
||||
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
||||
template <typename Char>
|
||||
constexpr bool IsAscii(Char aChar) {
|
||||
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
||||
auto uc = static_cast<UnsignedChar>(aChar);
|
||||
return uc < 0x80;
|
||||
constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; }
|
||||
|
||||
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
||||
constexpr bool IsAscii(signed char aChar) {
|
||||
return IsAscii(static_cast<unsigned char>(aChar));
|
||||
}
|
||||
|
||||
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
||||
constexpr bool IsAscii(char aChar) {
|
||||
return IsAscii(static_cast<unsigned char>(aChar));
|
||||
}
|
||||
|
||||
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
||||
constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; }
|
||||
|
||||
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
||||
constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; }
|
||||
|
||||
/**
|
||||
* Returns |true| iff |aString| contains only ASCII characters, that is,
|
||||
* characters in the range [0x00, 0x80).
|
||||
*
|
||||
* @param aString a 8-bit wide string to scan
|
||||
*/
|
||||
inline bool IsAscii(mozilla::Span<const char> aString) {
|
||||
#if MOZ_HAS_JSRUST()
|
||||
size_t length = aString.Length();
|
||||
const char* ptr = aString.Elements();
|
||||
// For short strings, avoid the function call, since, the SIMD
|
||||
// code won't have a chance to kick in anyway.
|
||||
if (length < 16) {
|
||||
const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
|
||||
uint8_t accu = 0;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
accu |= uptr[i];
|
||||
}
|
||||
return accu < 0x80;
|
||||
}
|
||||
return encoding_mem_is_ascii(ptr, length);
|
||||
#else
|
||||
for (char c : aString) {
|
||||
if (!IsAscii(c)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| iff |aString| contains only ASCII characters, that is,
|
||||
* characters in the range [0x00, 0x80).
|
||||
*
|
||||
* @param aString a 16-bit wide string to scan
|
||||
*/
|
||||
inline bool IsAscii(mozilla::Span<const char16_t> aString) {
|
||||
#if MOZ_HAS_JSRUST()
|
||||
return encoding_mem_is_basic_latin(aString.Elements(), aString.Length());
|
||||
#else
|
||||
for (char16_t c : aString) {
|
||||
if (!IsAscii(c)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -55,17 +106,40 @@ constexpr bool IsAsciiNullTerminated(const Char* aChar) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#if MOZ_HAS_JSRUST()
|
||||
/**
|
||||
* Returns true iff |aChar| is Latin-1 but not ASCII, i.e. in the range
|
||||
* [0x80, 0xFF].
|
||||
* Returns the index of the first unpaired surrogate or
|
||||
* the length of the string if there are none.
|
||||
*/
|
||||
template <typename Char>
|
||||
constexpr bool IsNonAsciiLatin1(Char aChar) {
|
||||
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
||||
auto uc = static_cast<UnsignedChar>(aChar);
|
||||
return uc >= 0x80 && uc <= 0xFF;
|
||||
inline size_t Utf16ValidUpTo(mozilla::Span<const char16_t> aString) {
|
||||
return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces unpaired surrogates with U+FFFD in the argument.
|
||||
*
|
||||
* Note: If you have an nsAString, use EnsureUTF16Validity() from
|
||||
* nsReadableUtils.h instead to avoid unsharing a valid shared
|
||||
* string.
|
||||
*/
|
||||
inline void EnsureUtf16ValiditySpan(mozilla::Span<char16_t> aString) {
|
||||
encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert ASCII to UTF-16. In debug builds, assert that the input is
|
||||
* ASCII.
|
||||
*
|
||||
* The length of aDest must not be less than the length of aSource.
|
||||
*/
|
||||
inline void ConvertAsciitoUtf16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
MOZ_ASSERT(IsAscii(aSource));
|
||||
ConvertLatin1toUtf16(aSource, aDest);
|
||||
}
|
||||
|
||||
#endif // MOZ_HAS_JSRUST
|
||||
|
||||
/**
|
||||
* Returns true iff |aChar| matches Ascii Whitespace.
|
||||
*
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
MFBT_API bool mozilla::IsValidUtf8(const void* aCodeUnits, size_t aCount) {
|
||||
const auto* s = static_cast<const unsigned char*>(aCodeUnits);
|
||||
MFBT_API bool mozilla::detail::IsValidUtf8(const void* aCodeUnits,
|
||||
size_t aCount) {
|
||||
const auto* s = reinterpret_cast<const unsigned char*>(aCodeUnits);
|
||||
const auto* const limit = s + aCount;
|
||||
|
||||
while (s < limit) {
|
||||
|
||||
144
mfbt/Utf8.h
144
mfbt/Utf8.h
@@ -15,13 +15,30 @@
|
||||
#include "mozilla/Casting.h" // for mozilla::AssertedCast
|
||||
#include "mozilla/Likely.h" // for MOZ_UNLIKELY
|
||||
#include "mozilla/Maybe.h" // for mozilla::Maybe
|
||||
#include "mozilla/TextUtils.h" // for mozilla::IsAscii
|
||||
#include "mozilla/Span.h" // for mozilla::Span
|
||||
#include "mozilla/TextUtils.h" // for mozilla::IsAscii and via Latin1.h for
|
||||
// encoding_rs_mem.h and MOZ_HAS_JSRUST.
|
||||
#include "mozilla/Tuple.h" // for mozilla::Tuple
|
||||
#include "mozilla/Types.h" // for MFBT_API
|
||||
|
||||
#include <limits.h> // for CHAR_BIT
|
||||
#include <stddef.h> // for size_t
|
||||
#include <stdint.h> // for uint8_t
|
||||
|
||||
#if MOZ_HAS_JSRUST()
|
||||
// Can't include mozilla/Encoding.h here.
|
||||
extern "C" {
|
||||
// Declared as uint8_t instead of char to match declaration in another header.
|
||||
size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
|
||||
}
|
||||
#else
|
||||
namespace mozilla {
|
||||
namespace detail {
|
||||
extern MFBT_API bool IsValidUtf8(const void* aCodeUnits, size_t aCount);
|
||||
}; // namespace detail
|
||||
}; // namespace mozilla
|
||||
#endif // MOZ_HAS_JSRUST
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
union Utf8Unit;
|
||||
@@ -224,20 +241,127 @@ inline const unsigned char* Utf8AsUnsignedChars(const Utf8Unit* aUnits) {
|
||||
}
|
||||
|
||||
/** Returns true iff |aUnit| is an ASCII value. */
|
||||
template <>
|
||||
inline bool IsAscii<Utf8Unit>(Utf8Unit aUnit) {
|
||||
return IsAscii(aUnit.toUint8());
|
||||
constexpr bool IsAscii(Utf8Unit aUnit) {
|
||||
return IsAscii(aUnit.toUnsignedChar());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given length-delimited memory consists of a valid UTF-8
|
||||
* string, false otherwise.
|
||||
* Return true if the given span of memory consists of a valid UTF-8
|
||||
* string and false otherwise.
|
||||
*
|
||||
* A valid UTF-8 string contains no overlong-encoded code points (as one would
|
||||
* expect) and contains no code unit sequence encoding a UTF-16 surrogate. The
|
||||
* string *may* contain U+0000 NULL code points.
|
||||
* The string *may* contain U+0000 NULL code points.
|
||||
*/
|
||||
extern MFBT_API bool IsValidUtf8(const void* aCodeUnits, size_t aCount);
|
||||
inline bool IsUtf8(mozilla::Span<const char> aString) {
|
||||
#if MOZ_HAS_JSRUST()
|
||||
size_t length = aString.Length();
|
||||
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
|
||||
// For short strings, the function call is a pessimization, and the SIMD
|
||||
// code won't have a chance to kick in anyway.
|
||||
if (length < 16) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (ptr[i] >= 0x80U) {
|
||||
ptr += i;
|
||||
length -= i;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
end:
|
||||
return length == encoding_utf8_valid_up_to(ptr, length);
|
||||
#else
|
||||
return detail::IsValidUtf8(aString.Elements(), aString.Length());
|
||||
#endif
|
||||
}
|
||||
|
||||
#if MOZ_HAS_JSRUST()
|
||||
|
||||
// See Latin1.h for conversions between Latin1 and UTF-8.
|
||||
|
||||
/**
|
||||
* Returns the index of the start of the first malformed byte
|
||||
* sequence or the length of the string if there are none.
|
||||
*/
|
||||
inline size_t Utf8ValidUpTo(mozilla::Span<const char> aString) {
|
||||
return encoding_utf8_valid_up_to(
|
||||
reinterpret_cast<const uint8_t*>(aString.Elements()), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts potentially-invalid UTF-16 to UTF-8 replacing lone surrogates
|
||||
* with the REPLACEMENT CHARACTER.
|
||||
*
|
||||
* The length of aDest must be at least the length of aSource times three.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*/
|
||||
inline size_t ConvertUtf16toUtf8(mozilla::Span<const char16_t> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
return encoding_mem_convert_utf16_to_utf8(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts potentially-invalid UTF-8 to UTF-16 replacing malformed byte
|
||||
* sequences with the REPLACEMENT CHARACTER with potentially insufficient
|
||||
* output space.
|
||||
*
|
||||
* Returns the number of code units read and the number of bytes written.
|
||||
*
|
||||
* If the output isn't large enough, not all input is consumed.
|
||||
*
|
||||
* The conversion is guaranteed to be complete if the length of aDest is
|
||||
* at least the length of aSource times three.
|
||||
*
|
||||
* The output is always valid UTF-8 ending on scalar value boundary
|
||||
* even in the case of partial conversion.
|
||||
*
|
||||
* The semantics of this function match the semantics of
|
||||
* TextEncoder.encodeInto.
|
||||
* https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto
|
||||
*/
|
||||
inline mozilla::Tuple<size_t, size_t> ConvertUtf16toUtf8Partial(
|
||||
mozilla::Span<const char16_t> aSource, mozilla::Span<char> aDest) {
|
||||
size_t srcLen = aSource.Length();
|
||||
size_t dstLen = aDest.Length();
|
||||
encoding_mem_convert_utf16_to_utf8_partial(aSource.Elements(), &srcLen,
|
||||
aDest.Elements(), &dstLen);
|
||||
return mozilla::MakeTuple(srcLen, dstLen);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts potentially-invalid UTF-8 to UTF-16 replacing malformed byte
|
||||
* sequences with the REPLACEMENT CHARACTER.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*
|
||||
* The length of aDest must be at least one greater than the length of aSource
|
||||
* even though the last slot isn't written to.
|
||||
*
|
||||
* If you know that the input is valid for sure, use
|
||||
* UnsafeConvertValidUtf8toUtf16() instead.
|
||||
*/
|
||||
inline size_t ConvertUtf8toUtf16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
return encoding_mem_convert_utf8_to_utf16(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts known-valid UTF-8 to UTF-16. If the input might be invalid,
|
||||
* use ConvertUtf8toUtf16() instead.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*
|
||||
* The length of aDest must be at least the length of aSource.
|
||||
*/
|
||||
inline size_t UnsafeConvertValidUtf8toUtf16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
return encoding_mem_convert_utf8_to_utf16(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
#endif // MOZ_HAS_JSRUST
|
||||
|
||||
/**
|
||||
* Returns true iff |aUnit| is a UTF-8 trailing code unit matching the pattern
|
||||
|
||||
@@ -9,6 +9,10 @@ with Files("**"):
|
||||
|
||||
Library('mfbt')
|
||||
|
||||
EXPORTS += [
|
||||
'../third_party/rust/encoding_c_mem/include/encoding_rs_mem.h',
|
||||
]
|
||||
|
||||
EXPORTS.mozilla = [
|
||||
'Algorithm.h',
|
||||
'Alignment.h',
|
||||
@@ -51,6 +55,8 @@ EXPORTS.mozilla = [
|
||||
'IntegerRange.h',
|
||||
'IntegerTypeTraits.h',
|
||||
'JSONWriter.h',
|
||||
'JsRust.h',
|
||||
'Latin1.h',
|
||||
'Likely.h',
|
||||
'LinkedList.h',
|
||||
'MacroArgs.h',
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#define MOZ_PRETEND_NO_JSRUST 1
|
||||
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
@@ -13,11 +15,13 @@
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
using mozilla::ArrayLength;
|
||||
using mozilla::AsChars;
|
||||
using mozilla::DecodeOneUtf8CodePoint;
|
||||
using mozilla::EnumSet;
|
||||
using mozilla::IntegerRange;
|
||||
using mozilla::IsAscii;
|
||||
using mozilla::IsValidUtf8;
|
||||
using mozilla::IsUtf8;
|
||||
using mozilla::MakeSpan;
|
||||
using mozilla::Utf8Unit;
|
||||
|
||||
// Disable the C++ 2a warning. See bug #1509926
|
||||
@@ -243,17 +247,18 @@ static void ExpectBadCodePoint(const Char (&aCharN)[N],
|
||||
aExpectedUnitsObserved);
|
||||
}
|
||||
|
||||
static void TestIsValidUtf8() {
|
||||
static void TestIsUtf8() {
|
||||
// Note we include the U+0000 NULL in this one -- and that's fine.
|
||||
static const char asciiBytes[] = u8"How about a nice game of chess?";
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(asciiBytes, ArrayLength(asciiBytes)));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(asciiBytes, ArrayLength(asciiBytes))));
|
||||
|
||||
static const char endNonAsciiBytes[] = u8"Life is like a 🌯";
|
||||
MOZ_RELEASE_ASSERT(
|
||||
IsValidUtf8(endNonAsciiBytes, ArrayLength(endNonAsciiBytes) - 1));
|
||||
IsUtf8(MakeSpan(endNonAsciiBytes, ArrayLength(endNonAsciiBytes) - 1)));
|
||||
|
||||
static const unsigned char badLeading[] = {0x80};
|
||||
MOZ_RELEASE_ASSERT(!IsValidUtf8(badLeading, ArrayLength(badLeading)));
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsUtf8(AsChars(MakeSpan(badLeading, ArrayLength(badLeading)))));
|
||||
|
||||
// Byte-counts
|
||||
|
||||
@@ -261,13 +266,13 @@ static void TestIsValidUtf8() {
|
||||
static const char oneBytes[] = u8"A"; // U+0041 LATIN CAPITAL LETTER A
|
||||
constexpr size_t oneBytesLen = ArrayLength(oneBytes);
|
||||
static_assert(oneBytesLen == 2, "U+0041 plus nul");
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(oneBytes, oneBytesLen));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(oneBytes, oneBytesLen)));
|
||||
|
||||
// 2
|
||||
static const char twoBytes[] = u8"؆"; // U+0606 ARABIC-INDIC CUBE ROOT
|
||||
constexpr size_t twoBytesLen = ArrayLength(twoBytes);
|
||||
static_assert(twoBytesLen == 3, "U+0606 in two bytes plus nul");
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(twoBytes, twoBytesLen));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(twoBytes, twoBytesLen)));
|
||||
|
||||
ExpectValidCodePoint(twoBytes, 0x0606);
|
||||
|
||||
@@ -275,7 +280,7 @@ static void TestIsValidUtf8() {
|
||||
static const char threeBytes[] = u8"᨞"; // U+1A1E BUGINESE PALLAWA
|
||||
constexpr size_t threeBytesLen = ArrayLength(threeBytes);
|
||||
static_assert(threeBytesLen == 4, "U+1A1E in three bytes plus nul");
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(threeBytes, threeBytesLen));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(threeBytes, threeBytesLen)));
|
||||
|
||||
ExpectValidCodePoint(threeBytes, 0x1A1E);
|
||||
|
||||
@@ -284,7 +289,7 @@ static void TestIsValidUtf8() {
|
||||
u8"🁡"; // U+1F061 DOMINO TILE HORIZONTAL-06-06
|
||||
constexpr size_t fourBytesLen = ArrayLength(fourBytes);
|
||||
static_assert(fourBytesLen == 5, "U+1F061 in four bytes plus nul");
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(fourBytes, fourBytesLen));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(fourBytes, fourBytesLen)));
|
||||
|
||||
ExpectValidCodePoint(fourBytes, 0x1F061);
|
||||
|
||||
@@ -292,7 +297,7 @@ static void TestIsValidUtf8() {
|
||||
static const char maxCodePoint[] = u8""; // U+10FFFF
|
||||
constexpr size_t maxCodePointLen = ArrayLength(maxCodePoint);
|
||||
static_assert(maxCodePointLen == 5, "U+10FFFF in four bytes plus nul");
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(maxCodePoint, maxCodePointLen));
|
||||
MOZ_RELEASE_ASSERT(IsUtf8(MakeSpan(maxCodePoint, maxCodePointLen)));
|
||||
|
||||
ExpectValidCodePoint(maxCodePoint, 0x10FFFF);
|
||||
|
||||
@@ -300,7 +305,8 @@ static void TestIsValidUtf8() {
|
||||
static const unsigned char onePastMaxCodePoint[] = {0xF4, 0x90, 0x80, 0x80,
|
||||
0x0};
|
||||
constexpr size_t onePastMaxCodePointLen = ArrayLength(onePastMaxCodePoint);
|
||||
MOZ_RELEASE_ASSERT(!IsValidUtf8(onePastMaxCodePoint, onePastMaxCodePointLen));
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsUtf8(AsChars(MakeSpan(onePastMaxCodePoint, onePastMaxCodePointLen))));
|
||||
|
||||
ExpectBadCodePoint(onePastMaxCodePoint, 0x110000, 4);
|
||||
|
||||
@@ -313,42 +319,45 @@ static void TestIsValidUtf8() {
|
||||
constexpr size_t justBeforeSurrogatesLen =
|
||||
ArrayLength(justBeforeSurrogates) - 1;
|
||||
MOZ_RELEASE_ASSERT(
|
||||
IsValidUtf8(justBeforeSurrogates, justBeforeSurrogatesLen));
|
||||
IsUtf8(AsChars(MakeSpan(justBeforeSurrogates, justBeforeSurrogatesLen))));
|
||||
|
||||
ExpectValidCodePoint(justBeforeSurrogates, 0xD7FF);
|
||||
|
||||
static const unsigned char leastSurrogate[] = {0xED, 0xA0, 0x80, 0x0};
|
||||
constexpr size_t leastSurrogateLen = ArrayLength(leastSurrogate) - 1;
|
||||
MOZ_RELEASE_ASSERT(!IsValidUtf8(leastSurrogate, leastSurrogateLen));
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsUtf8(AsChars(MakeSpan(leastSurrogate, leastSurrogateLen))));
|
||||
|
||||
ExpectBadCodePoint(leastSurrogate, 0xD800, 3);
|
||||
|
||||
static const unsigned char arbitraryHighSurrogate[] = {0xED, 0xA2, 0x87, 0x0};
|
||||
constexpr size_t arbitraryHighSurrogateLen =
|
||||
ArrayLength(arbitraryHighSurrogate) - 1;
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsValidUtf8(arbitraryHighSurrogate, arbitraryHighSurrogateLen));
|
||||
MOZ_RELEASE_ASSERT(!IsUtf8(
|
||||
AsChars(MakeSpan(arbitraryHighSurrogate, arbitraryHighSurrogateLen))));
|
||||
|
||||
ExpectBadCodePoint(arbitraryHighSurrogate, 0xD887, 3);
|
||||
|
||||
static const unsigned char arbitraryLowSurrogate[] = {0xED, 0xB7, 0xAF, 0x0};
|
||||
constexpr size_t arbitraryLowSurrogateLen =
|
||||
ArrayLength(arbitraryLowSurrogate) - 1;
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsValidUtf8(arbitraryLowSurrogate, arbitraryLowSurrogateLen));
|
||||
MOZ_RELEASE_ASSERT(!IsUtf8(
|
||||
AsChars(MakeSpan(arbitraryLowSurrogate, arbitraryLowSurrogateLen))));
|
||||
|
||||
ExpectBadCodePoint(arbitraryLowSurrogate, 0xDDEF, 3);
|
||||
|
||||
static const unsigned char greatestSurrogate[] = {0xED, 0xBF, 0xBF, 0x0};
|
||||
constexpr size_t greatestSurrogateLen = ArrayLength(greatestSurrogate) - 1;
|
||||
MOZ_RELEASE_ASSERT(!IsValidUtf8(greatestSurrogate, greatestSurrogateLen));
|
||||
MOZ_RELEASE_ASSERT(
|
||||
!IsUtf8(AsChars(MakeSpan(greatestSurrogate, greatestSurrogateLen))));
|
||||
|
||||
ExpectBadCodePoint(greatestSurrogate, 0xDFFF, 3);
|
||||
|
||||
static const unsigned char justAfterSurrogates[] = {0xEE, 0x80, 0x80, 0x0};
|
||||
constexpr size_t justAfterSurrogatesLen =
|
||||
ArrayLength(justAfterSurrogates) - 1;
|
||||
MOZ_RELEASE_ASSERT(IsValidUtf8(justAfterSurrogates, justAfterSurrogatesLen));
|
||||
MOZ_RELEASE_ASSERT(
|
||||
IsUtf8(AsChars(MakeSpan(justAfterSurrogates, justAfterSurrogatesLen))));
|
||||
|
||||
ExpectValidCodePoint(justAfterSurrogates, 0xE000);
|
||||
}
|
||||
@@ -737,7 +746,7 @@ static void TestDecodeOneUtf8CodePoint() {
|
||||
|
||||
int main() {
|
||||
TestUtf8Unit();
|
||||
TestIsValidUtf8();
|
||||
TestIsUtf8();
|
||||
TestDecodeOneUtf8CodePoint();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -63,7 +63,6 @@ CppUnitTests([
|
||||
'TestTypedEnum',
|
||||
'TestTypeTraits',
|
||||
'TestUniquePtr',
|
||||
'TestUtf8',
|
||||
'TestVariant',
|
||||
'TestVector',
|
||||
'TestWeakPtr',
|
||||
@@ -71,6 +70,13 @@ CppUnitTests([
|
||||
'TestXorShift128PlusRNG',
|
||||
])
|
||||
|
||||
# Not to be unified with the rest, because this test
|
||||
# sets MOZ_PRETEND_NO_JSRUST, which changes the behavior
|
||||
# of the included headers.
|
||||
CppUnitTests([
|
||||
'TestUtf8',
|
||||
])
|
||||
|
||||
if not CONFIG['MOZ_ASAN']:
|
||||
CppUnitTests([
|
||||
'TestPoisonArea',
|
||||
|
||||
@@ -720,7 +720,7 @@ nsresult ProxyAutoConfig::SetupJS() {
|
||||
// and otherwise inflate Latin-1 to UTF-16 and compile that.
|
||||
const char* scriptData = this->mConcatenatedPACData.get();
|
||||
size_t scriptLength = this->mConcatenatedPACData.Length();
|
||||
if (mozilla::IsValidUtf8(scriptData, scriptLength)) {
|
||||
if (mozilla::IsUtf8(mozilla::MakeSpan(scriptData, scriptLength))) {
|
||||
JS::SourceText<Utf8Unit> srcBuf;
|
||||
if (!srcBuf.init(cx, scriptData, scriptLength,
|
||||
JS::SourceOwnership::Borrowed)) {
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "nsEscape.h"
|
||||
#include "nsError.h"
|
||||
#include "mozilla/MemoryReporting.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/ipc/URIUtils.h"
|
||||
#include "nsIURIMutator.h"
|
||||
#include "mozilla/net/MozURL.h"
|
||||
@@ -624,7 +625,7 @@ NS_IMETHODIMP
|
||||
nsSimpleURI::GetAsciiSpec(nsACString& aResult) {
|
||||
nsresult rv = GetSpec(aResult);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
MOZ_ASSERT(IsASCII(aResult), "The spec should be ASCII");
|
||||
MOZ_ASSERT(IsAscii(aResult), "The spec should be ASCII");
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "nsReadableUtils.h"
|
||||
#include "mozilla/net/MozURL_ffi.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
//
|
||||
// setenv MOZ_LOG nsStandardURL:5
|
||||
@@ -120,7 +121,7 @@ int32_t nsStandardURL::nsSegmentEncoder::EncodeSegmentCount(
|
||||
auto encoder = mEncoding->NewEncoder();
|
||||
|
||||
nsAutoCString valid; // has to be declared in this scope
|
||||
if (MOZ_UNLIKELY(!IsUTF8(span.From(upTo)))) {
|
||||
if (MOZ_UNLIKELY(!IsUtf8(span.From(upTo)))) {
|
||||
MOZ_ASSERT_UNREACHABLE("Invalid UTF-8 passed to nsStandardURL.");
|
||||
// It's UB to pass invalid UTF-8 to
|
||||
// EncodeFromUTF8WithoutReplacement(), so let's make our input valid
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include "nsEscape.h"
|
||||
#include "nsIFile.h"
|
||||
#include "nsNativeCharsetUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
using mozilla::IsUtf8;
|
||||
|
||||
nsresult net_GetURLSpecFromActualFile(nsIFile* aFile, nsACString& result) {
|
||||
nsresult rv;
|
||||
@@ -80,7 +83,7 @@ nsresult net_GetFileFromURLSpec(const nsACString& aURL, nsIFile** result) {
|
||||
NS_UnescapeURL(path);
|
||||
if (path.Length() != strlen(path.get())) return NS_ERROR_FILE_INVALID_PATH;
|
||||
|
||||
if (IsUTF8(path)) {
|
||||
if (IsUtf8(path)) {
|
||||
// speed up the start-up where UTF-8 is the native charset
|
||||
// (e.g. on recent Linux distributions)
|
||||
if (NS_IsNativeUTF8())
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "nsEscape.h"
|
||||
#include "nsIFile.h"
|
||||
#include <windows.h>
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
nsresult net_GetURLSpecFromActualFile(nsIFile* aFile, nsACString& result) {
|
||||
nsresult rv;
|
||||
@@ -92,7 +93,8 @@ nsresult net_GetFileFromURLSpec(const nsACString& aURL, nsIFile** result) {
|
||||
// remove leading '\'
|
||||
if (path.CharAt(0) == '\\') path.Cut(0, 1);
|
||||
|
||||
if (IsUTF8(path)) rv = localFile->InitWithPath(NS_ConvertUTF8toUTF16(path));
|
||||
if (mozilla::IsUtf8(path))
|
||||
rv = localFile->InitWithPath(NS_ConvertUTF8toUTF16(path));
|
||||
// XXX In rare cases, a valid UTF-8 string can be valid as a native
|
||||
// encoding (e.g. 0xC5 0x83 is valid both as UTF-8 and Windows-125x).
|
||||
// However, the chance is very low that a meaningful word in a legacy
|
||||
|
||||
@@ -64,6 +64,7 @@
|
||||
#include "mozilla/StaticPrefs_network.h"
|
||||
#include "mozilla/StaticPrefs_privacy.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "nsIConsoleService.h"
|
||||
#include "nsTPriorityQueue.h"
|
||||
#include "nsVariant.h"
|
||||
@@ -3948,7 +3949,7 @@ nsresult nsCookieService::GetBaseDomainFromHost(
|
||||
// components are lower-cased, and UTF-8 components are normalized per
|
||||
// RFC 3454 and converted to ACE.
|
||||
nsresult nsCookieService::NormalizeHost(nsCString& aHost) {
|
||||
if (!IsASCII(aHost)) {
|
||||
if (!IsAscii(aHost)) {
|
||||
nsAutoCString host;
|
||||
nsresult rv = mIDNService->ConvertUTF8toACE(aHost, host);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
@@ -42,6 +42,8 @@
|
||||
#include "mozilla/net/DNSListenerProxy.h"
|
||||
#include "mozilla/Services.h"
|
||||
#include "mozilla/StaticPtr.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
using namespace mozilla;
|
||||
using namespace mozilla::net;
|
||||
@@ -776,12 +778,12 @@ nsresult nsDNSService::PreprocessHostname(bool aLocalDomain,
|
||||
}
|
||||
}
|
||||
|
||||
if (!aIDN || IsASCII(aInput)) {
|
||||
if (!aIDN || IsAscii(aInput)) {
|
||||
aACE = aInput;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
if (!(IsUTF8(aInput) && NS_SUCCEEDED(aIDN->ConvertUTF8toACE(aInput, aACE)))) {
|
||||
if (!(IsUtf8(aInput) && NS_SUCCEEDED(aIDN->ConvertUTF8toACE(aInput, aACE)))) {
|
||||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
return NS_OK;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "mozilla/HashFunctions.h"
|
||||
#include "mozilla/MemoryReporting.h"
|
||||
#include "mozilla/ResultExtensions.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include "MainThreadUtils.h"
|
||||
#include "nsCRT.h"
|
||||
@@ -400,7 +401,7 @@ nsresult nsEffectiveTLDService::GetBaseDomainInternal(nsCString& aHostname,
|
||||
// components are lower-cased, and UTF-8 components are normalized per
|
||||
// RFC 3454 and converted to ACE.
|
||||
nsresult nsEffectiveTLDService::NormalizeHostname(nsCString& aHostname) {
|
||||
if (!IsASCII(aHostname)) {
|
||||
if (!IsAscii(aHostname)) {
|
||||
nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#include "nsIObserverService.h"
|
||||
#include "nsISupportsPrimitives.h"
|
||||
#include "punycode.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
// Currently we use the non-transitional processing option -- see
|
||||
// http://unicode.org/reports/tr46/
|
||||
@@ -339,7 +341,7 @@ NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
|
||||
NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
|
||||
nsACString& output) {
|
||||
// protect against bogus input
|
||||
NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
|
||||
NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
|
||||
|
||||
NS_ConvertUTF8toUTF16 inUTF16(input);
|
||||
normalizeFullStops(inUTF16);
|
||||
@@ -419,7 +421,7 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
|
||||
bool isACE;
|
||||
IsACE(input, &isACE);
|
||||
|
||||
if (IsASCII(input)) {
|
||||
if (IsAscii(input)) {
|
||||
// first, canonicalize the host to lowercase, for whitelist lookup
|
||||
_retval = input;
|
||||
ToLowerCase(_retval);
|
||||
@@ -433,7 +435,7 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
|
||||
ACEtoUTF8(
|
||||
temp, _retval,
|
||||
isInWhitelist(temp) ? eStringPrepIgnoreErrors : eStringPrepForUI);
|
||||
*_isASCII = IsASCII(_retval);
|
||||
*_isASCII = IsAscii(_retval);
|
||||
} else {
|
||||
*_isASCII = true;
|
||||
}
|
||||
@@ -463,13 +465,13 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
|
||||
// the host is converted to ACE by the normalizer, then the host may contain
|
||||
// unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
|
||||
// and bug 309311.
|
||||
*_isASCII = IsASCII(_retval);
|
||||
*_isASCII = IsAscii(_retval);
|
||||
if (!*_isASCII && !isInWhitelist(_retval)) {
|
||||
// UTF8toACE with eStringPrepForUI may return a domain name where
|
||||
// some labels are in UTF-8 and some are in ACE, depending on
|
||||
// whether they are considered safe for display
|
||||
rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
|
||||
*_isASCII = IsASCII(_retval);
|
||||
*_isASCII = IsAscii(_retval);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -567,7 +569,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
|
||||
return NS_ERROR_MALFORMED_URI;
|
||||
}
|
||||
|
||||
if (IsASCII(in)) {
|
||||
if (IsAscii(in)) {
|
||||
LossyCopyUTF16toASCII(in, out);
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -578,7 +580,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
|
||||
if (IsASCII(strPrep)) {
|
||||
if (IsAscii(strPrep)) {
|
||||
LossyCopyUTF16toASCII(strPrep, out);
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -673,7 +675,7 @@ bool nsIDNService::isInWhitelist(const nsACString& host) {
|
||||
nsAutoCString tld(host);
|
||||
// make sure the host is ACE for lookup and check that there are no
|
||||
// unassigned codepoints
|
||||
if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
|
||||
if (!IsAscii(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -702,7 +704,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
|
||||
}
|
||||
|
||||
// We should never get here if the label is ASCII
|
||||
NS_ASSERTION(!IsASCII(label), "ASCII label in IDN checking");
|
||||
NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
|
||||
if (mRestrictionProfile == eASCIIOnlyProfile) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,12 @@
|
||||
#include "nsNativeCharsetUtils.h"
|
||||
#include "nsError.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
using mozilla::Encoding;
|
||||
using mozilla::IsAscii;
|
||||
using mozilla::IsUtf8;
|
||||
|
||||
// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
|
||||
|
||||
@@ -64,7 +68,7 @@ static nsresult ConvertStringToUTF8(const nsACString& aString,
|
||||
// check is requested. It may not be asked for if a caller suspects
|
||||
// that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
|
||||
// it's in a charset other than UTF-8 that can be mistaken for UTF-8.
|
||||
if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
|
||||
if (!aSkipCheck && (IsAscii(aString) || IsUtf8(aString))) {
|
||||
aUTF8String = aString;
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -77,7 +81,7 @@ static nsresult ConvertStringToUTF8(const nsACString& aString,
|
||||
// is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
|
||||
// was wrong.) We don't check ASCIIness assuming there's no charset
|
||||
// incompatible with ASCII (we don't support EBCDIC).
|
||||
if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
|
||||
if (aSkipCheck && NS_FAILED(rv) && IsUtf8(aString)) {
|
||||
aUTF8String = aString;
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -160,7 +164,7 @@ nsresult nsMIMEHeaderParamImpl::DoGetParameter(
|
||||
}
|
||||
}
|
||||
|
||||
if (IsUTF8(str1)) {
|
||||
if (IsUtf8(str1)) {
|
||||
CopyUTF8toUTF16(str1, aResult);
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -741,7 +745,7 @@ nsresult internalDecodeRFC2047Header(const char* aHeaderVal,
|
||||
// to UTF-8. Otherwise, just strips away CRLF.
|
||||
if (PL_strstr(aHeaderVal, "=?") ||
|
||||
(!aDefaultCharset.IsEmpty() &&
|
||||
(!IsUTF8(nsDependentCString(aHeaderVal)) ||
|
||||
(!IsUtf8(nsDependentCString(aHeaderVal)) ||
|
||||
Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
|
||||
DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
|
||||
} else if (aEatContinuations &&
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "mozilla/Attributes.h"
|
||||
#include "mozilla/EndianUtils.h"
|
||||
#include "mozilla/MathAlgorithms.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "mozilla/net/WebSocketEventService.h"
|
||||
|
||||
#include "nsIURI.h"
|
||||
@@ -1654,7 +1655,7 @@ nsresult WebSocketChannel::ProcessInput(uint8_t* buffer, uint32_t count) {
|
||||
}
|
||||
|
||||
// Section 8.1 says to fail connection if invalid utf-8 in text message
|
||||
if (!IsUTF8(utf8Data)) {
|
||||
if (!IsUtf8(utf8Data)) {
|
||||
LOG(("WebSocketChannel:: text frame invalid utf-8\n"));
|
||||
return NS_ERROR_CANNOT_CONVERT_DATA;
|
||||
}
|
||||
@@ -1703,7 +1704,7 @@ nsresult WebSocketChannel::ProcessInput(uint8_t* buffer, uint32_t count) {
|
||||
// (which are non-conformant to send) with u+fffd,
|
||||
// but secteam feels that silently rewriting messages is
|
||||
// inappropriate - so we will fail the connection instead.
|
||||
if (!IsUTF8(mServerCloseReason)) {
|
||||
if (!IsUtf8(mServerCloseReason)) {
|
||||
LOG(("WebSocketChannel:: close frame invalid utf-8\n"));
|
||||
return NS_ERROR_CANNOT_CONVERT_DATA;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <regex>
|
||||
#include "json/json.h"
|
||||
#include "json/reader.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/net/MozURL.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsDirectoryServiceDefs.h"
|
||||
@@ -252,7 +253,7 @@ bool OriginMatchesExpectedOrigin(const nsACString& aOrigin,
|
||||
}
|
||||
|
||||
bool IsUUID(const nsACString& aString) {
|
||||
if (!IsASCII(aString)) {
|
||||
if (!IsAscii(aString)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ nsHtml5String nsHtml5String::FromLiteral(const char* aLiteral) {
|
||||
MOZ_CRASH("Out of memory.");
|
||||
}
|
||||
char16_t* data = reinterpret_cast<char16_t*>(buffer->Data());
|
||||
ConvertLatin1toUTF16(MakeSpan(aLiteral, length), MakeSpan(data, length));
|
||||
ConvertAsciitoUtf16(MakeSpan(aLiteral, length), MakeSpan(data, length));
|
||||
data[length] = 0;
|
||||
return nsHtml5String(reinterpret_cast<uintptr_t>(buffer.forget().take()) |
|
||||
eStringBuffer);
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "SharedSSLState.h"
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "nsAppDirectoryServiceDefs.h"
|
||||
#include "nsCRT.h"
|
||||
@@ -336,7 +337,7 @@ nsCertOverrideService::RememberValidityOverride(const nsACString& aHostName,
|
||||
uint32_t aOverrideBits,
|
||||
bool aTemporary) {
|
||||
NS_ENSURE_ARG_POINTER(aCert);
|
||||
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
|
||||
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
if (aPort < -1) return NS_ERROR_INVALID_ARG;
|
||||
@@ -392,7 +393,7 @@ nsCertOverrideService::RememberTemporaryValidityOverrideUsingFingerprint(
|
||||
const nsACString& aHostName, int32_t aPort,
|
||||
const nsACString& aCertFingerprint, uint32_t aOverrideBits) {
|
||||
if (aCertFingerprint.IsEmpty() || aHostName.IsEmpty() ||
|
||||
!IsASCII(aCertFingerprint) || !IsASCII(aHostName) || (aPort < -1)) {
|
||||
!IsAscii(aCertFingerprint) || !IsAscii(aHostName) || (aPort < -1)) {
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
|
||||
@@ -427,7 +428,7 @@ nsCertOverrideService::HasMatchingOverride(const nsACString& aHostName,
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
|
||||
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
if (aPort < -1) return NS_ERROR_INVALID_ARG;
|
||||
@@ -501,7 +502,7 @@ nsresult nsCertOverrideService::AddEntryToList(
|
||||
NS_IMETHODIMP
|
||||
nsCertOverrideService::ClearValidityOverride(const nsACString& aHostName,
|
||||
int32_t aPort) {
|
||||
if (aHostName.IsEmpty() || !IsASCII(aHostName)) {
|
||||
if (aHostName.IsEmpty() || !IsAscii(aHostName)) {
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
if (!NS_IsMainThread()) {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "mozilla/NotNull.h"
|
||||
#include "mozilla/Sprintf.h"
|
||||
#include "mozilla/UniquePtr.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsIStringBundle.h"
|
||||
#include "nsNSSASN1Object.h"
|
||||
@@ -743,7 +744,7 @@ static nsresult ProcessExtKeyUsage(SECItem* extData, nsAString& text) {
|
||||
void LossyUTF8ToUTF16(const char* str, uint32_t len,
|
||||
/*out*/ nsAString& result) {
|
||||
auto span = MakeSpan(str, len);
|
||||
if (IsUTF8(span)) {
|
||||
if (IsUtf8(span)) {
|
||||
CopyUTF8toUTF16(span, result);
|
||||
} else {
|
||||
// Actually Latin1 despite ASCII in the legacy name
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "mozilla/ipc/TransportSecurityInfoUtils.h"
|
||||
#include "mozilla/NotNull.h"
|
||||
#include "mozilla/Span.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "nsArray.h"
|
||||
#include "nsCOMPtr.h"
|
||||
@@ -556,7 +557,7 @@ void nsNSSCertificate::GetSubjectAltNames() {
|
||||
current->name.other.len);
|
||||
// dNSName fields are defined as type IA5String and thus should
|
||||
// be limited to ASCII characters.
|
||||
if (IsASCII(nameFromCert)) {
|
||||
if (IsAscii(nameFromCert)) {
|
||||
name.Assign(NS_ConvertASCIItoUTF16(nameFromCert));
|
||||
mSubjectAltNames.push_back(name);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "nsPrintfCString.h"
|
||||
#include "nsNavHistory.h"
|
||||
#include "mozilla/Likely.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "nsVariant.h"
|
||||
|
||||
// Maximum number of chars to search through.
|
||||
@@ -373,7 +374,7 @@ nsDependentCSubstring MatchAutoCompleteFunction::fixupURISpec(
|
||||
// Otherwise, we will simply use our original string.
|
||||
bool unescaped = NS_UnescapeURL(aURISpec.BeginReading(), aURISpec.Length(),
|
||||
esc_SkipControl, aSpecBuf);
|
||||
if (unescaped && IsUTF8(aSpecBuf)) {
|
||||
if (unescaped && IsUtf8(aSpecBuf)) {
|
||||
fixedSpec.Rebind(aSpecBuf, 0);
|
||||
} else {
|
||||
fixedSpec.Rebind(aURISpec, 0);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "mozilla/ScopeExit.h"
|
||||
#include "mozilla/Services.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "mozilla/intl/LocaleService.h"
|
||||
#include "mozilla/recordreplay/ParentIPC.h"
|
||||
#include "mozilla/JSONWriter.h"
|
||||
@@ -4391,7 +4392,7 @@ nsresult XREMain::XRE_mainRun() {
|
||||
nsAutoCString path;
|
||||
rv = mDirProvider.GetProfileStartupDir(getter_AddRefs(profileDir));
|
||||
if (NS_SUCCEEDED(rv) && NS_SUCCEEDED(profileDir->GetNativePath(path)) &&
|
||||
!IsUTF8(path)) {
|
||||
!IsUtf8(path)) {
|
||||
PR_fprintf(
|
||||
PR_STDERR,
|
||||
"Error: The profile path is not valid UTF-8. Unable to continue.\n");
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "nsISupportsUtils.h"
|
||||
#include "nsStringEnumerator.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
using namespace mozilla;
|
||||
|
||||
@@ -55,7 +56,7 @@ bool nsMIMEInfoAndroid::GetMimeInfoForMimeType(const nsACString& aMimeType,
|
||||
|
||||
nsIHandlerApp* systemDefault = nullptr;
|
||||
|
||||
if (!IsUTF8(aMimeType)) return false;
|
||||
if (!IsUtf8(aMimeType)) return false;
|
||||
|
||||
NS_ConvertUTF8toUTF16 mimeType(aMimeType);
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
#include "mozilla/DebugOnly.h"
|
||||
#include "mozilla/UniquePtrExtensions.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsAutoPtr.h"
|
||||
@@ -2953,7 +2954,7 @@ nsLocalFile::GetPersistentDescriptor(nsACString& aPersistentDescriptor) {
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsLocalFile::SetPersistentDescriptor(const nsACString& aPersistentDescriptor) {
|
||||
if (IsUTF8(aPersistentDescriptor)) {
|
||||
if (IsUtf8(aPersistentDescriptor)) {
|
||||
return InitWithPath(NS_ConvertUTF8toUTF16(aPersistentDescriptor));
|
||||
} else {
|
||||
return InitWithNativePath(aPersistentDescriptor);
|
||||
|
||||
@@ -41,7 +41,7 @@ char* ToNewCString(const nsAString& aSource) {
|
||||
}
|
||||
|
||||
auto len = aSource.Length();
|
||||
LossyConvertUTF16toLatin1(aSource, MakeSpan(dest, len));
|
||||
LossyConvertUtf16toLatin1(aSource, MakeSpan(dest, len));
|
||||
dest[len] = 0;
|
||||
return dest;
|
||||
}
|
||||
@@ -64,7 +64,7 @@ char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
|
||||
size_t destLenVal = destLen.value();
|
||||
char* dest = static_cast<char*>(moz_xmalloc(destLenVal));
|
||||
|
||||
size_t written = ConvertUTF16toUTF8(aSource, MakeSpan(dest, destLenVal));
|
||||
size_t written = ConvertUtf16toUtf8(aSource, MakeSpan(dest, destLenVal));
|
||||
dest[written] = 0;
|
||||
|
||||
if (aUTF8Count) {
|
||||
@@ -111,7 +111,7 @@ char16_t* ToNewUnicode(const nsACString& aSource) {
|
||||
}
|
||||
|
||||
auto len = aSource.Length();
|
||||
ConvertLatin1toUTF16(aSource, MakeSpan(dest, len));
|
||||
ConvertLatin1toUtf16(aSource, MakeSpan(dest, len));
|
||||
dest[len] = 0;
|
||||
return dest;
|
||||
}
|
||||
@@ -132,7 +132,7 @@ char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
|
||||
|
||||
char16_t* dest = (char16_t*)moz_xmalloc(allocLength.value());
|
||||
|
||||
size_t written = ConvertUTF8toUTF16(aSource, MakeSpan(dest, lengthPlusOne));
|
||||
size_t written = ConvertUtf8toUtf16(aSource, MakeSpan(dest, lengthPlusOne));
|
||||
dest[written] = 0;
|
||||
|
||||
if (aUTF16Count) {
|
||||
|
||||
@@ -15,18 +15,10 @@
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "nsAString.h"
|
||||
#include "mozilla/Tuple.h"
|
||||
#include "encoding_rs_mem.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include "nsTArrayForwardDeclare.h"
|
||||
|
||||
// Can't include mozilla/Encoding.h here. The implementation is in
|
||||
// the encoding_rs crate.
|
||||
extern "C" {
|
||||
// Declared as uint8_t instead of char to match declaration in another header.
|
||||
size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
|
||||
}
|
||||
|
||||
// From the nsstring crate
|
||||
extern "C" {
|
||||
bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const char* aOther,
|
||||
@@ -55,95 +47,6 @@ bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
|
||||
size_t aOldLen);
|
||||
}
|
||||
|
||||
/**
|
||||
* If all the code points in the input are below U+0100, converts to Latin1,
|
||||
* i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
|
||||
* there are code points above U+00FF, produces garbage in a memory-safe way
|
||||
* and will likely start asserting in future debug builds. The nature of the
|
||||
* garbage depends on the CPU architecture and must not be relied upon.
|
||||
*
|
||||
* The length of aDest must be not be less than the length of aSource.
|
||||
*/
|
||||
inline void LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
encoding_mem_convert_utf16_to_latin1_lossy(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* If all the code points in the input are below U+0100, converts to Latin1,
|
||||
* i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
|
||||
* there are code points above U+00FF, asserts in debug builds and produces
|
||||
* garbage in memory-safe way in release builds. The nature of the garbage
|
||||
* may depend on the CPU architecture and must not be relied upon.
|
||||
*
|
||||
* The length of aDest must be not be less than the length of aSource.
|
||||
*/
|
||||
inline size_t LossyConvertUTF8toLatin1(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
return encoding_mem_convert_utf8_to_latin1_lossy(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Interprets unsigned byte value as Unicode scalar value (i.e. not
|
||||
* windows-1252!).
|
||||
*
|
||||
* The length of aDest must be not be less than the length of aSource.
|
||||
*/
|
||||
inline void ConvertLatin1toUTF16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(),
|
||||
aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Lone surrogates are replaced with the REPLACEMENT CHARACTER.
|
||||
*
|
||||
* The length of aDest must be at least the length of aSource times three.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*/
|
||||
inline size_t ConvertUTF16toUTF8(mozilla::Span<const char16_t> aSource,
|
||||
mozilla::Span<char> aDest) {
|
||||
return encoding_mem_convert_utf16_to_utf8(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Lone surrogates are replaced with the REPLACEMENT CHARACTER.
|
||||
*
|
||||
* The conversion is guaranteed to be complete if the length of aDest is
|
||||
* at least the length of aSource times three.
|
||||
*
|
||||
* The output is always valid UTF-8 ending on scalar value boundary
|
||||
* even in the case of partial conversion.
|
||||
*
|
||||
* Returns the number of code units read and the number of code
|
||||
* units written.
|
||||
*/
|
||||
inline mozilla::Tuple<size_t, size_t> ConvertUTF16toUTF8Partial(
|
||||
mozilla::Span<const char16_t> aSource, mozilla::Span<char> aDest) {
|
||||
size_t srcLen = aSource.Length();
|
||||
size_t dstLen = aDest.Length();
|
||||
encoding_mem_convert_utf16_to_utf8_partial(aSource.Elements(), &srcLen,
|
||||
aDest.Elements(), &dstLen);
|
||||
return mozilla::MakeTuple(srcLen, dstLen);
|
||||
}
|
||||
|
||||
/**
|
||||
* Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
|
||||
*
|
||||
* The length of aDest must at least one greater than the length of aSource.
|
||||
*
|
||||
* Returns the number of code units written.
|
||||
*/
|
||||
inline size_t ConvertUTF8toUTF16(mozilla::Span<const char> aSource,
|
||||
mozilla::Span<char16_t> aDest) {
|
||||
return encoding_mem_convert_utf8_to_utf16(
|
||||
aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
|
||||
}
|
||||
|
||||
inline size_t Distance(const nsReadingIterator<char16_t>& aStart,
|
||||
const nsReadingIterator<char16_t>& aEnd) {
|
||||
MOZ_ASSERT(aStart.get() <= aEnd.get());
|
||||
@@ -156,6 +59,9 @@ inline size_t Distance(const nsReadingIterator<char>& aStart,
|
||||
return static_cast<size_t>(aEnd.get() - aStart.get());
|
||||
}
|
||||
|
||||
// NOTE: Operations that don't need an operand to be an XPCOM string
|
||||
// are in mozilla/TextUtils.h and mozilla/Utf8.h.
|
||||
|
||||
// UTF-8 to UTF-16
|
||||
// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
|
||||
|
||||
@@ -473,116 +379,6 @@ char16_t* UTF8ToNewUnicode(const nsACString& aSource,
|
||||
char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
|
||||
char16_t* aDest, uint32_t aLength);
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| contains only ASCII characters, that is,
|
||||
* characters in the range (0x00, 0x7F).
|
||||
*
|
||||
* @param aString a 16-bit wide string to scan
|
||||
*/
|
||||
inline bool IsASCII(mozilla::Span<const char16_t> aString) {
|
||||
return encoding_mem_is_basic_latin(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| contains only ASCII characters, that is,
|
||||
* characters in the range (0x00, 0x7F).
|
||||
*
|
||||
* @param aString a 8-bit wide string to scan
|
||||
*/
|
||||
inline bool IsASCII(mozilla::Span<const char> aString) {
|
||||
size_t length = aString.Length();
|
||||
const char* ptr = aString.Elements();
|
||||
// For short strings, avoid the function call, since, the SIMD
|
||||
// code won't have a chance to kick in anyway.
|
||||
if (length < 16) {
|
||||
const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
|
||||
uint8_t accu = 0;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
accu |= uptr[i];
|
||||
}
|
||||
return accu < 0x80U;
|
||||
}
|
||||
return encoding_mem_is_ascii(ptr, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| contains only Latin1 characters, that is,
|
||||
* characters in the range (U+0000, U+00FF).
|
||||
*
|
||||
* @param aString a potentially-invalid UTF-16 string to scan
|
||||
*/
|
||||
inline bool IsUTF16Latin1(mozilla::Span<const char16_t> aString) {
|
||||
return encoding_mem_is_utf16_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| contains only Latin1 characters, that is,
|
||||
* characters in the range (U+0000, U+00FF).
|
||||
*
|
||||
* If you know that the argument is always absolutely guaranteed to be valid
|
||||
* UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead.
|
||||
*
|
||||
* @param aString potentially-invalid UTF-8 string to scan
|
||||
*/
|
||||
inline bool IsUTF8Latin1(mozilla::Span<const char> aString) {
|
||||
return encoding_mem_is_utf8_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| contains only Latin1 characters, that is,
|
||||
* characters in the range (U+0000, U+00FF).
|
||||
*
|
||||
* The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1
|
||||
* instead!
|
||||
*
|
||||
* @param aString known-valid UTF-8 string to scan
|
||||
*/
|
||||
inline bool UnsafeIsValidUTF8Latin1(mozilla::Span<const char> aString) {
|
||||
return encoding_mem_is_str_latin1(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns |true| if |aString| is a valid UTF-8 string.
|
||||
*
|
||||
* Note that this doesn't check whether the string might look like a valid
|
||||
* string in another encoding, too, e.g. ISO-2022-JP.
|
||||
*
|
||||
* @param aString an 8-bit wide string to scan
|
||||
*/
|
||||
inline bool IsUTF8(mozilla::Span<const char> aString) {
|
||||
size_t length = aString.Length();
|
||||
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
|
||||
// For short strings, the function call is a pessimization, and the SIMD
|
||||
// code won't have a chance to kick in anyway.
|
||||
if (length < 16) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (ptr[i] >= 0x80U) {
|
||||
ptr += i;
|
||||
length -= i;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
end:
|
||||
return length == encoding_utf8_valid_up_to(ptr, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first unpaired surrogate or
|
||||
* the length of the string if there are none.
|
||||
*/
|
||||
inline uint32_t UTF16ValidUpTo(mozilla::Span<const char16_t> aString) {
|
||||
return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces unpaired surrogates with U+FFFD in the argument.
|
||||
*/
|
||||
inline void EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString) {
|
||||
encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces unpaired surrogates with U+FFFD in the argument.
|
||||
*
|
||||
@@ -590,7 +386,7 @@ inline void EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString) {
|
||||
* buffer only if there are unpaired surrogates.
|
||||
*/
|
||||
inline void EnsureUTF16Validity(nsAString& aString) {
|
||||
uint32_t upTo = UTF16ValidUpTo(aString);
|
||||
uint32_t upTo = mozilla::Utf16ValidUpTo(aString);
|
||||
uint32_t len = aString.Length();
|
||||
if (upTo == len) {
|
||||
return;
|
||||
@@ -598,7 +394,7 @@ inline void EnsureUTF16Validity(nsAString& aString) {
|
||||
char16_t* ptr = aString.BeginWriting();
|
||||
auto span = mozilla::MakeSpan(ptr, len);
|
||||
span[upTo] = 0xFFFD;
|
||||
EnsureUTF16ValiditySpan(span.From(upTo + 1));
|
||||
mozilla::EnsureUtf16ValiditySpan(span.From(upTo + 1));
|
||||
}
|
||||
|
||||
bool ParseString(const nsACString& aAstring, char aDelimiter,
|
||||
|
||||
@@ -12,7 +12,9 @@
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsCRTGlue.h"
|
||||
#include "mozilla/RefPtr.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Unused.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
#include "nsTArray.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH
|
||||
@@ -2025,56 +2027,56 @@ MOZ_GTEST_BENCH_F(Strings, PerfStripCharsCRLF, [this] {
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8One, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsUTF8(*BlackBox(&mAsciiOneUtf8));
|
||||
bool b = IsUtf8(*BlackBox(&mAsciiOneUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Fifteen, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsUTF8(*BlackBox(&mAsciiFifteenUtf8));
|
||||
bool b = IsUtf8(*BlackBox(&mAsciiFifteenUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Hundred, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsUTF8(*BlackBox(&mAsciiHundredUtf8));
|
||||
bool b = IsUtf8(*BlackBox(&mAsciiHundredUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsUTF8Example3, [this] {
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
bool b = IsUTF8(*BlackBox(&mExample3Utf8));
|
||||
bool b = IsUtf8(*BlackBox(&mExample3Utf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsASCII8One, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsASCII(*BlackBox(&mAsciiOneUtf8));
|
||||
bool b = IsAscii(*BlackBox(&mAsciiOneUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIFifteen, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsASCII(*BlackBox(&mAsciiFifteenUtf8));
|
||||
bool b = IsAscii(*BlackBox(&mAsciiFifteenUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIHundred, [this] {
|
||||
for (int i = 0; i < 200000; i++) {
|
||||
bool b = IsASCII(*BlackBox(&mAsciiHundredUtf8));
|
||||
bool b = IsAscii(*BlackBox(&mAsciiHundredUtf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
MOZ_GTEST_BENCH_F(Strings, PerfIsASCIIExample3, [this] {
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
bool b = IsASCII(*BlackBox(&mExample3Utf8));
|
||||
bool b = IsAscii(*BlackBox(&mExample3Utf8));
|
||||
BlackBox(&b);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user