From 2ad061895d06393fd2298bed40feb1bdfb1c7b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bargull?= Date: Fri, 6 Dec 2024 10:58:22 +0000 Subject: [PATCH] Bug 1910084 - Part 3: Use StringChars for case conversion functions. r=jandem Add `StringChars::maybeRealloc` and `StringChars::toStringDontDeflate`, because case conversion functions require realloc support and may return static strings. Differential Revision: https://phabricator.services.mozilla.com/D230784 --- js/src/builtin/String.cpp | 47 +++---- js/src/jit-test/tests/basic/stringbuffer-4.js | 21 +++ js/src/vm/StringType.cpp | 123 ++++++++++++++++++ js/src/vm/StringType.h | 15 +++ mfbt/StringBuffer.h | 8 +- 5 files changed, 189 insertions(+), 25 deletions(-) diff --git a/js/src/builtin/String.cpp b/js/src/builtin/String.cpp index ce69a08d15cf..760abb046056 100644 --- a/js/src/builtin/String.cpp +++ b/js/src/builtin/String.cpp @@ -827,7 +827,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) { // Unlike toUpperCase, toLowerCase has the nice invariant that if the // input is a Latin-1 string, the output is also a Latin-1 string. - InlineCharBuffer newChars; + StringChars newChars(cx); const size_t length = str->length(); size_t resultLength; @@ -888,10 +888,10 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) { return nullptr; } - PodCopy(newChars.get(), chars, i); + PodCopy(newChars.data(nogc), chars, i); size_t readChars = - ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength); + ToLowerCaseImpl(newChars.data(nogc), chars, i, length, resultLength); if constexpr (!std::is_same_v) { if (readChars < length) { resultLength = ToLowerCaseLength(chars, readChars, length); @@ -900,7 +900,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) { return nullptr; } - MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars, + MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.data(nogc), chars, readChars, length, resultLength)); } @@ -910,7 +910,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) { } } - return newChars.toStringDontDeflate(cx, resultLength); + return newChars.template toStringDontDeflate(cx, resultLength); } JSLinearString* js::StringToLowerCase(JSContext* cx, JSString* string) { @@ -1184,21 +1184,22 @@ static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex, } template -static inline bool ToUpperCase(JSContext* cx, - InlineCharBuffer& newChars, +static inline bool ToUpperCase(JSContext* cx, StringChars& newChars, const SrcChar* chars, size_t startIndex, size_t length, size_t* resultLength) { MOZ_ASSERT(startIndex < length); + AutoCheckCannotGC nogc; + *resultLength = length; if (!newChars.maybeAlloc(cx, length)) { return false; } - CopyChars(newChars.get(), chars, startIndex); + CopyChars(newChars.data(nogc), chars, startIndex); size_t readChars = - ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length); + ToUpperCaseImpl(newChars.data(nogc), chars, startIndex, length, length); if (readChars < length) { size_t actualLength = ToUpperCaseLength(chars, readChars, length); @@ -1207,8 +1208,8 @@ static inline bool ToUpperCase(JSContext* cx, return false; } - MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars, - length, actualLength)); + MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.data(nogc), chars, + readChars, length, actualLength)); } return true; @@ -1216,10 +1217,10 @@ static inline bool ToUpperCase(JSContext* cx, template static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) { - using Latin1Buffer = InlineCharBuffer; - using TwoByteBuffer = InlineCharBuffer; + using Latin1StringChars = StringChars; + using TwoByteStringChars = StringChars; - mozilla::MaybeOneOf newChars; + mozilla::MaybeOneOf newChars; const size_t length = str->length(); size_t resultLength; { @@ -1298,24 +1299,24 @@ static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) { }); if (resultIsLatin1) { - newChars.construct(); + newChars.construct(cx); - if (!ToUpperCase(cx, newChars.ref(), chars, i, length, - &resultLength)) { + if (!ToUpperCase(cx, newChars.ref(), chars, i, + length, &resultLength)) { return nullptr; } } else { - newChars.construct(); + newChars.construct(cx); - if (!ToUpperCase(cx, newChars.ref(), chars, i, length, - &resultLength)) { + if (!ToUpperCase(cx, newChars.ref(), chars, i, + length, &resultLength)) { return nullptr; } } } else { - newChars.construct(); + newChars.construct(cx); - if (!ToUpperCase(cx, newChars.ref(), chars, i, length, + if (!ToUpperCase(cx, newChars.ref(), chars, i, length, &resultLength)) { return nullptr; } @@ -1323,7 +1324,7 @@ static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) { } auto toString = [&](auto& chars) { - return chars.toStringDontDeflate(cx, resultLength); + return chars.template toStringDontDeflate(cx, resultLength); }; return newChars.mapNonEmpty(toString); diff --git a/js/src/jit-test/tests/basic/stringbuffer-4.js b/js/src/jit-test/tests/basic/stringbuffer-4.js index f8a51920f4d3..c0a25a75b057 100644 --- a/js/src/jit-test/tests/basic/stringbuffer-4.js +++ b/js/src/jit-test/tests/basic/stringbuffer-4.js @@ -33,3 +33,24 @@ function testJoin() { assertEq(repr.bufferRefCount, 1); } testJoin(); + +// Strings created by StringChars. +function testLowerCase() { + var str = "A".repeat(2000).toLowerCase(); + var repr = JSON.parse(stringRepresentation(str)); + assertEq(repr.flags.includes("HAS_STRING_BUFFER_BIT"), true); + assertEq(repr.bufferRefCount, 1); +} +testLowerCase(); + +function testUpperCase(N) { + // Use ß to cover reallocation. The initial buffer has the same size as the + // input string, but because Upper(ß) = SS, the final buffer has twice the + // size. + var str = "ß".repeat(N).toUpperCase(); + var repr = JSON.parse(stringRepresentation(str)); + assertEq(repr.flags.includes("HAS_STRING_BUFFER_BIT"), true); + assertEq(repr.bufferRefCount, 1); +} +testUpperCase(1000); // Initial allocation not a string buffer. +testUpperCase(2000); // Reallocate string buffer. diff --git a/js/src/vm/StringType.cpp b/js/src/vm/StringType.cpp index 329db8f529ac..484cd4a8e2f5 100644 --- a/js/src/vm/StringType.cpp +++ b/js/src/vm/StringType.cpp @@ -2867,6 +2867,129 @@ template bool js::StringChars::maybeAlloc(JSContext*, size_t, template bool js::StringChars::maybeAlloc(JSContext*, size_t, gc::Heap); +template +bool js::StringChars::maybeRealloc(JSContext* cx, size_t oldLength, + size_t newLength, gc::Heap heap) { + assertValidRequest(oldLength, newLength); + + // Nothing to do if new length still fits into inline storage. + if (JSInlineString::lengthFits(newLength)) { + return true; + } + + if (MOZ_UNLIKELY(!JSString::validateLength(cx, newLength))) { + return false; + } + + // Unused |ownedChars_| means we were previously using inlining storage. + if (!ownedChars_) { + auto chars = AllocChars(cx, newLength, heap); + if (!chars) { + return false; + } + std::memcpy(chars.data(), inlineChars_, InlineLength); + + ownedChars_.set(std::move(chars)); + return true; + } + + // Use |realloc| for malloc'ed characters. + if (ownedChars_.isMalloced()) { + CharT* oldChars = ownedChars_.release(); + CharT* newChars = cx->pod_arena_realloc(js::StringBufferArena, oldChars, + oldLength, newLength); + if (!newChars) { + js_free(oldChars); + return false; + } + + using Kind = typename JSString::OwnedChars::Kind; + ownedChars_.set({newChars, newLength, Kind::Malloc}); + return true; + } + + // Use |StringBuffer::Realloc| for string buffers. + if (ownedChars_.hasStringBuffer()) { + static_assert( + mozilla::StringBuffer::IsValidLength(JSString::MAX_LENGTH), + "JSString length must be valid for StringBuffer"); + + auto* oldBuffer = mozilla::StringBuffer::FromData(ownedChars_.release()); + + // Note: StringBuffers must be null-terminated. + auto* newBuffer = mozilla::StringBuffer::Realloc( + oldBuffer, (newLength + 1) * sizeof(CharT), + mozilla::Some(js::StringBufferArena)); + if (!newBuffer) { + oldBuffer->Release(); + ReportOutOfMemory(cx); + return false; + } + auto* newChars = static_cast(newBuffer->Data()); + newChars[newLength] = '\0'; + + using Kind = typename JSString::OwnedChars::Kind; + ownedChars_.set({newChars, newLength, Kind::StringBuffer}); + + MOZ_ASSERT(newBuffer->RefCount() == 1); + return true; + } + + // Keep the previous nursery allocated chars alive. + Rooted> oldOwnedChars( + cx, std::move(ownedChars_.get())); + + // Nursery allocated characters can't be reallocated, so perform a new + // allocation instead. + auto chars = AllocChars(cx, newLength, heap); + if (!chars) { + return false; + } + mozilla::PodCopy(chars.data(), oldOwnedChars.data(), oldLength); + + ownedChars_.set(std::move(chars)); + return true; +} + +template bool js::StringChars::maybeRealloc(JSContext*, size_t, + size_t, gc::Heap); +template bool js::StringChars::maybeRealloc(JSContext*, size_t, + size_t, gc::Heap); + +template +template +JSLinearString* js::StringChars::toStringDontDeflate(JSContext* cx, + size_t length, + gc::Heap heap) { + MOZ_ASSERT(length == lastRequestedLength_); + + if (JSInlineString::lengthFits(length)) { + MOZ_ASSERT(!ownedChars_, + "unexpected OwnedChars allocation for inline strings"); + if (auto* str = TryEmptyOrStaticString(cx, inlineChars_, length)) { + return str; + } + return NewInlineString(cx, inlineChars_, length, heap); + } + + MOZ_ASSERT(ownedChars_, + "missing OwnedChars allocation for non-inline strings"); + MOZ_ASSERT(length == ownedChars_.length(), + "requested length doesn't match allocation"); + return JSLinearString::newValidLength(cx, &ownedChars_, heap); +} + +template JSLinearString* +js::StringChars::toStringDontDeflate(JSContext*, size_t, + gc::Heap); +template JSLinearString* js::StringChars::toStringDontDeflate( + JSContext*, size_t, gc::Heap); +template JSLinearString* +js::StringChars::toStringDontDeflate(JSContext*, size_t, + gc::Heap); +template JSLinearString* js::StringChars::toStringDontDeflate( + JSContext*, size_t, gc::Heap); + template template JSLinearString* js::StringChars::toStringDontDeflateNonStatic( diff --git a/js/src/vm/StringType.h b/js/src/vm/StringType.h index cb9cb2daea1d..5e84e0a47ade 100644 --- a/js/src/vm/StringType.h +++ b/js/src/vm/StringType.h @@ -2099,6 +2099,21 @@ class MOZ_NON_PARAM StringChars { bool maybeAlloc(JSContext* cx, size_t length, gc::Heap heap = gc::Heap::Default); + /** + * Increase the string characters storage. Allocates iff `newLength` exceeds + * the inline storage of this class. + */ + bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength, + gc::Heap heap = gc::Heap::Default); + + /** + * Build the result string. Does not deflate two-byte characters if all + * characters fit into Latin-1. + */ + template + JSLinearString* toStringDontDeflate(JSContext* cx, size_t length, + gc::Heap heap = gc::Heap::Default); + /** * Build the result string. Does not deflate two-byte characters if all * characters fit into Latin-1. And does not check static strings. diff --git a/mfbt/StringBuffer.h b/mfbt/StringBuffer.h index 448ad6600d1d..2ba42627eae4 100644 --- a/mfbt/StringBuffer.h +++ b/mfbt/StringBuffer.h @@ -131,7 +131,9 @@ class StringBuffer { * * @see IsReadonly */ - static StringBuffer* Realloc(StringBuffer* aHdr, size_t aSize) { + static StringBuffer* Realloc( + StringBuffer* aHdr, size_t aSize, + mozilla::Maybe aArena = mozilla::Nothing()) { MOZ_ASSERT(aSize != 0, "zero capacity allocation not allowed"); MOZ_ASSERT(sizeof(StringBuffer) + aSize <= size_t(uint32_t(-1)) && sizeof(StringBuffer) + aSize > aSize, @@ -148,7 +150,9 @@ class StringBuffer { logger.logRelease(0); } - aHdr = (StringBuffer*)realloc(aHdr, sizeof(StringBuffer) + aSize); + size_t bytes = sizeof(StringBuffer) + aSize; + aHdr = aArena ? (StringBuffer*)moz_arena_realloc(*aArena, aHdr, bytes) + : (StringBuffer*)realloc(aHdr, bytes); if (aHdr) { detail::RefCountLogger::logAddRef(aHdr, 1); aHdr->mStorageSize = aSize;