Bug 1910084 - Part 3: Use StringChars for case conversion functions. r=jandem

Add `StringChars::maybeRealloc` and `StringChars::toStringDontDeflate`, because
case conversion functions require realloc support and may return static strings.

Differential Revision: https://phabricator.services.mozilla.com/D230784
This commit is contained in:
André Bargull
2024-12-06 10:58:22 +00:00
parent 965dc3a625
commit 2ad061895d
5 changed files with 189 additions and 25 deletions

View File

@@ -827,7 +827,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) {
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
// input is a Latin-1 string, the output is also a Latin-1 string.
InlineCharBuffer<CharT> newChars;
StringChars<CharT> newChars(cx);
const size_t length = str->length();
size_t resultLength;
@@ -888,10 +888,10 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) {
return nullptr;
}
PodCopy(newChars.get(), chars, i);
PodCopy(newChars.data(nogc), chars, i);
size_t readChars =
ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
ToLowerCaseImpl(newChars.data(nogc), chars, i, length, resultLength);
if constexpr (!std::is_same_v<CharT, Latin1Char>) {
if (readChars < length) {
resultLength = ToLowerCaseLength(chars, readChars, length);
@@ -900,7 +900,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) {
return nullptr;
}
MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.data(nogc), chars,
readChars, length,
resultLength));
}
@@ -910,7 +910,7 @@ static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) {
}
}
return newChars.toStringDontDeflate(cx, resultLength);
return newChars.template toStringDontDeflate<CanGC>(cx, resultLength);
}
JSLinearString* js::StringToLowerCase(JSContext* cx, JSString* string) {
@@ -1184,21 +1184,22 @@ static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
}
template <typename DestChar, typename SrcChar>
static inline bool ToUpperCase(JSContext* cx,
InlineCharBuffer<DestChar>& newChars,
static inline bool ToUpperCase(JSContext* cx, StringChars<DestChar>& newChars,
const SrcChar* chars, size_t startIndex,
size_t length, size_t* resultLength) {
MOZ_ASSERT(startIndex < length);
AutoCheckCannotGC nogc;
*resultLength = length;
if (!newChars.maybeAlloc(cx, length)) {
return false;
}
CopyChars(newChars.get(), chars, startIndex);
CopyChars(newChars.data(nogc), chars, startIndex);
size_t readChars =
ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
ToUpperCaseImpl(newChars.data(nogc), chars, startIndex, length, length);
if (readChars < length) {
size_t actualLength = ToUpperCaseLength(chars, readChars, length);
@@ -1207,8 +1208,8 @@ static inline bool ToUpperCase(JSContext* cx,
return false;
}
MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
length, actualLength));
MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.data(nogc), chars,
readChars, length, actualLength));
}
return true;
@@ -1216,10 +1217,10 @@ static inline bool ToUpperCase(JSContext* cx,
template <typename CharT>
static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) {
using Latin1Buffer = InlineCharBuffer<Latin1Char>;
using TwoByteBuffer = InlineCharBuffer<char16_t>;
using Latin1StringChars = StringChars<Latin1Char>;
using TwoByteStringChars = StringChars<char16_t>;
mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
mozilla::MaybeOneOf<Latin1StringChars, TwoByteStringChars> newChars;
const size_t length = str->length();
size_t resultLength;
{
@@ -1298,24 +1299,24 @@ static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) {
});
if (resultIsLatin1) {
newChars.construct<Latin1Buffer>();
newChars.construct<Latin1StringChars>(cx);
if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
&resultLength)) {
if (!ToUpperCase(cx, newChars.ref<Latin1StringChars>(), chars, i,
length, &resultLength)) {
return nullptr;
}
} else {
newChars.construct<TwoByteBuffer>();
newChars.construct<TwoByteStringChars>(cx);
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
&resultLength)) {
if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i,
length, &resultLength)) {
return nullptr;
}
}
} else {
newChars.construct<TwoByteBuffer>();
newChars.construct<TwoByteStringChars>(cx);
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i, length,
&resultLength)) {
return nullptr;
}
@@ -1323,7 +1324,7 @@ static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) {
}
auto toString = [&](auto& chars) {
return chars.toStringDontDeflate(cx, resultLength);
return chars.template toStringDontDeflate<CanGC>(cx, resultLength);
};
return newChars.mapNonEmpty(toString);

View File

@@ -33,3 +33,24 @@ function testJoin() {
assertEq(repr.bufferRefCount, 1);
}
testJoin();
// Strings created by StringChars.
function testLowerCase() {
var str = "A".repeat(2000).toLowerCase();
var repr = JSON.parse(stringRepresentation(str));
assertEq(repr.flags.includes("HAS_STRING_BUFFER_BIT"), true);
assertEq(repr.bufferRefCount, 1);
}
testLowerCase();
function testUpperCase(N) {
// Use ß to cover reallocation. The initial buffer has the same size as the
// input string, but because Upper(ß) = SS, the final buffer has twice the
// size.
var str = "ß".repeat(N).toUpperCase();
var repr = JSON.parse(stringRepresentation(str));
assertEq(repr.flags.includes("HAS_STRING_BUFFER_BIT"), true);
assertEq(repr.bufferRefCount, 1);
}
testUpperCase(1000); // Initial allocation not a string buffer.
testUpperCase(2000); // Reallocate string buffer.

View File

@@ -2867,6 +2867,129 @@ template bool js::StringChars<JS::Latin1Char>::maybeAlloc(JSContext*, size_t,
template bool js::StringChars<char16_t>::maybeAlloc(JSContext*, size_t,
gc::Heap);
template <typename CharT>
bool js::StringChars<CharT>::maybeRealloc(JSContext* cx, size_t oldLength,
size_t newLength, gc::Heap heap) {
assertValidRequest(oldLength, newLength);
// Nothing to do if new length still fits into inline storage.
if (JSInlineString::lengthFits<CharT>(newLength)) {
return true;
}
if (MOZ_UNLIKELY(!JSString::validateLength(cx, newLength))) {
return false;
}
// Unused |ownedChars_| means we were previously using inlining storage.
if (!ownedChars_) {
auto chars = AllocChars<CharT>(cx, newLength, heap);
if (!chars) {
return false;
}
std::memcpy(chars.data(), inlineChars_, InlineLength);
ownedChars_.set(std::move(chars));
return true;
}
// Use |realloc| for malloc'ed characters.
if (ownedChars_.isMalloced()) {
CharT* oldChars = ownedChars_.release();
CharT* newChars = cx->pod_arena_realloc(js::StringBufferArena, oldChars,
oldLength, newLength);
if (!newChars) {
js_free(oldChars);
return false;
}
using Kind = typename JSString::OwnedChars<CharT>::Kind;
ownedChars_.set({newChars, newLength, Kind::Malloc});
return true;
}
// Use |StringBuffer::Realloc| for string buffers.
if (ownedChars_.hasStringBuffer()) {
static_assert(
mozilla::StringBuffer::IsValidLength<CharT>(JSString::MAX_LENGTH),
"JSString length must be valid for StringBuffer");
auto* oldBuffer = mozilla::StringBuffer::FromData(ownedChars_.release());
// Note: StringBuffers must be null-terminated.
auto* newBuffer = mozilla::StringBuffer::Realloc(
oldBuffer, (newLength + 1) * sizeof(CharT),
mozilla::Some(js::StringBufferArena));
if (!newBuffer) {
oldBuffer->Release();
ReportOutOfMemory(cx);
return false;
}
auto* newChars = static_cast<CharT*>(newBuffer->Data());
newChars[newLength] = '\0';
using Kind = typename JSString::OwnedChars<CharT>::Kind;
ownedChars_.set({newChars, newLength, Kind::StringBuffer});
MOZ_ASSERT(newBuffer->RefCount() == 1);
return true;
}
// Keep the previous nursery allocated chars alive.
Rooted<JSString::OwnedChars<CharT>> oldOwnedChars(
cx, std::move(ownedChars_.get()));
// Nursery allocated characters can't be reallocated, so perform a new
// allocation instead.
auto chars = AllocChars<CharT>(cx, newLength, heap);
if (!chars) {
return false;
}
mozilla::PodCopy(chars.data(), oldOwnedChars.data(), oldLength);
ownedChars_.set(std::move(chars));
return true;
}
template bool js::StringChars<JS::Latin1Char>::maybeRealloc(JSContext*, size_t,
size_t, gc::Heap);
template bool js::StringChars<char16_t>::maybeRealloc(JSContext*, size_t,
size_t, gc::Heap);
template <typename CharT>
template <AllowGC allowGC>
JSLinearString* js::StringChars<CharT>::toStringDontDeflate(JSContext* cx,
size_t length,
gc::Heap heap) {
MOZ_ASSERT(length == lastRequestedLength_);
if (JSInlineString::lengthFits<CharT>(length)) {
MOZ_ASSERT(!ownedChars_,
"unexpected OwnedChars allocation for inline strings");
if (auto* str = TryEmptyOrStaticString(cx, inlineChars_, length)) {
return str;
}
return NewInlineString<CanGC>(cx, inlineChars_, length, heap);
}
MOZ_ASSERT(ownedChars_,
"missing OwnedChars allocation for non-inline strings");
MOZ_ASSERT(length == ownedChars_.length(),
"requested length doesn't match allocation");
return JSLinearString::newValidLength<allowGC, CharT>(cx, &ownedChars_, heap);
}
template JSLinearString*
js::StringChars<JS::Latin1Char>::toStringDontDeflate<CanGC>(JSContext*, size_t,
gc::Heap);
template JSLinearString* js::StringChars<char16_t>::toStringDontDeflate<CanGC>(
JSContext*, size_t, gc::Heap);
template JSLinearString*
js::StringChars<JS::Latin1Char>::toStringDontDeflate<NoGC>(JSContext*, size_t,
gc::Heap);
template JSLinearString* js::StringChars<char16_t>::toStringDontDeflate<NoGC>(
JSContext*, size_t, gc::Heap);
template <typename CharT>
template <AllowGC allowGC>
JSLinearString* js::StringChars<CharT>::toStringDontDeflateNonStatic(

View File

@@ -2099,6 +2099,21 @@ class MOZ_NON_PARAM StringChars {
bool maybeAlloc(JSContext* cx, size_t length,
gc::Heap heap = gc::Heap::Default);
/**
* Increase the string characters storage. Allocates iff `newLength` exceeds
* the inline storage of this class.
*/
bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength,
gc::Heap heap = gc::Heap::Default);
/**
* Build the result string. Does not deflate two-byte characters if all
* characters fit into Latin-1.
*/
template <AllowGC allowGC>
JSLinearString* toStringDontDeflate(JSContext* cx, size_t length,
gc::Heap heap = gc::Heap::Default);
/**
* Build the result string. Does not deflate two-byte characters if all
* characters fit into Latin-1. And does not check static strings.

View File

@@ -131,7 +131,9 @@ class StringBuffer {
*
* @see IsReadonly
*/
static StringBuffer* Realloc(StringBuffer* aHdr, size_t aSize) {
static StringBuffer* Realloc(
StringBuffer* aHdr, size_t aSize,
mozilla::Maybe<arena_id_t> aArena = mozilla::Nothing()) {
MOZ_ASSERT(aSize != 0, "zero capacity allocation not allowed");
MOZ_ASSERT(sizeof(StringBuffer) + aSize <= size_t(uint32_t(-1)) &&
sizeof(StringBuffer) + aSize > aSize,
@@ -148,7 +150,9 @@ class StringBuffer {
logger.logRelease(0);
}
aHdr = (StringBuffer*)realloc(aHdr, sizeof(StringBuffer) + aSize);
size_t bytes = sizeof(StringBuffer) + aSize;
aHdr = aArena ? (StringBuffer*)moz_arena_realloc(*aArena, aHdr, bytes)
: (StringBuffer*)realloc(aHdr, bytes);
if (aHdr) {
detail::RefCountLogger::logAddRef(aHdr, 1);
aHdr->mStorageSize = aSize;