Bug 1806054 - pt 4. Add a jemalloc_stats_lite interface r=glandium

This new method collects less information and also does not need to lock
arenas to access their stats.  It may be used on any thread.

The main thread only arenas thread-safety rules have become more complex and
I've added a comment to explain what is safe.

Differential Revision: https://phabricator.services.mozilla.com/D225942
This commit is contained in:
Paul Bone
2025-01-08 03:15:54 +00:00
parent d3554fc22d
commit f2973aa9fb
6 changed files with 198 additions and 1 deletions

View File

@@ -1905,6 +1905,10 @@ inline void MozJemallocPHC::jemalloc_stats_internal(
aStats->bookkeeping += bookkeeping; aStats->bookkeeping += bookkeeping;
} }
inline void MozJemallocPHC::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) {
MozJemalloc::jemalloc_stats_lite(aStats);
}
inline void MozJemallocPHC::jemalloc_ptr_info(const void* aPtr, inline void MozJemallocPHC::jemalloc_ptr_info(const void* aPtr,
jemalloc_ptr_info_t* aInfo) { jemalloc_ptr_info_t* aInfo) {
if (!maybe_init()) { if (!maybe_init()) {

View File

@@ -72,6 +72,10 @@ MALLOC_DECL(jemalloc_stats_internal, void, jemalloc_stats_t*,
// Return the size of the jemalloc_bin_stats_t array. // Return the size of the jemalloc_bin_stats_t array.
MALLOC_DECL(jemalloc_stats_num_bins, size_t) MALLOC_DECL(jemalloc_stats_num_bins, size_t)
// Return some of the information that jemalloc_stats returns but works
// off-main-thread and is faster.
MALLOC_DECL(jemalloc_stats_lite, void, jemalloc_stats_lite_t*)
// Tell jemalloc this is the main thread. jemalloc will use this to validate // Tell jemalloc this is the main thread. jemalloc will use this to validate
// that main thread only arenas are only used on the main thread. // that main thread only arenas are only used on the main thread.
MALLOC_DECL(jemalloc_set_main_thread, void) MALLOC_DECL(jemalloc_set_main_thread, void)

View File

@@ -703,6 +703,9 @@ struct arena_stats_t {
size_t allocated_small; size_t allocated_small;
size_t allocated_large; size_t allocated_large;
// The number of "memory operations" aka mallocs/frees.
size_t operations;
}; };
// *************************************************************************** // ***************************************************************************
@@ -1151,13 +1154,26 @@ struct arena_t {
// and it keeps the value it had after the destructor. // and it keeps the value it had after the destructor.
arena_id_t mId; arena_id_t mId;
// All operations on this arena require that lock be locked. The MaybeMutex // Operations on this arena require that lock be locked. The MaybeMutex
// class will elude locking if the arena is accessed from a single thread // class will elude locking if the arena is accessed from a single thread
// only (currently only the main thread can be used like this). // only (currently only the main thread can be used like this).
MaybeMutex mLock MOZ_UNANNOTATED; MaybeMutex mLock MOZ_UNANNOTATED;
// The lock is required to write to fields of mStats, but it is not needed to
// read them, so long as inconsistents reads are okay (fields might not make
// sense together).
arena_stats_t mStats MOZ_GUARDED_BY(mLock); arena_stats_t mStats MOZ_GUARDED_BY(mLock);
// We can read the allocated counts from mStats without a lock:
size_t AllocatedBytes() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
return mStats.allocated_small + mStats.allocated_large;
}
// We can read the operations field from mStats without a lock:
size_t Operations() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
return mStats.operations;
}
private: private:
// Tree of dirty-page-containing chunks this arena manages. // Tree of dirty-page-containing chunks this arena manages.
RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty
@@ -1519,6 +1535,10 @@ class ArenaCollection {
return Iterator(&mArenas, &mPrivateArenas); return Iterator(&mArenas, &mPrivateArenas);
} }
Iterator iter_all() {
return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas);
}
inline arena_t* GetDefault() { return mDefaultArena; } inline arena_t* GetDefault() { return mDefaultArena; }
Mutex mLock MOZ_UNANNOTATED; Mutex mLock MOZ_UNANNOTATED;
@@ -1603,6 +1623,7 @@ static RedBlackTree<extent_node_t, ExtentTreeTrait> huge
// Huge allocation statistics. // Huge allocation statistics.
static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx); static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx);
static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx); static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx);
static size_t huge_operations MOZ_GUARDED_BY(huge_mtx);
// ************************** // **************************
// base (internal allocation). // base (internal allocation).
@@ -3904,6 +3925,7 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
} }
mStats.allocated_small += aSize; mStats.allocated_small += aSize;
mStats.operations++;
} }
if (!aZero) { if (!aZero) {
@@ -3928,6 +3950,7 @@ void* arena_t::MallocLarge(size_t aSize, bool aZero) {
return nullptr; return nullptr;
} }
mStats.allocated_large += aSize; mStats.allocated_large += aSize;
mStats.operations++;
} }
if (!aZero) { if (!aZero) {
@@ -3992,6 +4015,7 @@ void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
} }
mStats.allocated_large += aSize; mStats.allocated_large += aSize;
mStats.operations++;
} }
// Note that since Bug 1488780we don't attempt purge dirty memory on this code // Note that since Bug 1488780we don't attempt purge dirty memory on this code
@@ -4360,6 +4384,7 @@ arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
// the book-keeping overhead via measurements. // the book-keeping overhead via measurements.
mStats.allocated_small -= size; mStats.allocated_small -= size;
mStats.operations++;
return dealloc_chunk; return dealloc_chunk;
} }
@@ -4370,6 +4395,7 @@ arena_chunk_t* arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) {
size_t size = aChunk->map[pageind].bits & ~gPageSizeMask; size_t size = aChunk->map[pageind].bits & ~gPageSizeMask;
mStats.allocated_large -= size; mStats.allocated_large -= size;
mStats.operations++;
return DallocRun((arena_run_t*)aPtr, true); return DallocRun((arena_run_t*)aPtr, true);
} }
@@ -4447,6 +4473,7 @@ void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
MaybeMutexAutoLock lock(mLock); MaybeMutexAutoLock lock(mLock);
TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true); TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
mStats.allocated_large -= aOldSize - aSize; mStats.allocated_large -= aOldSize - aSize;
mStats.operations++;
should_purge = mNumDirty > EffectiveMaxDirty(); should_purge = mNumDirty > EffectiveMaxDirty();
} }
@@ -4485,6 +4512,7 @@ bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
mStats.allocated_large += aSize - aOldSize; mStats.allocated_large += aSize - aOldSize;
mStats.operations++;
return true; return true;
} }
@@ -4749,6 +4777,7 @@ static void huge_init() MOZ_REQUIRES(gInitLock) {
huge.Init(); huge.Init();
huge_allocated = 0; huge_allocated = 0;
huge_mapped = 0; huge_mapped = 0;
huge_operations = 0;
MOZ_POP_THREAD_SAFETY MOZ_POP_THREAD_SAFETY
} }
@@ -4820,6 +4849,7 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
// reasonably claim we never "allocated" them in the first place. // reasonably claim we never "allocated" them in the first place.
huge_allocated += psize; huge_allocated += psize;
huge_mapped += csize; huge_mapped += csize;
huge_operations++;
} }
pages_decommit((void*)((uintptr_t)ret + psize), csize - psize); pages_decommit((void*)((uintptr_t)ret + psize), csize - psize);
@@ -4855,6 +4885,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
MOZ_ASSERT(node->mSize == aOldSize); MOZ_ASSERT(node->mSize == aOldSize);
MOZ_RELEASE_ASSERT(node->mArena == this); MOZ_RELEASE_ASSERT(node->mArena == this);
huge_allocated -= aOldSize - psize; huge_allocated -= aOldSize - psize;
huge_operations++;
// No need to change huge_mapped, because we didn't (un)map anything. // No need to change huge_mapped, because we didn't (un)map anything.
node->mSize = psize; node->mSize = psize;
} else if (psize > aOldSize) { } else if (psize > aOldSize) {
@@ -4874,6 +4905,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
MOZ_ASSERT(node->mSize == aOldSize); MOZ_ASSERT(node->mSize == aOldSize);
MOZ_RELEASE_ASSERT(node->mArena == this); MOZ_RELEASE_ASSERT(node->mArena == this);
huge_allocated += psize - aOldSize; huge_allocated += psize - aOldSize;
huge_operations++;
// No need to change huge_mapped, because we didn't // No need to change huge_mapped, because we didn't
// (un)map anything. // (un)map anything.
node->mSize = psize; node->mSize = psize;
@@ -4926,6 +4958,7 @@ static void huge_dalloc(void* aPtr, arena_t* aArena) {
mapped = CHUNK_CEILING(node->mSize + gPageSize); mapped = CHUNK_CEILING(node->mSize + gPageSize);
huge_allocated -= node->mSize; huge_allocated -= node->mSize;
huge_mapped -= mapped; huge_mapped -= mapped;
huge_operations++;
} }
// Unmap chunk. // Unmap chunk.
@@ -5332,6 +5365,7 @@ inline void MozJemalloc::jemalloc_stats_internal(
MutexAutoLock lock(huge_mtx); MutexAutoLock lock(huge_mtx);
non_arena_mapped += huge_mapped; non_arena_mapped += huge_mapped;
aStats->allocated += huge_allocated; aStats->allocated += huge_allocated;
aStats->num_operations += huge_operations;
MOZ_ASSERT(huge_mapped >= huge_allocated); MOZ_ASSERT(huge_mapped >= huge_allocated);
} }
@@ -5375,6 +5409,8 @@ inline void MozJemalloc::jemalloc_stats_internal(
arena_fresh = arena->mNumFresh << gPageSize2Pow; arena_fresh = arena->mNumFresh << gPageSize2Pow;
arena_madvised = arena->mNumMAdvised << gPageSize2Pow; arena_madvised = arena->mNumMAdvised << gPageSize2Pow;
aStats->num_operations += arena->mStats.operations;
for (j = 0; j < NUM_SMALL_CLASSES; j++) { for (j = 0; j < NUM_SMALL_CLASSES; j++) {
arena_bin_t* bin = &arena->mBins[j]; arena_bin_t* bin = &arena->mBins[j];
size_t bin_unused = 0; size_t bin_unused = 0;
@@ -5436,6 +5472,36 @@ inline void MozJemalloc::jemalloc_stats_internal(
aStats->pages_dirty + aStats->bookkeeping); aStats->pages_dirty + aStats->bookkeeping);
} }
inline void MozJemalloc::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) {
if (!aStats) {
return;
}
if (!malloc_init()) {
memset(aStats, 0, sizeof(*aStats));
return;
}
aStats->allocated_bytes = 0;
aStats->num_operations = 0;
// Get huge mapped/allocated.
{
MutexAutoLock lock(huge_mtx);
aStats->allocated_bytes += huge_allocated;
aStats->num_operations += huge_operations;
MOZ_ASSERT(huge_mapped >= huge_allocated);
}
{
MutexAutoLock lock(gArenas.mLock);
for (auto arena : gArenas.iter_all()) {
// We don't need to lock the arena to access these fields.
aStats->allocated_bytes += arena->AllocatedBytes();
aStats->num_operations += arena->Operations();
}
}
}
inline size_t MozJemalloc::jemalloc_stats_num_bins() { inline size_t MozJemalloc::jemalloc_stats_num_bins() {
return NUM_SMALL_CLASSES; return NUM_SMALL_CLASSES;
} }

View File

@@ -108,6 +108,8 @@ struct MozJemallocPHC : public MozJemalloc {
static void jemalloc_stats_internal(jemalloc_stats_t*, jemalloc_bin_stats_t*); static void jemalloc_stats_internal(jemalloc_stats_t*, jemalloc_bin_stats_t*);
static void jemalloc_stats_lite(jemalloc_stats_lite_t*);
static void jemalloc_ptr_info(const void*, jemalloc_ptr_info_t*); static void jemalloc_ptr_info(const void*, jemalloc_ptr_info_t*);
# define MALLOC_DECL(name, return_type, ...) \ # define MALLOC_DECL(name, return_type, ...) \

View File

@@ -122,6 +122,13 @@ typedef struct {
size_t bookkeeping; // Committed bytes used internally by the size_t bookkeeping; // Committed bytes used internally by the
// allocator. // allocator.
size_t bin_unused; // Bytes committed to a bin but currently unused. size_t bin_unused; // Bytes committed to a bin but currently unused.
size_t num_operations; // The number of malloc()+free() calls. Note that
// realloc calls
// count as 0, 1 or 2 operations depending on internal
// operations. Which internal operations (eg in place
// or move, or different size classes) require
// different internal operations is unspecified.
} jemalloc_stats_t; } jemalloc_stats_t;
typedef struct { typedef struct {
@@ -134,6 +141,18 @@ typedef struct {
size_t bytes_per_run; // The number of bytes per run, including headers. size_t bytes_per_run; // The number of bytes per run, including headers.
} jemalloc_bin_stats_t; } jemalloc_bin_stats_t;
// jemalloc_stats_lite() is not a stable interface. When using
// jemalloc_stats_lite_t, be sure that the compiled results of mozjemalloc.cpp
// are in sync with this header file.
typedef struct {
size_t allocated_bytes;
// The number of malloc()+free() calls. realloc calls count as 0, 1 or 2
// operations depending on whether they do nothing, resize in-place, or move
// the memory.
size_t num_operations;
} jemalloc_stats_lite_t;
enum PtrInfoTag { enum PtrInfoTag {
// The pointer is not currently known to the allocator. // The pointer is not currently known to the allocator.
// 'addr', 'size', and 'arenaId' are always 0. // 'addr', 'size', and 'arenaId' are always 0.

View File

@@ -764,3 +764,105 @@ TEST(Jemalloc, DisposeArena)
RESTORE_GDB_SLEEP_LOCAL(); RESTORE_GDB_SLEEP_LOCAL();
} }
static void CheckPtr(void* ptr, size_t size) {
EXPECT_TRUE(ptr);
jemalloc_ptr_info_t info;
jemalloc_ptr_info(ptr, &info);
EXPECT_EQ(info.tag, TagLiveAlloc);
EXPECT_EQ(info.size, malloc_good_size(size));
}
static void CheckStats(const char* operation, unsigned iteration,
jemalloc_stats_lite_t& baseline,
jemalloc_stats_lite_t& stats, size_t num_ops,
ptrdiff_t bytes_diff) {
if ((baseline.allocated_bytes + bytes_diff != stats.allocated_bytes
|| baseline.num_operations + num_ops != stats.num_operations)) {
// All the tests that check stats, perform some operation, then check stats
// again can race with other threads. But the test can't be made thread
// safe without a sagnificant amount of work. However this IS a problem
// when stepping through the test using a debugger, since other threads are
// likely to run while the current thread is paused. Instead of neading a
// debugger our printf here can help understand a failing test.
fprintf(stderr, "Check stats failed after iteration %u operation %s\n",
iteration, operation);
EXPECT_EQ(baseline.allocated_bytes + bytes_diff, stats.allocated_bytes);
EXPECT_EQ(baseline.num_operations + num_ops, stats.num_operations);
}
}
TEST(Jemalloc, StatsLite)
{
// Disable PHC allocations for this test, because even a single PHC
// allocation occurring can throw it off.
AutoDisablePHCOnCurrentThread disable;
// Use this data to make an allocation, resize it twice, then free it. Some
// The data uses a few size classes and does a combination of in-place and
// moving reallocations.
struct {
// The initial allocation size.
size_t initial;
// The first reallocation size and number of operations of the reallocation.
size_t next;
size_t next_ops;
// The final reallocation size and number of operations of the reallocation.
size_t last;
size_t last_ops;
} TestData[] = {
/* clang-format off */
{ 16, 15, 0, 256, 2},
{128_KiB, 64_KiB, 1, 68_KiB, 1},
{ 4_MiB, 16_MiB, 2, 3_MiB, 2},
{ 16_KiB, 512, 2, 32_MiB, 2},
/* clang-format on */
};
arena_id_t my_arena = moz_create_arena();
unsigned i = 0;
for (auto data : TestData) {
// Assert that the API returns /something/ a bit sensible.
jemalloc_stats_lite_t baseline;
jemalloc_stats_lite(&baseline);
// Allocate an object.
void* ptr = moz_arena_malloc(my_arena, data.initial);
CheckPtr(ptr, data.initial);
jemalloc_stats_lite_t stats1;
jemalloc_stats_lite(&stats1);
CheckStats("malloc()", i, baseline, stats1, 1,
malloc_good_size(data.initial));
// realloc the item in-place.
ptr = moz_arena_realloc(my_arena, ptr, data.next);
CheckPtr(ptr, data.next);
jemalloc_stats_lite_t stats2;
jemalloc_stats_lite(&stats2);
CheckStats("realloc() 1", i, stats1, stats2, data.next_ops,
malloc_good_size(data.next) - malloc_good_size(data.initial));
// realloc so it has to move to a different location
ptr = moz_arena_realloc(my_arena, ptr, data.last);
CheckPtr(ptr, data.last);
jemalloc_stats_lite_t stats3;
jemalloc_stats_lite(&stats3);
CheckStats("realloc() 2", i, stats2, stats3, data.last_ops,
malloc_good_size(data.last) - malloc_good_size(data.next));
moz_arena_free(my_arena, ptr);
jemalloc_stats_lite_t stats4;
jemalloc_stats_lite(&stats4);
CheckStats("free()", i, stats3, stats4, 1, -malloc_good_size(data.last));
i++;
}
moz_dispose_arena(my_arena);
}