Bug 1806054 - pt 4. Add a jemalloc_stats_lite interface r=glandium
This new method collects less information and also does not need to lock arenas to access their stats. It may be used on any thread. The main thread only arenas thread-safety rules have become more complex and I've added a comment to explain what is safe. Differential Revision: https://phabricator.services.mozilla.com/D225942
This commit is contained in:
@@ -1905,6 +1905,10 @@ inline void MozJemallocPHC::jemalloc_stats_internal(
|
||||
aStats->bookkeeping += bookkeeping;
|
||||
}
|
||||
|
||||
inline void MozJemallocPHC::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) {
|
||||
MozJemalloc::jemalloc_stats_lite(aStats);
|
||||
}
|
||||
|
||||
inline void MozJemallocPHC::jemalloc_ptr_info(const void* aPtr,
|
||||
jemalloc_ptr_info_t* aInfo) {
|
||||
if (!maybe_init()) {
|
||||
|
||||
@@ -72,6 +72,10 @@ MALLOC_DECL(jemalloc_stats_internal, void, jemalloc_stats_t*,
|
||||
// Return the size of the jemalloc_bin_stats_t array.
|
||||
MALLOC_DECL(jemalloc_stats_num_bins, size_t)
|
||||
|
||||
// Return some of the information that jemalloc_stats returns but works
|
||||
// off-main-thread and is faster.
|
||||
MALLOC_DECL(jemalloc_stats_lite, void, jemalloc_stats_lite_t*)
|
||||
|
||||
// Tell jemalloc this is the main thread. jemalloc will use this to validate
|
||||
// that main thread only arenas are only used on the main thread.
|
||||
MALLOC_DECL(jemalloc_set_main_thread, void)
|
||||
|
||||
@@ -703,6 +703,9 @@ struct arena_stats_t {
|
||||
size_t allocated_small;
|
||||
|
||||
size_t allocated_large;
|
||||
|
||||
// The number of "memory operations" aka mallocs/frees.
|
||||
size_t operations;
|
||||
};
|
||||
|
||||
// ***************************************************************************
|
||||
@@ -1151,13 +1154,26 @@ struct arena_t {
|
||||
// and it keeps the value it had after the destructor.
|
||||
arena_id_t mId;
|
||||
|
||||
// All operations on this arena require that lock be locked. The MaybeMutex
|
||||
// Operations on this arena require that lock be locked. The MaybeMutex
|
||||
// class will elude locking if the arena is accessed from a single thread
|
||||
// only (currently only the main thread can be used like this).
|
||||
MaybeMutex mLock MOZ_UNANNOTATED;
|
||||
|
||||
// The lock is required to write to fields of mStats, but it is not needed to
|
||||
// read them, so long as inconsistents reads are okay (fields might not make
|
||||
// sense together).
|
||||
arena_stats_t mStats MOZ_GUARDED_BY(mLock);
|
||||
|
||||
// We can read the allocated counts from mStats without a lock:
|
||||
size_t AllocatedBytes() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return mStats.allocated_small + mStats.allocated_large;
|
||||
}
|
||||
|
||||
// We can read the operations field from mStats without a lock:
|
||||
size_t Operations() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
|
||||
return mStats.operations;
|
||||
}
|
||||
|
||||
private:
|
||||
// Tree of dirty-page-containing chunks this arena manages.
|
||||
RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty
|
||||
@@ -1519,6 +1535,10 @@ class ArenaCollection {
|
||||
return Iterator(&mArenas, &mPrivateArenas);
|
||||
}
|
||||
|
||||
Iterator iter_all() {
|
||||
return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas);
|
||||
}
|
||||
|
||||
inline arena_t* GetDefault() { return mDefaultArena; }
|
||||
|
||||
Mutex mLock MOZ_UNANNOTATED;
|
||||
@@ -1603,6 +1623,7 @@ static RedBlackTree<extent_node_t, ExtentTreeTrait> huge
|
||||
// Huge allocation statistics.
|
||||
static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx);
|
||||
static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx);
|
||||
static size_t huge_operations MOZ_GUARDED_BY(huge_mtx);
|
||||
|
||||
// **************************
|
||||
// base (internal allocation).
|
||||
@@ -3904,6 +3925,7 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
|
||||
}
|
||||
|
||||
mStats.allocated_small += aSize;
|
||||
mStats.operations++;
|
||||
}
|
||||
|
||||
if (!aZero) {
|
||||
@@ -3928,6 +3950,7 @@ void* arena_t::MallocLarge(size_t aSize, bool aZero) {
|
||||
return nullptr;
|
||||
}
|
||||
mStats.allocated_large += aSize;
|
||||
mStats.operations++;
|
||||
}
|
||||
|
||||
if (!aZero) {
|
||||
@@ -3992,6 +4015,7 @@ void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
|
||||
}
|
||||
|
||||
mStats.allocated_large += aSize;
|
||||
mStats.operations++;
|
||||
}
|
||||
|
||||
// Note that since Bug 1488780we don't attempt purge dirty memory on this code
|
||||
@@ -4360,6 +4384,7 @@ arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
|
||||
// the book-keeping overhead via measurements.
|
||||
|
||||
mStats.allocated_small -= size;
|
||||
mStats.operations++;
|
||||
|
||||
return dealloc_chunk;
|
||||
}
|
||||
@@ -4370,6 +4395,7 @@ arena_chunk_t* arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) {
|
||||
size_t size = aChunk->map[pageind].bits & ~gPageSizeMask;
|
||||
|
||||
mStats.allocated_large -= size;
|
||||
mStats.operations++;
|
||||
|
||||
return DallocRun((arena_run_t*)aPtr, true);
|
||||
}
|
||||
@@ -4447,6 +4473,7 @@ void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
|
||||
MaybeMutexAutoLock lock(mLock);
|
||||
TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
|
||||
mStats.allocated_large -= aOldSize - aSize;
|
||||
mStats.operations++;
|
||||
|
||||
should_purge = mNumDirty > EffectiveMaxDirty();
|
||||
}
|
||||
@@ -4485,6 +4512,7 @@ bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
|
||||
aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
|
||||
|
||||
mStats.allocated_large += aSize - aOldSize;
|
||||
mStats.operations++;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4749,6 +4777,7 @@ static void huge_init() MOZ_REQUIRES(gInitLock) {
|
||||
huge.Init();
|
||||
huge_allocated = 0;
|
||||
huge_mapped = 0;
|
||||
huge_operations = 0;
|
||||
MOZ_POP_THREAD_SAFETY
|
||||
}
|
||||
|
||||
@@ -4820,6 +4849,7 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
|
||||
// reasonably claim we never "allocated" them in the first place.
|
||||
huge_allocated += psize;
|
||||
huge_mapped += csize;
|
||||
huge_operations++;
|
||||
}
|
||||
|
||||
pages_decommit((void*)((uintptr_t)ret + psize), csize - psize);
|
||||
@@ -4855,6 +4885,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
|
||||
MOZ_ASSERT(node->mSize == aOldSize);
|
||||
MOZ_RELEASE_ASSERT(node->mArena == this);
|
||||
huge_allocated -= aOldSize - psize;
|
||||
huge_operations++;
|
||||
// No need to change huge_mapped, because we didn't (un)map anything.
|
||||
node->mSize = psize;
|
||||
} else if (psize > aOldSize) {
|
||||
@@ -4874,6 +4905,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
|
||||
MOZ_ASSERT(node->mSize == aOldSize);
|
||||
MOZ_RELEASE_ASSERT(node->mArena == this);
|
||||
huge_allocated += psize - aOldSize;
|
||||
huge_operations++;
|
||||
// No need to change huge_mapped, because we didn't
|
||||
// (un)map anything.
|
||||
node->mSize = psize;
|
||||
@@ -4926,6 +4958,7 @@ static void huge_dalloc(void* aPtr, arena_t* aArena) {
|
||||
mapped = CHUNK_CEILING(node->mSize + gPageSize);
|
||||
huge_allocated -= node->mSize;
|
||||
huge_mapped -= mapped;
|
||||
huge_operations++;
|
||||
}
|
||||
|
||||
// Unmap chunk.
|
||||
@@ -5332,6 +5365,7 @@ inline void MozJemalloc::jemalloc_stats_internal(
|
||||
MutexAutoLock lock(huge_mtx);
|
||||
non_arena_mapped += huge_mapped;
|
||||
aStats->allocated += huge_allocated;
|
||||
aStats->num_operations += huge_operations;
|
||||
MOZ_ASSERT(huge_mapped >= huge_allocated);
|
||||
}
|
||||
|
||||
@@ -5375,6 +5409,8 @@ inline void MozJemalloc::jemalloc_stats_internal(
|
||||
arena_fresh = arena->mNumFresh << gPageSize2Pow;
|
||||
arena_madvised = arena->mNumMAdvised << gPageSize2Pow;
|
||||
|
||||
aStats->num_operations += arena->mStats.operations;
|
||||
|
||||
for (j = 0; j < NUM_SMALL_CLASSES; j++) {
|
||||
arena_bin_t* bin = &arena->mBins[j];
|
||||
size_t bin_unused = 0;
|
||||
@@ -5436,6 +5472,36 @@ inline void MozJemalloc::jemalloc_stats_internal(
|
||||
aStats->pages_dirty + aStats->bookkeeping);
|
||||
}
|
||||
|
||||
inline void MozJemalloc::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) {
|
||||
if (!aStats) {
|
||||
return;
|
||||
}
|
||||
if (!malloc_init()) {
|
||||
memset(aStats, 0, sizeof(*aStats));
|
||||
return;
|
||||
}
|
||||
|
||||
aStats->allocated_bytes = 0;
|
||||
aStats->num_operations = 0;
|
||||
|
||||
// Get huge mapped/allocated.
|
||||
{
|
||||
MutexAutoLock lock(huge_mtx);
|
||||
aStats->allocated_bytes += huge_allocated;
|
||||
aStats->num_operations += huge_operations;
|
||||
MOZ_ASSERT(huge_mapped >= huge_allocated);
|
||||
}
|
||||
|
||||
{
|
||||
MutexAutoLock lock(gArenas.mLock);
|
||||
for (auto arena : gArenas.iter_all()) {
|
||||
// We don't need to lock the arena to access these fields.
|
||||
aStats->allocated_bytes += arena->AllocatedBytes();
|
||||
aStats->num_operations += arena->Operations();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t MozJemalloc::jemalloc_stats_num_bins() {
|
||||
return NUM_SMALL_CLASSES;
|
||||
}
|
||||
|
||||
@@ -108,6 +108,8 @@ struct MozJemallocPHC : public MozJemalloc {
|
||||
|
||||
static void jemalloc_stats_internal(jemalloc_stats_t*, jemalloc_bin_stats_t*);
|
||||
|
||||
static void jemalloc_stats_lite(jemalloc_stats_lite_t*);
|
||||
|
||||
static void jemalloc_ptr_info(const void*, jemalloc_ptr_info_t*);
|
||||
|
||||
# define MALLOC_DECL(name, return_type, ...) \
|
||||
|
||||
@@ -122,6 +122,13 @@ typedef struct {
|
||||
size_t bookkeeping; // Committed bytes used internally by the
|
||||
// allocator.
|
||||
size_t bin_unused; // Bytes committed to a bin but currently unused.
|
||||
|
||||
size_t num_operations; // The number of malloc()+free() calls. Note that
|
||||
// realloc calls
|
||||
// count as 0, 1 or 2 operations depending on internal
|
||||
// operations. Which internal operations (eg in place
|
||||
// or move, or different size classes) require
|
||||
// different internal operations is unspecified.
|
||||
} jemalloc_stats_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -134,6 +141,18 @@ typedef struct {
|
||||
size_t bytes_per_run; // The number of bytes per run, including headers.
|
||||
} jemalloc_bin_stats_t;
|
||||
|
||||
// jemalloc_stats_lite() is not a stable interface. When using
|
||||
// jemalloc_stats_lite_t, be sure that the compiled results of mozjemalloc.cpp
|
||||
// are in sync with this header file.
|
||||
typedef struct {
|
||||
size_t allocated_bytes;
|
||||
|
||||
// The number of malloc()+free() calls. realloc calls count as 0, 1 or 2
|
||||
// operations depending on whether they do nothing, resize in-place, or move
|
||||
// the memory.
|
||||
size_t num_operations;
|
||||
} jemalloc_stats_lite_t;
|
||||
|
||||
enum PtrInfoTag {
|
||||
// The pointer is not currently known to the allocator.
|
||||
// 'addr', 'size', and 'arenaId' are always 0.
|
||||
|
||||
@@ -764,3 +764,105 @@ TEST(Jemalloc, DisposeArena)
|
||||
|
||||
RESTORE_GDB_SLEEP_LOCAL();
|
||||
}
|
||||
|
||||
static void CheckPtr(void* ptr, size_t size) {
|
||||
EXPECT_TRUE(ptr);
|
||||
jemalloc_ptr_info_t info;
|
||||
jemalloc_ptr_info(ptr, &info);
|
||||
EXPECT_EQ(info.tag, TagLiveAlloc);
|
||||
EXPECT_EQ(info.size, malloc_good_size(size));
|
||||
}
|
||||
|
||||
static void CheckStats(const char* operation, unsigned iteration,
|
||||
jemalloc_stats_lite_t& baseline,
|
||||
jemalloc_stats_lite_t& stats, size_t num_ops,
|
||||
ptrdiff_t bytes_diff) {
|
||||
if ((baseline.allocated_bytes + bytes_diff != stats.allocated_bytes
|
||||
|
||||
|| baseline.num_operations + num_ops != stats.num_operations)) {
|
||||
// All the tests that check stats, perform some operation, then check stats
|
||||
// again can race with other threads. But the test can't be made thread
|
||||
// safe without a sagnificant amount of work. However this IS a problem
|
||||
// when stepping through the test using a debugger, since other threads are
|
||||
// likely to run while the current thread is paused. Instead of neading a
|
||||
// debugger our printf here can help understand a failing test.
|
||||
fprintf(stderr, "Check stats failed after iteration %u operation %s\n",
|
||||
iteration, operation);
|
||||
|
||||
EXPECT_EQ(baseline.allocated_bytes + bytes_diff, stats.allocated_bytes);
|
||||
EXPECT_EQ(baseline.num_operations + num_ops, stats.num_operations);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Jemalloc, StatsLite)
|
||||
{
|
||||
// Disable PHC allocations for this test, because even a single PHC
|
||||
// allocation occurring can throw it off.
|
||||
AutoDisablePHCOnCurrentThread disable;
|
||||
|
||||
// Use this data to make an allocation, resize it twice, then free it. Some
|
||||
// The data uses a few size classes and does a combination of in-place and
|
||||
// moving reallocations.
|
||||
struct {
|
||||
// The initial allocation size.
|
||||
size_t initial;
|
||||
// The first reallocation size and number of operations of the reallocation.
|
||||
size_t next;
|
||||
size_t next_ops;
|
||||
// The final reallocation size and number of operations of the reallocation.
|
||||
size_t last;
|
||||
size_t last_ops;
|
||||
} TestData[] = {
|
||||
/* clang-format off */
|
||||
{ 16, 15, 0, 256, 2},
|
||||
{128_KiB, 64_KiB, 1, 68_KiB, 1},
|
||||
{ 4_MiB, 16_MiB, 2, 3_MiB, 2},
|
||||
{ 16_KiB, 512, 2, 32_MiB, 2},
|
||||
/* clang-format on */
|
||||
};
|
||||
|
||||
arena_id_t my_arena = moz_create_arena();
|
||||
|
||||
unsigned i = 0;
|
||||
for (auto data : TestData) {
|
||||
// Assert that the API returns /something/ a bit sensible.
|
||||
jemalloc_stats_lite_t baseline;
|
||||
jemalloc_stats_lite(&baseline);
|
||||
|
||||
// Allocate an object.
|
||||
void* ptr = moz_arena_malloc(my_arena, data.initial);
|
||||
CheckPtr(ptr, data.initial);
|
||||
|
||||
jemalloc_stats_lite_t stats1;
|
||||
jemalloc_stats_lite(&stats1);
|
||||
CheckStats("malloc()", i, baseline, stats1, 1,
|
||||
malloc_good_size(data.initial));
|
||||
|
||||
// realloc the item in-place.
|
||||
ptr = moz_arena_realloc(my_arena, ptr, data.next);
|
||||
CheckPtr(ptr, data.next);
|
||||
|
||||
jemalloc_stats_lite_t stats2;
|
||||
jemalloc_stats_lite(&stats2);
|
||||
CheckStats("realloc() 1", i, stats1, stats2, data.next_ops,
|
||||
malloc_good_size(data.next) - malloc_good_size(data.initial));
|
||||
|
||||
// realloc so it has to move to a different location
|
||||
ptr = moz_arena_realloc(my_arena, ptr, data.last);
|
||||
CheckPtr(ptr, data.last);
|
||||
|
||||
jemalloc_stats_lite_t stats3;
|
||||
jemalloc_stats_lite(&stats3);
|
||||
CheckStats("realloc() 2", i, stats2, stats3, data.last_ops,
|
||||
malloc_good_size(data.last) - malloc_good_size(data.next));
|
||||
|
||||
moz_arena_free(my_arena, ptr);
|
||||
jemalloc_stats_lite_t stats4;
|
||||
jemalloc_stats_lite(&stats4);
|
||||
CheckStats("free()", i, stats3, stats4, 1, -malloc_good_size(data.last));
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
moz_dispose_arena(my_arena);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user