On 64-bit Windows (x86_64, aarch64), stack walking relies on RtlLookupFunctionEntry to navigate from one frame to the next. This function acquires up to two ntdll internal locks when it is called. The profiler and the background hang monitor both need to walk the stacks of suspended threads. This can lead to deadlock situations, which so far we have avoided with stack walk suppressions. We guard some critical paths to mark them as suppressing stack walk, and we forbid stack walking when any thread is currently on such path. While stack walk suppression has helped remove most deadlock situations, some can remain because it is hard to detect and manually annotate all the paths that could lead to a deadlock situation. Another drawback is that stack walk suppression disables stack walking for much larger portions of code than required. For example, we disable stack walking for LdrLoadDll, so we cannot collect stacks while we are loading a DLL. Yet, the lock that could lead to a deadlock situation is only held during a very small portion of the whole time spent in LdrLoadDll. This patch addresses these two issues by implementing a finer-grained strategy to avoid deadlock situations. We acquire the pointers to the internel ntdll locks through a single-stepped execution of RtlLookupFunctionEntry. This allows us to try to acquire the locks non-blockingly so that we can guarantee safe stack walking with no deadlock. If we fail to collect pointers to the locks, we fall back to using stack walk suppressions like before. This way we get the best of both worlds: if we are confident that the situation is under control, we will use the new strategy and get better profiler accuracy and no deadlock; in case of doubt, we can still use the profiler thanks to stack walk suppressions. Differential Revision: https://phabricator.services.mozilla.com/D223498
253 lines
8.6 KiB
C++
253 lines
8.6 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "mozilla/WindowsStackWalkInitialization.h"
|
|
|
|
#include "nsWindowsDllInterceptor.h"
|
|
#include "mozilla/NativeNt.h"
|
|
#include "mozilla/StackWalk_windows.h"
|
|
#include "mozilla/WindowsDiagnostics.h"
|
|
|
|
namespace mozilla {
|
|
|
|
#if defined(_M_AMD64) || defined(_M_ARM64)
|
|
static WindowsDllInterceptor NtDllIntercept;
|
|
|
|
typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module);
|
|
static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll;
|
|
|
|
static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) {
|
|
// Prevent the stack walker from suspending this thread when LdrUnloadDll
|
|
// holds the RtlLookupFunctionEntry lock.
|
|
AutoSuppressStackWalking suppress;
|
|
return stub_LdrUnloadDll(module);
|
|
}
|
|
|
|
// These pointers are disguised as PVOID to avoid pulling in obscure headers
|
|
typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)(
|
|
PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
|
|
PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags);
|
|
static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func>
|
|
stub_LdrResolveDelayLoadedAPI;
|
|
|
|
static PVOID WINAPI patched_LdrResolveDelayLoadedAPI(
|
|
PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
|
|
PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) {
|
|
// Prevent the stack walker from suspending this thread when
|
|
// LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock.
|
|
AutoSuppressStackWalking suppress;
|
|
return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor,
|
|
FailureDllHook, FailureSystemHook,
|
|
ThunkAddress, Flags);
|
|
}
|
|
|
|
void WindowsStackWalkInitialization() {
|
|
// This function could be called by both profilers, but we only want to run
|
|
// it once.
|
|
static bool ran = false;
|
|
if (ran) {
|
|
return;
|
|
}
|
|
ran = true;
|
|
|
|
// Attempt to initialize strategy (1) for avoiding deadlocks. See comments in
|
|
// StackWalk.cpp near InitializeStackWalkLocks().
|
|
Array<void*, 2> stackWalkLocks;
|
|
if (CollectStackWalkLocks(stackWalkLocks)) {
|
|
bool locksArePlausible = ValidateStackWalkLocks(stackWalkLocks);
|
|
|
|
// If this crashes then most likely our lock collection code is broken.
|
|
MOZ_ASSERT(locksArePlausible);
|
|
|
|
if (locksArePlausible) {
|
|
InitializeStackWalkLocks(stackWalkLocks);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Strategy (2): We will rely on stack walk suppressions. We use hooking
|
|
// to install stack walk suppression on specific Windows calls which are
|
|
// known to acquire the locks exclusively. Some of these calls, e.g.
|
|
// LdrLoadDll, are already hooked by other parts of our code base; in this
|
|
// case the stack walk suppressions are already added there directly.
|
|
NtDllIntercept.Init("ntdll.dll");
|
|
stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll);
|
|
stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept, "LdrResolveDelayLoadedAPI",
|
|
&patched_LdrResolveDelayLoadedAPI);
|
|
}
|
|
|
|
[[clang::optnone]] void UnoptimizedLookup() {
|
|
DWORD64 imageBase;
|
|
::RtlLookupFunctionEntry(0, &imageBase, nullptr);
|
|
}
|
|
|
|
MFBT_API
|
|
bool CollectStackWalkLocks(Array<void*, 2>& aStackWalkLocks) {
|
|
// At the moment we are only capable of enabling strategy (1) for x86-64
|
|
// because WindowsDiagnostics.h does not implement single-stepping for arm64.
|
|
# if defined(_M_AMD64)
|
|
struct LockCollectionData {
|
|
Array<void*, 2> mCollectedLocks;
|
|
int mCollectedLocksCount;
|
|
DebugOnly<bool> mLookupCalled;
|
|
};
|
|
|
|
LockCollectionData data{};
|
|
|
|
// Do a single-stepped call to RtlLookupFunctionEntry, and monitor the calls
|
|
// to RtlAcquireSRWLockShared and RtlReleaseSRWLockShared.
|
|
WindowsDiagnosticsError error = CollectSingleStepData(
|
|
UnoptimizedLookup,
|
|
[](void* aState, CONTEXT* aContext) {
|
|
LockCollectionData& data =
|
|
*reinterpret_cast<LockCollectionData*>(aState);
|
|
|
|
# ifdef DEBUG
|
|
if (aContext->Rip ==
|
|
reinterpret_cast<DWORD64>(::RtlLookupFunctionEntry)) {
|
|
data.mLookupCalled = true;
|
|
}
|
|
# endif
|
|
|
|
void* lock = ExtractLockFromCurrentCpuContext(aContext);
|
|
if (lock) {
|
|
bool alreadyCollected = false;
|
|
for (auto collectedLock : data.mCollectedLocks) {
|
|
if (collectedLock == lock) {
|
|
alreadyCollected = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!alreadyCollected) {
|
|
if (data.mCollectedLocksCount <
|
|
std::numeric_limits<
|
|
decltype(data.mCollectedLocksCount)>::max()) {
|
|
++data.mCollectedLocksCount;
|
|
}
|
|
if (data.mCollectedLocksCount <= 2) {
|
|
data.mCollectedLocks[data.mCollectedLocksCount - 1] = lock;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Continue single-stepping
|
|
return true;
|
|
},
|
|
&data);
|
|
|
|
// We only expect to fail if a debugger is present.
|
|
MOZ_ASSERT(error == WindowsDiagnosticsError::None ||
|
|
error == WindowsDiagnosticsError::DebuggerPresent);
|
|
|
|
if (error != WindowsDiagnosticsError::None) {
|
|
return false;
|
|
}
|
|
|
|
// Crashing here most likely means that the optimizer was too aggressive.
|
|
MOZ_ASSERT(data.mLookupCalled);
|
|
|
|
// If we managed to collect exactly two locks, then we assume that these
|
|
// are the locks we are looking for.
|
|
bool isAcquisitionSuccessful = data.mCollectedLocksCount == 2;
|
|
|
|
// We always expect that RtlLookupFunctionEntry's behavior results in a
|
|
// successful acquisition. If this crashes then we likely failed to detect
|
|
// the instructions that acquire and release the locks in our function
|
|
// ExtractLockFromCurrentCpuContext.
|
|
MOZ_ASSERT(isAcquisitionSuccessful);
|
|
if (!isAcquisitionSuccessful) {
|
|
return false;
|
|
}
|
|
|
|
aStackWalkLocks[0] = data.mCollectedLocks[0];
|
|
aStackWalkLocks[1] = data.mCollectedLocks[1];
|
|
return true;
|
|
# else
|
|
return false;
|
|
# endif // _M_AMD64
|
|
}
|
|
|
|
// Based on a single-step CPU context, extract a pointer to a lock that is
|
|
// being acquired or released (if any).
|
|
MFBT_API
|
|
void* ExtractLockFromCurrentCpuContext(void* aContext) {
|
|
# if defined(_M_AMD64)
|
|
// rex bits
|
|
constexpr BYTE kMaskHighNibble = 0xF0;
|
|
constexpr BYTE kRexOpcode = 0x40;
|
|
constexpr BYTE kMaskRexW = 0x08;
|
|
constexpr BYTE kMaskRexB = 0x01;
|
|
|
|
// mod r/m bits
|
|
constexpr BYTE kMaskMod = 0xC0;
|
|
constexpr BYTE kMaskRm = 0x07;
|
|
constexpr BYTE kModNoRegDisp = 0x00;
|
|
constexpr BYTE kRmNeedSib = 0x04;
|
|
constexpr BYTE kRmNoRegDispDisp32 = 0x05;
|
|
|
|
auto context = reinterpret_cast<CONTEXT*>(aContext);
|
|
auto opcode = reinterpret_cast<uint8_t*>(context->Rip);
|
|
// lock rex.w(?rxb) cmpxchg r/m64, r64
|
|
if (opcode[0] == 0xf0 &&
|
|
(opcode[1] & (kMaskHighNibble | kMaskRexW)) == (kRexOpcode | kMaskRexW) &&
|
|
opcode[2] == 0x0f && opcode[3] == 0xb1) {
|
|
if ((opcode[4] & kMaskMod) == kModNoRegDisp) {
|
|
BYTE const rm = opcode[4] & kMaskRm; // low 3 bits, no offset
|
|
|
|
if (rm == kRmNeedSib) {
|
|
// uses SIB byte; decoding not implemented
|
|
return nullptr;
|
|
}
|
|
|
|
if (rm == kRmNoRegDispDisp32) {
|
|
// rip-relative
|
|
return reinterpret_cast<void*>(
|
|
static_cast<int64_t>(context->Rip) + 9i64 +
|
|
static_cast<int64_t>(*reinterpret_cast<int32_t*>(opcode + 5)));
|
|
}
|
|
|
|
// otherwise, this reads/writes from [reg] -- and conveniently, the
|
|
// registers in the CONTEXT struct form an indexable subarray in "opcode
|
|
// order"
|
|
BYTE const regIndex = ((opcode[1] & kMaskRexB) << 3) | rm;
|
|
DWORD64 const regValue = (&context->Rax)[regIndex];
|
|
return reinterpret_cast<void*>(regValue);
|
|
}
|
|
}
|
|
return nullptr;
|
|
# else
|
|
return nullptr;
|
|
# endif // _M_AMD64
|
|
}
|
|
|
|
MFBT_API
|
|
bool ValidateStackWalkLocks(const Array<void*, 2>& aStackWalkLocks) {
|
|
if (!aStackWalkLocks[0] || !aStackWalkLocks[1]) {
|
|
return false;
|
|
}
|
|
|
|
// We check that the pointers live in ntdll's .data section as a best effort.
|
|
mozilla::nt::PEHeaders ntdllImage(::GetModuleHandleW(L"ntdll.dll"));
|
|
if (!ntdllImage) {
|
|
return false;
|
|
}
|
|
|
|
auto dataSection = ntdllImage.GetDataSectionInfo();
|
|
if (dataSection.isNothing()) {
|
|
return false;
|
|
}
|
|
|
|
return dataSection.isSome() &&
|
|
&*dataSection->cbegin() <= aStackWalkLocks[0] &&
|
|
aStackWalkLocks[0] <= &*(dataSection->cend() - 1) &&
|
|
&*dataSection->cbegin() <= aStackWalkLocks[1] &&
|
|
aStackWalkLocks[1] <= &*(dataSection->cend() - 1);
|
|
}
|
|
|
|
#endif // _M_AMD64 || _M_ARM64
|
|
|
|
} // namespace mozilla
|