Files
tubestation/toolkit/xre/dllservices/tests/TestStackWalkInitialization.cpp
Yannis Juglaret addbb1c6db Bug 1839299 - Use a finer-grained strategy to protect RtlLookupFunctionEntry against deadlocks. r=win-reviewers,rkraesig
On 64-bit Windows (x86_64, aarch64), stack walking relies on
RtlLookupFunctionEntry to navigate from one frame to the next. This
function acquires up to two ntdll internal locks when it is called.

The profiler and the background hang monitor both need to walk the
stacks of suspended threads. This can lead to deadlock situations,
which so far we have avoided with stack walk suppressions. We guard some
critical paths to mark them as suppressing stack walk, and we forbid
stack walking when any thread is currently on such path.

While stack walk suppression has helped remove most deadlock situations,
some can remain because it is hard to detect and manually annotate all
the paths that could lead to a deadlock situation. Another drawback is
that stack walk suppression disables stack walking for much larger
portions of code than required. For example, we disable stack walking
for LdrLoadDll, so we cannot collect stacks while we are loading a DLL.
Yet, the lock that could lead to a deadlock situation is only held
during a very small portion of the whole time spent in LdrLoadDll.

This patch addresses these two issues by implementing a finer-grained
strategy to avoid deadlock situations. We acquire the pointers to the
internel ntdll locks through a single-stepped execution of
RtlLookupFunctionEntry. This allows us to try to acquire the locks
non-blockingly so that we can guarantee safe stack walking with no
deadlock.

If we fail to collect pointers to the locks, we fall back to using stack
walk suppressions like before. This way we get the best of both worlds:
if we are confident that the situation is under control, we will use the
new strategy and get better profiler accuracy and no deadlock; in case
of doubt, we can still use the profiler thanks to stack walk
suppressions.

Differential Revision: https://phabricator.services.mozilla.com/D223498
2024-10-17 13:47:04 +00:00

222 lines
7.5 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsWindowsHelpers.h"
#include "mozilla/Array.h"
#include "mozilla/Attributes.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/WindowsStackWalkInitialization.h"
#include <windows.h>
#include <cstdio>
#define TEST_FAILED(format, ...) \
do { \
wprintf(L"TEST-FAILED | TestStackWalkInitialization | " format __VA_OPT__( \
, ) __VA_ARGS__); \
::exit(1); \
} while (0)
#define TEST_PASS(format, ...) \
do { \
wprintf(L"TEST-PASS | TestStackWalkInitialization | " format __VA_OPT__( \
, ) __VA_ARGS__); \
} while (0)
#define MAX_TIMEOUT_MS 5000
extern "C" __declspec(dllexport) uint64_t gPseudoLock{};
MOZ_NEVER_INLINE MOZ_NAKED __declspec(dllexport) void LockThroughRegisterRsi() {
asm volatile(
// Found in RtlAcquireSRWLockShared
"lock cmpxchgq %rcx, (%rsi)");
}
MOZ_NEVER_INLINE MOZ_NAKED __declspec(dllexport) void LockThroughRegisterRcx() {
asm volatile(
// Found in RtlReleaseSRWLockShared
"lock cmpxchgq %r10, (%rcx)");
}
MOZ_NEVER_INLINE MOZ_NAKED __declspec(dllexport) void LockThroughRegisterR10() {
asm volatile("lock cmpxchgq %rcx, (%r10)");
}
MOZ_NEVER_INLINE MOZ_NAKED __declspec(dllexport) void
LockThroughRipRelativeAddr() {
asm volatile(
// Found in an inlined call to RtlAcquireSRWLockShared in
// RtlpxLookupFunctionTable on Windows 10
"lock cmpxchgq %r11, gPseudoLock(%rip)");
}
void TestLockExtraction() {
void* extractedLock{};
CONTEXT context{};
context.Rip = reinterpret_cast<DWORD64>(LockThroughRegisterRsi);
context.Rsi = reinterpret_cast<DWORD64>(&gPseudoLock);
extractedLock = mozilla::ExtractLockFromCurrentCpuContext(&context);
context.Rsi = 0;
if (extractedLock != &gPseudoLock) {
TEST_FAILED(
L"Failed to extract the lock through register RSI (expected: %p, got: "
L"%p)\n",
&gPseudoLock, extractedLock);
}
context.Rip = reinterpret_cast<DWORD64>(LockThroughRegisterRcx);
context.Rcx = reinterpret_cast<DWORD64>(&gPseudoLock);
extractedLock = mozilla::ExtractLockFromCurrentCpuContext(&context);
context.Rcx = 0;
if (extractedLock != &gPseudoLock) {
TEST_FAILED(
L"Failed to extract the lock through register RCX (expected: %p, got: "
L"%p)\n",
&gPseudoLock, extractedLock);
}
context.Rip = reinterpret_cast<DWORD64>(LockThroughRegisterR10);
context.R10 = reinterpret_cast<DWORD64>(&gPseudoLock);
extractedLock = mozilla::ExtractLockFromCurrentCpuContext(&context);
context.R10 = 0;
if (extractedLock != &gPseudoLock) {
TEST_FAILED(
L"Failed to extract the lock through register R10 (expected: %p, got: "
L"%p)\n",
&gPseudoLock, extractedLock);
}
context.Rip = reinterpret_cast<DWORD64>(LockThroughRipRelativeAddr);
extractedLock = mozilla::ExtractLockFromCurrentCpuContext(&context);
if (extractedLock != &gPseudoLock) {
TEST_FAILED(
L"Failed to extract the lock through RIP-relative address (expected: "
L"%p, got: %p)\n",
&gPseudoLock, extractedLock);
}
TEST_PASS(L"Managed to extract the lock with all test patterns\n");
}
void TestLockCollectionAndValidation(
mozilla::Array<void*, 2>& aStackWalkLocks) {
if (!mozilla::CollectStackWalkLocks(aStackWalkLocks)) {
TEST_FAILED(L"Failed to collect stack walk locks\n");
}
if (!mozilla::ValidateStackWalkLocks(aStackWalkLocks)) {
TEST_FAILED(L"Failed to validate stack walk locks\n");
}
TEST_PASS(L"Collected and validated locks successfully\n");
}
DWORD WINAPI LookupThreadProc(LPVOID aEvents) {
auto events = reinterpret_cast<nsAutoHandle*>(aEvents);
auto& lookupThreadReady = events[0];
auto& initiateLookup = events[1];
auto& lookupThreadDone = events[2];
// Signal that we are ready to enter lookup.
::SetEvent(lookupThreadReady);
// Wait for the main thread to acquire the locks exclusively.
if (::WaitForSingleObject(initiateLookup, MAX_TIMEOUT_MS) == WAIT_OBJECT_0) {
// Do a lookup. We are supposed to get stuck until the locks are released.
DWORD64 imageBase;
::RtlLookupFunctionEntry(reinterpret_cast<DWORD64>(LookupThreadProc),
&imageBase, nullptr);
// Signal that we are not or no longer stuck.
::SetEvent(lookupThreadDone);
}
return 0;
}
// This test checks that the locks in aStackWalkLocks cause
// RtlLookupFunctionEntry to get stuck if they are held exclusively, i.e. there
// is a good chance that these are indeed the locks we are looking for.
void TestLocksPreventLookup(const mozilla::Array<void*, 2>& aStackWalkLocks) {
nsAutoHandle events[3]{};
for (int i = 0; i < 3; ++i) {
nsAutoHandle event(::CreateEventW(nullptr, /* bManualReset */ TRUE,
/* bInitialState */ FALSE, nullptr));
if (!event) {
TEST_FAILED(L"Failed to create event %d\n", i);
}
events[i].swap(event);
}
auto& lookupThreadReady = events[0];
auto& initiateLookup = events[1];
auto& lookupThreadDone = events[2];
nsAutoHandle lookupThread(::CreateThread(nullptr, 0, LookupThreadProc,
reinterpret_cast<void*>(events), 0,
nullptr));
if (!lookupThread) {
TEST_FAILED(L"Failed to create lookup thread\n");
}
if (::WaitForSingleObject(lookupThreadReady, MAX_TIMEOUT_MS) !=
WAIT_OBJECT_0) {
TEST_FAILED(L"Lookup thread did not signal the lookupThreadReady event\n");
}
mozilla::Array<SRWLOCK*, 2> stackWalkLocks{
reinterpret_cast<SRWLOCK*>(aStackWalkLocks[0]),
reinterpret_cast<SRWLOCK*>(aStackWalkLocks[1])};
if (!::TryAcquireSRWLockExclusive(stackWalkLocks[0])) {
TEST_FAILED(L"Failed to acquire lock 0\n");
}
if (!::TryAcquireSRWLockExclusive(stackWalkLocks[1])) {
::ReleaseSRWLockExclusive(stackWalkLocks[0]);
TEST_FAILED(L"Failed to acquire lock 1\n");
}
{
auto onExitScope = mozilla::MakeScopeExit([&stackWalkLocks]() {
::ReleaseSRWLockExclusive(stackWalkLocks[1]);
::ReleaseSRWLockExclusive(stackWalkLocks[0]);
});
if (!::SetEvent(initiateLookup)) {
TEST_FAILED(L"Failed to signal the initiateLookup event\n");
}
if (::WaitForSingleObject(lookupThreadDone, MAX_TIMEOUT_MS) !=
WAIT_TIMEOUT) {
TEST_FAILED(
L"Lookup thread was not stuck during lookup while we acquired the "
L"locks exclusively\n");
}
}
if (::WaitForSingleObject(lookupThreadDone, MAX_TIMEOUT_MS) !=
WAIT_OBJECT_0) {
TEST_FAILED(
L"Lookup thread did not signal the lookupThreadDone event after locks "
L"were released\n");
}
TEST_PASS(L"Locks prevented lookup while acquired exclusively\n");
}
int wmain(int argc, wchar_t* argv[]) {
TestLockExtraction();
mozilla::Array<void*, 2> stackWalkLocks;
TestLockCollectionAndValidation(stackWalkLocks);
TestLocksPreventLookup(stackWalkLocks);
return 0;
}