diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js index 734d867923e4..101065afc3a2 100644 --- a/browser/app/profile/firefox.js +++ b/browser/app/profile/firefox.js @@ -1849,3 +1849,6 @@ pref("extensions.interposition.enabled", true); #endif pref("browser.defaultbrowser.notificationbar", false); + +// How many milliseconds to wait for a CPOW response from the child process. +pref("dom.ipc.cpow.timeout", 0); diff --git a/dom/ipc/ContentParent.cpp b/dom/ipc/ContentParent.cpp index 76f854b7018e..892817723465 100755 --- a/dom/ipc/ContentParent.cpp +++ b/dom/ipc/ContentParent.cpp @@ -2011,6 +2011,9 @@ ContentParent::ContentParent(mozIApplication* aApp, true /* Send registered chrome */); ContentProcessManager::GetSingleton()->AddContentProcess(this); + + // Set a reply timeout for CPOWs. + SetReplyTimeoutMs(Preferences::GetInt("dom.ipc.cpow.timeout", 0)); } #ifdef MOZ_NUWA_PROCESS diff --git a/dom/plugins/ipc/PluginModuleParent.cpp b/dom/plugins/ipc/PluginModuleParent.cpp index ad7ef6f79fb1..63bfd58de221 100755 --- a/dom/plugins/ipc/PluginModuleParent.cpp +++ b/dom/plugins/ipc/PluginModuleParent.cpp @@ -571,6 +571,7 @@ PluginModuleChromeParent::ShouldContinueFromReplyTimeout() FinishHangUI(); #endif // XP_WIN TerminateChildProcess(MessageLoop::current()); + GetIPCChannel()->CloseWithTimeout(); return false; } diff --git a/ipc/glue/MessageChannel.cpp b/ipc/glue/MessageChannel.cpp index 5681468796ec..dcf0ae4ae050 100644 --- a/ipc/glue/MessageChannel.cpp +++ b/ipc/glue/MessageChannel.cpp @@ -292,6 +292,8 @@ MessageChannel::MessageChannel(MessageListener *aListener) mDispatchingSyncMessage(false), mDispatchingSyncMessagePriority(0), mCurrentTransaction(0), + mTimedOutMessageSeqno(0), + mRecvdErrors(0), mRemoteStackDepthGuess(false), mSawInterruptOutMsg(false), mAbortOnError(false), @@ -611,8 +613,30 @@ MessageChannel::OnMessageReceivedFromLink(const Message& aMsg) // Regardless of the Interrupt stack, if we're awaiting a sync reply, // we know that it needs to be immediately handled to unblock us. - if (AwaitingSyncReply() && aMsg.is_sync() && aMsg.is_reply()) { + if (aMsg.is_sync() && aMsg.is_reply()) { + if (aMsg.seqno() == mTimedOutMessageSeqno) { + // Drop the message, but allow future sync messages to be sent. + mTimedOutMessageSeqno = 0; + return; + } + + MOZ_ASSERT(AwaitingSyncReply()); MOZ_ASSERT(!mRecvd); + + // Rather than storing errors in mRecvd, we mark them in + // mRecvdErrors. We need a counter because multiple replies can arrive + // when a timeout happens, as in the following example. Imagine the + // child is running slowly. The parent sends a sync message P1. It times + // out. The child eventually sends a sync message C1. While waiting for + // the C1 response, the child dispatches P1. In doing so, it sends sync + // message C2. At that point, it's valid for the parent to send error + // responses for both C1 and C2. + if (aMsg.is_reply_error()) { + mRecvdErrors++; + NotifyWorkerThread(); + return; + } + mRecvd = new Message(aMsg); NotifyWorkerThread(); return; @@ -698,6 +722,14 @@ MessageChannel::Send(Message* aMsg, Message* aReply) MonitorAutoLock lock(*mMonitor); + if (mTimedOutMessageSeqno) { + // Don't bother sending another sync message if a previous one timed out + // and we haven't received a reply for it. Once the original timed-out + // message receives a reply, we'll be able to send more sync messages + // again. + return false; + } + IPC_ASSERT(aMsg->is_sync(), "can only Send() sync messages here"); IPC_ASSERT(aMsg->priority() >= DispatchingSyncMessagePriority(), "can't send sync message of a lesser priority than what's being dispatched"); @@ -713,12 +745,12 @@ MessageChannel::Send(Message* aMsg, Message* aReply) msg->set_seqno(NextSeqno()); - DebugOnly replySeqno = msg->seqno(); + int32_t seqno = msg->seqno(); DebugOnly replyType = msg->type() + 1; AutoSetValue replies(mAwaitingSyncReply, true); AutoSetValue prio(mAwaitingSyncReplyPriority, msg->priority()); - AutoEnterTransaction transact(this, msg->seqno()); + AutoEnterTransaction transact(this, seqno); int32_t transaction = mCurrentTransaction; msg->set_transaction_id(transaction); @@ -750,16 +782,16 @@ MessageChannel::Send(Message* aMsg, Message* aReply) } // See if we've received a reply. + if (mRecvdErrors) { + mRecvdErrors--; + return false; + } + if (mRecvd) { MOZ_ASSERT(mRecvd->is_reply(), "expected reply"); - - if (mRecvd->is_reply_error()) { - mRecvd = nullptr; - return false; - } - + MOZ_ASSERT(!mRecvd->is_reply_error()); MOZ_ASSERT(mRecvd->type() == replyType, "wrong reply type"); - MOZ_ASSERT(mRecvd->seqno() == replySeqno); + MOZ_ASSERT(mRecvd->seqno() == seqno); MOZ_ASSERT(mRecvd->is_sync()); *aReply = Move(*mRecvd); @@ -767,6 +799,8 @@ MessageChannel::Send(Message* aMsg, Message* aReply) return true; } + MOZ_ASSERT(!mTimedOutMessageSeqno); + bool maybeTimedOut = !WaitForSyncNotify(); if (!Connected()) { @@ -774,8 +808,13 @@ MessageChannel::Send(Message* aMsg, Message* aReply) return false; } - if (maybeTimedOut && !ShouldContinueFromTimeout()) + // We only time out a message if it initiated a new transaction (i.e., + // if neither side has any other message Sends on the stack). + bool canTimeOut = transaction == seqno; + if (maybeTimedOut && canTimeOut && !ShouldContinueFromTimeout()) { + mTimedOutMessageSeqno = seqno; return false; + } } return true; @@ -1089,7 +1128,14 @@ MessageChannel::DispatchSyncMessage(const Message& aMsg) bool& blockingVar = ShouldBlockScripts() ? gParentIsBlocked : dummy; Result rv; - { + if (mTimedOutMessageSeqno) { + // If the other side sends a message in response to one of our messages + // that we've timed out, then we reply with an error. + // + // We even reject messages that were sent before the other side even got + // to our timed out message. + rv = MsgNotAllowed; + } else { AutoSetValue blocked(blockingVar, true); AutoSetValue sync(mDispatchingSyncMessage, true); AutoSetValue prioSet(mDispatchingSyncMessagePriority, prio); @@ -1104,6 +1150,7 @@ MessageChannel::DispatchSyncMessage(const Message& aMsg) reply->set_reply_error(); } reply->set_seqno(aMsg.seqno()); + reply->set_transaction_id(aMsg.transaction_id()); MonitorAutoLock lock(*mMonitor); if (ChannelConnected == mChannelState) { @@ -1361,21 +1408,6 @@ MessageChannel::ShouldContinueFromTimeout() return true; } - if (!cont) { - // NB: there's a sublety here. If parents were allowed to send sync - // messages to children, then it would be possible for this - // synchronous close-on-timeout to race with async |OnMessageReceived| - // tasks arriving from the child, posted to the worker thread's event - // loop. This would complicate cleanup of the *Channel. But since - // IPDL forbids this (and since it doesn't support children timing out - // on parents), the parent can only block on interrupt messages to the child, - // and in that case arriving async messages are enqueued to the interrupt - // channel's special queue. They're then ignored because the channel - // state changes to ChannelTimeout (i.e. !Connected). - SynchronouslyClose(); - mChannelState = ChannelTimeout; - } - return cont; } @@ -1618,6 +1650,19 @@ MessageChannel::CloseWithError() PostErrorNotifyTask(); } +void +MessageChannel::CloseWithTimeout() +{ + AssertWorkerThread(); + + MonitorAutoLock lock(*mMonitor); + if (ChannelConnected != mChannelState) { + return; + } + SynchronouslyClose(); + mChannelState = ChannelTimeout; +} + void MessageChannel::BlockScripts() { diff --git a/ipc/glue/MessageChannel.h b/ipc/glue/MessageChannel.h index 323e0450bc19..68c46302d96d 100644 --- a/ipc/glue/MessageChannel.h +++ b/ipc/glue/MessageChannel.h @@ -84,6 +84,8 @@ class MessageChannel : HasResultCodes // for process links only, not thread links. void CloseWithError(); + void CloseWithTimeout(); + void SetAbortOnError(bool abort) { mAbortOnError = true; @@ -518,10 +520,29 @@ class MessageChannel : HasResultCodes int32_t mOldTransaction; }; + // If a sync message times out, we store its sequence number here. Any + // future sync messages will fail immediately. Once the reply for original + // sync message is received, we allow sync messages again. + // + // When a message times out, nothing is done to inform the other side. The + // other side will eventually dispatch the message and send a reply. Our + // side is responsible for replying to all sync messages sent by the other + // side when it dispatches the timed out message. The response is always an + // error. + // + // A message is only timed out if it initiated a transaction. This avoids + // hitting a lot of corner cases with message nesting that we don't really + // care about. + int32_t mTimedOutMessageSeqno; + // If waiting for the reply to a sync out-message, it will be saved here // on the I/O thread and then read and cleared by the worker thread. nsAutoPtr mRecvd; + // If a sync message reply that is an error arrives, we increment this + // counter rather than storing it in mRecvd. + size_t mRecvdErrors; + // Queue of all incoming messages, except for replies to sync and urgent // messages, which are delivered directly to mRecvd, and any pending urgent // incall, which is stored in mPendingUrgentRequest. diff --git a/ipc/ipdl/test/cxx/PTestUrgentHangs.ipdl b/ipc/ipdl/test/cxx/PTestUrgentHangs.ipdl new file mode 100644 index 000000000000..07fc403e97fb --- /dev/null +++ b/ipc/ipdl/test/cxx/PTestUrgentHangs.ipdl @@ -0,0 +1,24 @@ +namespace mozilla { +namespace _ipdltest { + +prio(normal upto high) sync protocol PTestUrgentHangs +{ +parent: + prio(high) sync Test1_2(); + + prio(high) sync TestInner(); + +child: + prio(high) sync Test1_1(); + prio(high) sync Test1_3(); + + prio(high) sync Test2(); + + prio(high) sync Test3(); + + async Test4(); + prio(high) sync Test4_1(); +}; + +} // namespace _ipdltest +} // namespace mozilla diff --git a/ipc/ipdl/test/cxx/TestUrgentHangs.cpp b/ipc/ipdl/test/cxx/TestUrgentHangs.cpp new file mode 100644 index 000000000000..743597593b03 --- /dev/null +++ b/ipc/ipdl/test/cxx/TestUrgentHangs.cpp @@ -0,0 +1,170 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: sw=4 ts=4 et : + */ +#include "TestUrgentHangs.h" + +#include "IPDLUnitTests.h" // fail etc. +#if defined(OS_POSIX) +#include +#else +#include +#endif + +template<> +struct RunnableMethodTraits +{ + static void RetainCallee(mozilla::_ipdltest::TestUrgentHangsParent* obj) { } + static void ReleaseCallee(mozilla::_ipdltest::TestUrgentHangsParent* obj) { } +}; + +namespace mozilla { +namespace _ipdltest { + +//----------------------------------------------------------------------------- +// parent + +TestUrgentHangsParent::TestUrgentHangsParent() +{ + MOZ_COUNT_CTOR(TestUrgentHangsParent); +} + +TestUrgentHangsParent::~TestUrgentHangsParent() +{ + MOZ_COUNT_DTOR(TestUrgentHangsParent); +} + +void +TestUrgentHangsParent::Main() +{ + SetReplyTimeoutMs(1000); + + // Should succeed despite the nested sleep call because the content process + // responded to the transaction. + if (!SendTest1_1()) + fail("sending Test1_1"); + + // Fails with a timeout. + if (SendTest2()) + fail("sending Test2"); + + // Also fails since we haven't gotten a response for Test2 yet. + if (SendTest3()) + fail("sending Test3"); + + // Do a second round of testing once the reply to Test2 comes back. + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + NewRunnableMethod(this, &TestUrgentHangsParent::FinishTesting), + 3000); +} + +void +TestUrgentHangsParent::FinishTesting() +{ + // Send an async message that waits 2 seconds and then sends a sync message + // (which should be processed). + if (!SendTest4()) + fail("sending Test4"); + + // Send a sync message that will time out because the child is waiting + // inside RecvTest4. + if (SendTest4_1()) + fail("sending Test4_1"); + + // Close the channel after the child finishes its work in RecvTest4. + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + NewRunnableMethod(this, &TestUrgentHangsParent::Close), + 3000); +} + +bool +TestUrgentHangsParent::RecvTest1_2() +{ + if (!SendTest1_3()) + fail("sending Test1_3"); + return true; +} + +bool +TestUrgentHangsParent::RecvTestInner() +{ + fail("TestInner should never be dispatched"); + return true; +} + +//----------------------------------------------------------------------------- +// child + +bool +TestUrgentHangsChild::RecvTest1_1() +{ + if (!SendTest1_2()) + fail("sending Test1_2"); + + return true; +} + +bool +TestUrgentHangsChild::RecvTest1_3() +{ + sleep(2); + + return true; +} + +bool +TestUrgentHangsChild::RecvTest2() +{ + sleep(2); + + // Should fail because of the timeout. + if (SendTestInner()) + fail("sending TestInner"); + + return true; +} + +bool +TestUrgentHangsChild::RecvTest3() +{ + fail("RecvTest3 should never be called"); + return true; +} + +bool +TestUrgentHangsChild::RecvTest4() +{ + sleep(2); + + // This should fail because Test4_1 timed out and hasn't gotten a response + // yet. + if (SendTestInner()) + fail("sending TestInner"); + + return true; +} + +bool +TestUrgentHangsChild::RecvTest4_1() +{ + // This should fail because Test4_1 timed out and hasn't gotten a response + // yet. + if (SendTestInner()) + fail("sending TestInner"); + + return true; +} + +TestUrgentHangsChild::TestUrgentHangsChild() +{ + MOZ_COUNT_CTOR(TestUrgentHangsChild); +} + +TestUrgentHangsChild::~TestUrgentHangsChild() +{ + MOZ_COUNT_DTOR(TestUrgentHangsChild); +} + +} // namespace _ipdltest +} // namespace mozilla diff --git a/ipc/ipdl/test/cxx/TestUrgentHangs.h b/ipc/ipdl/test/cxx/TestUrgentHangs.h new file mode 100644 index 000000000000..55379f0be395 --- /dev/null +++ b/ipc/ipdl/test/cxx/TestUrgentHangs.h @@ -0,0 +1,66 @@ +#ifndef mozilla__ipdltest_TestUrgentHangs_h +#define mozilla__ipdltest_TestUrgentHangs_h 1 + +#include "mozilla/_ipdltest/IPDLUnitTests.h" + +#include "mozilla/_ipdltest/PTestUrgentHangsParent.h" +#include "mozilla/_ipdltest/PTestUrgentHangsChild.h" + +namespace mozilla { +namespace _ipdltest { + + +class TestUrgentHangsParent : + public PTestUrgentHangsParent +{ +public: + TestUrgentHangsParent(); + virtual ~TestUrgentHangsParent(); + + static bool RunTestInProcesses() { return true; } + static bool RunTestInThreads() { return false; } + + void Main(); + void FinishTesting(); + + bool RecvTest1_2(); + bool RecvTestInner(); + + bool ShouldContinueFromReplyTimeout() MOZ_OVERRIDE + { + return false; + } + virtual void ActorDestroy(ActorDestroyReason why) MOZ_OVERRIDE + { + passed("ok"); + QuitParent(); + } +}; + + +class TestUrgentHangsChild : + public PTestUrgentHangsChild +{ +public: + TestUrgentHangsChild(); + virtual ~TestUrgentHangsChild(); + + bool RecvTest1_1(); + bool RecvTest1_3(); + bool RecvTest2(); + bool RecvTest3(); + bool RecvTest4(); + bool RecvTest4_1(); + + virtual void ActorDestroy(ActorDestroyReason why) MOZ_OVERRIDE + { + QuitChild(); + } +}; + + +} // namespace _ipdltest +} // namespace mozilla + + +#endif // ifndef mozilla__ipdltest_TestUrgentHangs_h diff --git a/ipc/ipdl/test/cxx/moz.build b/ipc/ipdl/test/cxx/moz.build index d25759b9c4db..12797434c66f 100644 --- a/ipc/ipdl/test/cxx/moz.build +++ b/ipc/ipdl/test/cxx/moz.build @@ -46,6 +46,7 @@ SOURCES += [ 'TestSyncHang.cpp', 'TestSyncWakeup.cpp', 'TestUrgency.cpp', + 'TestUrgentHangs.cpp', ] if CONFIG['OS_ARCH'] == 'Linux': @@ -120,6 +121,7 @@ IPDL_SOURCES += [ 'PTestSyncWakeup.ipdl', 'PTestSysVShmem.ipdl', 'PTestUrgency.ipdl', + 'PTestUrgentHangs.ipdl', ] include('/ipc/chromium/chromium-config.mozbuild') diff --git a/testing/mochitest/mochitest_options.py b/testing/mochitest/mochitest_options.py index ec5153bb9128..c9f901123eaa 100644 --- a/testing/mochitest/mochitest_options.py +++ b/testing/mochitest/mochitest_options.py @@ -809,7 +809,7 @@ class B2GOptions(MochitestOptions): defaults["testPath"] = "" defaults["extensionsToExclude"] = ["specialpowers"] # See dependencies of bug 1038943. - defaults["defaultLeakThreshold"] = 5308 + defaults["defaultLeakThreshold"] = 5404 self.set_defaults(**defaults) def verifyRemoteOptions(self, options):