Bug 1852924 - poll QUIC UDP socket for PR_POLL_WRITE on WOULD_BLOCK r=necko-reviewers,kershaw

`neqo_glue` first asks the Neqo connection state machine for outbound
UDP datagrams (`conn.process_output`) and then sends each on a UDP
socket. It does so in a loop. Previously, in case the second step fails
with WOULD_BLOCK, `neqo_glue` would simply dropp the datagram, relying
on QUIC's retransmit mechanism for the data to be delivered eventually
in consecutive UDP datagrams.

The above "local" packet drops lead to unnecessary congestion window
reductions due to packet loss.  This is especially relevant on high
throughput uploads, where `neqo_glue` might fill the OS UDP socket send
buffer frequently, thus leading to many WOULD_BLOCKs, thus leading to
many datagram drops, thus leading to many congestion control window
reductions. Such scenario is for example detailed in the conversation
below:

https://bugzilla.mozilla.org/show_bug.cgi?id=1852924#c38

With this commit `neqo_glue` temporarily buffers the datagram on WOULD_BLOCK and
instructs the `nsUDPSocket` to be polled with PR_POLL_WRITE, i.e. be polled for
being writable. Once writable, `neqo_glue` is called back and the buffered
datagram is sent, before proceeding as usual.

Differential Revision: https://phabricator.services.mozilla.com/D239162
This commit is contained in:
Max Leonard Inden
2025-03-31 17:49:54 +00:00
parent 1062485959
commit c625c10752
5 changed files with 68 additions and 6 deletions

View File

@@ -14671,6 +14671,16 @@
mirror: always
rust: true
# Poll UDP socket via PR_POLL_WRITE on WOULD_BLOCK. Noop if
# network.http.http3.use_nspr_for_io is true.
#
# See <https://phabricator.services.mozilla.com/D239162> for details.
- name: network.http.http3.pr_poll_write
type: RelaxedAtomicBool
value: true
mirror: always
rust: true
- name: network.http.http3.enable_qlog
type: RelaxedAtomicBool
value: false

View File

@@ -263,6 +263,16 @@ interface nsIUDPSocket : nsISupports
*/
[noscript, notxpcom] int64_t getFileDescriptor();
/**
* enableWritePoll
*
* Request that the UDP socket polls for write-availability.
* Typically called after a non-blocking send returns WOULD_BLOCK.
*
* Note that the socket always polls for read-availability.
*/
[noscript, notxpcom] void enableWritePoll();
/**
* addOutputBytes
*

View File

@@ -416,6 +416,13 @@ void nsUDPSocket::OnSocketReady(PRFileDesc* fd, int16_t outFlags) {
}
if (mSyncListener) {
if (outFlags & PR_POLL_WRITE) {
// If we see PR_POLL_WRITE, we revert the poll to read+except only, and we
// call OnPacketReceived() to trigger the read code (which eventually
// calls SendData). We might consider splitting out a separate
// write-ready-only callback in the future.
mPollFlags = (PR_POLL_READ | PR_POLL_EXCEPT);
}
mSyncListener->OnPacketReceived(this);
return;
}
@@ -1207,6 +1214,16 @@ int64_t nsUDPSocket::GetFileDescriptor() {
return PR_FileDesc2NativeHandle(mFD);
}
/**
* Request that the UDP socket polls for write-availability.
* Typically called after a non-blocking send returns WOULD_BLOCK.
*
* Note that the socket always polls for read-availability.
*/
void nsUDPSocket::EnableWritePoll() {
mPollFlags = (PR_POLL_WRITE | PR_POLL_READ | PR_POLL_EXCEPT);
}
NS_IMETHODIMP
nsUDPSocket::SendBinaryStream(const nsACString& aHost, uint16_t aPort,
nsIInputStream* aStream) {
@@ -1251,7 +1268,7 @@ nsUDPSocket::RecvWithAddr(NetAddr* addr, nsTArray<uint8_t>& aData) {
if (!aData.AppendElements(buff, count, fallible)) {
UDPSOCKET_LOG((
"nsUDPSocket::OnSocketReady: AppendElements FAILED [this=%p]\n", this));
"nsUDPSocket::RecvWithAddr: AppendElements FAILED [this=%p]\n", this));
mCondition = NS_ERROR_UNEXPECTED;
}
return NS_OK;

View File

@@ -990,8 +990,11 @@ nsresult Http3Session::ProcessOutput(nsIUDPSocket* socket) {
Http3Session* self = (Http3Session*)aContext;
self->SetupTimer(timeout);
});
// Note: WOULD_BLOCK is handled in neqo_glue.
if (NS_FAILED(rv.result)) {
if (rv.result == NS_BASE_STREAM_WOULD_BLOCK) {
// The OS buffer was full. Tell the UDP socket to poll for
// write-availability.
socket->EnableWritePoll();
} else if (NS_FAILED(rv.result)) {
mSocketError = rv.result;
// If there was an error return from here. We do not need to set a timer,
// because we will close the connection.

View File

@@ -76,6 +76,9 @@ pub struct NeqoHttp3Conn {
// would close the file descriptor on `Drop`. The lifetime of the underlying
// OS socket is managed not by `neqo_glue` but `NSPR`.
socket: Option<neqo_udp::Socket<BorrowedSocket>>,
/// Buffered outbound datagram from previous send that failed with
/// WouldBlock. To be sent once UDP socket has write-availability again.
buffered_outbound_datagram: Option<Datagram>,
datagram_segment_size_sent: LocalMemoryDistribution<'static>,
datagram_segment_size_received: LocalMemoryDistribution<'static>,
@@ -349,6 +352,7 @@ impl NeqoHttp3Conn {
datagram_size_received: networking::http_3_udp_datagram_size_received.start_buffer(),
datagram_segments_received: networking::http_3_udp_datagram_segments_received
.start_buffer(),
buffered_outbound_datagram: None,
}));
unsafe { RefPtr::from_raw(conn).ok_or(NS_ERROR_NOT_CONNECTED) }
}
@@ -822,7 +826,13 @@ pub extern "C" fn neqo_http3conn_process_output_and_send(
} else {
now + accumulated_time
};
match conn.conn.process_output(conn.last_output_time) {
let output = conn
.buffered_outbound_datagram
.take()
.map(Output::Datagram)
.unwrap_or_else(|| conn.conn.process_output(conn.last_output_time));
match output {
Output::Datagram(mut dg) => {
if !static_prefs::pref!("network.http.http3.ecn") {
dg.set_tos(IpTos::default());
@@ -841,8 +851,20 @@ pub extern "C" fn neqo_http3conn_process_output_and_send(
match conn.socket.as_mut().expect("non NSPR IO").send(&dg) {
Ok(()) => {}
Err(e) if e.kind() == io::ErrorKind::WouldBlock => {
qwarn!("dropping datagram as socket would block");
break;
if static_prefs::pref!("network.http.http3.pr_poll_write") {
qdebug!("Buffer outbound datagram to be sent once UDP socket has write-availability.");
conn.buffered_outbound_datagram = Some(dg);
return ProcessOutputAndSendResult {
// Propagate WouldBlock error, thus indicating that
// the UDP socket should be polled for
// write-availability.
result: NS_BASE_STREAM_WOULD_BLOCK,
bytes_written: bytes_written.try_into().unwrap_or(u32::MAX),
};
} else {
qwarn!("dropping datagram as socket would block");
break;
}
}
Err(e) => {
qwarn!("failed to send datagram: {}", e);