Merge branch 'tcp-make-tcp_ack-faster'

Eric Dumazet says:

====================
tcp: make tcp_ack() faster

Move tcp_rack_update_reo_wnd() and tcp_rack_advance() to tcp_input.c
to allow their (auto)inlining.

No functional change in this series.
====================

Link: https://patch.msgid.link/20260127032147.3498272-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-01-28 19:31:53 -08:00
commit c0f38f3136
3 changed files with 77 additions and 80 deletions

View file

@ -2514,10 +2514,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
u32 reo_wnd);
extern bool tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
/* tcp_plb.c */

View file

@ -1558,6 +1558,38 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
return in_sack;
}
/* Record the most recently (re)sent time among the (s)acked packets
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt
*/
static void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
u32 end_seq, u64 xmit_time)
{
u32 rtt_us;
rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
* retransmission) was sacked.
*
* If the original is lost, there is no ambiguity. Otherwise
* we assume the original can be delayed up to aRTT + min_rtt.
* the aRTT term is bounded by the fast recovery or timeout,
* so it's at least one RTT (i.e., retransmission is at least
* an RTT later).
*/
return;
}
tp->rack.advanced = 1;
tp->rack.rtt_us = rtt_us;
if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
end_seq, tp->rack.end_seq)) {
tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
}
}
/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
@ -4149,6 +4181,49 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered,
return delivered;
}
/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
*
* If a DSACK is received that seems like it may have been due to reordering
* triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
* by srtt), since there is possibility that spurious retransmission was
* due to reordering delay longer than reo_wnd.
*
* Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
* no. of successful recoveries (accounts for full DSACK-based loss
* recovery undo). After that, reset it to default (min_rtt/4).
*
* At max, reo_wnd is incremented only once per rtt. So that the new
* DSACK on which we are reacting, is due to the spurious retx (approx)
* after the reo_wnd has been updated last time.
*
* reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
* absolute value to account for change in rtt.
*/
static void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
if (before(rs->prior_delivered, tp->rack.last_delivered))
tp->rack.dsack_seen = 0;
/* Adjust the reo_wnd if update is pending */
if (tp->rack.dsack_seen) {
tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
tp->rack.reo_wnd_steps + 1);
tp->rack.dsack_seen = 0;
tp->rack.last_delivered = tp->delivered;
tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
} else if (!tp->rack.reo_wnd_persist) {
tp->rack.reo_wnd_steps = 1;
}
}
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@ -4283,7 +4358,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_in_ack_event(sk, flag);
if (tp->tlp_high_seq)
if (unlikely(tp->tlp_high_seq))
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
@ -4333,7 +4408,7 @@ no_queue:
*/
tcp_ack_probe(sk);
if (tp->tlp_high_seq)
if (unlikely(tp->tlp_high_seq))
tcp_process_tlp_ack(sk, ack, flag);
return 1;

View file

@ -111,38 +111,6 @@ bool tcp_rack_mark_lost(struct sock *sk)
return !!timeout;
}
/* Record the most recently (re)sent time among the (s)acked packets
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time)
{
u32 rtt_us;
rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
* retransmission) was sacked.
*
* If the original is lost, there is no ambiguity. Otherwise
* we assume the original can be delayed up to aRTT + min_rtt.
* the aRTT term is bounded by the fast recovery or timeout,
* so it's at least one RTT (i.e., retransmission is at least
* an RTT later).
*/
return;
}
tp->rack.advanced = 1;
tp->rack.rtt_us = rtt_us;
if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
end_seq, tp->rack.end_seq)) {
tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
}
}
/* We have waited long enough to accommodate reordering. Mark the expired
* packets lost and retransmit them.
*/
@ -166,49 +134,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
tcp_rearm_rto(sk);
}
/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
*
* If a DSACK is received that seems like it may have been due to reordering
* triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
* by srtt), since there is possibility that spurious retransmission was
* due to reordering delay longer than reo_wnd.
*
* Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
* no. of successful recoveries (accounts for full DSACK-based loss
* recovery undo). After that, reset it to default (min_rtt/4).
*
* At max, reo_wnd is incremented only once per rtt. So that the new
* DSACK on which we are reacting, is due to the spurious retx (approx)
* after the reo_wnd has been updated last time.
*
* reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
* absolute value to account for change in rtt.
*/
void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
if (before(rs->prior_delivered, tp->rack.last_delivered))
tp->rack.dsack_seen = 0;
/* Adjust the reo_wnd if update is pending */
if (tp->rack.dsack_seen) {
tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
tp->rack.reo_wnd_steps + 1);
tp->rack.dsack_seen = 0;
tp->rack.last_delivered = tp->delivered;
tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
} else if (!tp->rack.reo_wnd_persist) {
tp->rack.reo_wnd_steps = 1;
}
}
/* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
* the next unacked packet upon receiving
* a) three or more DUPACKs to start the fast recovery