*/
static void tcp_disable_fack(struct tcp_sock *tp)
{
+ /* RFC3517 uses different metric in lost marker => reset on change */
+ if (tcp_is_fack(tp))
+ tp->lost_skb_hint = NULL;
tp->rx_opt.sack_ok &= ~2;
}
}
if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
tp->mdev = dst_metric(dst, RTAX_RTTVAR);
- tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+ tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
tcp_bound_rto(sk);
if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
return 0;
+ if (!tp->packets_out)
+ goto out;
+
/* SACK fastpath:
* if the only SACK change is the increase of the end_seq of
* the first block then only apply that SACK block
/* DSACK info lost if out-of-mem, try SACK still */
if (in_sack <= 0)
in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
- if (in_sack < 0)
+ if (unlikely(in_sack < 0))
break;
- fack_count += tcp_skb_pcount(skb);
+ if (!in_sack) {
+ fack_count += tcp_skb_pcount(skb);
+ continue;
+ }
sacked = TCP_SKB_CB(skb)->sacked;
/* Account D-SACK for retransmitted packet. */
- if ((dup_sack && in_sack) &&
- (sacked & TCPCB_RETRANS) &&
- after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ if (dup_sack && (sacked & TCPCB_RETRANS)) {
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+ tp->undo_retrans--;
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
+ (sacked & TCPCB_SACKED_ACKED))
+ reord = min(fack_count, reord);
+ }
- /* The frame is ACKed. */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
- if (sacked&TCPCB_RETRANS) {
- if ((dup_sack && in_sack) &&
- (sacked&TCPCB_SACKED_ACKED))
- reord = min(fack_count, reord);
- } else {
- /* If it was in a hole, we detected reordering. */
- if (fack_count < prior_fackets &&
- !(sacked&TCPCB_SACKED_ACKED))
- reord = min(fack_count, reord);
- }
- /* Nothing to do; acked frame is about to be dropped. */
+ /* Nothing to do; acked frame is about to be dropped (was ACKed). */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
+ fack_count += tcp_skb_pcount(skb);
continue;
}
- if (!in_sack)
- continue;
-
if (!(sacked&TCPCB_SACKED_ACKED)) {
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
tp->retransmit_skb_hint = NULL;
}
} else {
- /* New sack for not retransmitted frame,
- * which was in hole. It is reordering.
- */
- if (!(sacked & TCPCB_RETRANS) &&
- fack_count < prior_fackets)
- reord = min(fack_count, reord);
+ if (!(sacked & TCPCB_RETRANS)) {
+ /* New sack for not retransmitted frame,
+ * which was in hole. It is reordering.
+ */
+ if (fack_count < prior_fackets)
+ reord = min(fack_count, reord);
+
+ /* SACK enhanced F-RTO (RFC4138; Appendix B) */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+ flag |= FLAG_ONLY_ORIG_SACKED;
+ }
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
/* clear lost hint */
tp->retransmit_skb_hint = NULL;
}
- /* SACK enhanced F-RTO detection.
- * Set flag if and only if non-rexmitted
- * segments below frto_highmark are
- * SACKed (RFC4138; Appendix B).
- * Clearing correct due to in-order walk
- */
- if (after(end_seq, tp->frto_highmark)) {
- flag &= ~FLAG_ONLY_ORIG_SACKED;
- } else {
- if (!(sacked & TCPCB_RETRANS))
- flag |= FLAG_ONLY_ORIG_SACKED;
- }
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
flag |= FLAG_DATA_SACKED;
tp->sacked_out += tcp_skb_pcount(skb);
+ fack_count += tcp_skb_pcount(skb);
+
+ /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
+ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->lost_skb_hint)->seq))
+ tp->lost_cnt_hint += tcp_skb_pcount(skb);
+
if (fack_count > tp->fackets_out)
tp->fackets_out = fack_count;
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
+
+ fack_count += tcp_skb_pcount(skb);
}
/* D-SACK. We can detect redundant retransmission
tp->retransmit_skb_hint = NULL;
}
}
+
+ /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
+ * due to in-order walk
+ */
+ if (after(end_seq, tp->frto_highmark))
+ flag &= ~FLAG_ONLY_ORIG_SACKED;
}
- if (tp->retrans_out &&
+ if (tcp_is_fack(tp) && tp->retrans_out &&
after(highest_sack_end_seq, tp->lost_retrans_low) &&
icsk->icsk_ca_state == TCP_CA_Recovery)
flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
tcp_verify_left_out(tp);
- if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+ if ((reord < tp->fackets_out) &&
+ ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
(!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
- tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
+ tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+out:
#if FASTRETRANS_DEBUG > 0
BUG_TRAP((int)tp->sacked_out >= 0);
}
tcp_verify_left_out(tp);
+ /* Too bad if TCP was application limited */
+ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
+
/* Earlier loss recovery underway (see RFC4138; Appendix B).
* The last condition is necessary at least in tp->frto_counter case.
*/
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
break;
+
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
/*
* Count the retransmission made on RTO correctly (only when
* waiting for the first ACK and did not get it)...
} else {
if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
tp->undo_marker = 0;
- TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
}
/* Don't lost mark skbs that were fwd transmitted after RTO */
return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
}
+/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
+ * counter when SACK is enabled (without SACK, sacked_out is used for
+ * that purpose).
+ *
+ * Instead, with FACK TCP uses fackets_out that includes both SACKed
+ * segments up to the highest received SACK block so far and holes in
+ * between them.
+ *
+ * With reordering, holes may still be in flight, so RFC3517 recovery
+ * uses pure sacked_out (total number of SACKed segments) even though
+ * it violates the RFC that uses duplicate ACKs, often these are equal
+ * but when e.g. out-of-window ACKs or packet duplication occurs,
+ * they differ. Since neither occurs due to loss, TCP should really
+ * ignore them.
+ */
+static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
+{
+ return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+}
+
static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
{
return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
return 1;
/* Not-A-Trick#2 : Classic rule... */
- if (tcp_fackets_out(tp) > tp->reordering)
+ if (tcp_dupack_heurestics(tp) > tp->reordering)
return 1;
/* Trick#3 : when we use RFC2988 timer restart, fast
* retransmit can be triggered by timeout of queue head.
*/
- if (tcp_head_timedout(sk))
+ if (tcp_is_fack(tp) && tcp_head_timedout(sk))
return 1;
/* Trick#4: It is still not OK... But will it be useful to delay
tp->retransmit_skb_hint = NULL;
}
-/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, int packets)
+/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
+ * is against sacked "cnt", otherwise it's against facked "cnt"
+ */
+static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
- cnt += tcp_skb_pcount(skb);
- if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+
+ if (tcp_is_fack(tp) ||
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+ cnt += tcp_skb_pcount(skb);
+
+ if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
+ after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
/* Account newly detected lost packet(s) */
-static void tcp_update_scoreboard(struct sock *sk)
+static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_fack(tp)) {
+ if (tcp_is_reno(tp)) {
+ tcp_mark_head_lost(sk, 1, fast_rexmit);
+ } else if (tcp_is_fack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost);
+ tcp_mark_head_lost(sk, lost, fast_rexmit);
} else {
- tcp_mark_head_lost(sk, 1);
+ int sacked_upto = tp->sacked_out - tp->reordering;
+ if (sacked_upto < 0)
+ sacked_upto = 0;
+ tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
}
/* New heuristics: it is possible only after we switched
* Hence, we can detect timed out packets during fast
* retransmit without falling to slow start.
*/
- if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
+ if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
struct sk_buff *skb;
skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
{
struct tcp_sock *tp = tcp_sk(sk);
/* Partial ACK arrived. Force Hoe's retransmit. */
- int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
+ int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
if (tcp_may_undo(tp)) {
/* Plain luck! Hole if filled with delayed
struct tcp_sock *tp = tcp_sk(sk);
int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
- (tp->fackets_out > tp->reordering));
+ (tcp_fackets_out(tp) > tp->reordering));
+ int fast_rexmit = 0;
/* Some technical things:
* 1. Reno does not count dupacks (sacked_out) automatically. */
return;
/* C. Process data loss notification, provided it is valid. */
- if ((flag&FLAG_DATA_LOST) &&
+ if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
+ tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
+ fast_rexmit = 1;
}
- if (do_lost || tcp_head_timedout(sk))
- tcp_update_scoreboard(sk);
+ if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+ tcp_update_scoreboard(sk, fast_rexmit);
tcp_cwnd_down(sk, flag);
tcp_xmit_retransmit_queue(sk);
}
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
+static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
+ int prior_fackets)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
int fully_acked = 1;
int flag = 0;
int prior_packets = tp->packets_out;
+ u32 cnt = 0;
+ u32 reord = tp->packets_out;
s32 seq_rtt = -1;
+ s32 ca_seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
u32 packets_acked;
u8 sacked = scb->sacked;
+ /* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 ||
!after(tp->snd_una, scb->seq))
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= packets_acked;
flag |= FLAG_RETRANS_DATA_ACKED;
+ ca_seq_rtt = -1;
seq_rtt = -1;
if ((flag & FLAG_DATA_ACKED) ||
(packets_acked > 1))
flag |= FLAG_NONHEAD_RETRANS_ACKED;
- } else if (seq_rtt < 0) {
- seq_rtt = now - scb->when;
- if (fully_acked)
- last_ackt = skb->tstamp;
+ } else {
+ ca_seq_rtt = now - scb->when;
+ last_ackt = skb->tstamp;
+ if (seq_rtt < 0) {
+ seq_rtt = ca_seq_rtt;
+ }
+ if (!(sacked & TCPCB_SACKED_ACKED))
+ reord = min(cnt, reord);
}
if (sacked & TCPCB_SACKED_ACKED)
if ((sacked & TCPCB_URG) && tp->urg_mode &&
!before(end_seq, tp->snd_up))
tp->urg_mode = 0;
- } else if (seq_rtt < 0) {
- seq_rtt = now - scb->when;
- if (fully_acked)
- last_ackt = skb->tstamp;
+ } else {
+ ca_seq_rtt = now - scb->when;
+ last_ackt = skb->tstamp;
+ if (seq_rtt < 0) {
+ seq_rtt = ca_seq_rtt;
+ }
+ reord = min(cnt, reord);
}
tp->packets_out -= packets_acked;
+ cnt += packets_acked;
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
tcp_ack_update_rtt(sk, flag, seq_rtt);
tcp_rearm_rto(sk);
+ if (tcp_is_reno(tp)) {
+ tcp_remove_reno_sacks(sk, pkts_acked);
+ } else {
+ /* Non-retransmitted hole got filled? That's reordering */
+ if (reord < prior_fackets)
+ tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ }
+
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
/* hint's skb might be NULL but we don't need to care */
tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
tp->fastpath_cnt_hint);
- if (tcp_is_reno(tp))
- tcp_remove_reno_sacks(sk, pkts_acked);
-
if (ca_ops->pkts_acked) {
s32 rtt_us = -1;
net_invalid_timestamp()))
rtt_us = ktime_us_delta(ktime_get_real(),
last_ackt);
- else if (seq_rtt > 0)
- rtt_us = jiffies_to_usecs(seq_rtt);
+ else if (ca_seq_rtt > 0)
+ rtt_us = jiffies_to_usecs(ca_seq_rtt);
}
ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
}
if (tp->frto_counter == 1) {
- /* Sending of the next skb must be allowed or no F-RTO */
- if (!tcp_send_head(sk) ||
- after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
- tp->snd_una + tp->snd_wnd)) {
- tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
- flag);
- return 1;
- }
-
+ /* tcp_may_send_now needs to see updated state */
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
tp->frto_counter = 2;
+
+ if (!tcp_may_send_now(sk))
+ tcp_enter_frto_loss(sk, 2, flag);
+
return 1;
} else {
switch (sysctl_tcp_frto_response) {
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
u32 prior_in_flight;
+ u32 prior_fackets;
s32 seq_rtt;
int prior_packets;
int frto_cwnd = 0;
tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
}
+ prior_fackets = tp->fackets_out;
+ prior_in_flight = tcp_packets_in_flight(tp);
+
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
if (!prior_packets)
goto no_queue;
- prior_in_flight = tcp_packets_in_flight(tp);
-
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
+ if (tp->frto_counter)
+ frto_cwnd = tcp_process_frto(sk, flag);
/* Guarantee sacktag reordering detection against wrap-arounds */
if (before(tp->frto_highmark, tp->snd_una))
tp->frto_highmark = 0;
- if (tp->frto_counter)
- frto_cwnd = tcp_process_frto(sk, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */