#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
+#include <net/netdma.h>
int sysctl_tcp_timestamps = 1;
int sysctl_tcp_window_scaling = 1;
* Hence, we can detect timed out packets during fast
* retransmit without falling to slow start.
*/
- if (tcp_head_timedout(sk, tp)) {
+ if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
struct sk_buff *skb;
skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
}
}
+static void tcp_mtup_probe_failed(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
+ icsk->icsk_mtup.probe_size = 0;
+}
+
+static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* FIXME: breaks with very large cwnd */
+ tp->prior_ssthresh = tcp_current_ssthresh(sk);
+ tp->snd_cwnd = tp->snd_cwnd *
+ tcp_mss_to_mtu(sk, tp->mss_cache) /
+ icsk->icsk_mtup.probe_size;
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->rcv_ssthresh = tcp_current_ssthresh(sk);
+
+ icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
+ icsk->icsk_mtup.probe_size = 0;
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+}
+
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
return;
}
+ /* MTU probe failure: don't reduce cwnd */
+ if (icsk->icsk_ca_state < TCP_CA_CWR &&
+ icsk->icsk_mtup.probe_size &&
+ tp->snd_una == tp->mtu_probe.probe_seq_start) {
+ tcp_mtup_probe_failed(sk);
+ /* Restores the reduction we did in tcp_mtup_probe() */
+ tp->snd_cwnd++;
+ tcp_simple_retransmit(sk);
+ return;
+ }
+
/* Otherwise enter Recovery state */
if (IsReno(tp))
tp->retrans_stamp = 0;
}
+ /* MTU probing checks */
+ if (icsk->icsk_mtup.probe_size) {
+ if (!after(tp->mtu_probe.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) {
+ tcp_mtup_probe_success(sk, skb);
+ }
+ }
+
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if(sacked & TCPCB_SACKED_RETRANS)
__tcp_checksum_complete_user(sk, skb);
}
+#ifdef CONFIG_NET_DMA
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int chunk = skb->len - hlen;
+ int dma_cookie;
+ int copied_early = 0;
+
+ if (tp->ucopy.wakeup)
+ return 0;
+
+ if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+ tp->ucopy.dma_chan = get_softnet_dma();
+
+ if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+
+ dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
+ skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
+
+ if (dma_cookie < 0)
+ goto out;
+
+ tp->ucopy.dma_cookie = dma_cookie;
+ copied_early = 1;
+
+ tp->ucopy.len -= chunk;
+ tp->copied_seq += chunk;
+ tcp_rcv_space_adjust(sk);
+
+ if ((tp->ucopy.len == 0) ||
+ (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+ (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
+ tp->ucopy.wakeup = 1;
+ sk->sk_data_ready(sk, 0);
+ }
+ } else if (chunk > 0) {
+ tp->ucopy.wakeup = 1;
+ sk->sk_data_ready(sk, 0);
+ }
+out:
+ return copied_early;
+}
+#endif /* CONFIG_NET_DMA */
+
/*
* TCP receive function for the ESTABLISHED state.
*
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
- tcp_rcv_rtt_measure_ts(sk, skb);
-
/* We know that such packets are checksummed
* on entry.
*/
}
} else {
int eaten = 0;
+ int copied_early = 0;
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt &&
- len - tcp_header_len <= tp->ucopy.len &&
- sock_owned_by_user(sk)) {
- __set_current_state(TASK_RUNNING);
+ if (tp->copied_seq == tp->rcv_nxt &&
+ len - tcp_header_len <= tp->ucopy.len) {
+#ifdef CONFIG_NET_DMA
+ if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ copied_early = 1;
+ eaten = 1;
+ }
+#endif
+ if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
+ __set_current_state(TASK_RUNNING);
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
+ if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
+ eaten = 1;
+ }
+ if (eaten) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
- eaten = 1;
}
+ if (copied_early)
+ tcp_cleanup_rbuf(sk, skb->len);
}
if (!eaten) {
if (tcp_checksum_complete_user(sk, skb))
__tcp_ack_snd_check(sk, 0);
no_ack:
+#ifdef CONFIG_NET_DMA
+ if (copied_early)
+ __skb_queue_tail(&sk->sk_async_wait_queue, skb);
+ else
+#endif
if (eaten)
__kfree_skb(skb);
else
if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
tp->rx_opt.sack_ok |= 2;
+ tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
if (tp->ecn_flags&TCP_ECN_OK)
sock_set_flag(sk, SOCK_NO_LARGESEND);
+ tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
*/
tp->lsndtime = tcp_time_stamp;
+ tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
-EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);