X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_output.c;h=b7325e0b406a8486fad7bc6da6b24f5ade426415;hb=dfb4b9dceb35c567a595ae5e9d035cfda044a103;hp=caf2e2cff29365ece4d1075ac2eae72c7bf12816;hpb=1dd465cac8d3ba18a9840d032f6604147269c031;p=linux-2.6-omap-h63xx.git

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index caf2e2cff29..b7325e0b406 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -194,12 +194,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	 * will be satisfied with 2.
 	 */
 	if (mss > (1<<*rcv_wscale)) {
-		int init_cwnd;
-
-		if (mss > 1460)
+		int init_cwnd = 4;
+		if (mss > 1460*3)
 			init_cwnd = 2;
-		else
-			init_cwnd = (mss > 1095) ? 3 : 4;
+		else if (mss > 1460)
+			init_cwnd = 3;
 		if (*rcv_wnd > init_cwnd*mss)
 			*rcv_wnd = init_cwnd*mss;
 	}
@@ -263,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
 {
-	if (skb != NULL) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		struct inet_sock *inet = inet_sk(sk);
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-		int tcp_header_size = tp->tcp_header_len;
-		struct tcphdr *th;
-		int sysctl_flags;
-		int err;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet;
+	struct tcp_sock *tp;
+	struct tcp_skb_cb *tcb;
+	int tcp_header_size;
+	struct tcphdr *th;
+	int sysctl_flags;
+	int err;
+
+	BUG_ON(!skb || !tcp_skb_pcount(skb));
+
+	/* If congestion control is doing timestamping, we must
+	 * take such a timestamp before we potentially clone/copy.
+	 */
+	if (icsk->icsk_ca_ops->rtt_sample)
+		__net_timestamp(skb);
+
+	if (likely(clone_it)) {
+		if (unlikely(skb_cloned(skb)))
+			skb = pskb_copy(skb, gfp_mask);
+		else
+			skb = skb_clone(skb, gfp_mask);
+		if (unlikely(!skb))
+			return -ENOBUFS;
+	}
 
-		BUG_ON(!tcp_skb_pcount(skb));
+	inet = inet_sk(sk);
+	tp = tcp_sk(sk);
+	tcb = TCP_SKB_CB(skb);
+	tcp_header_size = tp->tcp_header_len;
 
 #define SYSCTL_FLAG_TSTAMPS	0x1
 #define SYSCTL_FLAG_WSCALE	0x2
 #define SYSCTL_FLAG_SACK	0x4
 
-		/* If congestion control is doing timestamping */
-		if (icsk->icsk_ca_ops->rtt_sample)
-			__net_timestamp(skb);
-
-		sysctl_flags = 0;
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-			if(sysctl_tcp_timestamps) {
-				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
-			}
-			if(sysctl_tcp_window_scaling) {
-				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_WSCALE;
-			}
-			if(sysctl_tcp_sack) {
-				sysctl_flags |= SYSCTL_FLAG_SACK;
-				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
-					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
-			}
-		} else if (tp->rx_opt.eff_sacks) {
-			/* A SACK is 2 pad bytes, a 2 byte header, plus
-			 * 2 32-bit sequence numbers for each SACK block.
-			 */
-			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	sysctl_flags = 0;
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+		if(sysctl_tcp_timestamps) {
+			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
-		
-		if (tcp_packets_in_flight(tp) == 0)
-			tcp_ca_event(sk, CA_EVENT_TX_START);
-
-		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-		skb->h.th = th;
-		skb_set_owner_w(skb, sk);
-
-		/* Build TCP header and checksum it. */
-		th->source		= inet->sport;
-		th->dest		= inet->dport;
-		th->seq			= htonl(tcb->seq);
-		th->ack_seq		= htonl(tp->rcv_nxt);
-		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			/* RFC1323: The window in SYN & SYN/ACK segments
-			 * is never scaled.
-			 */
-			th->window	= htons(tp->rcv_wnd);
-		} else {
-			th->window	= htons(tcp_select_window(sk));
+		if (sysctl_tcp_window_scaling) {
+			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_WSCALE;
 		}
-		th->check		= 0;
-		th->urg_ptr		= 0;
-
-		if (tp->urg_mode &&
-		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
-			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
-			th->urg			= 1;
+		if (sysctl_tcp_sack) {
+			sysctl_flags |= SYSCTL_FLAG_SACK;
+			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 		}
+	} else if (unlikely(tp->rx_opt.eff_sacks)) {
+		/* A SACK is 2 pad bytes, a 2 byte header, plus
+		 * 2 32-bit sequence numbers for each SACK block.
+		 */
+		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+				    (tp->rx_opt.eff_sacks *
+				     TCPOLEN_SACK_PERBLOCK));
+	}
+		
+	if (tcp_packets_in_flight(tp) == 0)
+		tcp_ca_event(sk, CA_EVENT_TX_START);
+
+	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb->h.th = th;
+	skb_set_owner_w(skb, sk);
+
+	/* Build TCP header and checksum it. */
+	th->source		= inet->sport;
+	th->dest		= inet->dport;
+	th->seq			= htonl(tcb->seq);
+	th->ack_seq		= htonl(tp->rcv_nxt);
+	*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
+					tcb->flags);
+
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		/* RFC1323: The window in SYN & SYN/ACK segments
+		 * is never scaled.
+		 */
+		th->window	= htons(tp->rcv_wnd);
+	} else {
+		th->window	= htons(tcp_select_window(sk));
+	}
+	th->check		= 0;
+	th->urg_ptr		= 0;
 
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_syn_build_options((__u32 *)(th + 1),
-					      tcp_advertise_mss(sk),
-					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
-					      (sysctl_flags & SYSCTL_FLAG_SACK),
-					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-					      tp->rx_opt.rcv_wscale,
-					      tcb->when,
-		      			      tp->rx_opt.ts_recent);
-		} else {
-			tcp_build_and_update_options((__u32 *)(th + 1),
-						     tp, tcb->when);
+	if (unlikely(tp->urg_mode &&
+		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+		th->urg			= 1;
+	}
 
-			TCP_ECN_send(sk, tp, skb, tcp_header_size);
-		}
-		tp->af_specific->send_check(sk, th, skb->len, skb);
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_syn_build_options((__u32 *)(th + 1),
+				      tcp_advertise_mss(sk),
+				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+				      (sysctl_flags & SYSCTL_FLAG_SACK),
+				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+				      tp->rx_opt.rcv_wscale,
+				      tcb->when,
+				      tp->rx_opt.ts_recent);
+	} else {
+		tcp_build_and_update_options((__u32 *)(th + 1),
+					     tp, tcb->when);
+		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+	}
 
-		if (tcb->flags & TCPCB_FLAG_ACK)
-			tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+	tp->af_specific->send_check(sk, th, skb->len, skb);
 
-		if (skb->len != tcp_header_size)
-			tcp_event_data_sent(tp, skb, sk);
+	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (skb->len != tcp_header_size)
+		tcp_event_data_sent(tp, skb, sk);
 
-		err = tp->af_specific->queue_xmit(skb, 0);
-		if (err <= 0)
-			return err;
+	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-		tcp_enter_cwr(sk);
+	err = tp->af_specific->queue_xmit(skb, 0);
+	if (unlikely(err <= 0))
+		return err;
+
+	tcp_enter_cwr(sk);
+
+	/* NET_XMIT_CN is special. It does not guarantee,
+	 * that this packet is lost. It tells that device
+	 * is about to start to drop packets or already
+	 * drops some packets of the same priority and
+	 * invokes us to send less aggressively.
+	 */
+	return err == NET_XMIT_CN ? 0 : err;
 
-		/* NET_XMIT_CN is special. It does not guarantee,
-		 * that this packet is lost. It tells that device
-		 * is about to start to drop packets or already
-		 * drops some packets of the same priority and
-		 * invokes us to send less aggressively.
-		 */
-		return err == NET_XMIT_CN ? 0 : err;
-	}
-	return -ENOBUFS;
 #undef SYSCTL_FLAG_TSTAMPS
 #undef SYSCTL_FLAG_WSCALE
 #undef SYSCTL_FLAG_SACK
@@ -436,8 +452,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	int nsize, old_factor;
 	u16 flags;
 
-	BUG_ON(len >= skb->len);
+	BUG_ON(len > skb->len);
 
+ 	clear_all_retrans_hints(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -601,7 +618,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    for TCP options, but includes only bare TCP header.
 
    tp->rx_opt.mss_clamp is mss negotiated at connection setup.
-   It is minumum of user_mss and mss received with SYN.
+   It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
    tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+		if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
 			break;
 
 		/* Advance the send_head.  This one is sent out.
@@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		/* Send it out now. */
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
 			update_send_head(sk, tp, skb);
 			tcp_cwnd_validate(sk, tp);
 			return;
@@ -1173,7 +1190,7 @@ u32 __tcp_select_window(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* MSS for the peer's data.  Previous verions used mss_clamp
+	/* MSS for the peer's data.  Previous versions used mss_clamp
 	 * here.  I don't know if the value based on our guesses
 	 * of peer's MSS is better for the performance.  It's more correct
 	 * but may be worse for the performance because of rcv_mss
@@ -1262,7 +1279,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		/* Ok.  We will be able to collapse the packet. */
+		/* changing transmit queue under us so clear hints */
+		clear_all_retrans_hints(tp);
+
+		/* Ok.	We will be able to collapse the packet. */
 		__skb_unlink(next_skb, &sk->sk_write_queue);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1332,6 +1352,8 @@ void tcp_simple_retransmit(struct sock *sk)
 		}
 	}
 
+	clear_all_retrans_hints(tp);
+
 	if (!lost)
 		return;
 
@@ -1363,7 +1385,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
 	if (atomic_read(&sk->sk_wmem_alloc) >
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1424,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
-				    pskb_copy(skb, GFP_ATOMIC):
-				    skb_clone(skb, GFP_ATOMIC)));
+	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
@@ -1470,13 +1490,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt = tp->lost_out;
+	int packet_cnt;
+
+	if (tp->retransmit_skb_hint) {
+		skb = tp->retransmit_skb_hint;
+		packet_cnt = tp->retransmit_cnt_hint;
+	}else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
 
 	/* First pass: retransmit lost packets. */
-	if (packet_cnt) {
-		sk_stream_for_retrans_queue(skb, sk) {
+	if (tp->lost_out) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			/* we could do better than to assign each time */
+			tp->retransmit_skb_hint = skb;
+			tp->retransmit_cnt_hint = packet_cnt;
+
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
 			 * calculation purposes.  This works because
@@ -1487,10 +1519,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 				return;
 
-			if (sacked&TCPCB_LOST) {
+			if (sacked & TCPCB_LOST) {
 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					if (tcp_retransmit_skb(sk, skb))
+					if (tcp_retransmit_skb(sk, skb)) {
+						tp->retransmit_skb_hint = NULL;
 						return;
+					}
 					if (icsk->icsk_ca_state != TCP_CA_Loss)
 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
 					else
@@ -1503,8 +1537,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 									  TCP_RTO_MAX);
 				}
 
-				packet_cnt -= tcp_skb_pcount(skb);
-				if (packet_cnt <= 0)
+				packet_cnt += tcp_skb_pcount(skb);
+				if (packet_cnt >= tp->lost_out)
 					break;
 			}
 		}
@@ -1530,9 +1564,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (tcp_may_send_now(sk, tp))
 		return;
 
-	packet_cnt = 0;
+	if (tp->forward_skb_hint) {
+		skb = tp->forward_skb_hint;
+		packet_cnt = tp->forward_cnt_hint;
+	} else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
+
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		tp->forward_cnt_hint = packet_cnt;
+		tp->forward_skb_hint = skb;
 
-	sk_stream_for_retrans_queue(skb, sk) {
 		/* Similar to the retransmit loop above we
 		 * can pretend that the retransmitted SKB
 		 * we send out here will be composed of one
@@ -1549,8 +1592,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			continue;
 
 		/* Ok, retransmit it. */
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb)) {
+			tp->forward_skb_hint = NULL;
 			break;
+		}
 
 		if (skb == skb_peek(&sk->sk_write_queue))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -1611,7 +1656,7 @@ void tcp_send_fin(struct sock *sk)
  * was unread data in the receive queue.  This behavior is recommended
  * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
  */
-void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1635,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
 	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	if (tcp_transmit_skb(sk, skb))
+	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
 
@@ -1670,7 +1715,7 @@ int tcp_send_synack(struct sock *sk)
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 }
 
 /*
@@ -1831,7 +1876,7 @@ int tcp_connect(struct sock *sk)
 	__skb_queue_tail(&sk->sk_write_queue, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
@@ -1927,7 +1972,7 @@ void tcp_send_ack(struct sock *sk)
 		/* Send it off, this clears delayed acks for us. */
 		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
-		tcp_transmit_skb(sk, buff);
+		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
 }
 
@@ -1967,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb);
+	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
 int tcp_write_wakeup(struct sock *sk)
@@ -2000,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk)
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
 				update_send_head(sk, tp, skb);
 			}
@@ -2060,3 +2105,4 @@ EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);