3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/jiffies.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41 #include <linux/times.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <linux/crypto.h>
70 #include <linux/scatterlist.h>
72 /* Socket used for sending RSTs and ACKs */
73 static struct socket *tcp6_socket;
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
77 static void tcp_v6_send_check(struct sock *sk, int len,
80 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
82 static struct inet_connection_sock_af_ops ipv6_mapped;
83 static struct inet_connection_sock_af_ops ipv6_specific;
84 #ifdef CONFIG_TCP_MD5SIG
85 static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
89 static void tcp_v6_hash(struct sock *sk)
91 if (sk->sk_state != TCP_CLOSE) {
92 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
102 static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
103 struct in6_addr *saddr,
104 struct in6_addr *daddr,
107 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
110 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
112 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
113 ipv6_hdr(skb)->saddr.s6_addr32,
115 tcp_hdr(skb)->source);
118 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
122 struct inet_sock *inet = inet_sk(sk);
123 struct inet_connection_sock *icsk = inet_csk(sk);
124 struct ipv6_pinfo *np = inet6_sk(sk);
125 struct tcp_sock *tp = tcp_sk(sk);
126 struct in6_addr *saddr = NULL, *final_p = NULL, final;
128 struct dst_entry *dst;
132 if (addr_len < SIN6_LEN_RFC2133)
135 if (usin->sin6_family != AF_INET6)
136 return(-EAFNOSUPPORT);
138 memset(&fl, 0, sizeof(fl));
141 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
142 IP6_ECN_flow_init(fl.fl6_flowlabel);
143 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
144 struct ip6_flowlabel *flowlabel;
145 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
146 if (flowlabel == NULL)
148 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
149 fl6_sock_release(flowlabel);
154 * connect() to INADDR_ANY means loopback (BSD'ism).
157 if(ipv6_addr_any(&usin->sin6_addr))
158 usin->sin6_addr.s6_addr[15] = 0x1;
160 addr_type = ipv6_addr_type(&usin->sin6_addr);
162 if(addr_type & IPV6_ADDR_MULTICAST)
165 if (addr_type&IPV6_ADDR_LINKLOCAL) {
166 if (addr_len >= sizeof(struct sockaddr_in6) &&
167 usin->sin6_scope_id) {
168 /* If interface is set while binding, indices
171 if (sk->sk_bound_dev_if &&
172 sk->sk_bound_dev_if != usin->sin6_scope_id)
175 sk->sk_bound_dev_if = usin->sin6_scope_id;
178 /* Connect to link-local address requires an interface */
179 if (!sk->sk_bound_dev_if)
183 if (tp->rx_opt.ts_recent_stamp &&
184 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
185 tp->rx_opt.ts_recent = 0;
186 tp->rx_opt.ts_recent_stamp = 0;
190 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
191 np->flow_label = fl.fl6_flowlabel;
197 if (addr_type == IPV6_ADDR_MAPPED) {
198 u32 exthdrlen = icsk->icsk_ext_hdr_len;
199 struct sockaddr_in sin;
201 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
203 if (__ipv6_only_sock(sk))
206 sin.sin_family = AF_INET;
207 sin.sin_port = usin->sin6_port;
208 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
210 icsk->icsk_af_ops = &ipv6_mapped;
211 sk->sk_backlog_rcv = tcp_v4_do_rcv;
212 #ifdef CONFIG_TCP_MD5SIG
213 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
216 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
219 icsk->icsk_ext_hdr_len = exthdrlen;
220 icsk->icsk_af_ops = &ipv6_specific;
221 sk->sk_backlog_rcv = tcp_v6_do_rcv;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp->af_specific = &tcp_sock_ipv6_specific;
227 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
229 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
236 if (!ipv6_addr_any(&np->rcv_saddr))
237 saddr = &np->rcv_saddr;
239 fl.proto = IPPROTO_TCP;
240 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241 ipv6_addr_copy(&fl.fl6_src,
242 (saddr ? saddr : &np->saddr));
243 fl.oif = sk->sk_bound_dev_if;
244 fl.fl_ip_dport = usin->sin6_port;
245 fl.fl_ip_sport = inet->sport;
247 if (np->opt && np->opt->srcrt) {
248 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
249 ipv6_addr_copy(&final, &fl.fl6_dst);
250 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
254 security_sk_classify_flow(sk, &fl);
256 err = ip6_dst_lookup(sk, &dst, &fl);
260 ipv6_addr_copy(&fl.fl6_dst, final_p);
262 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
264 err = ip6_dst_blackhole(sk, &dst, &fl);
271 ipv6_addr_copy(&np->rcv_saddr, saddr);
274 /* set the source address */
275 ipv6_addr_copy(&np->saddr, saddr);
276 inet->rcv_saddr = LOOPBACK4_IPV6;
278 sk->sk_gso_type = SKB_GSO_TCPV6;
279 __ip6_dst_store(sk, dst, NULL, NULL);
281 icsk->icsk_ext_hdr_len = 0;
283 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
286 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
288 inet->dport = usin->sin6_port;
290 tcp_set_state(sk, TCP_SYN_SENT);
291 err = inet6_hash_connect(&tcp_death_row, sk);
296 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
301 err = tcp_connect(sk);
308 tcp_set_state(sk, TCP_CLOSE);
312 sk->sk_route_caps = 0;
316 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
317 int type, int code, int offset, __be32 info)
319 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
320 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
321 struct ipv6_pinfo *np;
327 sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr,
328 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
331 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
335 if (sk->sk_state == TCP_TIME_WAIT) {
336 inet_twsk_put(inet_twsk(sk));
341 if (sock_owned_by_user(sk))
342 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
344 if (sk->sk_state == TCP_CLOSE)
348 seq = ntohl(th->seq);
349 if (sk->sk_state != TCP_LISTEN &&
350 !between(seq, tp->snd_una, tp->snd_nxt)) {
351 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
357 if (type == ICMPV6_PKT_TOOBIG) {
358 struct dst_entry *dst = NULL;
360 if (sock_owned_by_user(sk))
362 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
365 /* icmp should have updated the destination cache entry */
366 dst = __sk_dst_check(sk, np->dst_cookie);
369 struct inet_sock *inet = inet_sk(sk);
372 /* BUGGG_FUTURE: Again, it is not clear how
373 to handle rthdr case. Ignore this complexity
376 memset(&fl, 0, sizeof(fl));
377 fl.proto = IPPROTO_TCP;
378 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
379 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
380 fl.oif = sk->sk_bound_dev_if;
381 fl.fl_ip_dport = inet->dport;
382 fl.fl_ip_sport = inet->sport;
383 security_skb_classify_flow(skb, &fl);
385 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
386 sk->sk_err_soft = -err;
390 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
391 sk->sk_err_soft = -err;
398 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
399 tcp_sync_mss(sk, dst_mtu(dst));
400 tcp_simple_retransmit(sk);
401 } /* else let the usual retransmit timer handle it */
406 icmpv6_err_convert(type, code, &err);
408 /* Might be for an request_sock */
409 switch (sk->sk_state) {
410 struct request_sock *req, **prev;
412 if (sock_owned_by_user(sk))
415 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
416 &hdr->saddr, inet6_iif(skb));
420 /* ICMPs are not backlogged, hence we cannot get
421 * an established socket here.
423 BUG_TRAP(req->sk == NULL);
425 if (seq != tcp_rsk(req)->snt_isn) {
426 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
430 inet_csk_reqsk_queue_drop(sk, req, prev);
434 case TCP_SYN_RECV: /* Cannot happen.
435 It can, it SYNs are crossed. --ANK */
436 if (!sock_owned_by_user(sk)) {
438 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
442 sk->sk_err_soft = err;
446 if (!sock_owned_by_user(sk) && np->recverr) {
448 sk->sk_error_report(sk);
450 sk->sk_err_soft = err;
458 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
460 struct inet6_request_sock *treq = inet6_rsk(req);
461 struct ipv6_pinfo *np = inet6_sk(sk);
462 struct sk_buff * skb;
463 struct ipv6_txoptions *opt = NULL;
464 struct in6_addr * final_p = NULL, final;
466 struct dst_entry *dst;
469 memset(&fl, 0, sizeof(fl));
470 fl.proto = IPPROTO_TCP;
471 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
472 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
473 fl.fl6_flowlabel = 0;
475 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
476 fl.fl_ip_sport = inet_sk(sk)->sport;
477 security_req_classify_flow(req, &fl);
480 if (opt && opt->srcrt) {
481 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
482 ipv6_addr_copy(&final, &fl.fl6_dst);
483 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
487 err = ip6_dst_lookup(sk, &dst, &fl);
491 ipv6_addr_copy(&fl.fl6_dst, final_p);
492 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
495 skb = tcp_make_synack(sk, dst, req);
497 struct tcphdr *th = tcp_hdr(skb);
499 th->check = tcp_v6_check(th, skb->len,
500 &treq->loc_addr, &treq->rmt_addr,
501 csum_partial((char *)th, skb->len, skb->csum));
503 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
504 err = ip6_xmit(sk, skb, &fl, opt, 0);
505 err = net_xmit_eval(err);
509 if (opt && opt != np->opt)
510 sock_kfree_s(sk, opt, opt->tot_len);
515 static void tcp_v6_reqsk_destructor(struct request_sock *req)
517 if (inet6_rsk(req)->pktopts)
518 kfree_skb(inet6_rsk(req)->pktopts);
521 #ifdef CONFIG_TCP_MD5SIG
522 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
523 struct in6_addr *addr)
525 struct tcp_sock *tp = tcp_sk(sk);
530 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
533 for (i = 0; i < tp->md5sig_info->entries6; i++) {
534 if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, addr) == 0)
535 return &tp->md5sig_info->keys6[i].base;
540 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
541 struct sock *addr_sk)
543 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
546 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
547 struct request_sock *req)
549 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
552 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
553 char *newkey, u8 newkeylen)
555 /* Add key to the list */
556 struct tcp_md5sig_key *key;
557 struct tcp_sock *tp = tcp_sk(sk);
558 struct tcp6_md5sig_key *keys;
560 key = tcp_v6_md5_do_lookup(sk, peer);
562 /* modify existing entry - just update that one */
565 key->keylen = newkeylen;
567 /* reallocate new list if current one is full. */
568 if (!tp->md5sig_info) {
569 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
570 if (!tp->md5sig_info) {
574 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
576 if (tcp_alloc_md5sig_pool() == NULL) {
580 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
581 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
582 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
585 tcp_free_md5sig_pool();
590 if (tp->md5sig_info->entries6)
591 memmove(keys, tp->md5sig_info->keys6,
592 (sizeof (tp->md5sig_info->keys6[0]) *
593 tp->md5sig_info->entries6));
595 kfree(tp->md5sig_info->keys6);
596 tp->md5sig_info->keys6 = keys;
597 tp->md5sig_info->alloced6++;
600 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
602 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
603 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
605 tp->md5sig_info->entries6++;
610 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
611 u8 *newkey, __u8 newkeylen)
613 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
617 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
619 struct tcp_sock *tp = tcp_sk(sk);
622 for (i = 0; i < tp->md5sig_info->entries6; i++) {
623 if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, peer) == 0) {
625 kfree(tp->md5sig_info->keys6[i].base.key);
626 tp->md5sig_info->entries6--;
628 if (tp->md5sig_info->entries6 == 0) {
629 kfree(tp->md5sig_info->keys6);
630 tp->md5sig_info->keys6 = NULL;
631 tp->md5sig_info->alloced6 = 0;
633 /* shrink the database */
634 if (tp->md5sig_info->entries6 != i)
635 memmove(&tp->md5sig_info->keys6[i],
636 &tp->md5sig_info->keys6[i+1],
637 (tp->md5sig_info->entries6 - i)
638 * sizeof (tp->md5sig_info->keys6[0]));
640 tcp_free_md5sig_pool();
647 static void tcp_v6_clear_md5_list (struct sock *sk)
649 struct tcp_sock *tp = tcp_sk(sk);
652 if (tp->md5sig_info->entries6) {
653 for (i = 0; i < tp->md5sig_info->entries6; i++)
654 kfree(tp->md5sig_info->keys6[i].base.key);
655 tp->md5sig_info->entries6 = 0;
656 tcp_free_md5sig_pool();
659 kfree(tp->md5sig_info->keys6);
660 tp->md5sig_info->keys6 = NULL;
661 tp->md5sig_info->alloced6 = 0;
663 if (tp->md5sig_info->entries4) {
664 for (i = 0; i < tp->md5sig_info->entries4; i++)
665 kfree(tp->md5sig_info->keys4[i].base.key);
666 tp->md5sig_info->entries4 = 0;
667 tcp_free_md5sig_pool();
670 kfree(tp->md5sig_info->keys4);
671 tp->md5sig_info->keys4 = NULL;
672 tp->md5sig_info->alloced4 = 0;
675 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
678 struct tcp_md5sig cmd;
679 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
682 if (optlen < sizeof(cmd))
685 if (copy_from_user(&cmd, optval, sizeof(cmd)))
688 if (sin6->sin6_family != AF_INET6)
691 if (!cmd.tcpm_keylen) {
692 if (!tcp_sk(sk)->md5sig_info)
694 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
695 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
696 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
699 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
702 if (!tcp_sk(sk)->md5sig_info) {
703 struct tcp_sock *tp = tcp_sk(sk);
704 struct tcp_md5sig_info *p;
706 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
711 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
714 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
717 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
718 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
719 newkey, cmd.tcpm_keylen);
721 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
724 static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
725 struct in6_addr *saddr,
726 struct in6_addr *daddr,
727 struct tcphdr *th, int protocol,
730 struct scatterlist sg[4];
734 struct tcp_md5sig_pool *hp;
735 struct tcp6_pseudohdr *bp;
736 struct hash_desc *desc;
738 unsigned int nbytes = 0;
740 hp = tcp_get_md5sig_pool();
742 printk(KERN_WARNING "%s(): hash pool not found...\n", __FUNCTION__);
743 goto clear_hash_noput;
745 bp = &hp->md5_blk.ip6;
746 desc = &hp->md5_desc;
748 /* 1. TCP pseudo-header (RFC2460) */
749 ipv6_addr_copy(&bp->saddr, saddr);
750 ipv6_addr_copy(&bp->daddr, daddr);
751 bp->len = htonl(tcplen);
752 bp->protocol = htonl(protocol);
754 sg_init_table(sg, 4);
756 sg_set_buf(&sg[block++], bp, sizeof(*bp));
757 nbytes += sizeof(*bp);
759 /* 2. TCP header, excluding options */
762 sg_set_buf(&sg[block++], th, sizeof(*th));
763 nbytes += sizeof(*th);
765 /* 3. TCP segment data (if any) */
766 data_len = tcplen - (th->doff << 2);
768 u8 *data = (u8 *)th + (th->doff << 2);
769 sg_set_buf(&sg[block++], data, data_len);
774 sg_set_buf(&sg[block++], key->key, key->keylen);
775 nbytes += key->keylen;
777 sg_mark_end(&sg[block - 1]);
779 /* Now store the hash into the packet */
780 err = crypto_hash_init(desc);
782 printk(KERN_WARNING "%s(): hash_init failed\n", __FUNCTION__);
785 err = crypto_hash_update(desc, sg, nbytes);
787 printk(KERN_WARNING "%s(): hash_update failed\n", __FUNCTION__);
790 err = crypto_hash_final(desc, md5_hash);
792 printk(KERN_WARNING "%s(): hash_final failed\n", __FUNCTION__);
796 /* Reset header, and free up the crypto */
797 tcp_put_md5sig_pool();
802 tcp_put_md5sig_pool();
804 memset(md5_hash, 0, 16);
808 static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
810 struct dst_entry *dst,
811 struct request_sock *req,
812 struct tcphdr *th, int protocol,
815 struct in6_addr *saddr, *daddr;
818 saddr = &inet6_sk(sk)->saddr;
819 daddr = &inet6_sk(sk)->daddr;
821 saddr = &inet6_rsk(req)->loc_addr;
822 daddr = &inet6_rsk(req)->rmt_addr;
824 return tcp_v6_do_calc_md5_hash(md5_hash, key,
826 th, protocol, tcplen);
829 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
831 __u8 *hash_location = NULL;
832 struct tcp_md5sig_key *hash_expected;
833 struct ipv6hdr *ip6h = ipv6_hdr(skb);
834 struct tcphdr *th = tcp_hdr(skb);
835 int length = (th->doff << 2) - sizeof (*th);
840 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
842 /* If the TCP option is too short, we can short cut */
843 if (length < TCPOLEN_MD5SIG)
844 return hash_expected ? 1 : 0;
860 if (opsize < 2 || opsize > length)
862 if (opcode == TCPOPT_MD5SIG) {
872 /* do we have a hash as expected? */
873 if (!hash_expected) {
876 if (net_ratelimit()) {
877 printk(KERN_INFO "MD5 Hash NOT expected but found "
878 "(" NIP6_FMT ", %u)->"
879 "(" NIP6_FMT ", %u)\n",
880 NIP6(ip6h->saddr), ntohs(th->source),
881 NIP6(ip6h->daddr), ntohs(th->dest));
886 if (!hash_location) {
887 if (net_ratelimit()) {
888 printk(KERN_INFO "MD5 Hash expected but NOT found "
889 "(" NIP6_FMT ", %u)->"
890 "(" NIP6_FMT ", %u)\n",
891 NIP6(ip6h->saddr), ntohs(th->source),
892 NIP6(ip6h->daddr), ntohs(th->dest));
897 /* check the signature */
898 genhash = tcp_v6_do_calc_md5_hash(newhash,
900 &ip6h->saddr, &ip6h->daddr,
903 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
904 if (net_ratelimit()) {
905 printk(KERN_INFO "MD5 Hash %s for "
906 "(" NIP6_FMT ", %u)->"
907 "(" NIP6_FMT ", %u)\n",
908 genhash ? "failed" : "mismatch",
909 NIP6(ip6h->saddr), ntohs(th->source),
910 NIP6(ip6h->daddr), ntohs(th->dest));
918 static struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
920 .obj_size = sizeof(struct tcp6_request_sock),
921 .rtx_syn_ack = tcp_v6_send_synack,
922 .send_ack = tcp_v6_reqsk_send_ack,
923 .destructor = tcp_v6_reqsk_destructor,
924 .send_reset = tcp_v6_send_reset
927 #ifdef CONFIG_TCP_MD5SIG
928 static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
929 .md5_lookup = tcp_v6_reqsk_md5_lookup,
933 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
934 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
935 .twsk_unique = tcp_twsk_unique,
936 .twsk_destructor= tcp_twsk_destructor,
939 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
941 struct ipv6_pinfo *np = inet6_sk(sk);
942 struct tcphdr *th = tcp_hdr(skb);
944 if (skb->ip_summed == CHECKSUM_PARTIAL) {
945 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
946 skb->csum_start = skb_transport_header(skb) - skb->head;
947 skb->csum_offset = offsetof(struct tcphdr, check);
949 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
950 csum_partial((char *)th, th->doff<<2,
955 static int tcp_v6_gso_send_check(struct sk_buff *skb)
957 struct ipv6hdr *ipv6h;
960 if (!pskb_may_pull(skb, sizeof(*th)))
963 ipv6h = ipv6_hdr(skb);
967 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
969 skb->csum_start = skb_transport_header(skb) - skb->head;
970 skb->csum_offset = offsetof(struct tcphdr, check);
971 skb->ip_summed = CHECKSUM_PARTIAL;
975 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
977 struct tcphdr *th = tcp_hdr(skb), *t1;
978 struct sk_buff *buff;
980 unsigned int tot_len = sizeof(*th);
981 #ifdef CONFIG_TCP_MD5SIG
982 struct tcp_md5sig_key *key;
988 if (!ipv6_unicast_destination(skb))
991 #ifdef CONFIG_TCP_MD5SIG
993 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
998 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1002 * We need to grab some memory, and put together an RST,
1003 * and then put it into the queue to be sent.
1006 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1011 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1013 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1015 /* Swap the send and the receive. */
1016 memset(t1, 0, sizeof(*t1));
1017 t1->dest = th->source;
1018 t1->source = th->dest;
1019 t1->doff = tot_len / 4;
1023 t1->seq = th->ack_seq;
1026 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1027 + skb->len - (th->doff<<2));
1030 #ifdef CONFIG_TCP_MD5SIG
1032 __be32 *opt = (__be32*)(t1 + 1);
1033 opt[0] = htonl((TCPOPT_NOP << 24) |
1034 (TCPOPT_NOP << 16) |
1035 (TCPOPT_MD5SIG << 8) |
1037 tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
1038 &ipv6_hdr(skb)->daddr,
1039 &ipv6_hdr(skb)->saddr,
1040 t1, IPPROTO_TCP, tot_len);
1044 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1046 memset(&fl, 0, sizeof(fl));
1047 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1048 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1050 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1051 sizeof(*t1), IPPROTO_TCP,
1054 fl.proto = IPPROTO_TCP;
1055 fl.oif = inet6_iif(skb);
1056 fl.fl_ip_dport = t1->dest;
1057 fl.fl_ip_sport = t1->source;
1058 security_skb_classify_flow(skb, &fl);
1060 /* sk = NULL, but it is safe for now. RST socket required. */
1061 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1063 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1064 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
1065 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1066 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1074 static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1075 struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1077 struct tcphdr *th = tcp_hdr(skb), *t1;
1078 struct sk_buff *buff;
1080 unsigned int tot_len = sizeof(struct tcphdr);
1082 #ifdef CONFIG_TCP_MD5SIG
1083 struct tcp_md5sig_key *key;
1084 struct tcp_md5sig_key tw_key;
1087 #ifdef CONFIG_TCP_MD5SIG
1088 if (!tw && skb->sk) {
1089 key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
1090 } else if (tw && tw->tw_md5_keylen) {
1091 tw_key.key = tw->tw_md5_key;
1092 tw_key.keylen = tw->tw_md5_keylen;
1100 tot_len += TCPOLEN_TSTAMP_ALIGNED;
1101 #ifdef CONFIG_TCP_MD5SIG
1103 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1106 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1111 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1113 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1115 /* Swap the send and the receive. */
1116 memset(t1, 0, sizeof(*t1));
1117 t1->dest = th->source;
1118 t1->source = th->dest;
1119 t1->doff = tot_len/4;
1120 t1->seq = htonl(seq);
1121 t1->ack_seq = htonl(ack);
1123 t1->window = htons(win);
1125 topt = (__be32 *)(t1 + 1);
1128 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1129 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1130 *topt++ = htonl(tcp_time_stamp);
1134 #ifdef CONFIG_TCP_MD5SIG
1136 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1137 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1138 tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
1139 &ipv6_hdr(skb)->daddr,
1140 &ipv6_hdr(skb)->saddr,
1141 t1, IPPROTO_TCP, tot_len);
1145 buff->csum = csum_partial((char *)t1, tot_len, 0);
1147 memset(&fl, 0, sizeof(fl));
1148 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1149 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1151 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1152 tot_len, IPPROTO_TCP,
1155 fl.proto = IPPROTO_TCP;
1156 fl.oif = inet6_iif(skb);
1157 fl.fl_ip_dport = t1->dest;
1158 fl.fl_ip_sport = t1->source;
1159 security_skb_classify_flow(skb, &fl);
1161 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1162 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1163 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
1164 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1172 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1174 struct inet_timewait_sock *tw = inet_twsk(sk);
1175 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1177 tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1178 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1179 tcptw->tw_ts_recent);
1184 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1186 tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1190 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1192 struct request_sock *req, **prev;
1193 const struct tcphdr *th = tcp_hdr(skb);
1196 /* Find possible connection requests. */
1197 req = inet6_csk_search_req(sk, &prev, th->source,
1198 &ipv6_hdr(skb)->saddr,
1199 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1201 return tcp_check_req(sk, skb, req, prev);
1203 nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo,
1204 &ipv6_hdr(skb)->saddr, th->source,
1205 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1208 if (nsk->sk_state != TCP_TIME_WAIT) {
1212 inet_twsk_put(inet_twsk(nsk));
1216 #if 0 /*def CONFIG_SYN_COOKIES*/
1217 if (!th->rst && !th->syn && th->ack)
1218 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1223 /* FIXME: this is substantially similar to the ipv4 code.
1224 * Can some kind of merge be done? -- erics
1226 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1228 struct inet6_request_sock *treq;
1229 struct ipv6_pinfo *np = inet6_sk(sk);
1230 struct tcp_options_received tmp_opt;
1231 struct tcp_sock *tp = tcp_sk(sk);
1232 struct request_sock *req = NULL;
1233 __u32 isn = TCP_SKB_CB(skb)->when;
1235 if (skb->protocol == htons(ETH_P_IP))
1236 return tcp_v4_conn_request(sk, skb);
1238 if (!ipv6_unicast_destination(skb))
1242 * There are no SYN attacks on IPv6, yet...
1244 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1245 if (net_ratelimit())
1246 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1250 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1253 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1257 #ifdef CONFIG_TCP_MD5SIG
1258 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1261 tcp_clear_options(&tmp_opt);
1262 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1263 tmp_opt.user_mss = tp->rx_opt.user_mss;
1265 tcp_parse_options(skb, &tmp_opt, 0);
1267 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1268 tcp_openreq_init(req, &tmp_opt, skb);
1270 treq = inet6_rsk(req);
1271 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1272 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1273 TCP_ECN_create_request(req, tcp_hdr(skb));
1274 treq->pktopts = NULL;
1275 if (ipv6_opt_accepted(sk, skb) ||
1276 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1277 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1278 atomic_inc(&skb->users);
1279 treq->pktopts = skb;
1281 treq->iif = sk->sk_bound_dev_if;
1283 /* So that link locals have meaning */
1284 if (!sk->sk_bound_dev_if &&
1285 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1286 treq->iif = inet6_iif(skb);
1289 isn = tcp_v6_init_sequence(skb);
1291 tcp_rsk(req)->snt_isn = isn;
1293 security_inet_conn_request(sk, skb, req);
1295 if (tcp_v6_send_synack(sk, req))
1298 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1305 return 0; /* don't send reset */
1308 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1309 struct request_sock *req,
1310 struct dst_entry *dst)
1312 struct inet6_request_sock *treq = inet6_rsk(req);
1313 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1314 struct tcp6_sock *newtcp6sk;
1315 struct inet_sock *newinet;
1316 struct tcp_sock *newtp;
1318 struct ipv6_txoptions *opt;
1319 #ifdef CONFIG_TCP_MD5SIG
1320 struct tcp_md5sig_key *key;
1323 if (skb->protocol == htons(ETH_P_IP)) {
1328 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1333 newtcp6sk = (struct tcp6_sock *)newsk;
1334 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1336 newinet = inet_sk(newsk);
1337 newnp = inet6_sk(newsk);
1338 newtp = tcp_sk(newsk);
1340 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1342 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1345 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1348 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1350 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1351 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1352 #ifdef CONFIG_TCP_MD5SIG
1353 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1356 newnp->pktoptions = NULL;
1358 newnp->mcast_oif = inet6_iif(skb);
1359 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1362 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1363 * here, tcp_create_openreq_child now does this for us, see the comment in
1364 * that function for the gory details. -acme
1367 /* It is tricky place. Until this moment IPv4 tcp
1368 worked with IPv6 icsk.icsk_af_ops.
1371 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1378 if (sk_acceptq_is_full(sk))
1382 struct in6_addr *final_p = NULL, final;
1385 memset(&fl, 0, sizeof(fl));
1386 fl.proto = IPPROTO_TCP;
1387 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1388 if (opt && opt->srcrt) {
1389 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1390 ipv6_addr_copy(&final, &fl.fl6_dst);
1391 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1394 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1395 fl.oif = sk->sk_bound_dev_if;
1396 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1397 fl.fl_ip_sport = inet_sk(sk)->sport;
1398 security_req_classify_flow(req, &fl);
1400 if (ip6_dst_lookup(sk, &dst, &fl))
1404 ipv6_addr_copy(&fl.fl6_dst, final_p);
1406 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1410 newsk = tcp_create_openreq_child(sk, req, skb);
1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1416 * count here, tcp_create_openreq_child now does this for us, see the
1417 * comment in that function for the gory details. -acme
1420 newsk->sk_gso_type = SKB_GSO_TCPV6;
1421 __ip6_dst_store(newsk, dst, NULL, NULL);
1423 newtcp6sk = (struct tcp6_sock *)newsk;
1424 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1426 newtp = tcp_sk(newsk);
1427 newinet = inet_sk(newsk);
1428 newnp = inet6_sk(newsk);
1430 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1432 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1433 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1434 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1435 newsk->sk_bound_dev_if = treq->iif;
1437 /* Now IPv6 options...
1439 First: no IPv4 options.
1441 newinet->opt = NULL;
1442 newnp->ipv6_fl_list = NULL;
1445 newnp->rxopt.all = np->rxopt.all;
1447 /* Clone pktoptions received with SYN */
1448 newnp->pktoptions = NULL;
1449 if (treq->pktopts != NULL) {
1450 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1451 kfree_skb(treq->pktopts);
1452 treq->pktopts = NULL;
1453 if (newnp->pktoptions)
1454 skb_set_owner_r(newnp->pktoptions, newsk);
1457 newnp->mcast_oif = inet6_iif(skb);
1458 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1460 /* Clone native IPv6 options from listening socket (if any)
1462 Yes, keeping reference count would be much more clever,
1463 but we make one more one thing there: reattach optmem
1467 newnp->opt = ipv6_dup_options(newsk, opt);
1469 sock_kfree_s(sk, opt, opt->tot_len);
1472 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1474 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1475 newnp->opt->opt_flen);
1477 tcp_mtup_init(newsk);
1478 tcp_sync_mss(newsk, dst_mtu(dst));
1479 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1480 tcp_initialize_rcv_mss(newsk);
1482 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1484 #ifdef CONFIG_TCP_MD5SIG
1485 /* Copy over the MD5 key from the original socket */
1486 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1487 /* We're using one, so create a matching key
1488 * on the newsk structure. If we fail to get
1489 * memory, then we end up not copying the key
1492 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1494 tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr,
1495 newkey, key->keylen);
1499 __inet6_hash(newsk);
1500 inet_inherit_port(sk, newsk);
1505 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1507 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1508 if (opt && opt != np->opt)
1509 sock_kfree_s(sk, opt, opt->tot_len);
1514 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1516 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1517 if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
1518 &ipv6_hdr(skb)->daddr, skb->csum)) {
1519 skb->ip_summed = CHECKSUM_UNNECESSARY;
1524 skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
1525 &ipv6_hdr(skb)->saddr,
1526 &ipv6_hdr(skb)->daddr, 0));
1528 if (skb->len <= 76) {
1529 return __skb_checksum_complete(skb);
1534 /* The socket must have it's spinlock held when we get
1537 * We have a potential double-lock case here, so even when
1538 * doing backlog processing we use the BH locking scheme.
1539 * This is because we cannot sleep with the original spinlock
1542 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1544 struct ipv6_pinfo *np = inet6_sk(sk);
1545 struct tcp_sock *tp;
1546 struct sk_buff *opt_skb = NULL;
1548 /* Imagine: socket is IPv6. IPv4 packet arrives,
1549 goes to IPv4 receive handler and backlogged.
1550 From backlog it always goes here. Kerboom...
1551 Fortunately, tcp_rcv_established and rcv_established
1552 handle them correctly, but it is not case with
1553 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1556 if (skb->protocol == htons(ETH_P_IP))
1557 return tcp_v4_do_rcv(sk, skb);
1559 #ifdef CONFIG_TCP_MD5SIG
1560 if (tcp_v6_inbound_md5_hash (sk, skb))
1564 if (sk_filter(sk, skb))
1568 * socket locking is here for SMP purposes as backlog rcv
1569 * is currently called with bh processing disabled.
1572 /* Do Stevens' IPV6_PKTOPTIONS.
1574 Yes, guys, it is the only place in our code, where we
1575 may make it not affecting IPv4.
1576 The rest of code is protocol independent,
1577 and I do not like idea to uglify IPv4.
1579 Actually, all the idea behind IPV6_PKTOPTIONS
1580 looks not very well thought. For now we latch
1581 options, received in the last packet, enqueued
1582 by tcp. Feel free to propose better solution.
1586 opt_skb = skb_clone(skb, GFP_ATOMIC);
1588 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1589 TCP_CHECK_TIMER(sk);
1590 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1592 TCP_CHECK_TIMER(sk);
1594 goto ipv6_pktoptions;
1598 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1601 if (sk->sk_state == TCP_LISTEN) {
1602 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1607 * Queue it on the new socket if the new socket is active,
1608 * otherwise we just shortcircuit this and continue with
1612 if (tcp_child_process(sk, nsk, skb))
1615 __kfree_skb(opt_skb);
1620 TCP_CHECK_TIMER(sk);
1621 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1623 TCP_CHECK_TIMER(sk);
1625 goto ipv6_pktoptions;
1629 tcp_v6_send_reset(sk, skb);
1632 __kfree_skb(opt_skb);
1636 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641 /* Do you ask, what is it?
1643 1. skb was enqueued by tcp.
1644 2. skb is added to tail of read queue, rather than out of order.
1645 3. socket is not in passive state.
1646 4. Finally, it really contains options, which user wants to receive.
1649 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1650 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1651 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1652 np->mcast_oif = inet6_iif(opt_skb);
1653 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1654 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1655 if (ipv6_opt_accepted(sk, opt_skb)) {
1656 skb_set_owner_r(opt_skb, sk);
1657 opt_skb = xchg(&np->pktoptions, opt_skb);
1659 __kfree_skb(opt_skb);
1660 opt_skb = xchg(&np->pktoptions, NULL);
1669 static int tcp_v6_rcv(struct sk_buff *skb)
1675 if (skb->pkt_type != PACKET_HOST)
1679 * Count it even if it's bad.
1681 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1683 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1688 if (th->doff < sizeof(struct tcphdr)/4)
1690 if (!pskb_may_pull(skb, th->doff*4))
1693 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1697 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1698 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1699 skb->len - th->doff*4);
1700 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1701 TCP_SKB_CB(skb)->when = 0;
1702 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1703 TCP_SKB_CB(skb)->sacked = 0;
1705 sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo,
1706 &ipv6_hdr(skb)->saddr, th->source,
1707 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
1714 if (sk->sk_state == TCP_TIME_WAIT)
1717 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1718 goto discard_and_relse;
1720 if (sk_filter(sk, skb))
1721 goto discard_and_relse;
1725 bh_lock_sock_nested(sk);
1727 if (!sock_owned_by_user(sk)) {
1728 #ifdef CONFIG_NET_DMA
1729 struct tcp_sock *tp = tcp_sk(sk);
1730 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1731 tp->ucopy.dma_chan = get_softnet_dma();
1732 if (tp->ucopy.dma_chan)
1733 ret = tcp_v6_do_rcv(sk, skb);
1737 if (!tcp_prequeue(sk, skb))
1738 ret = tcp_v6_do_rcv(sk, skb);
1741 sk_add_backlog(sk, skb);
1745 return ret ? -1 : 0;
1748 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1751 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1753 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1755 tcp_v6_send_reset(NULL, skb);
1772 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1773 inet_twsk_put(inet_twsk(sk));
1777 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1778 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1779 inet_twsk_put(inet_twsk(sk));
1783 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1788 sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo,
1789 &ipv6_hdr(skb)->daddr,
1790 ntohs(th->dest), inet6_iif(skb));
1792 struct inet_timewait_sock *tw = inet_twsk(sk);
1793 inet_twsk_deschedule(tw, &tcp_death_row);
1798 /* Fall through to ACK */
1801 tcp_v6_timewait_ack(sk, skb);
1805 case TCP_TW_SUCCESS:;
1810 static int tcp_v6_remember_stamp(struct sock *sk)
1812 /* Alas, not yet... */
1816 static struct inet_connection_sock_af_ops ipv6_specific = {
1817 .queue_xmit = inet6_csk_xmit,
1818 .send_check = tcp_v6_send_check,
1819 .rebuild_header = inet6_sk_rebuild_header,
1820 .conn_request = tcp_v6_conn_request,
1821 .syn_recv_sock = tcp_v6_syn_recv_sock,
1822 .remember_stamp = tcp_v6_remember_stamp,
1823 .net_header_len = sizeof(struct ipv6hdr),
1824 .setsockopt = ipv6_setsockopt,
1825 .getsockopt = ipv6_getsockopt,
1826 .addr2sockaddr = inet6_csk_addr2sockaddr,
1827 .sockaddr_len = sizeof(struct sockaddr_in6),
1828 .bind_conflict = inet6_csk_bind_conflict,
1829 #ifdef CONFIG_COMPAT
1830 .compat_setsockopt = compat_ipv6_setsockopt,
1831 .compat_getsockopt = compat_ipv6_getsockopt,
1835 #ifdef CONFIG_TCP_MD5SIG
1836 static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1837 .md5_lookup = tcp_v6_md5_lookup,
1838 .calc_md5_hash = tcp_v6_calc_md5_hash,
1839 .md5_add = tcp_v6_md5_add_func,
1840 .md5_parse = tcp_v6_parse_md5_keys,
1845 * TCP over IPv4 via INET6 API
1848 static struct inet_connection_sock_af_ops ipv6_mapped = {
1849 .queue_xmit = ip_queue_xmit,
1850 .send_check = tcp_v4_send_check,
1851 .rebuild_header = inet_sk_rebuild_header,
1852 .conn_request = tcp_v6_conn_request,
1853 .syn_recv_sock = tcp_v6_syn_recv_sock,
1854 .remember_stamp = tcp_v4_remember_stamp,
1855 .net_header_len = sizeof(struct iphdr),
1856 .setsockopt = ipv6_setsockopt,
1857 .getsockopt = ipv6_getsockopt,
1858 .addr2sockaddr = inet6_csk_addr2sockaddr,
1859 .sockaddr_len = sizeof(struct sockaddr_in6),
1860 .bind_conflict = inet6_csk_bind_conflict,
1861 #ifdef CONFIG_COMPAT
1862 .compat_setsockopt = compat_ipv6_setsockopt,
1863 .compat_getsockopt = compat_ipv6_getsockopt,
1867 #ifdef CONFIG_TCP_MD5SIG
1868 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1869 .md5_lookup = tcp_v4_md5_lookup,
1870 .calc_md5_hash = tcp_v4_calc_md5_hash,
1871 .md5_add = tcp_v6_md5_add_func,
1872 .md5_parse = tcp_v6_parse_md5_keys,
1876 /* NOTE: A lot of things set to zero explicitly by call to
1877 * sk_alloc() so need not be done here.
1879 static int tcp_v6_init_sock(struct sock *sk)
1881 struct inet_connection_sock *icsk = inet_csk(sk);
1882 struct tcp_sock *tp = tcp_sk(sk);
1884 skb_queue_head_init(&tp->out_of_order_queue);
1885 tcp_init_xmit_timers(sk);
1886 tcp_prequeue_init(tp);
1888 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1889 tp->mdev = TCP_TIMEOUT_INIT;
1891 /* So many TCP implementations out there (incorrectly) count the
1892 * initial SYN frame in their delayed-ACK and congestion control
1893 * algorithms that we must have the following bandaid to talk
1894 * efficiently to them. -DaveM
1898 /* See draft-stevens-tcpca-spec-01 for discussion of the
1899 * initialization of these values.
1901 tp->snd_ssthresh = 0x7fffffff;
1902 tp->snd_cwnd_clamp = ~0;
1903 tp->mss_cache = 536;
1905 tp->reordering = sysctl_tcp_reordering;
1907 sk->sk_state = TCP_CLOSE;
1909 icsk->icsk_af_ops = &ipv6_specific;
1910 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1911 icsk->icsk_sync_mss = tcp_sync_mss;
1912 sk->sk_write_space = sk_stream_write_space;
1913 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1915 #ifdef CONFIG_TCP_MD5SIG
1916 tp->af_specific = &tcp_sock_ipv6_specific;
1919 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1920 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1922 atomic_inc(&tcp_sockets_allocated);
1927 static int tcp_v6_destroy_sock(struct sock *sk)
1929 #ifdef CONFIG_TCP_MD5SIG
1930 /* Clean up the MD5 key list */
1931 if (tcp_sk(sk)->md5sig_info)
1932 tcp_v6_clear_md5_list(sk);
1934 tcp_v4_destroy_sock(sk);
1935 return inet6_destroy_sock(sk);
1938 #ifdef CONFIG_PROC_FS
1939 /* Proc filesystem TCPv6 sock list dumping. */
1940 static void get_openreq6(struct seq_file *seq,
1941 struct sock *sk, struct request_sock *req, int i, int uid)
1943 int ttd = req->expires - jiffies;
1944 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1945 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1951 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1952 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1954 src->s6_addr32[0], src->s6_addr32[1],
1955 src->s6_addr32[2], src->s6_addr32[3],
1956 ntohs(inet_sk(sk)->sport),
1957 dest->s6_addr32[0], dest->s6_addr32[1],
1958 dest->s6_addr32[2], dest->s6_addr32[3],
1959 ntohs(inet_rsk(req)->rmt_port),
1961 0,0, /* could print option size, but that is af dependent. */
1962 1, /* timers active (only the expire timer) */
1963 jiffies_to_clock_t(ttd),
1966 0, /* non standard timer */
1967 0, /* open_requests have no inode */
1971 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1973 struct in6_addr *dest, *src;
1976 unsigned long timer_expires;
1977 struct inet_sock *inet = inet_sk(sp);
1978 struct tcp_sock *tp = tcp_sk(sp);
1979 const struct inet_connection_sock *icsk = inet_csk(sp);
1980 struct ipv6_pinfo *np = inet6_sk(sp);
1983 src = &np->rcv_saddr;
1984 destp = ntohs(inet->dport);
1985 srcp = ntohs(inet->sport);
1987 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1989 timer_expires = icsk->icsk_timeout;
1990 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1992 timer_expires = icsk->icsk_timeout;
1993 } else if (timer_pending(&sp->sk_timer)) {
1995 timer_expires = sp->sk_timer.expires;
1998 timer_expires = jiffies;
2002 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2003 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2005 src->s6_addr32[0], src->s6_addr32[1],
2006 src->s6_addr32[2], src->s6_addr32[3], srcp,
2007 dest->s6_addr32[0], dest->s6_addr32[1],
2008 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2010 tp->write_seq-tp->snd_una,
2011 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
2013 jiffies_to_clock_t(timer_expires - jiffies),
2014 icsk->icsk_retransmits,
2016 icsk->icsk_probes_out,
2018 atomic_read(&sp->sk_refcnt), sp,
2021 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2022 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2026 static void get_timewait6_sock(struct seq_file *seq,
2027 struct inet_timewait_sock *tw, int i)
2029 struct in6_addr *dest, *src;
2031 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
2032 int ttd = tw->tw_ttd - jiffies;
2037 dest = &tw6->tw_v6_daddr;
2038 src = &tw6->tw_v6_rcv_saddr;
2039 destp = ntohs(tw->tw_dport);
2040 srcp = ntohs(tw->tw_sport);
2043 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2044 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2046 src->s6_addr32[0], src->s6_addr32[1],
2047 src->s6_addr32[2], src->s6_addr32[3], srcp,
2048 dest->s6_addr32[0], dest->s6_addr32[1],
2049 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2050 tw->tw_substate, 0, 0,
2051 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2052 atomic_read(&tw->tw_refcnt), tw);
2055 static int tcp6_seq_show(struct seq_file *seq, void *v)
2057 struct tcp_iter_state *st;
2059 if (v == SEQ_START_TOKEN) {
2064 "st tx_queue rx_queue tr tm->when retrnsmt"
2065 " uid timeout inode\n");
2070 switch (st->state) {
2071 case TCP_SEQ_STATE_LISTENING:
2072 case TCP_SEQ_STATE_ESTABLISHED:
2073 get_tcp6_sock(seq, v, st->num);
2075 case TCP_SEQ_STATE_OPENREQ:
2076 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2078 case TCP_SEQ_STATE_TIME_WAIT:
2079 get_timewait6_sock(seq, v, st->num);
2086 static struct file_operations tcp6_seq_fops;
2087 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2088 .owner = THIS_MODULE,
2091 .seq_show = tcp6_seq_show,
2092 .seq_fops = &tcp6_seq_fops,
2095 int __init tcp6_proc_init(void)
2097 return tcp_proc_register(&tcp6_seq_afinfo);
2100 void tcp6_proc_exit(void)
2102 tcp_proc_unregister(&tcp6_seq_afinfo);
2106 DEFINE_PROTO_INUSE(tcpv6)
2108 struct proto tcpv6_prot = {
2110 .owner = THIS_MODULE,
2112 .connect = tcp_v6_connect,
2113 .disconnect = tcp_disconnect,
2114 .accept = inet_csk_accept,
2116 .init = tcp_v6_init_sock,
2117 .destroy = tcp_v6_destroy_sock,
2118 .shutdown = tcp_shutdown,
2119 .setsockopt = tcp_setsockopt,
2120 .getsockopt = tcp_getsockopt,
2121 .recvmsg = tcp_recvmsg,
2122 .backlog_rcv = tcp_v6_do_rcv,
2123 .hash = tcp_v6_hash,
2124 .unhash = inet_unhash,
2125 .get_port = inet_csk_get_port,
2126 .enter_memory_pressure = tcp_enter_memory_pressure,
2127 .sockets_allocated = &tcp_sockets_allocated,
2128 .memory_allocated = &tcp_memory_allocated,
2129 .memory_pressure = &tcp_memory_pressure,
2130 .orphan_count = &tcp_orphan_count,
2131 .sysctl_mem = sysctl_tcp_mem,
2132 .sysctl_wmem = sysctl_tcp_wmem,
2133 .sysctl_rmem = sysctl_tcp_rmem,
2134 .max_header = MAX_TCP_HEADER,
2135 .obj_size = sizeof(struct tcp6_sock),
2136 .twsk_prot = &tcp6_timewait_sock_ops,
2137 .rsk_prot = &tcp6_request_sock_ops,
2138 .hashinfo = &tcp_hashinfo,
2139 #ifdef CONFIG_COMPAT
2140 .compat_setsockopt = compat_tcp_setsockopt,
2141 .compat_getsockopt = compat_tcp_getsockopt,
2143 REF_PROTO_INUSE(tcpv6)
2146 static struct inet6_protocol tcpv6_protocol = {
2147 .handler = tcp_v6_rcv,
2148 .err_handler = tcp_v6_err,
2149 .gso_send_check = tcp_v6_gso_send_check,
2150 .gso_segment = tcp_tso_segment,
2151 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2154 static struct inet_protosw tcpv6_protosw = {
2155 .type = SOCK_STREAM,
2156 .protocol = IPPROTO_TCP,
2157 .prot = &tcpv6_prot,
2158 .ops = &inet6_stream_ops,
2161 .flags = INET_PROTOSW_PERMANENT |
2165 int __init tcpv6_init(void)
2169 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2173 /* register inet6 protocol */
2174 ret = inet6_register_protosw(&tcpv6_protosw);
2176 goto out_tcpv6_protocol;
2178 ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6,
2179 SOCK_RAW, IPPROTO_TCP);
2181 goto out_tcpv6_protosw;
2186 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2188 inet6_unregister_protosw(&tcpv6_protosw);
2192 void tcpv6_exit(void)
2194 sock_release(tcp6_socket);
2195 inet6_unregister_protosw(&tcpv6_protosw);
2196 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);