2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
72 EXPORT_SYMBOL(icmpv6_statistics);
73 DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
74 EXPORT_SYMBOL(icmpv6msg_statistics);
77 * The ICMP socket(s). This is the most convenient way to flow control
78 * our ICMP output as well as maintain a clean interface throughout
79 * all layers. All Socketless IP sends will soon be gone.
81 * On SMP we have one ICMP socket per-cpu.
83 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
84 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
86 static int icmpv6_rcv(struct sk_buff *skb);
88 static struct inet6_protocol icmpv6_protocol = {
89 .handler = icmpv6_rcv,
90 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
93 static __inline__ int icmpv6_xmit_lock(void)
97 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
98 /* This can happen if the output path (f.e. SIT or
99 * ip6ip6 tunnel) signals dst_link_failure() for an
100 * outgoing ICMP6 packet.
108 static __inline__ void icmpv6_xmit_unlock(void)
110 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
114 * Slightly more convenient version of icmpv6_send.
116 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
118 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
123 * Figure out, may we reply to this packet with icmp error.
125 * We do not reply, if:
126 * - it was icmp error message.
127 * - it is truncated, so that it is known, that protocol is ICMPV6
128 * (i.e. in the middle of some exthdr)
133 static int is_ineligible(struct sk_buff *skb)
135 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 int len = skb->len - ptr;
137 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
142 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
145 if (nexthdr == IPPROTO_ICMPV6) {
147 tp = skb_header_pointer(skb,
148 ptr+offsetof(struct icmp6hdr, icmp6_type),
149 sizeof(_type), &_type);
151 !(*tp & ICMPV6_INFOMSG_MASK))
157 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
160 * Check the ICMP output rate limit
162 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
165 struct dst_entry *dst;
168 /* Informational messages are not limited. */
169 if (type & ICMPV6_INFOMSG_MASK)
172 /* Do not limit pmtu discovery, it would break it. */
173 if (type == ICMPV6_PKT_TOOBIG)
177 * Look up the output route.
178 * XXX: perhaps the expire for routing entries cloned by
179 * this lookup should be more aggressive (not longer than timeout).
181 dst = ip6_route_output(sk, fl);
183 IP6_INC_STATS(ip6_dst_idev(dst),
184 IPSTATS_MIB_OUTNOROUTES);
185 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 int tmo = sysctl_icmpv6_time;
191 /* Give more bandwidth to wider prefixes. */
192 if (rt->rt6i_dst.plen < 128)
193 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
195 res = xrlim_allow(dst, tmo);
202 * an inline helper for the "simple" if statement below
203 * checks if parameter problem report is caused by an
204 * unrecognized IPv6 option that has the Option Type
205 * highest-order two bits set to 10
208 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
212 offset += skb_network_offset(skb);
213 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
216 return (*op & 0xC0) == 0x80;
219 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
222 struct icmp6hdr *icmp6h;
225 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
228 icmp6h = icmp6_hdr(skb);
229 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
230 icmp6h->icmp6_cksum = 0;
232 if (skb_queue_len(&sk->sk_write_queue) == 1) {
233 skb->csum = csum_partial((char *)icmp6h,
234 sizeof(struct icmp6hdr), skb->csum);
235 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
242 skb_queue_walk(&sk->sk_write_queue, skb) {
243 tmp_csum = csum_add(tmp_csum, skb->csum);
246 tmp_csum = csum_partial((char *)icmp6h,
247 sizeof(struct icmp6hdr), tmp_csum);
248 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
253 ip6_push_pending_frames(sk);
264 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
266 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
267 struct sk_buff *org_skb = msg->skb;
270 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
272 skb->csum = csum_block_add(skb->csum, csum, odd);
273 if (!(msg->type & ICMPV6_INFOMSG_MASK))
274 nf_ct_attach(skb, org_skb);
278 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
279 static void mip6_addr_swap(struct sk_buff *skb)
281 struct ipv6hdr *iph = ipv6_hdr(skb);
282 struct inet6_skb_parm *opt = IP6CB(skb);
283 struct ipv6_destopt_hao *hao;
288 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
289 if (likely(off >= 0)) {
290 hao = (struct ipv6_destopt_hao *)
291 (skb_network_header(skb) + off);
292 ipv6_addr_copy(&tmp, &iph->saddr);
293 ipv6_addr_copy(&iph->saddr, &hao->addr);
294 ipv6_addr_copy(&hao->addr, &tmp);
299 static inline void mip6_addr_swap(struct sk_buff *skb) {}
303 * Send an ICMP message in response to a packet in error
305 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
306 struct net_device *dev)
308 struct inet6_dev *idev = NULL;
309 struct ipv6hdr *hdr = ipv6_hdr(skb);
311 struct ipv6_pinfo *np;
312 struct in6_addr *saddr = NULL;
313 struct dst_entry *dst;
314 struct dst_entry *dst2;
315 struct icmp6hdr tmp_hdr;
318 struct icmpv6_msg msg;
325 if ((u8 *)hdr < skb->head ||
326 (skb->network_header + sizeof(*hdr)) > skb->tail)
330 * Make sure we respect the rules
331 * i.e. RFC 1885 2.4(e)
332 * Rule (e.1) is enforced by not using icmpv6_send
333 * in any code that processes icmp errors.
335 addr_type = ipv6_addr_type(&hdr->daddr);
337 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
344 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
345 if (type != ICMPV6_PKT_TOOBIG &&
346 !(type == ICMPV6_PARAMPROB &&
347 code == ICMPV6_UNK_OPTION &&
348 (opt_unrec(skb, info))))
354 addr_type = ipv6_addr_type(&hdr->saddr);
360 if (addr_type & IPV6_ADDR_LINKLOCAL)
361 iif = skb->dev->ifindex;
364 * Must not send error if the source does not uniquely
365 * identify a single node (RFC2463 Section 2.4).
366 * We check unspecified / multicast addresses here,
367 * and anycast addresses will be checked later.
369 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
370 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
375 * Never answer to a ICMP packet.
377 if (is_ineligible(skb)) {
378 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
384 memset(&fl, 0, sizeof(fl));
385 fl.proto = IPPROTO_ICMPV6;
386 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
388 ipv6_addr_copy(&fl.fl6_src, saddr);
390 fl.fl_icmp_type = type;
391 fl.fl_icmp_code = code;
392 security_skb_classify_flow(skb, &fl);
394 if (icmpv6_xmit_lock())
397 sk = icmpv6_socket->sk;
400 if (!icmpv6_xrlim_allow(sk, type, &fl))
403 tmp_hdr.icmp6_type = type;
404 tmp_hdr.icmp6_code = code;
405 tmp_hdr.icmp6_cksum = 0;
406 tmp_hdr.icmp6_pointer = htonl(info);
408 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
409 fl.oif = np->mcast_oif;
411 err = ip6_dst_lookup(sk, &dst, &fl);
416 * We won't send icmp if the destination is known
419 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
420 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
421 goto out_dst_release;
424 /* No need to clone since we're just using its address. */
427 err = xfrm_lookup(&dst, &fl, sk, 0);
440 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
443 if (ip6_dst_lookup(sk, &dst2, &fl))
446 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
447 if (err == -ENOENT) {
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops;
463 hlimit = np->hop_limit;
465 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
467 hlimit = ipv6_get_hoplimit(dst->dev);
474 msg.offset = skb_network_offset(skb);
477 len = skb->len - msg.offset;
478 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
480 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
481 goto out_dst_release;
484 idev = in6_dev_get(skb->dev);
486 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
487 len + sizeof(struct icmp6hdr),
488 sizeof(struct icmp6hdr),
489 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
492 ip6_flush_pending_frames(sk);
495 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
498 if (likely(idev != NULL))
503 icmpv6_xmit_unlock();
506 EXPORT_SYMBOL(icmpv6_send);
508 static void icmpv6_echo_reply(struct sk_buff *skb)
511 struct inet6_dev *idev;
512 struct ipv6_pinfo *np;
513 struct in6_addr *saddr = NULL;
514 struct icmp6hdr *icmph = icmp6_hdr(skb);
515 struct icmp6hdr tmp_hdr;
517 struct icmpv6_msg msg;
518 struct dst_entry *dst;
523 saddr = &ipv6_hdr(skb)->daddr;
525 if (!ipv6_unicast_destination(skb))
528 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
529 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
531 memset(&fl, 0, sizeof(fl));
532 fl.proto = IPPROTO_ICMPV6;
533 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
535 ipv6_addr_copy(&fl.fl6_src, saddr);
536 fl.oif = skb->dev->ifindex;
537 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
538 security_skb_classify_flow(skb, &fl);
540 if (icmpv6_xmit_lock())
543 sk = icmpv6_socket->sk;
546 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
547 fl.oif = np->mcast_oif;
549 err = ip6_dst_lookup(sk, &dst, &fl);
552 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
555 if (ipv6_addr_is_multicast(&fl.fl6_dst))
556 hlimit = np->mcast_hops;
558 hlimit = np->hop_limit;
560 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
562 hlimit = ipv6_get_hoplimit(dst->dev);
568 idev = in6_dev_get(skb->dev);
572 msg.type = ICMPV6_ECHO_REPLY;
574 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
575 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
576 (struct rt6_info*)dst, MSG_DONTWAIT);
579 ip6_flush_pending_frames(sk);
582 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
585 if (likely(idev != NULL))
589 icmpv6_xmit_unlock();
592 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
594 struct inet6_protocol *ipprot;
599 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
602 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
603 if (ipv6_ext_hdr(nexthdr)) {
604 /* now skip over extension headers */
605 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
609 inner_offset = sizeof(struct ipv6hdr);
612 /* Checkin header including 8 bytes of inner protocol header. */
613 if (!pskb_may_pull(skb, inner_offset+8))
616 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
617 Without this we will not able f.e. to make source routed
619 Corresponding argument (opt) to notifiers is already added.
623 hash = nexthdr & (MAX_INET_PROTOS - 1);
626 ipprot = rcu_dereference(inet6_protos[hash]);
627 if (ipprot && ipprot->err_handler)
628 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
631 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
635 * Handle icmp messages
638 static int icmpv6_rcv(struct sk_buff *skb)
640 struct net_device *dev = skb->dev;
641 struct inet6_dev *idev = __in6_dev_get(dev);
642 struct in6_addr *saddr, *daddr;
643 struct ipv6hdr *orig_hdr;
644 struct icmp6hdr *hdr;
647 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
650 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
654 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
657 nh = skb_network_offset(skb);
658 skb_set_network_header(skb, sizeof(*hdr));
660 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
663 skb_set_network_header(skb, nh);
666 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
668 saddr = &ipv6_hdr(skb)->saddr;
669 daddr = &ipv6_hdr(skb)->daddr;
671 /* Perform checksum. */
672 switch (skb->ip_summed) {
673 case CHECKSUM_COMPLETE:
674 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
679 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
681 if (__skb_checksum_complete(skb)) {
682 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
683 NIP6(*saddr), NIP6(*daddr));
688 __skb_pull(skb, sizeof(*hdr));
690 hdr = icmp6_hdr(skb);
692 type = hdr->icmp6_type;
694 ICMP6MSGIN_INC_STATS_BH(idev, type);
697 case ICMPV6_ECHO_REQUEST:
698 icmpv6_echo_reply(skb);
701 case ICMPV6_ECHO_REPLY:
702 /* we couldn't care less */
705 case ICMPV6_PKT_TOOBIG:
706 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
707 standard destination cache. Seems, only "advanced"
708 destination cache will allow to solve this problem
711 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
713 hdr = icmp6_hdr(skb);
714 orig_hdr = (struct ipv6hdr *) (hdr + 1);
715 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
716 ntohl(hdr->icmp6_mtu));
719 * Drop through to notify
722 case ICMPV6_DEST_UNREACH:
723 case ICMPV6_TIME_EXCEED:
724 case ICMPV6_PARAMPROB:
725 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
728 case NDISC_ROUTER_SOLICITATION:
729 case NDISC_ROUTER_ADVERTISEMENT:
730 case NDISC_NEIGHBOUR_SOLICITATION:
731 case NDISC_NEIGHBOUR_ADVERTISEMENT:
736 case ICMPV6_MGM_QUERY:
737 igmp6_event_query(skb);
740 case ICMPV6_MGM_REPORT:
741 igmp6_event_report(skb);
744 case ICMPV6_MGM_REDUCTION:
745 case ICMPV6_NI_QUERY:
746 case ICMPV6_NI_REPLY:
747 case ICMPV6_MLD2_REPORT:
748 case ICMPV6_DHAAD_REQUEST:
749 case ICMPV6_DHAAD_REPLY:
750 case ICMPV6_MOBILE_PREFIX_SOL:
751 case ICMPV6_MOBILE_PREFIX_ADV:
755 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
758 if (type & ICMPV6_INFOMSG_MASK)
762 * error of unknown type.
763 * must pass to upper level
766 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
773 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
780 * Special lock-class for __icmpv6_socket:
782 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
784 int __init icmpv6_init(struct net_proto_family *ops)
789 for_each_possible_cpu(i) {
790 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
791 &per_cpu(__icmpv6_socket, i));
794 "Failed to initialize the ICMP6 control socket "
800 sk = per_cpu(__icmpv6_socket, i)->sk;
801 sk->sk_allocation = GFP_ATOMIC;
803 * Split off their lock-class, because sk->sk_dst_lock
804 * gets used from softirqs, which is safe for
805 * __icmpv6_socket (because those never get directly used
806 * via userspace syscalls), but unsafe for normal sockets.
808 lockdep_set_class(&sk->sk_dst_lock,
809 &icmpv6_socket_sk_dst_lock_key);
811 /* Enough space for 2 64K ICMP packets, including
812 * sk_buff struct overhead.
815 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
817 sk->sk_prot->unhash(sk);
821 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
822 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
830 for (j = 0; j < i; j++) {
831 if (!cpu_possible(j))
833 sock_release(per_cpu(__icmpv6_socket, j));
839 void icmpv6_cleanup(void)
843 for_each_possible_cpu(i) {
844 sock_release(per_cpu(__icmpv6_socket, i));
846 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
849 static const struct icmp6_err {
857 { /* ADM_PROHIBITED */
861 { /* Was NOT_NEIGHBOUR, now reserved */
875 int icmpv6_err_convert(int type, int code, int *err)
882 case ICMPV6_DEST_UNREACH:
884 if (code <= ICMPV6_PORT_UNREACH) {
885 *err = tab_unreach[code].err;
886 fatal = tab_unreach[code].fatal;
890 case ICMPV6_PKT_TOOBIG:
894 case ICMPV6_PARAMPROB:
899 case ICMPV6_TIME_EXCEED:
907 EXPORT_SYMBOL(icmpv6_err_convert);
910 ctl_table ipv6_icmp_table[] = {
912 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
913 .procname = "ratelimit",
914 .data = &sysctl_icmpv6_time,
915 .maxlen = sizeof(int),
917 .proc_handler = &proc_dointvec