2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
72 EXPORT_SYMBOL(icmpv6_statistics);
73 DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
74 EXPORT_SYMBOL(icmpv6msg_statistics);
77 * The ICMP socket(s). This is the most convenient way to flow control
78 * our ICMP output as well as maintain a clean interface throughout
79 * all layers. All Socketless IP sends will soon be gone.
81 * On SMP we have one ICMP socket per-cpu.
83 static inline struct sock *icmpv6_sk(struct net *net)
85 return net->ipv6.icmp_sk[smp_processor_id()];
88 static int icmpv6_rcv(struct sk_buff *skb);
90 static struct inet6_protocol icmpv6_protocol = {
91 .handler = icmpv6_rcv,
92 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
95 static __inline__ int icmpv6_xmit_lock(struct sock *sk)
99 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
100 /* This can happen if the output path (f.e. SIT or
101 * ip6ip6 tunnel) signals dst_link_failure() for an
102 * outgoing ICMP6 packet.
110 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
112 spin_unlock_bh(&sk->sk_lock.slock);
116 * Slightly more convenient version of icmpv6_send.
118 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
120 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
125 * Figure out, may we reply to this packet with icmp error.
127 * We do not reply, if:
128 * - it was icmp error message.
129 * - it is truncated, so that it is known, that protocol is ICMPV6
130 * (i.e. in the middle of some exthdr)
135 static int is_ineligible(struct sk_buff *skb)
137 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
138 int len = skb->len - ptr;
139 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
144 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
147 if (nexthdr == IPPROTO_ICMPV6) {
149 tp = skb_header_pointer(skb,
150 ptr+offsetof(struct icmp6hdr, icmp6_type),
151 sizeof(_type), &_type);
153 !(*tp & ICMPV6_INFOMSG_MASK))
160 * Check the ICMP output rate limit
162 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
165 struct dst_entry *dst;
168 /* Informational messages are not limited. */
169 if (type & ICMPV6_INFOMSG_MASK)
172 /* Do not limit pmtu discovery, it would break it. */
173 if (type == ICMPV6_PKT_TOOBIG)
177 * Look up the output route.
178 * XXX: perhaps the expire for routing entries cloned by
179 * this lookup should be more aggressive (not longer than timeout).
181 dst = ip6_route_output(sk, fl);
183 IP6_INC_STATS(ip6_dst_idev(dst),
184 IPSTATS_MIB_OUTNOROUTES);
185 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 int tmo = init_net.ipv6.sysctl.icmpv6_time;
191 /* Give more bandwidth to wider prefixes. */
192 if (rt->rt6i_dst.plen < 128)
193 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
195 res = xrlim_allow(dst, tmo);
202 * an inline helper for the "simple" if statement below
203 * checks if parameter problem report is caused by an
204 * unrecognized IPv6 option that has the Option Type
205 * highest-order two bits set to 10
208 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
212 offset += skb_network_offset(skb);
213 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
216 return (*op & 0xC0) == 0x80;
219 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
222 struct icmp6hdr *icmp6h;
225 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
228 icmp6h = icmp6_hdr(skb);
229 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
230 icmp6h->icmp6_cksum = 0;
232 if (skb_queue_len(&sk->sk_write_queue) == 1) {
233 skb->csum = csum_partial((char *)icmp6h,
234 sizeof(struct icmp6hdr), skb->csum);
235 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
242 skb_queue_walk(&sk->sk_write_queue, skb) {
243 tmp_csum = csum_add(tmp_csum, skb->csum);
246 tmp_csum = csum_partial((char *)icmp6h,
247 sizeof(struct icmp6hdr), tmp_csum);
248 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
253 ip6_push_pending_frames(sk);
264 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
266 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
267 struct sk_buff *org_skb = msg->skb;
270 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
272 skb->csum = csum_block_add(skb->csum, csum, odd);
273 if (!(msg->type & ICMPV6_INFOMSG_MASK))
274 nf_ct_attach(skb, org_skb);
278 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
279 static void mip6_addr_swap(struct sk_buff *skb)
281 struct ipv6hdr *iph = ipv6_hdr(skb);
282 struct inet6_skb_parm *opt = IP6CB(skb);
283 struct ipv6_destopt_hao *hao;
288 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
289 if (likely(off >= 0)) {
290 hao = (struct ipv6_destopt_hao *)
291 (skb_network_header(skb) + off);
292 ipv6_addr_copy(&tmp, &iph->saddr);
293 ipv6_addr_copy(&iph->saddr, &hao->addr);
294 ipv6_addr_copy(&hao->addr, &tmp);
299 static inline void mip6_addr_swap(struct sk_buff *skb) {}
303 * Send an ICMP message in response to a packet in error
305 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
306 struct net_device *dev)
308 struct inet6_dev *idev = NULL;
309 struct ipv6hdr *hdr = ipv6_hdr(skb);
311 struct ipv6_pinfo *np;
312 struct in6_addr *saddr = NULL;
313 struct dst_entry *dst;
314 struct dst_entry *dst2;
315 struct icmp6hdr tmp_hdr;
318 struct icmpv6_msg msg;
325 if ((u8 *)hdr < skb->head ||
326 (skb->network_header + sizeof(*hdr)) > skb->tail)
330 * Make sure we respect the rules
331 * i.e. RFC 1885 2.4(e)
332 * Rule (e.1) is enforced by not using icmpv6_send
333 * in any code that processes icmp errors.
335 addr_type = ipv6_addr_type(&hdr->daddr);
337 if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
344 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
345 if (type != ICMPV6_PKT_TOOBIG &&
346 !(type == ICMPV6_PARAMPROB &&
347 code == ICMPV6_UNK_OPTION &&
348 (opt_unrec(skb, info))))
354 addr_type = ipv6_addr_type(&hdr->saddr);
360 if (addr_type & IPV6_ADDR_LINKLOCAL)
361 iif = skb->dev->ifindex;
364 * Must not send error if the source does not uniquely
365 * identify a single node (RFC2463 Section 2.4).
366 * We check unspecified / multicast addresses here,
367 * and anycast addresses will be checked later.
369 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
370 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
375 * Never answer to a ICMP packet.
377 if (is_ineligible(skb)) {
378 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
384 memset(&fl, 0, sizeof(fl));
385 fl.proto = IPPROTO_ICMPV6;
386 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
388 ipv6_addr_copy(&fl.fl6_src, saddr);
390 fl.fl_icmp_type = type;
391 fl.fl_icmp_code = code;
392 security_skb_classify_flow(skb, &fl);
394 sk = icmpv6_sk(&init_net);
397 if (icmpv6_xmit_lock(sk))
400 if (!icmpv6_xrlim_allow(sk, type, &fl))
403 tmp_hdr.icmp6_type = type;
404 tmp_hdr.icmp6_code = code;
405 tmp_hdr.icmp6_cksum = 0;
406 tmp_hdr.icmp6_pointer = htonl(info);
408 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
409 fl.oif = np->mcast_oif;
411 err = ip6_dst_lookup(sk, &dst, &fl);
416 * We won't send icmp if the destination is known
419 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
420 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
421 goto out_dst_release;
424 /* No need to clone since we're just using its address. */
427 err = xfrm_lookup(&dst, &fl, sk, 0);
440 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
443 if (ip6_dst_lookup(sk, &dst2, &fl))
446 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
447 if (err == -ENOENT) {
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops;
463 hlimit = np->hop_limit;
465 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
467 hlimit = ipv6_get_hoplimit(dst->dev);
474 msg.offset = skb_network_offset(skb);
477 len = skb->len - msg.offset;
478 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
480 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
481 goto out_dst_release;
484 idev = in6_dev_get(skb->dev);
486 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
487 len + sizeof(struct icmp6hdr),
488 sizeof(struct icmp6hdr),
489 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
492 ip6_flush_pending_frames(sk);
495 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
498 if (likely(idev != NULL))
503 icmpv6_xmit_unlock(sk);
506 EXPORT_SYMBOL(icmpv6_send);
508 static void icmpv6_echo_reply(struct sk_buff *skb)
511 struct inet6_dev *idev;
512 struct ipv6_pinfo *np;
513 struct in6_addr *saddr = NULL;
514 struct icmp6hdr *icmph = icmp6_hdr(skb);
515 struct icmp6hdr tmp_hdr;
517 struct icmpv6_msg msg;
518 struct dst_entry *dst;
523 saddr = &ipv6_hdr(skb)->daddr;
525 if (!ipv6_unicast_destination(skb))
528 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
529 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
531 memset(&fl, 0, sizeof(fl));
532 fl.proto = IPPROTO_ICMPV6;
533 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
535 ipv6_addr_copy(&fl.fl6_src, saddr);
536 fl.oif = skb->dev->ifindex;
537 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
538 security_skb_classify_flow(skb, &fl);
540 sk = icmpv6_sk(&init_net);
543 if (icmpv6_xmit_lock(sk))
546 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
547 fl.oif = np->mcast_oif;
549 err = ip6_dst_lookup(sk, &dst, &fl);
552 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
555 if (ipv6_addr_is_multicast(&fl.fl6_dst))
556 hlimit = np->mcast_hops;
558 hlimit = np->hop_limit;
560 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
562 hlimit = ipv6_get_hoplimit(dst->dev);
568 idev = in6_dev_get(skb->dev);
572 msg.type = ICMPV6_ECHO_REPLY;
574 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
575 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
576 (struct rt6_info*)dst, MSG_DONTWAIT);
579 ip6_flush_pending_frames(sk);
582 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
585 if (likely(idev != NULL))
589 icmpv6_xmit_unlock(sk);
592 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
594 struct inet6_protocol *ipprot;
599 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
602 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
603 if (ipv6_ext_hdr(nexthdr)) {
604 /* now skip over extension headers */
605 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
609 inner_offset = sizeof(struct ipv6hdr);
612 /* Checkin header including 8 bytes of inner protocol header. */
613 if (!pskb_may_pull(skb, inner_offset+8))
616 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
617 Without this we will not able f.e. to make source routed
619 Corresponding argument (opt) to notifiers is already added.
623 hash = nexthdr & (MAX_INET_PROTOS - 1);
626 ipprot = rcu_dereference(inet6_protos[hash]);
627 if (ipprot && ipprot->err_handler)
628 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
631 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
635 * Handle icmp messages
638 static int icmpv6_rcv(struct sk_buff *skb)
640 struct net_device *dev = skb->dev;
641 struct inet6_dev *idev = __in6_dev_get(dev);
642 struct in6_addr *saddr, *daddr;
643 struct ipv6hdr *orig_hdr;
644 struct icmp6hdr *hdr;
647 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
650 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
654 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
657 nh = skb_network_offset(skb);
658 skb_set_network_header(skb, sizeof(*hdr));
660 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
663 skb_set_network_header(skb, nh);
666 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
668 saddr = &ipv6_hdr(skb)->saddr;
669 daddr = &ipv6_hdr(skb)->daddr;
671 /* Perform checksum. */
672 switch (skb->ip_summed) {
673 case CHECKSUM_COMPLETE:
674 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
679 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
681 if (__skb_checksum_complete(skb)) {
682 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
683 NIP6(*saddr), NIP6(*daddr));
688 if (!pskb_pull(skb, sizeof(*hdr)))
691 hdr = icmp6_hdr(skb);
693 type = hdr->icmp6_type;
695 ICMP6MSGIN_INC_STATS_BH(idev, type);
698 case ICMPV6_ECHO_REQUEST:
699 icmpv6_echo_reply(skb);
702 case ICMPV6_ECHO_REPLY:
703 /* we couldn't care less */
706 case ICMPV6_PKT_TOOBIG:
707 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
708 standard destination cache. Seems, only "advanced"
709 destination cache will allow to solve this problem
712 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
714 hdr = icmp6_hdr(skb);
715 orig_hdr = (struct ipv6hdr *) (hdr + 1);
716 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
717 ntohl(hdr->icmp6_mtu));
720 * Drop through to notify
723 case ICMPV6_DEST_UNREACH:
724 case ICMPV6_TIME_EXCEED:
725 case ICMPV6_PARAMPROB:
726 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
729 case NDISC_ROUTER_SOLICITATION:
730 case NDISC_ROUTER_ADVERTISEMENT:
731 case NDISC_NEIGHBOUR_SOLICITATION:
732 case NDISC_NEIGHBOUR_ADVERTISEMENT:
737 case ICMPV6_MGM_QUERY:
738 igmp6_event_query(skb);
741 case ICMPV6_MGM_REPORT:
742 igmp6_event_report(skb);
745 case ICMPV6_MGM_REDUCTION:
746 case ICMPV6_NI_QUERY:
747 case ICMPV6_NI_REPLY:
748 case ICMPV6_MLD2_REPORT:
749 case ICMPV6_DHAAD_REQUEST:
750 case ICMPV6_DHAAD_REPLY:
751 case ICMPV6_MOBILE_PREFIX_SOL:
752 case ICMPV6_MOBILE_PREFIX_ADV:
756 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
759 if (type & ICMPV6_INFOMSG_MASK)
763 * error of unknown type.
764 * must pass to upper level
767 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
774 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
781 * Special lock-class for __icmpv6_sk:
783 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
785 static int __net_init icmpv6_sk_init(struct net *net)
791 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
792 if (net->ipv6.icmp_sk == NULL)
795 for_each_possible_cpu(i) {
797 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
801 "Failed to initialize the ICMP6 control socket "
807 net->ipv6.icmp_sk[i] = sk = sock->sk;
808 sk_change_net(sk, net);
810 sk->sk_allocation = GFP_ATOMIC;
812 * Split off their lock-class, because sk->sk_dst_lock
813 * gets used from softirqs, which is safe for
814 * __icmpv6_sk (because those never get directly used
815 * via userspace syscalls), but unsafe for normal sockets.
817 lockdep_set_class(&sk->sk_dst_lock,
818 &icmpv6_socket_sk_dst_lock_key);
820 /* Enough space for 2 64K ICMP packets, including
821 * sk_buff struct overhead.
824 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
826 sk->sk_prot->unhash(sk);
831 for (j = 0; j < i; j++)
832 sk_release_kernel(net->ipv6.icmp_sk[j]);
833 kfree(net->ipv6.icmp_sk);
837 static void __net_exit icmpv6_sk_exit(struct net *net)
841 for_each_possible_cpu(i) {
842 sk_release_kernel(net->ipv6.icmp_sk[i]);
844 kfree(net->ipv6.icmp_sk);
847 static struct pernet_operations icmpv6_sk_ops = {
848 .init = icmpv6_sk_init,
849 .exit = icmpv6_sk_exit,
852 int __init icmpv6_init(void)
856 err = register_pernet_subsys(&icmpv6_sk_ops);
861 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
866 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
867 unregister_pernet_subsys(&icmpv6_sk_ops);
871 void icmpv6_cleanup(void)
873 unregister_pernet_subsys(&icmpv6_sk_ops);
874 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
878 static const struct icmp6_err {
886 { /* ADM_PROHIBITED */
890 { /* Was NOT_NEIGHBOUR, now reserved */
904 int icmpv6_err_convert(int type, int code, int *err)
911 case ICMPV6_DEST_UNREACH:
913 if (code <= ICMPV6_PORT_UNREACH) {
914 *err = tab_unreach[code].err;
915 fatal = tab_unreach[code].fatal;
919 case ICMPV6_PKT_TOOBIG:
923 case ICMPV6_PARAMPROB:
928 case ICMPV6_TIME_EXCEED:
936 EXPORT_SYMBOL(icmpv6_err_convert);
939 ctl_table ipv6_icmp_table_template[] = {
941 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
942 .procname = "ratelimit",
943 .data = &init_net.ipv6.sysctl.icmpv6_time,
944 .maxlen = sizeof(int),
946 .proc_handler = &proc_dointvec
951 struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
953 struct ctl_table *table;
955 table = kmemdup(ipv6_icmp_table_template,
956 sizeof(ipv6_icmp_table_template),