2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
56 #include <linux/sysctl.h>
58 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
67 static struct ipv4_devconf ipv4_devconf = {
69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
76 static struct ipv4_devconf ipv4_devconf_dflt = {
78 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
106 static inline void devinet_sysctl_register(struct in_device *idev)
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
114 /* Locks all the inet devices. */
116 static struct in_ifaddr *inet_alloc_ifa(void)
118 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
121 INIT_RCU_HEAD(&ifa->rcu_head);
127 static void inet_rcu_free_ifa(struct rcu_head *head)
129 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
131 in_dev_put(ifa->ifa_dev);
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
137 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
140 void in_dev_finish_destroy(struct in_device *idev)
142 struct net_device *dev = idev->dev;
144 BUG_TRAP(!idev->ifa_list);
145 BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148 idev, dev ? dev->name : "NIL");
152 printk("Freeing alive in_device %p\n", idev);
158 static struct in_device *inetdev_init(struct net_device *dev)
160 struct in_device *in_dev;
164 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
167 INIT_RCU_HEAD(&in_dev->rcu_head);
168 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf));
170 in_dev->cnf.sysctl = NULL;
172 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
174 /* Reference in_dev->dev */
176 /* Account for reference dev->ip_ptr (below) */
179 devinet_sysctl_register(in_dev);
180 ip_mc_init_dev(in_dev);
181 if (dev->flags & IFF_UP)
184 /* we can receive as soon as ip_ptr is set -- do this last */
185 rcu_assign_pointer(dev->ip_ptr, in_dev);
194 static void in_dev_rcu_put(struct rcu_head *head)
196 struct in_device *idev = container_of(head, struct in_device, rcu_head);
200 static void inetdev_destroy(struct in_device *in_dev)
202 struct in_ifaddr *ifa;
203 struct net_device *dev;
211 ip_mc_destroy_dev(in_dev);
213 while ((ifa = in_dev->ifa_list) != NULL) {
214 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
220 devinet_sysctl_unregister(in_dev);
221 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
224 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
230 for_primary_ifa(in_dev) {
231 if (inet_ifa_match(a, ifa)) {
232 if (!b || inet_ifa_match(b, ifa)) {
237 } endfor_ifa(in_dev);
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 int destroy, struct nlmsghdr *nlh, u32 pid)
245 struct in_ifaddr *promote = NULL;
246 struct in_ifaddr *ifa, *ifa1 = *ifap;
247 struct in_ifaddr *last_prim = in_dev->ifa_list;
248 struct in_ifaddr *prev_prom = NULL;
249 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
253 /* 1. Deleting primary ifaddr forces deletion all secondaries
254 * unless alias promotion is set
257 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
260 while ((ifa = *ifap1) != NULL) {
261 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 ifa1->ifa_scope <= ifa->ifa_scope)
265 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 ifa1->ifa_mask != ifa->ifa_mask ||
267 !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 ifap1 = &ifa->ifa_next;
274 *ifap1 = ifa->ifa_next;
276 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 blocking_notifier_call_chain(&inetaddr_chain,
289 *ifap = ifa1->ifa_next;
291 /* 3. Announce address deletion */
293 /* Send message first, then call notifier.
294 At first sight, FIB update triggered by notifier
295 will refer to already deleted ifaddr, that could confuse
296 netlink listeners. It is not true: look, gated sees
297 that route deleted and if it still thinks that ifaddr
298 is valid, it will try to restore deleted routes... Grr.
299 So that, this order is correct.
301 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
307 prev_prom->ifa_next = promote->ifa_next;
308 promote->ifa_next = last_prim->ifa_next;
309 last_prim->ifa_next = promote;
312 promote->ifa_flags &= ~IFA_F_SECONDARY;
313 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 blocking_notifier_call_chain(&inetaddr_chain,
316 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 if (ifa1->ifa_mask != ifa->ifa_mask ||
318 !inet_ifa_match(ifa1->ifa_address, ifa))
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
331 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
337 struct in_device *in_dev = ifa->ifa_dev;
338 struct in_ifaddr *ifa1, **ifap, **last_primary;
342 if (!ifa->ifa_local) {
347 ifa->ifa_flags &= ~IFA_F_SECONDARY;
348 last_primary = &in_dev->ifa_list;
350 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351 ifap = &ifa1->ifa_next) {
352 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353 ifa->ifa_scope <= ifa1->ifa_scope)
354 last_primary = &ifa1->ifa_next;
355 if (ifa1->ifa_mask == ifa->ifa_mask &&
356 inet_ifa_match(ifa1->ifa_address, ifa)) {
357 if (ifa1->ifa_local == ifa->ifa_local) {
361 if (ifa1->ifa_scope != ifa->ifa_scope) {
365 ifa->ifa_flags |= IFA_F_SECONDARY;
369 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370 net_srandom(ifa->ifa_local);
374 ifa->ifa_next = *ifap;
377 /* Send message first, then call notifier.
378 Notifier will trigger FIB update, so that
379 listeners of netlink will know about new ifaddr */
380 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
388 return __inet_insert_ifa(ifa, NULL, 0);
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
393 struct in_device *in_dev = __in_dev_get_rtnl(dev);
401 ipv4_devconf_setall(in_dev);
402 if (ifa->ifa_dev != in_dev) {
403 BUG_TRAP(!ifa->ifa_dev);
405 ifa->ifa_dev = in_dev;
407 if (ipv4_is_loopback(ifa->ifa_local))
408 ifa->ifa_scope = RT_SCOPE_HOST;
409 return inet_insert_ifa(ifa);
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
414 struct net_device *dev;
415 struct in_device *in_dev = NULL;
416 read_lock(&dev_base_lock);
417 dev = __dev_get_by_index(net, ifindex);
419 in_dev = in_dev_get(dev);
420 read_unlock(&dev_base_lock);
424 /* Called only from RTNL semaphored context. No locks. */
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
431 for_primary_ifa(in_dev) {
432 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
434 } endfor_ifa(in_dev);
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
440 struct net *net = sock_net(skb->sk);
441 struct nlattr *tb[IFA_MAX+1];
442 struct in_device *in_dev;
443 struct ifaddrmsg *ifm;
444 struct in_ifaddr *ifa, **ifap;
449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 ifm = nlmsg_data(nlh);
454 in_dev = inetdev_by_index(net, ifm->ifa_index);
455 if (in_dev == NULL) {
460 __in_dev_put(in_dev);
462 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463 ifap = &ifa->ifa_next) {
465 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
468 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
471 if (tb[IFA_ADDRESS] &&
472 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
476 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 err = -EADDRNOTAVAIL;
485 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
487 struct nlattr *tb[IFA_MAX+1];
488 struct in_ifaddr *ifa;
489 struct ifaddrmsg *ifm;
490 struct net_device *dev;
491 struct in_device *in_dev;
494 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 ifm = nlmsg_data(nlh);
500 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
503 dev = __dev_get_by_index(net, ifm->ifa_index);
508 in_dev = __in_dev_get_rtnl(dev);
513 ifa = inet_alloc_ifa();
516 * A potential indev allocation can be left alive, it stays
517 * assigned to its device and is destroy with it.
521 ipv4_devconf_setall(in_dev);
524 if (tb[IFA_ADDRESS] == NULL)
525 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
527 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
528 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
529 ifa->ifa_flags = ifm->ifa_flags;
530 ifa->ifa_scope = ifm->ifa_scope;
531 ifa->ifa_dev = in_dev;
533 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
534 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
536 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
545 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
553 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
555 struct net *net = sock_net(skb->sk);
556 struct in_ifaddr *ifa;
560 ifa = rtm_to_ifaddr(net, nlh);
564 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
568 * Determine a default network mask, based on the IP address.
571 static __inline__ int inet_abc_len(__be32 addr)
573 int rc = -1; /* Something else, probably a multicast. */
575 if (ipv4_is_zeronet(addr))
578 __u32 haddr = ntohl(addr);
580 if (IN_CLASSA(haddr))
582 else if (IN_CLASSB(haddr))
584 else if (IN_CLASSC(haddr))
592 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
595 struct sockaddr_in sin_orig;
596 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
597 struct in_device *in_dev;
598 struct in_ifaddr **ifap = NULL;
599 struct in_ifaddr *ifa = NULL;
600 struct net_device *dev;
603 int tryaddrmatch = 0;
606 * Fetch the caller's info block into kernel space
609 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
611 ifr.ifr_name[IFNAMSIZ - 1] = 0;
613 /* save original address for comparison */
614 memcpy(&sin_orig, sin, sizeof(*sin));
616 colon = strchr(ifr.ifr_name, ':');
621 dev_load(net, ifr.ifr_name);
625 case SIOCGIFADDR: /* Get interface address */
626 case SIOCGIFBRDADDR: /* Get the broadcast address */
627 case SIOCGIFDSTADDR: /* Get the destination address */
628 case SIOCGIFNETMASK: /* Get the netmask for the interface */
629 /* Note that these ioctls will not sleep,
630 so that we do not impose a lock.
631 One day we will be forced to put shlock here (I mean SMP)
633 tryaddrmatch = (sin_orig.sin_family == AF_INET);
634 memset(sin, 0, sizeof(*sin));
635 sin->sin_family = AF_INET;
640 if (!capable(CAP_NET_ADMIN))
643 case SIOCSIFADDR: /* Set interface address (and family) */
644 case SIOCSIFBRDADDR: /* Set the broadcast address */
645 case SIOCSIFDSTADDR: /* Set the destination address */
646 case SIOCSIFNETMASK: /* Set the netmask for the interface */
648 if (!capable(CAP_NET_ADMIN))
651 if (sin->sin_family != AF_INET)
662 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
668 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
670 /* Matthias Andree */
671 /* compare label and address (4.4BSD style) */
672 /* note: we only do this for a limited set of ioctls
673 and only if the original address family was AF_INET.
674 This is checked above. */
675 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676 ifap = &ifa->ifa_next) {
677 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
678 sin_orig.sin_addr.s_addr ==
684 /* we didn't get a match, maybe the application is
685 4.3BSD-style and passed in junk so we fall back to
686 comparing just the label */
688 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
689 ifap = &ifa->ifa_next)
690 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
695 ret = -EADDRNOTAVAIL;
696 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
700 case SIOCGIFADDR: /* Get interface address */
701 sin->sin_addr.s_addr = ifa->ifa_local;
704 case SIOCGIFBRDADDR: /* Get the broadcast address */
705 sin->sin_addr.s_addr = ifa->ifa_broadcast;
708 case SIOCGIFDSTADDR: /* Get the destination address */
709 sin->sin_addr.s_addr = ifa->ifa_address;
712 case SIOCGIFNETMASK: /* Get the netmask for the interface */
713 sin->sin_addr.s_addr = ifa->ifa_mask;
718 ret = -EADDRNOTAVAIL;
722 if (!(ifr.ifr_flags & IFF_UP))
723 inet_del_ifa(in_dev, ifap, 1);
726 ret = dev_change_flags(dev, ifr.ifr_flags);
729 case SIOCSIFADDR: /* Set interface address (and family) */
731 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
736 if ((ifa = inet_alloc_ifa()) == NULL)
739 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
741 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
744 if (ifa->ifa_local == sin->sin_addr.s_addr)
746 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
752 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
754 if (!(dev->flags & IFF_POINTOPOINT)) {
755 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
756 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
757 if ((dev->flags & IFF_BROADCAST) &&
758 ifa->ifa_prefixlen < 31)
759 ifa->ifa_broadcast = ifa->ifa_address |
762 ifa->ifa_prefixlen = 32;
763 ifa->ifa_mask = inet_make_mask(32);
765 ret = inet_set_ifa(dev, ifa);
768 case SIOCSIFBRDADDR: /* Set the broadcast address */
770 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
771 inet_del_ifa(in_dev, ifap, 0);
772 ifa->ifa_broadcast = sin->sin_addr.s_addr;
773 inet_insert_ifa(ifa);
777 case SIOCSIFDSTADDR: /* Set the destination address */
779 if (ifa->ifa_address == sin->sin_addr.s_addr)
782 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
785 inet_del_ifa(in_dev, ifap, 0);
786 ifa->ifa_address = sin->sin_addr.s_addr;
787 inet_insert_ifa(ifa);
790 case SIOCSIFNETMASK: /* Set the netmask for the interface */
793 * The mask we set must be legal.
796 if (bad_mask(sin->sin_addr.s_addr, 0))
799 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
800 __be32 old_mask = ifa->ifa_mask;
801 inet_del_ifa(in_dev, ifap, 0);
802 ifa->ifa_mask = sin->sin_addr.s_addr;
803 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
805 /* See if current broadcast address matches
806 * with current netmask, then recalculate
807 * the broadcast address. Otherwise it's a
808 * funny address, so don't touch it since
809 * the user seems to know what (s)he's doing...
811 if ((dev->flags & IFF_BROADCAST) &&
812 (ifa->ifa_prefixlen < 31) &&
813 (ifa->ifa_broadcast ==
814 (ifa->ifa_local|~old_mask))) {
815 ifa->ifa_broadcast = (ifa->ifa_local |
816 ~sin->sin_addr.s_addr);
818 inet_insert_ifa(ifa);
828 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
834 struct in_device *in_dev = __in_dev_get_rtnl(dev);
835 struct in_ifaddr *ifa;
839 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
842 for (; ifa; ifa = ifa->ifa_next) {
847 if (len < (int) sizeof(ifr))
849 memset(&ifr, 0, sizeof(struct ifreq));
851 strcpy(ifr.ifr_name, ifa->ifa_label);
853 strcpy(ifr.ifr_name, dev->name);
855 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
859 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
863 buf += sizeof(struct ifreq);
864 len -= sizeof(struct ifreq);
865 done += sizeof(struct ifreq);
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
874 struct in_device *in_dev;
875 struct net *net = dev_net(dev);
878 in_dev = __in_dev_get_rcu(dev);
882 for_primary_ifa(in_dev) {
883 if (ifa->ifa_scope > scope)
885 if (!dst || inet_ifa_match(dst, ifa)) {
886 addr = ifa->ifa_local;
890 addr = ifa->ifa_local;
891 } endfor_ifa(in_dev);
898 /* Not loopback addresses on loopback should be preferred
899 in this case. It is importnat that lo is the first interface
902 read_lock(&dev_base_lock);
904 for_each_netdev(net, dev) {
905 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
908 for_primary_ifa(in_dev) {
909 if (ifa->ifa_scope != RT_SCOPE_LINK &&
910 ifa->ifa_scope <= scope) {
911 addr = ifa->ifa_local;
912 goto out_unlock_both;
914 } endfor_ifa(in_dev);
917 read_unlock(&dev_base_lock);
923 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
924 __be32 local, int scope)
931 (local == ifa->ifa_local || !local) &&
932 ifa->ifa_scope <= scope) {
933 addr = ifa->ifa_local;
938 same = (!local || inet_ifa_match(local, ifa)) &&
939 (!dst || inet_ifa_match(dst, ifa));
943 /* Is the selected addr into dst subnet? */
944 if (inet_ifa_match(addr, ifa))
946 /* No, then can we use new local src? */
947 if (ifa->ifa_scope <= scope) {
948 addr = ifa->ifa_local;
951 /* search for large dst subnet for addr */
955 } endfor_ifa(in_dev);
957 return same? addr : 0;
961 * Confirm that local IP address exists using wildcards:
962 * - in_dev: only on this interface, 0=any interface
963 * - dst: only in the same subnet as dst, 0=any dst
964 * - local: address, 0=autoselect the local address
965 * - scope: maximum allowed scope value for the local address
967 __be32 inet_confirm_addr(struct in_device *in_dev,
968 __be32 dst, __be32 local, int scope)
971 struct net_device *dev;
974 if (scope != RT_SCOPE_LINK)
975 return confirm_addr_indev(in_dev, dst, local, scope);
977 net = dev_net(in_dev->dev);
978 read_lock(&dev_base_lock);
980 for_each_netdev(net, dev) {
981 if ((in_dev = __in_dev_get_rcu(dev))) {
982 addr = confirm_addr_indev(in_dev, dst, local, scope);
988 read_unlock(&dev_base_lock);
997 int register_inetaddr_notifier(struct notifier_block *nb)
999 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1002 int unregister_inetaddr_notifier(struct notifier_block *nb)
1004 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1007 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1008 * alias numbering and to create unique labels if possible.
1010 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1012 struct in_ifaddr *ifa;
1015 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1016 char old[IFNAMSIZ], *dot;
1018 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1019 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1022 dot = strchr(old, ':');
1024 sprintf(old, ":%d", named);
1027 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1028 strcat(ifa->ifa_label, dot);
1030 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1035 /* Called only under RTNL semaphore */
1037 static int inetdev_event(struct notifier_block *this, unsigned long event,
1040 struct net_device *dev = ptr;
1041 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1046 if (event == NETDEV_REGISTER) {
1047 in_dev = inetdev_init(dev);
1049 return notifier_from_errno(-ENOMEM);
1050 if (dev->flags & IFF_LOOPBACK) {
1051 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1052 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1059 case NETDEV_REGISTER:
1060 printk(KERN_DEBUG "inetdev_event: bug\n");
1066 if (dev->flags & IFF_LOOPBACK) {
1067 struct in_ifaddr *ifa;
1068 if ((ifa = inet_alloc_ifa()) != NULL) {
1070 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1071 ifa->ifa_prefixlen = 8;
1072 ifa->ifa_mask = inet_make_mask(8);
1073 in_dev_hold(in_dev);
1074 ifa->ifa_dev = in_dev;
1075 ifa->ifa_scope = RT_SCOPE_HOST;
1076 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1077 inet_insert_ifa(ifa);
1085 case NETDEV_CHANGEMTU:
1088 /* MTU falled under 68, disable IP */
1089 case NETDEV_UNREGISTER:
1090 inetdev_destroy(in_dev);
1092 case NETDEV_CHANGENAME:
1093 /* Do not notify about label change, this event is
1094 * not interesting to applications using netlink.
1096 inetdev_changename(dev, in_dev);
1098 devinet_sysctl_unregister(in_dev);
1099 devinet_sysctl_register(in_dev);
1106 static struct notifier_block ip_netdev_notifier = {
1107 .notifier_call =inetdev_event,
1110 static inline size_t inet_nlmsg_size(void)
1112 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1113 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1120 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1121 u32 pid, u32 seq, int event, unsigned int flags)
1123 struct ifaddrmsg *ifm;
1124 struct nlmsghdr *nlh;
1126 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1130 ifm = nlmsg_data(nlh);
1131 ifm->ifa_family = AF_INET;
1132 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1133 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1134 ifm->ifa_scope = ifa->ifa_scope;
1135 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1137 if (ifa->ifa_address)
1138 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1141 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1143 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1149 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1152 return nlmsg_end(skb, nlh);
1155 nlmsg_cancel(skb, nlh);
1159 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1161 struct net *net = sock_net(skb->sk);
1163 struct net_device *dev;
1164 struct in_device *in_dev;
1165 struct in_ifaddr *ifa;
1166 int s_ip_idx, s_idx = cb->args[0];
1168 s_ip_idx = ip_idx = cb->args[1];
1170 for_each_netdev(net, dev) {
1175 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1178 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1179 ifa = ifa->ifa_next, ip_idx++) {
1180 if (ip_idx < s_ip_idx)
1182 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1184 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1193 cb->args[1] = ip_idx;
1198 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1201 struct sk_buff *skb;
1202 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1206 net = dev_net(ifa->ifa_dev->dev);
1207 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1211 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1213 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1214 WARN_ON(err == -EMSGSIZE);
1218 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1221 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1224 #ifdef CONFIG_SYSCTL
1226 static void devinet_copy_dflt_conf(struct net *net, int i)
1228 struct net_device *dev;
1230 read_lock(&dev_base_lock);
1231 for_each_netdev(net, dev) {
1232 struct in_device *in_dev;
1234 in_dev = __in_dev_get_rcu(dev);
1235 if (in_dev && !test_bit(i, in_dev->cnf.state))
1236 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1239 read_unlock(&dev_base_lock);
1242 static void inet_forward_change(struct net *net)
1244 struct net_device *dev;
1245 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1247 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1248 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1250 read_lock(&dev_base_lock);
1251 for_each_netdev(net, dev) {
1252 struct in_device *in_dev;
1254 in_dev = __in_dev_get_rcu(dev);
1256 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1259 read_unlock(&dev_base_lock);
1264 static int devinet_conf_proc(ctl_table *ctl, int write,
1265 struct file* filp, void __user *buffer,
1266 size_t *lenp, loff_t *ppos)
1268 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1271 struct ipv4_devconf *cnf = ctl->extra1;
1272 struct net *net = ctl->extra2;
1273 int i = (int *)ctl->data - cnf->data;
1275 set_bit(i, cnf->state);
1277 if (cnf == net->ipv4.devconf_dflt)
1278 devinet_copy_dflt_conf(net, i);
1284 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1285 void __user *oldval, size_t __user *oldlenp,
1286 void __user *newval, size_t newlen)
1288 struct ipv4_devconf *cnf;
1290 int *valp = table->data;
1294 if (!newval || !newlen)
1297 if (newlen != sizeof(int))
1300 if (get_user(new, (int __user *)newval))
1306 if (oldval && oldlenp) {
1309 if (get_user(len, oldlenp))
1313 if (len > table->maxlen)
1314 len = table->maxlen;
1315 if (copy_to_user(oldval, valp, len))
1317 if (put_user(len, oldlenp))
1324 cnf = table->extra1;
1325 net = table->extra2;
1326 i = (int *)table->data - cnf->data;
1328 set_bit(i, cnf->state);
1330 if (cnf == net->ipv4.devconf_dflt)
1331 devinet_copy_dflt_conf(net, i);
1336 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1337 struct file* filp, void __user *buffer,
1338 size_t *lenp, loff_t *ppos)
1340 int *valp = ctl->data;
1342 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1344 if (write && *valp != val) {
1345 struct net *net = ctl->extra2;
1347 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1348 inet_forward_change(net);
1349 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1356 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1357 struct file* filp, void __user *buffer,
1358 size_t *lenp, loff_t *ppos)
1360 int *valp = ctl->data;
1362 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1364 if (write && *valp != val)
1370 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1371 void __user *oldval, size_t __user *oldlenp,
1372 void __user *newval, size_t newlen)
1374 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1384 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1386 .ctl_name = NET_IPV4_CONF_ ## attr, \
1388 .data = ipv4_devconf.data + \
1389 NET_IPV4_CONF_ ## attr - 1, \
1390 .maxlen = sizeof(int), \
1392 .proc_handler = proc, \
1393 .strategy = sysctl, \
1394 .extra1 = &ipv4_devconf, \
1397 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1398 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1399 devinet_conf_sysctl)
1401 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1402 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1403 devinet_conf_sysctl)
1405 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1406 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1408 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1409 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1410 ipv4_doint_and_flush_strategy)
1412 static struct devinet_sysctl_table {
1413 struct ctl_table_header *sysctl_header;
1414 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1416 } devinet_sysctl = {
1418 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1419 devinet_sysctl_forward,
1420 devinet_conf_sysctl),
1421 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1423 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1424 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1425 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1426 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1427 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1428 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1429 "accept_source_route"),
1430 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1431 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1432 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1433 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1434 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1435 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1436 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1437 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1438 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1440 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1441 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1442 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1443 "force_igmp_version"),
1444 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1445 "promote_secondaries"),
1449 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1450 int ctl_name, struct ipv4_devconf *p)
1453 struct devinet_sysctl_table *t;
1455 #define DEVINET_CTL_PATH_DEV 3
1457 struct ctl_path devinet_ctl_path[] = {
1458 { .procname = "net", .ctl_name = CTL_NET, },
1459 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1460 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1461 { /* to be set */ },
1465 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1469 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1470 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1471 t->devinet_vars[i].extra1 = p;
1472 t->devinet_vars[i].extra2 = net;
1476 * Make a copy of dev_name, because '.procname' is regarded as const
1477 * by sysctl and we wouldn't want anyone to change it under our feet
1478 * (see SIOCSIFNAME).
1480 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1484 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1485 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1487 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1489 if (!t->sysctl_header)
1503 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1505 struct devinet_sysctl_table *t = cnf->sysctl;
1511 unregister_sysctl_table(t->sysctl_header);
1516 static void devinet_sysctl_register(struct in_device *idev)
1518 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1519 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1520 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1521 idev->dev->ifindex, &idev->cnf);
1524 static void devinet_sysctl_unregister(struct in_device *idev)
1526 __devinet_sysctl_unregister(&idev->cnf);
1527 neigh_sysctl_unregister(idev->arp_parms);
1530 static struct ctl_table ctl_forward_entry[] = {
1532 .ctl_name = NET_IPV4_FORWARD,
1533 .procname = "ip_forward",
1534 .data = &ipv4_devconf.data[
1535 NET_IPV4_CONF_FORWARDING - 1],
1536 .maxlen = sizeof(int),
1538 .proc_handler = devinet_sysctl_forward,
1539 .strategy = devinet_conf_sysctl,
1540 .extra1 = &ipv4_devconf,
1541 .extra2 = &init_net,
1546 static __net_initdata struct ctl_path net_ipv4_path[] = {
1547 { .procname = "net", .ctl_name = CTL_NET, },
1548 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1553 static __net_init int devinet_init_net(struct net *net)
1556 struct ipv4_devconf *all, *dflt;
1557 #ifdef CONFIG_SYSCTL
1558 struct ctl_table *tbl = ctl_forward_entry;
1559 struct ctl_table_header *forw_hdr;
1563 all = &ipv4_devconf;
1564 dflt = &ipv4_devconf_dflt;
1566 if (net != &init_net) {
1567 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1571 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1573 goto err_alloc_dflt;
1575 #ifdef CONFIG_SYSCTL
1576 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1580 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1581 tbl[0].extra1 = all;
1582 tbl[0].extra2 = net;
1586 #ifdef CONFIG_SYSCTL
1587 err = __devinet_sysctl_register(net, "all",
1588 NET_PROTO_CONF_ALL, all);
1592 err = __devinet_sysctl_register(net, "default",
1593 NET_PROTO_CONF_DEFAULT, dflt);
1598 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1599 if (forw_hdr == NULL)
1601 net->ipv4.forw_hdr = forw_hdr;
1604 net->ipv4.devconf_all = all;
1605 net->ipv4.devconf_dflt = dflt;
1608 #ifdef CONFIG_SYSCTL
1610 __devinet_sysctl_unregister(dflt);
1612 __devinet_sysctl_unregister(all);
1614 if (tbl != ctl_forward_entry)
1618 if (dflt != &ipv4_devconf_dflt)
1621 if (all != &ipv4_devconf)
1627 static __net_exit void devinet_exit_net(struct net *net)
1629 #ifdef CONFIG_SYSCTL
1630 struct ctl_table *tbl;
1632 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1633 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1634 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1635 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1638 kfree(net->ipv4.devconf_dflt);
1639 kfree(net->ipv4.devconf_all);
1642 static __net_initdata struct pernet_operations devinet_ops = {
1643 .init = devinet_init_net,
1644 .exit = devinet_exit_net,
1647 void __init devinet_init(void)
1649 register_pernet_subsys(&devinet_ops);
1651 register_gifconf(PF_INET, inet_gifconf);
1652 register_netdevice_notifier(&ip_netdev_notifier);
1654 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1655 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1656 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1659 EXPORT_SYMBOL(in_dev_finish_destroy);
1660 EXPORT_SYMBOL(inet_select_addr);
1661 EXPORT_SYMBOL(inetdev_by_index);
1662 EXPORT_SYMBOL(register_inetaddr_notifier);
1663 EXPORT_SYMBOL(unregister_inetaddr_notifier);