2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_addr.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/notifier.h>
55 #include <linux/inetdevice.h>
56 #include <linux/igmp.h>
58 #include <linux/sysctl.h>
60 #include <linux/kmod.h>
64 #include <net/route.h>
65 #include <net/ip_fib.h>
66 #include <net/netlink.h>
68 struct ipv4_devconf ipv4_devconf = {
69 .accept_redirects = 1,
71 .secure_redirects = 1,
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 .accept_redirects = 1,
78 .secure_redirects = 1,
80 .accept_source_route = 1,
83 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
84 [IFA_LOCAL] = { .type = NLA_U32 },
85 [IFA_ADDRESS] = { .type = NLA_U32 },
86 [IFA_BROADCAST] = { .type = NLA_U32 },
87 [IFA_ANYCAST] = { .type = NLA_U32 },
88 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
91 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
93 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
94 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
97 static void devinet_sysctl_register(struct in_device *in_dev,
98 struct ipv4_devconf *p);
99 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
102 /* Locks all the inet devices. */
104 static struct in_ifaddr *inet_alloc_ifa(void)
106 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
109 INIT_RCU_HEAD(&ifa->rcu_head);
115 static void inet_rcu_free_ifa(struct rcu_head *head)
117 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
119 in_dev_put(ifa->ifa_dev);
123 static inline void inet_free_ifa(struct in_ifaddr *ifa)
125 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
128 void in_dev_finish_destroy(struct in_device *idev)
130 struct net_device *dev = idev->dev;
132 BUG_TRAP(!idev->ifa_list);
133 BUG_TRAP(!idev->mc_list);
134 #ifdef NET_REFCNT_DEBUG
135 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
136 idev, dev ? dev->name : "NIL");
140 printk("Freeing alive in_device %p\n", idev);
146 struct in_device *inetdev_init(struct net_device *dev)
148 struct in_device *in_dev;
152 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
155 INIT_RCU_HEAD(&in_dev->rcu_head);
156 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
157 in_dev->cnf.sysctl = NULL;
159 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
161 /* Reference in_dev->dev */
164 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
165 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
168 /* Account for reference dev->ip_ptr (below) */
172 devinet_sysctl_register(in_dev, &in_dev->cnf);
174 ip_mc_init_dev(in_dev);
175 if (dev->flags & IFF_UP)
178 /* we can receive as soon as ip_ptr is set -- do this last */
179 rcu_assign_pointer(dev->ip_ptr, in_dev);
187 static void in_dev_rcu_put(struct rcu_head *head)
189 struct in_device *idev = container_of(head, struct in_device, rcu_head);
193 static void inetdev_destroy(struct in_device *in_dev)
195 struct in_ifaddr *ifa;
196 struct net_device *dev;
201 if (dev == &loopback_dev)
206 ip_mc_destroy_dev(in_dev);
208 while ((ifa = in_dev->ifa_list) != NULL) {
209 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 devinet_sysctl_unregister(&in_dev->cnf);
220 neigh_sysctl_unregister(in_dev->arp_parms);
222 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
225 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
228 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
231 for_primary_ifa(in_dev) {
232 if (inet_ifa_match(a, ifa)) {
233 if (!b || inet_ifa_match(b, ifa)) {
238 } endfor_ifa(in_dev);
243 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
244 int destroy, struct nlmsghdr *nlh, u32 pid)
246 struct in_ifaddr *promote = NULL;
247 struct in_ifaddr *ifa, *ifa1 = *ifap;
248 struct in_ifaddr *last_prim = in_dev->ifa_list;
249 struct in_ifaddr *prev_prom = NULL;
250 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
254 /* 1. Deleting primary ifaddr forces deletion all secondaries
255 * unless alias promotion is set
258 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
259 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
261 while ((ifa = *ifap1) != NULL) {
262 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
263 ifa1->ifa_scope <= ifa->ifa_scope)
266 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
267 ifa1->ifa_mask != ifa->ifa_mask ||
268 !inet_ifa_match(ifa1->ifa_address, ifa)) {
269 ifap1 = &ifa->ifa_next;
275 *ifap1 = ifa->ifa_next;
277 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
278 blocking_notifier_call_chain(&inetaddr_chain,
290 *ifap = ifa1->ifa_next;
292 /* 3. Announce address deletion */
294 /* Send message first, then call notifier.
295 At first sight, FIB update triggered by notifier
296 will refer to already deleted ifaddr, that could confuse
297 netlink listeners. It is not true: look, gated sees
298 that route deleted and if it still thinks that ifaddr
299 is valid, it will try to restore deleted routes... Grr.
300 So that, this order is correct.
302 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
303 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
308 prev_prom->ifa_next = promote->ifa_next;
309 promote->ifa_next = last_prim->ifa_next;
310 last_prim->ifa_next = promote;
313 promote->ifa_flags &= ~IFA_F_SECONDARY;
314 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
315 blocking_notifier_call_chain(&inetaddr_chain,
317 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
318 if (ifa1->ifa_mask != ifa->ifa_mask ||
319 !inet_ifa_match(ifa1->ifa_address, ifa))
328 if (!in_dev->ifa_list)
329 inetdev_destroy(in_dev);
333 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
336 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
339 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
342 struct in_device *in_dev = ifa->ifa_dev;
343 struct in_ifaddr *ifa1, **ifap, **last_primary;
347 if (!ifa->ifa_local) {
352 ifa->ifa_flags &= ~IFA_F_SECONDARY;
353 last_primary = &in_dev->ifa_list;
355 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
356 ifap = &ifa1->ifa_next) {
357 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
358 ifa->ifa_scope <= ifa1->ifa_scope)
359 last_primary = &ifa1->ifa_next;
360 if (ifa1->ifa_mask == ifa->ifa_mask &&
361 inet_ifa_match(ifa1->ifa_address, ifa)) {
362 if (ifa1->ifa_local == ifa->ifa_local) {
366 if (ifa1->ifa_scope != ifa->ifa_scope) {
370 ifa->ifa_flags |= IFA_F_SECONDARY;
374 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
375 net_srandom(ifa->ifa_local);
379 ifa->ifa_next = *ifap;
382 /* Send message first, then call notifier.
383 Notifier will trigger FIB update, so that
384 listeners of netlink will know about new ifaddr */
385 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
386 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
391 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 return __inet_insert_ifa(ifa, NULL, 0);
396 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 struct in_device *in_dev = __in_dev_get_rtnl(dev);
403 in_dev = inetdev_init(dev);
409 if (ifa->ifa_dev != in_dev) {
410 BUG_TRAP(!ifa->ifa_dev);
412 ifa->ifa_dev = in_dev;
414 if (LOOPBACK(ifa->ifa_local))
415 ifa->ifa_scope = RT_SCOPE_HOST;
416 return inet_insert_ifa(ifa);
419 struct in_device *inetdev_by_index(int ifindex)
421 struct net_device *dev;
422 struct in_device *in_dev = NULL;
423 read_lock(&dev_base_lock);
424 dev = __dev_get_by_index(ifindex);
426 in_dev = in_dev_get(dev);
427 read_unlock(&dev_base_lock);
431 /* Called only from RTNL semaphored context. No locks. */
433 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
438 for_primary_ifa(in_dev) {
439 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
441 } endfor_ifa(in_dev);
445 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
447 struct nlattr *tb[IFA_MAX+1];
448 struct in_device *in_dev;
449 struct ifaddrmsg *ifm;
450 struct in_ifaddr *ifa, **ifap;
455 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
459 ifm = nlmsg_data(nlh);
460 in_dev = inetdev_by_index(ifm->ifa_index);
461 if (in_dev == NULL) {
466 __in_dev_put(in_dev);
468 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469 ifap = &ifa->ifa_next) {
471 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
474 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
477 if (tb[IFA_ADDRESS] &&
478 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
479 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
482 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
486 err = -EADDRNOTAVAIL;
491 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
493 struct nlattr *tb[IFA_MAX+1];
494 struct in_ifaddr *ifa;
495 struct ifaddrmsg *ifm;
496 struct net_device *dev;
497 struct in_device *in_dev;
500 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
504 ifm = nlmsg_data(nlh);
505 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
508 dev = __dev_get_by_index(ifm->ifa_index);
514 in_dev = __in_dev_get_rtnl(dev);
515 if (in_dev == NULL) {
516 in_dev = inetdev_init(dev);
517 if (in_dev == NULL) {
523 ifa = inet_alloc_ifa();
526 * A potential indev allocation can be left alive, it stays
527 * assigned to its device and is destroy with it.
535 if (tb[IFA_ADDRESS] == NULL)
536 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
538 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
539 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
540 ifa->ifa_flags = ifm->ifa_flags;
541 ifa->ifa_scope = ifm->ifa_scope;
542 ifa->ifa_dev = in_dev;
544 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
545 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
547 if (tb[IFA_BROADCAST])
548 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
551 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
554 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
556 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
564 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
566 struct in_ifaddr *ifa;
570 ifa = rtm_to_ifaddr(nlh);
574 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
578 * Determine a default network mask, based on the IP address.
581 static __inline__ int inet_abc_len(__be32 addr)
583 int rc = -1; /* Something else, probably a multicast. */
588 __u32 haddr = ntohl(addr);
590 if (IN_CLASSA(haddr))
592 else if (IN_CLASSB(haddr))
594 else if (IN_CLASSC(haddr))
602 int devinet_ioctl(unsigned int cmd, void __user *arg)
605 struct sockaddr_in sin_orig;
606 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
607 struct in_device *in_dev;
608 struct in_ifaddr **ifap = NULL;
609 struct in_ifaddr *ifa = NULL;
610 struct net_device *dev;
613 int tryaddrmatch = 0;
616 * Fetch the caller's info block into kernel space
619 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621 ifr.ifr_name[IFNAMSIZ - 1] = 0;
623 /* save original address for comparison */
624 memcpy(&sin_orig, sin, sizeof(*sin));
626 colon = strchr(ifr.ifr_name, ':');
631 dev_load(ifr.ifr_name);
635 case SIOCGIFADDR: /* Get interface address */
636 case SIOCGIFBRDADDR: /* Get the broadcast address */
637 case SIOCGIFDSTADDR: /* Get the destination address */
638 case SIOCGIFNETMASK: /* Get the netmask for the interface */
639 /* Note that these ioctls will not sleep,
640 so that we do not impose a lock.
641 One day we will be forced to put shlock here (I mean SMP)
643 tryaddrmatch = (sin_orig.sin_family == AF_INET);
644 memset(sin, 0, sizeof(*sin));
645 sin->sin_family = AF_INET;
650 if (!capable(CAP_NET_ADMIN))
653 case SIOCSIFADDR: /* Set interface address (and family) */
654 case SIOCSIFBRDADDR: /* Set the broadcast address */
655 case SIOCSIFDSTADDR: /* Set the destination address */
656 case SIOCSIFNETMASK: /* Set the netmask for the interface */
658 if (!capable(CAP_NET_ADMIN))
661 if (sin->sin_family != AF_INET)
672 if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
678 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680 /* Matthias Andree */
681 /* compare label and address (4.4BSD style) */
682 /* note: we only do this for a limited set of ioctls
683 and only if the original address family was AF_INET.
684 This is checked above. */
685 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
686 ifap = &ifa->ifa_next) {
687 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
688 sin_orig.sin_addr.s_addr ==
694 /* we didn't get a match, maybe the application is
695 4.3BSD-style and passed in junk so we fall back to
696 comparing just the label */
698 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
699 ifap = &ifa->ifa_next)
700 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
705 ret = -EADDRNOTAVAIL;
706 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
710 case SIOCGIFADDR: /* Get interface address */
711 sin->sin_addr.s_addr = ifa->ifa_local;
714 case SIOCGIFBRDADDR: /* Get the broadcast address */
715 sin->sin_addr.s_addr = ifa->ifa_broadcast;
718 case SIOCGIFDSTADDR: /* Get the destination address */
719 sin->sin_addr.s_addr = ifa->ifa_address;
722 case SIOCGIFNETMASK: /* Get the netmask for the interface */
723 sin->sin_addr.s_addr = ifa->ifa_mask;
728 ret = -EADDRNOTAVAIL;
732 if (!(ifr.ifr_flags & IFF_UP))
733 inet_del_ifa(in_dev, ifap, 1);
736 ret = dev_change_flags(dev, ifr.ifr_flags);
739 case SIOCSIFADDR: /* Set interface address (and family) */
741 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
746 if ((ifa = inet_alloc_ifa()) == NULL)
749 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
754 if (ifa->ifa_local == sin->sin_addr.s_addr)
756 inet_del_ifa(in_dev, ifap, 0);
757 ifa->ifa_broadcast = 0;
758 ifa->ifa_anycast = 0;
761 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763 if (!(dev->flags & IFF_POINTOPOINT)) {
764 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
765 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
766 if ((dev->flags & IFF_BROADCAST) &&
767 ifa->ifa_prefixlen < 31)
768 ifa->ifa_broadcast = ifa->ifa_address |
771 ifa->ifa_prefixlen = 32;
772 ifa->ifa_mask = inet_make_mask(32);
774 ret = inet_set_ifa(dev, ifa);
777 case SIOCSIFBRDADDR: /* Set the broadcast address */
779 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
780 inet_del_ifa(in_dev, ifap, 0);
781 ifa->ifa_broadcast = sin->sin_addr.s_addr;
782 inet_insert_ifa(ifa);
786 case SIOCSIFDSTADDR: /* Set the destination address */
788 if (ifa->ifa_address == sin->sin_addr.s_addr)
791 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
794 inet_del_ifa(in_dev, ifap, 0);
795 ifa->ifa_address = sin->sin_addr.s_addr;
796 inet_insert_ifa(ifa);
799 case SIOCSIFNETMASK: /* Set the netmask for the interface */
802 * The mask we set must be legal.
805 if (bad_mask(sin->sin_addr.s_addr, 0))
808 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
809 __be32 old_mask = ifa->ifa_mask;
810 inet_del_ifa(in_dev, ifap, 0);
811 ifa->ifa_mask = sin->sin_addr.s_addr;
812 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814 /* See if current broadcast address matches
815 * with current netmask, then recalculate
816 * the broadcast address. Otherwise it's a
817 * funny address, so don't touch it since
818 * the user seems to know what (s)he's doing...
820 if ((dev->flags & IFF_BROADCAST) &&
821 (ifa->ifa_prefixlen < 31) &&
822 (ifa->ifa_broadcast ==
823 (ifa->ifa_local|~old_mask))) {
824 ifa->ifa_broadcast = (ifa->ifa_local |
825 ~sin->sin_addr.s_addr);
827 inet_insert_ifa(ifa);
837 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
841 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843 struct in_device *in_dev = __in_dev_get_rtnl(dev);
844 struct in_ifaddr *ifa;
848 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
851 for (; ifa; ifa = ifa->ifa_next) {
856 if (len < (int) sizeof(ifr))
858 memset(&ifr, 0, sizeof(struct ifreq));
860 strcpy(ifr.ifr_name, ifa->ifa_label);
862 strcpy(ifr.ifr_name, dev->name);
864 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
865 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
868 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
872 buf += sizeof(struct ifreq);
873 len -= sizeof(struct ifreq);
874 done += sizeof(struct ifreq);
880 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
883 struct in_device *in_dev;
886 in_dev = __in_dev_get_rcu(dev);
890 for_primary_ifa(in_dev) {
891 if (ifa->ifa_scope > scope)
893 if (!dst || inet_ifa_match(dst, ifa)) {
894 addr = ifa->ifa_local;
898 addr = ifa->ifa_local;
899 } endfor_ifa(in_dev);
906 /* Not loopback addresses on loopback should be preferred
907 in this case. It is importnat that lo is the first interface
910 read_lock(&dev_base_lock);
912 for (dev = dev_base; dev; dev = dev->next) {
913 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
916 for_primary_ifa(in_dev) {
917 if (ifa->ifa_scope != RT_SCOPE_LINK &&
918 ifa->ifa_scope <= scope) {
919 addr = ifa->ifa_local;
920 goto out_unlock_both;
922 } endfor_ifa(in_dev);
925 read_unlock(&dev_base_lock);
931 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
932 __be32 local, int scope)
939 (local == ifa->ifa_local || !local) &&
940 ifa->ifa_scope <= scope) {
941 addr = ifa->ifa_local;
946 same = (!local || inet_ifa_match(local, ifa)) &&
947 (!dst || inet_ifa_match(dst, ifa));
951 /* Is the selected addr into dst subnet? */
952 if (inet_ifa_match(addr, ifa))
954 /* No, then can we use new local src? */
955 if (ifa->ifa_scope <= scope) {
956 addr = ifa->ifa_local;
959 /* search for large dst subnet for addr */
963 } endfor_ifa(in_dev);
965 return same? addr : 0;
969 * Confirm that local IP address exists using wildcards:
970 * - dev: only on this interface, 0=any interface
971 * - dst: only in the same subnet as dst, 0=any dst
972 * - local: address, 0=autoselect the local address
973 * - scope: maximum allowed scope value for the local address
975 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
978 struct in_device *in_dev;
982 if ((in_dev = __in_dev_get_rcu(dev)))
983 addr = confirm_addr_indev(in_dev, dst, local, scope);
989 read_lock(&dev_base_lock);
991 for (dev = dev_base; dev; dev = dev->next) {
992 if ((in_dev = __in_dev_get_rcu(dev))) {
993 addr = confirm_addr_indev(in_dev, dst, local, scope);
999 read_unlock(&dev_base_lock);
1008 int register_inetaddr_notifier(struct notifier_block *nb)
1010 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1013 int unregister_inetaddr_notifier(struct notifier_block *nb)
1015 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1018 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1019 * alias numbering and to create unique labels if possible.
1021 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1023 struct in_ifaddr *ifa;
1026 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1027 char old[IFNAMSIZ], *dot;
1029 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1030 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1033 dot = strchr(ifa->ifa_label, ':');
1035 sprintf(old, ":%d", named);
1038 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1039 strcat(ifa->ifa_label, dot);
1041 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1046 /* Called only under RTNL semaphore */
1048 static int inetdev_event(struct notifier_block *this, unsigned long event,
1051 struct net_device *dev = ptr;
1052 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1057 if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1058 in_dev = inetdev_init(dev);
1060 panic("devinet: Failed to create loopback\n");
1061 in_dev->cnf.no_xfrm = 1;
1062 in_dev->cnf.no_policy = 1;
1068 case NETDEV_REGISTER:
1069 printk(KERN_DEBUG "inetdev_event: bug\n");
1075 if (dev == &loopback_dev) {
1076 struct in_ifaddr *ifa;
1077 if ((ifa = inet_alloc_ifa()) != NULL) {
1079 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1080 ifa->ifa_prefixlen = 8;
1081 ifa->ifa_mask = inet_make_mask(8);
1082 in_dev_hold(in_dev);
1083 ifa->ifa_dev = in_dev;
1084 ifa->ifa_scope = RT_SCOPE_HOST;
1085 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1086 inet_insert_ifa(ifa);
1094 case NETDEV_CHANGEMTU:
1097 /* MTU falled under 68, disable IP */
1098 case NETDEV_UNREGISTER:
1099 inetdev_destroy(in_dev);
1101 case NETDEV_CHANGENAME:
1102 /* Do not notify about label change, this event is
1103 * not interesting to applications using netlink.
1105 inetdev_changename(dev, in_dev);
1107 #ifdef CONFIG_SYSCTL
1108 devinet_sysctl_unregister(&in_dev->cnf);
1109 neigh_sysctl_unregister(in_dev->arp_parms);
1110 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1111 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1112 devinet_sysctl_register(in_dev, &in_dev->cnf);
1120 static struct notifier_block ip_netdev_notifier = {
1121 .notifier_call =inetdev_event,
1124 static inline size_t inet_nlmsg_size(void)
1126 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1127 + nla_total_size(4) /* IFA_ADDRESS */
1128 + nla_total_size(4) /* IFA_LOCAL */
1129 + nla_total_size(4) /* IFA_BROADCAST */
1130 + nla_total_size(4) /* IFA_ANYCAST */
1131 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1134 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1135 u32 pid, u32 seq, int event, unsigned int flags)
1137 struct ifaddrmsg *ifm;
1138 struct nlmsghdr *nlh;
1140 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1144 ifm = nlmsg_data(nlh);
1145 ifm->ifa_family = AF_INET;
1146 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1147 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1148 ifm->ifa_scope = ifa->ifa_scope;
1149 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1151 if (ifa->ifa_address)
1152 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1155 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1157 if (ifa->ifa_broadcast)
1158 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1160 if (ifa->ifa_anycast)
1161 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1163 if (ifa->ifa_label[0])
1164 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166 return nlmsg_end(skb, nlh);
1169 return nlmsg_cancel(skb, nlh);
1172 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175 struct net_device *dev;
1176 struct in_device *in_dev;
1177 struct in_ifaddr *ifa;
1178 int s_ip_idx, s_idx = cb->args[0];
1180 s_ip_idx = ip_idx = cb->args[1];
1181 read_lock(&dev_base_lock);
1182 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1188 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1193 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1194 ifa = ifa->ifa_next, ip_idx++) {
1195 if (ip_idx < s_ip_idx)
1197 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1208 read_unlock(&dev_base_lock);
1210 cb->args[1] = ip_idx;
1215 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1218 struct sk_buff *skb;
1219 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1222 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1226 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1227 /* failure implies BUG in inet_nlmsg_size() */
1230 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1236 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1237 [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
1238 [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
1239 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
1240 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
1241 [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
1242 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1243 .dumpit = inet_dump_fib, },
1244 #ifdef CONFIG_IP_MULTIPLE_TABLES
1245 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1249 #ifdef CONFIG_SYSCTL
1251 void inet_forward_change(void)
1253 struct net_device *dev;
1254 int on = ipv4_devconf.forwarding;
1256 ipv4_devconf.accept_redirects = !on;
1257 ipv4_devconf_dflt.forwarding = on;
1259 read_lock(&dev_base_lock);
1260 for (dev = dev_base; dev; dev = dev->next) {
1261 struct in_device *in_dev;
1263 in_dev = __in_dev_get_rcu(dev);
1265 in_dev->cnf.forwarding = on;
1268 read_unlock(&dev_base_lock);
1273 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1274 struct file* filp, void __user *buffer,
1275 size_t *lenp, loff_t *ppos)
1277 int *valp = ctl->data;
1279 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1281 if (write && *valp != val) {
1282 if (valp == &ipv4_devconf.forwarding)
1283 inet_forward_change();
1284 else if (valp != &ipv4_devconf_dflt.forwarding)
1291 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1292 struct file* filp, void __user *buffer,
1293 size_t *lenp, loff_t *ppos)
1295 int *valp = ctl->data;
1297 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1299 if (write && *valp != val)
1305 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1306 void __user *oldval, size_t __user *oldlenp,
1307 void __user *newval, size_t newlen)
1309 int *valp = table->data;
1312 if (!newval || !newlen)
1315 if (newlen != sizeof(int))
1318 if (get_user(new, (int __user *)newval))
1324 if (oldval && oldlenp) {
1327 if (get_user(len, oldlenp))
1331 if (len > table->maxlen)
1332 len = table->maxlen;
1333 if (copy_to_user(oldval, valp, len))
1335 if (put_user(len, oldlenp))
1346 static struct devinet_sysctl_table {
1347 struct ctl_table_header *sysctl_header;
1348 ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1349 ctl_table devinet_dev[2];
1350 ctl_table devinet_conf_dir[2];
1351 ctl_table devinet_proto_dir[2];
1352 ctl_table devinet_root_dir[2];
1353 } devinet_sysctl = {
1356 .ctl_name = NET_IPV4_CONF_FORWARDING,
1357 .procname = "forwarding",
1358 .data = &ipv4_devconf.forwarding,
1359 .maxlen = sizeof(int),
1361 .proc_handler = &devinet_sysctl_forward,
1364 .ctl_name = NET_IPV4_CONF_MC_FORWARDING,
1365 .procname = "mc_forwarding",
1366 .data = &ipv4_devconf.mc_forwarding,
1367 .maxlen = sizeof(int),
1369 .proc_handler = &proc_dointvec,
1372 .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1373 .procname = "accept_redirects",
1374 .data = &ipv4_devconf.accept_redirects,
1375 .maxlen = sizeof(int),
1377 .proc_handler = &proc_dointvec,
1380 .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS,
1381 .procname = "secure_redirects",
1382 .data = &ipv4_devconf.secure_redirects,
1383 .maxlen = sizeof(int),
1385 .proc_handler = &proc_dointvec,
1388 .ctl_name = NET_IPV4_CONF_SHARED_MEDIA,
1389 .procname = "shared_media",
1390 .data = &ipv4_devconf.shared_media,
1391 .maxlen = sizeof(int),
1393 .proc_handler = &proc_dointvec,
1396 .ctl_name = NET_IPV4_CONF_RP_FILTER,
1397 .procname = "rp_filter",
1398 .data = &ipv4_devconf.rp_filter,
1399 .maxlen = sizeof(int),
1401 .proc_handler = &proc_dointvec,
1404 .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS,
1405 .procname = "send_redirects",
1406 .data = &ipv4_devconf.send_redirects,
1407 .maxlen = sizeof(int),
1409 .proc_handler = &proc_dointvec,
1412 .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1413 .procname = "accept_source_route",
1414 .data = &ipv4_devconf.accept_source_route,
1415 .maxlen = sizeof(int),
1417 .proc_handler = &proc_dointvec,
1420 .ctl_name = NET_IPV4_CONF_PROXY_ARP,
1421 .procname = "proxy_arp",
1422 .data = &ipv4_devconf.proxy_arp,
1423 .maxlen = sizeof(int),
1425 .proc_handler = &proc_dointvec,
1428 .ctl_name = NET_IPV4_CONF_MEDIUM_ID,
1429 .procname = "medium_id",
1430 .data = &ipv4_devconf.medium_id,
1431 .maxlen = sizeof(int),
1433 .proc_handler = &proc_dointvec,
1436 .ctl_name = NET_IPV4_CONF_BOOTP_RELAY,
1437 .procname = "bootp_relay",
1438 .data = &ipv4_devconf.bootp_relay,
1439 .maxlen = sizeof(int),
1441 .proc_handler = &proc_dointvec,
1444 .ctl_name = NET_IPV4_CONF_LOG_MARTIANS,
1445 .procname = "log_martians",
1446 .data = &ipv4_devconf.log_martians,
1447 .maxlen = sizeof(int),
1449 .proc_handler = &proc_dointvec,
1452 .ctl_name = NET_IPV4_CONF_TAG,
1454 .data = &ipv4_devconf.tag,
1455 .maxlen = sizeof(int),
1457 .proc_handler = &proc_dointvec,
1460 .ctl_name = NET_IPV4_CONF_ARPFILTER,
1461 .procname = "arp_filter",
1462 .data = &ipv4_devconf.arp_filter,
1463 .maxlen = sizeof(int),
1465 .proc_handler = &proc_dointvec,
1468 .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE,
1469 .procname = "arp_announce",
1470 .data = &ipv4_devconf.arp_announce,
1471 .maxlen = sizeof(int),
1473 .proc_handler = &proc_dointvec,
1476 .ctl_name = NET_IPV4_CONF_ARP_IGNORE,
1477 .procname = "arp_ignore",
1478 .data = &ipv4_devconf.arp_ignore,
1479 .maxlen = sizeof(int),
1481 .proc_handler = &proc_dointvec,
1484 .ctl_name = NET_IPV4_CONF_ARP_ACCEPT,
1485 .procname = "arp_accept",
1486 .data = &ipv4_devconf.arp_accept,
1487 .maxlen = sizeof(int),
1489 .proc_handler = &proc_dointvec,
1492 .ctl_name = NET_IPV4_CONF_NOXFRM,
1493 .procname = "disable_xfrm",
1494 .data = &ipv4_devconf.no_xfrm,
1495 .maxlen = sizeof(int),
1497 .proc_handler = &ipv4_doint_and_flush,
1498 .strategy = &ipv4_doint_and_flush_strategy,
1501 .ctl_name = NET_IPV4_CONF_NOPOLICY,
1502 .procname = "disable_policy",
1503 .data = &ipv4_devconf.no_policy,
1504 .maxlen = sizeof(int),
1506 .proc_handler = &ipv4_doint_and_flush,
1507 .strategy = &ipv4_doint_and_flush_strategy,
1510 .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1511 .procname = "force_igmp_version",
1512 .data = &ipv4_devconf.force_igmp_version,
1513 .maxlen = sizeof(int),
1515 .proc_handler = &ipv4_doint_and_flush,
1516 .strategy = &ipv4_doint_and_flush_strategy,
1519 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1520 .procname = "promote_secondaries",
1521 .data = &ipv4_devconf.promote_secondaries,
1522 .maxlen = sizeof(int),
1524 .proc_handler = &ipv4_doint_and_flush,
1525 .strategy = &ipv4_doint_and_flush_strategy,
1530 .ctl_name = NET_PROTO_CONF_ALL,
1533 .child = devinet_sysctl.devinet_vars,
1536 .devinet_conf_dir = {
1538 .ctl_name = NET_IPV4_CONF,
1541 .child = devinet_sysctl.devinet_dev,
1544 .devinet_proto_dir = {
1546 .ctl_name = NET_IPV4,
1549 .child = devinet_sysctl.devinet_conf_dir,
1552 .devinet_root_dir = {
1554 .ctl_name = CTL_NET,
1557 .child = devinet_sysctl.devinet_proto_dir,
1562 static void devinet_sysctl_register(struct in_device *in_dev,
1563 struct ipv4_devconf *p)
1566 struct net_device *dev = in_dev ? in_dev->dev : NULL;
1567 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1569 char *dev_name = NULL;
1573 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1574 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1575 t->devinet_vars[i].de = NULL;
1579 dev_name = dev->name;
1580 t->devinet_dev[0].ctl_name = dev->ifindex;
1582 dev_name = "default";
1583 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1587 * Make a copy of dev_name, because '.procname' is regarded as const
1588 * by sysctl and we wouldn't want anyone to change it under our feet
1589 * (see SIOCSIFNAME).
1591 dev_name = kstrdup(dev_name, GFP_KERNEL);
1595 t->devinet_dev[0].procname = dev_name;
1596 t->devinet_dev[0].child = t->devinet_vars;
1597 t->devinet_dev[0].de = NULL;
1598 t->devinet_conf_dir[0].child = t->devinet_dev;
1599 t->devinet_conf_dir[0].de = NULL;
1600 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1601 t->devinet_proto_dir[0].de = NULL;
1602 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1603 t->devinet_root_dir[0].de = NULL;
1605 t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1606 if (!t->sysctl_header)
1620 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1623 struct devinet_sysctl_table *t = p->sysctl;
1625 unregister_sysctl_table(t->sysctl_header);
1626 kfree(t->devinet_dev[0].procname);
1632 void __init devinet_init(void)
1634 register_gifconf(PF_INET, inet_gifconf);
1635 register_netdevice_notifier(&ip_netdev_notifier);
1636 rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1637 #ifdef CONFIG_SYSCTL
1638 devinet_sysctl.sysctl_header =
1639 register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1640 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1644 EXPORT_SYMBOL(in_dev_finish_destroy);
1645 EXPORT_SYMBOL(inet_select_addr);
1646 EXPORT_SYMBOL(inetdev_by_index);
1647 EXPORT_SYMBOL(register_inetaddr_notifier);
1648 EXPORT_SYMBOL(unregister_inetaddr_notifier);