2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
48 #include "fib_lookup.h"
50 static DEFINE_SPINLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62 static DEFINE_SPINLOCK(fib_multipath_lock);
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82 #define endfor_nexthops(fi) }
89 } fib_props[RTN_MAX + 1] = {
92 .scope = RT_SCOPE_NOWHERE,
96 .scope = RT_SCOPE_UNIVERSE,
100 .scope = RT_SCOPE_HOST,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
108 .scope = RT_SCOPE_LINK,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
128 .scope = RT_SCOPE_UNIVERSE,
132 .scope = RT_SCOPE_NOWHERE,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
141 /* Release a nexthop info record */
143 void free_fib_info(struct fib_info *fi)
145 if (fi->fib_dead == 0) {
146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
149 change_nexthops(fi) {
153 } endfor_nexthops(fi);
158 void fib_release_info(struct fib_info *fi)
160 spin_lock_bh(&fib_info_lock);
161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
173 spin_unlock_bh(&fib_info_lock);
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 const struct fib_nh *onh = ofi->fib_nh;
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 } endfor_nexthops(fi);
197 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199 unsigned int mask = (fib_hash_size - 1);
200 unsigned int val = fi->fib_nhs;
202 val ^= fi->fib_protocol;
203 val ^= (__force u32)fi->fib_prefsrc;
204 val ^= fi->fib_priority;
206 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 static struct fib_info *fib_find_info(const struct fib_info *nfi)
211 struct hlist_head *head;
212 struct hlist_node *node;
216 hash = fib_info_hashfn(nfi);
217 head = &fib_info_hash[hash];
219 hlist_for_each_entry(fi, node, head, fib_hash) {
220 if (fi->fib_nhs != nfi->fib_nhs)
222 if (nfi->fib_protocol == fi->fib_protocol &&
223 nfi->fib_prefsrc == fi->fib_prefsrc &&
224 nfi->fib_priority == fi->fib_priority &&
225 memcmp(nfi->fib_metrics, fi->fib_metrics,
226 sizeof(fi->fib_metrics)) == 0 &&
227 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
228 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
235 static inline unsigned int fib_devindex_hashfn(unsigned int val)
237 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240 (val >> DEVINDEX_HASHBITS) ^
241 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 /* Check, that the gateway is already configured.
245 Used only by redirect accept routine.
248 int ip_fib_check_default(__be32 gw, struct net_device *dev)
250 struct hlist_head *head;
251 struct hlist_node *node;
255 spin_lock(&fib_info_lock);
257 hash = fib_devindex_hashfn(dev->ifindex);
258 head = &fib_info_devhash[hash];
259 hlist_for_each_entry(nh, node, head, nh_hash) {
260 if (nh->nh_dev == dev &&
262 !(nh->nh_flags&RTNH_F_DEAD)) {
263 spin_unlock(&fib_info_lock);
268 spin_unlock(&fib_info_lock);
273 static inline size_t fib_nlmsg_size(struct fib_info *fi)
275 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
276 + nla_total_size(4) /* RTA_TABLE */
277 + nla_total_size(4) /* RTA_DST */
278 + nla_total_size(4) /* RTA_PRIORITY */
279 + nla_total_size(4); /* RTA_PREFSRC */
281 /* space for nested metrics */
282 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285 /* Also handles the special case fib_nhs == 1 */
287 /* each nexthop is packed in an attribute */
288 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
290 /* may contain flow and gateway attribute */
291 nhsize += 2 * nla_total_size(4);
293 /* all nexthops are packed in a nested attribute */
294 payload += nla_total_size(fi->fib_nhs * nhsize);
300 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
301 int dst_len, u32 tb_id, struct nl_info *info,
302 unsigned int nlm_flags)
305 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
308 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
312 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
313 fa->fa_type, fa->fa_scope, key, dst_len,
314 fa->fa_tos, fa->fa_info, nlm_flags);
316 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
317 WARN_ON(err == -EMSGSIZE);
321 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
322 info->nlh, GFP_KERNEL);
325 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
328 /* Return the first fib alias matching TOS with
329 * priority less than or equal to PRIO.
331 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334 struct fib_alias *fa;
335 list_for_each_entry(fa, fah, fa_list) {
336 if (fa->fa_tos > tos)
338 if (fa->fa_info->fib_priority >= prio ||
346 int fib_detect_death(struct fib_info *fi, int order,
347 struct fib_info **last_resort, int *last_idx, int dflt)
350 int state = NUD_NONE;
352 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
354 state = n->nud_state;
357 if (state==NUD_REACHABLE)
359 if ((state&NUD_VALID) && order != dflt)
361 if ((state&NUD_VALID) ||
362 (*last_idx<0 && order > dflt)) {
369 #ifdef CONFIG_IP_ROUTE_MULTIPATH
371 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
375 while (rtnh_ok(rtnh, remaining)) {
377 rtnh = rtnh_next(rtnh, &remaining);
380 /* leftover implies invalid nexthop configuration, discard it */
381 return remaining > 0 ? 0 : nhs;
384 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
385 int remaining, struct fib_config *cfg)
387 change_nexthops(fi) {
390 if (!rtnh_ok(rtnh, remaining))
393 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
394 nh->nh_oif = rtnh->rtnh_ifindex;
395 nh->nh_weight = rtnh->rtnh_hops + 1;
397 attrlen = rtnh_attrlen(rtnh);
399 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
401 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
402 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
403 #ifdef CONFIG_NET_CLS_ROUTE
404 nla = nla_find(attrs, attrlen, RTA_FLOW);
405 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
409 rtnh = rtnh_next(rtnh, &remaining);
410 } endfor_nexthops(fi);
417 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
419 #ifdef CONFIG_IP_ROUTE_MULTIPATH
420 struct rtnexthop *rtnh;
424 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
427 if (cfg->fc_oif || cfg->fc_gw) {
428 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
429 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
434 #ifdef CONFIG_IP_ROUTE_MULTIPATH
435 if (cfg->fc_mp == NULL)
439 remaining = cfg->fc_mp_len;
444 if (!rtnh_ok(rtnh, remaining))
447 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
450 attrlen = rtnh_attrlen(rtnh);
452 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
454 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
455 if (nla && nla_get_be32(nla) != nh->nh_gw)
457 #ifdef CONFIG_NET_CLS_ROUTE
458 nla = nla_find(attrs, attrlen, RTA_FLOW);
459 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
464 rtnh = rtnh_next(rtnh, &remaining);
465 } endfor_nexthops(fi);
475 Semantics of nexthop is very messy by historical reasons.
476 We have to take into account, that:
477 a) gateway can be actually local interface address,
478 so that gatewayed route is direct.
479 b) gateway must be on-link address, possibly
480 described not by an ifaddr, but also by a direct route.
481 c) If both gateway and interface are specified, they should not
483 d) If we use tunnel routes, gateway could be not on-link.
485 Attempt to reconcile all of these (alas, self-contradictory) conditions
486 results in pretty ugly and hairy code with obscure logic.
488 I chose to generalized it instead, so that the size
489 of code does not increase practically, but it becomes
491 Every prefix is assigned a "scope" value: "host" is local address,
492 "link" is direct route,
493 [ ... "site" ... "interior" ... ]
494 and "universe" is true gateway route with global meaning.
496 Every prefix refers to a set of "nexthop"s (gw, oif),
497 where gw must have narrower scope. This recursion stops
498 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
499 which means that gw is forced to be on link.
501 Code is still hairy, but now it is apparently logically
502 consistent and very flexible. F.e. as by-product it allows
503 to co-exists in peace independent exterior and interior
506 Normally it looks as following.
508 {universe prefix} -> (gw, oif) [scope link]
510 |-> {link prefix} -> (gw, oif) [scope local]
512 |-> {local prefix} (terminal node)
515 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
521 struct fib_result res;
523 #ifdef CONFIG_IP_ROUTE_PERVASIVE
524 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 if (nh->nh_flags&RTNH_F_ONLINK) {
528 struct net_device *dev;
530 if (cfg->fc_scope >= RT_SCOPE_LINK)
532 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
533 nh->nh_gw) != RTN_UNICAST)
535 if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
536 nh->nh_oif)) == NULL)
538 if (!(dev->flags&IFF_UP))
542 nh->nh_scope = RT_SCOPE_LINK;
550 .scope = cfg->fc_scope + 1,
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
569 dev_hold(nh->nh_dev);
571 if (!(nh->nh_dev->flags & IFF_UP))
578 struct in_device *in_dev;
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
583 in_dev = inetdev_by_index(nh->nh_oif);
586 if (!(in_dev->dev->flags&IFF_UP)) {
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
598 static inline unsigned int fib_laddr_hashfn(__be32 val)
600 unsigned int mask = (fib_hash_size - 1);
602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
605 static struct hlist_head *fib_hash_alloc(int bytes)
607 if (bytes <= PAGE_SIZE)
608 return kzalloc(bytes, GFP_KERNEL);
610 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
614 static void fib_hash_free(struct hlist_head *hash, int bytes)
619 if (bytes <= PAGE_SIZE)
622 free_pages((unsigned long) hash, get_order(bytes));
625 static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
629 struct hlist_head *old_info_hash, *old_laddrhash;
630 unsigned int old_size = fib_hash_size;
631 unsigned int i, bytes;
633 spin_lock_bh(&fib_info_lock);
634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
636 fib_hash_size = new_size;
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
647 hlist_del(&fi->fib_hash);
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
654 fib_info_hash = new_info_hash;
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
665 hlist_del(&fi->fib_lhash);
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
672 fib_info_laddrhash = new_laddrhash;
674 spin_unlock_bh(&fib_info_lock);
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
681 struct fib_info *fib_create_info(struct fib_config *cfg)
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
688 /* Fast check to catch the most weird cases */
689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
692 #ifdef CONFIG_IP_ROUTE_MULTIPATH
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
701 if (fib_info_cnt >= fib_hash_size) {
702 unsigned int new_size = fib_hash_size << 1;
703 struct hlist_head *new_info_hash;
704 struct hlist_head *new_laddrhash;
709 bytes = new_size * sizeof(struct hlist_head *);
710 new_info_hash = fib_hash_alloc(bytes);
711 new_laddrhash = fib_hash_alloc(bytes);
712 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes);
716 fib_hash_move(new_info_hash, new_laddrhash, new_size);
722 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
727 fi->fib_protocol = cfg->fc_protocol;
728 fi->fib_flags = cfg->fc_flags;
729 fi->fib_priority = cfg->fc_priority;
730 fi->fib_prefsrc = cfg->fc_prefsrc;
733 change_nexthops(fi) {
735 } endfor_nexthops(fi)
741 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
742 int type = nla_type(nla);
747 fi->fib_metrics[type - 1] = nla_get_u32(nla);
753 #ifdef CONFIG_IP_ROUTE_MULTIPATH
754 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
757 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
759 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
761 #ifdef CONFIG_NET_CLS_ROUTE
762 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
769 struct fib_nh *nh = fi->fib_nh;
771 nh->nh_oif = cfg->fc_oif;
772 nh->nh_gw = cfg->fc_gw;
773 nh->nh_flags = cfg->fc_flags;
774 #ifdef CONFIG_NET_CLS_ROUTE
775 nh->nh_tclassid = cfg->fc_flow;
777 #ifdef CONFIG_IP_ROUTE_MULTIPATH
782 if (fib_props[cfg->fc_type].error) {
783 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
788 if (cfg->fc_scope > RT_SCOPE_HOST)
791 if (cfg->fc_scope == RT_SCOPE_HOST) {
792 struct fib_nh *nh = fi->fib_nh;
794 /* Local address is added. */
795 if (nhs != 1 || nh->nh_gw)
797 nh->nh_scope = RT_SCOPE_NOWHERE;
798 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
801 if (nh->nh_dev == NULL)
804 change_nexthops(fi) {
805 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
807 } endfor_nexthops(fi)
810 if (fi->fib_prefsrc) {
811 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
812 fi->fib_prefsrc != cfg->fc_dst)
813 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
814 fi->fib_prefsrc) != RTN_LOCAL)
819 if ((ofi = fib_find_info(fi)) != NULL) {
827 atomic_inc(&fi->fib_clntref);
828 spin_lock_bh(&fib_info_lock);
829 hlist_add_head(&fi->fib_hash,
830 &fib_info_hash[fib_info_hashfn(fi)]);
831 if (fi->fib_prefsrc) {
832 struct hlist_head *head;
834 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
835 hlist_add_head(&fi->fib_lhash, head);
837 change_nexthops(fi) {
838 struct hlist_head *head;
843 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
844 head = &fib_info_devhash[hash];
845 hlist_add_head(&nh->nh_hash, head);
846 } endfor_nexthops(fi)
847 spin_unlock_bh(&fib_info_lock);
862 /* Note! fib_semantic_match intentionally uses RCU list functions. */
863 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
864 struct fib_result *res, __be32 zone, __be32 mask,
867 struct fib_alias *fa;
870 list_for_each_entry_rcu(fa, head, fa_list) {
874 fa->fa_tos != flp->fl4_tos)
877 if (fa->fa_scope < flp->fl4_scope)
880 fa->fa_state |= FA_S_ACCESSED;
882 err = fib_props[fa->fa_type].error;
884 struct fib_info *fi = fa->fa_info;
886 if (fi->fib_flags & RTNH_F_DEAD)
889 switch (fa->fa_type) {
896 if (nh->nh_flags&RTNH_F_DEAD)
898 if (!flp->oif || flp->oif == nh->nh_oif)
901 #ifdef CONFIG_IP_ROUTE_MULTIPATH
902 if (nhsel < fi->fib_nhs) {
915 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
925 res->prefixlen = prefixlen;
926 res->nh_sel = nh_sel;
927 res->type = fa->fa_type;
928 res->scope = fa->fa_scope;
929 res->fi = fa->fa_info;
930 atomic_inc(&res->fi->fib_clntref);
934 /* Find appropriate source address to this destination */
936 __be32 __fib_res_prefsrc(struct fib_result *res)
938 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
941 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
943 struct fib_info *fi, unsigned int flags)
945 struct nlmsghdr *nlh;
948 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
952 rtm = nlmsg_data(nlh);
953 rtm->rtm_family = AF_INET;
954 rtm->rtm_dst_len = dst_len;
955 rtm->rtm_src_len = 0;
957 rtm->rtm_table = tb_id;
958 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
959 rtm->rtm_type = type;
960 rtm->rtm_flags = fi->fib_flags;
961 rtm->rtm_scope = scope;
962 rtm->rtm_protocol = fi->fib_protocol;
964 if (rtm->rtm_dst_len)
965 NLA_PUT_BE32(skb, RTA_DST, dst);
967 if (fi->fib_priority)
968 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
970 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
971 goto nla_put_failure;
974 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
976 if (fi->fib_nhs == 1) {
977 if (fi->fib_nh->nh_gw)
978 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
980 if (fi->fib_nh->nh_oif)
981 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
982 #ifdef CONFIG_NET_CLS_ROUTE
983 if (fi->fib_nh[0].nh_tclassid)
984 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
987 #ifdef CONFIG_IP_ROUTE_MULTIPATH
988 if (fi->fib_nhs > 1) {
989 struct rtnexthop *rtnh;
992 mp = nla_nest_start(skb, RTA_MULTIPATH);
994 goto nla_put_failure;
997 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
999 goto nla_put_failure;
1001 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1002 rtnh->rtnh_hops = nh->nh_weight - 1;
1003 rtnh->rtnh_ifindex = nh->nh_oif;
1006 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1007 #ifdef CONFIG_NET_CLS_ROUTE
1008 if (nh->nh_tclassid)
1009 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1011 /* length of rtnetlink header + attributes */
1012 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1013 } endfor_nexthops(fi);
1015 nla_nest_end(skb, mp);
1018 return nlmsg_end(skb, nlh);
1021 nlmsg_cancel(skb, nlh);
1027 - local address disappeared -> we must delete all the entries
1029 - device went down -> we must shutdown all nexthops going via it.
1032 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1035 int scope = RT_SCOPE_NOWHERE;
1040 if (local && fib_info_laddrhash) {
1041 unsigned int hash = fib_laddr_hashfn(local);
1042 struct hlist_head *head = &fib_info_laddrhash[hash];
1043 struct hlist_node *node;
1044 struct fib_info *fi;
1046 hlist_for_each_entry(fi, node, head, fib_lhash) {
1047 if (fi->fib_prefsrc == local) {
1048 fi->fib_flags |= RTNH_F_DEAD;
1055 struct fib_info *prev_fi = NULL;
1056 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1057 struct hlist_head *head = &fib_info_devhash[hash];
1058 struct hlist_node *node;
1061 hlist_for_each_entry(nh, node, head, nh_hash) {
1062 struct fib_info *fi = nh->nh_parent;
1065 BUG_ON(!fi->fib_nhs);
1066 if (nh->nh_dev != dev || fi == prev_fi)
1070 change_nexthops(fi) {
1071 if (nh->nh_flags&RTNH_F_DEAD)
1073 else if (nh->nh_dev == dev &&
1074 nh->nh_scope != scope) {
1075 nh->nh_flags |= RTNH_F_DEAD;
1076 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1077 spin_lock_bh(&fib_multipath_lock);
1078 fi->fib_power -= nh->nh_power;
1080 spin_unlock_bh(&fib_multipath_lock);
1084 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1085 if (force > 1 && nh->nh_dev == dev) {
1090 } endfor_nexthops(fi)
1091 if (dead == fi->fib_nhs) {
1092 fi->fib_flags |= RTNH_F_DEAD;
1101 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1104 Dead device goes up. We wake up dead nexthops.
1105 It takes sense only on multipath routes.
1108 int fib_sync_up(struct net_device *dev)
1110 struct fib_info *prev_fi;
1112 struct hlist_head *head;
1113 struct hlist_node *node;
1117 if (!(dev->flags&IFF_UP))
1121 hash = fib_devindex_hashfn(dev->ifindex);
1122 head = &fib_info_devhash[hash];
1125 hlist_for_each_entry(nh, node, head, nh_hash) {
1126 struct fib_info *fi = nh->nh_parent;
1129 BUG_ON(!fi->fib_nhs);
1130 if (nh->nh_dev != dev || fi == prev_fi)
1135 change_nexthops(fi) {
1136 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1140 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1142 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1145 spin_lock_bh(&fib_multipath_lock);
1147 nh->nh_flags &= ~RTNH_F_DEAD;
1148 spin_unlock_bh(&fib_multipath_lock);
1149 } endfor_nexthops(fi)
1152 fi->fib_flags &= ~RTNH_F_DEAD;
1161 The algorithm is suboptimal, but it provides really
1162 fair weighted route distribution.
1165 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1167 struct fib_info *fi = res->fi;
1170 spin_lock_bh(&fib_multipath_lock);
1171 if (fi->fib_power <= 0) {
1173 change_nexthops(fi) {
1174 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1175 power += nh->nh_weight;
1176 nh->nh_power = nh->nh_weight;
1178 } endfor_nexthops(fi);
1179 fi->fib_power = power;
1181 spin_unlock_bh(&fib_multipath_lock);
1182 /* Race condition: route has just become dead. */
1189 /* w should be random number [0..fi->fib_power-1],
1190 it is pretty bad approximation.
1193 w = jiffies % fi->fib_power;
1195 change_nexthops(fi) {
1196 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1197 if ((w -= nh->nh_power) <= 0) {
1200 res->nh_sel = nhsel;
1201 spin_unlock_bh(&fib_multipath_lock);
1205 } endfor_nexthops(fi);
1207 /* Race condition: route has just become dead. */
1209 spin_unlock_bh(&fib_multipath_lock);