]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv4/fib_semantics.c
[NETNS]: Add netns to nl_info structure.
[linux-2.6-omap-h63xx.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
47
48 #include "fib_lookup.h"
49
50 #define FSprintk(a...)
51
52 static DEFINE_SPINLOCK(fib_info_lock);
53 static struct hlist_head *fib_info_hash;
54 static struct hlist_head *fib_info_laddrhash;
55 static unsigned int fib_hash_size;
56 static unsigned int fib_info_cnt;
57
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64 static DEFINE_SPINLOCK(fib_multipath_lock);
65
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
75
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
78
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
81
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84 #define endfor_nexthops(fi) }
85
86
87 static const struct
88 {
89         int     error;
90         u8      scope;
91 } fib_props[RTN_MAX + 1] = {
92         {
93                 .error  = 0,
94                 .scope  = RT_SCOPE_NOWHERE,
95         },      /* RTN_UNSPEC */
96         {
97                 .error  = 0,
98                 .scope  = RT_SCOPE_UNIVERSE,
99         },      /* RTN_UNICAST */
100         {
101                 .error  = 0,
102                 .scope  = RT_SCOPE_HOST,
103         },      /* RTN_LOCAL */
104         {
105                 .error  = 0,
106                 .scope  = RT_SCOPE_LINK,
107         },      /* RTN_BROADCAST */
108         {
109                 .error  = 0,
110                 .scope  = RT_SCOPE_LINK,
111         },      /* RTN_ANYCAST */
112         {
113                 .error  = 0,
114                 .scope  = RT_SCOPE_UNIVERSE,
115         },      /* RTN_MULTICAST */
116         {
117                 .error  = -EINVAL,
118                 .scope  = RT_SCOPE_UNIVERSE,
119         },      /* RTN_BLACKHOLE */
120         {
121                 .error  = -EHOSTUNREACH,
122                 .scope  = RT_SCOPE_UNIVERSE,
123         },      /* RTN_UNREACHABLE */
124         {
125                 .error  = -EACCES,
126                 .scope  = RT_SCOPE_UNIVERSE,
127         },      /* RTN_PROHIBIT */
128         {
129                 .error  = -EAGAIN,
130                 .scope  = RT_SCOPE_UNIVERSE,
131         },      /* RTN_THROW */
132         {
133                 .error  = -EINVAL,
134                 .scope  = RT_SCOPE_NOWHERE,
135         },      /* RTN_NAT */
136         {
137                 .error  = -EINVAL,
138                 .scope  = RT_SCOPE_NOWHERE,
139         },      /* RTN_XRESOLVE */
140 };
141
142
143 /* Release a nexthop info record */
144
145 void free_fib_info(struct fib_info *fi)
146 {
147         if (fi->fib_dead == 0) {
148                 printk("Freeing alive fib_info %p\n", fi);
149                 return;
150         }
151         change_nexthops(fi) {
152                 if (nh->nh_dev)
153                         dev_put(nh->nh_dev);
154                 nh->nh_dev = NULL;
155         } endfor_nexthops(fi);
156         fib_info_cnt--;
157         kfree(fi);
158 }
159
160 void fib_release_info(struct fib_info *fi)
161 {
162         spin_lock_bh(&fib_info_lock);
163         if (fi && --fi->fib_treeref == 0) {
164                 hlist_del(&fi->fib_hash);
165                 if (fi->fib_prefsrc)
166                         hlist_del(&fi->fib_lhash);
167                 change_nexthops(fi) {
168                         if (!nh->nh_dev)
169                                 continue;
170                         hlist_del(&nh->nh_hash);
171                 } endfor_nexthops(fi)
172                 fi->fib_dead = 1;
173                 fib_info_put(fi);
174         }
175         spin_unlock_bh(&fib_info_lock);
176 }
177
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180         const struct fib_nh *onh = ofi->fib_nh;
181
182         for_nexthops(fi) {
183                 if (nh->nh_oif != onh->nh_oif ||
184                     nh->nh_gw  != onh->nh_gw ||
185                     nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187                     nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190                     nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193                         return -1;
194                 onh++;
195         } endfor_nexthops(fi);
196         return 0;
197 }
198
199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200 {
201         unsigned int mask = (fib_hash_size - 1);
202         unsigned int val = fi->fib_nhs;
203
204         val ^= fi->fib_protocol;
205         val ^= (__force u32)fi->fib_prefsrc;
206         val ^= fi->fib_priority;
207
208         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 }
210
211 static struct fib_info *fib_find_info(const struct fib_info *nfi)
212 {
213         struct hlist_head *head;
214         struct hlist_node *node;
215         struct fib_info *fi;
216         unsigned int hash;
217
218         hash = fib_info_hashfn(nfi);
219         head = &fib_info_hash[hash];
220
221         hlist_for_each_entry(fi, node, head, fib_hash) {
222                 if (fi->fib_nhs != nfi->fib_nhs)
223                         continue;
224                 if (nfi->fib_protocol == fi->fib_protocol &&
225                     nfi->fib_prefsrc == fi->fib_prefsrc &&
226                     nfi->fib_priority == fi->fib_priority &&
227                     memcmp(nfi->fib_metrics, fi->fib_metrics,
228                            sizeof(fi->fib_metrics)) == 0 &&
229                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231                         return fi;
232         }
233
234         return NULL;
235 }
236
237 static inline unsigned int fib_devindex_hashfn(unsigned int val)
238 {
239         unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241         return (val ^
242                 (val >> DEVINDEX_HASHBITS) ^
243                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 }
245
246 /* Check, that the gateway is already configured.
247    Used only by redirect accept routine.
248  */
249
250 int ip_fib_check_default(__be32 gw, struct net_device *dev)
251 {
252         struct hlist_head *head;
253         struct hlist_node *node;
254         struct fib_nh *nh;
255         unsigned int hash;
256
257         spin_lock(&fib_info_lock);
258
259         hash = fib_devindex_hashfn(dev->ifindex);
260         head = &fib_info_devhash[hash];
261         hlist_for_each_entry(nh, node, head, nh_hash) {
262                 if (nh->nh_dev == dev &&
263                     nh->nh_gw == gw &&
264                     !(nh->nh_flags&RTNH_F_DEAD)) {
265                         spin_unlock(&fib_info_lock);
266                         return 0;
267                 }
268         }
269
270         spin_unlock(&fib_info_lock);
271
272         return -1;
273 }
274
275 static inline size_t fib_nlmsg_size(struct fib_info *fi)
276 {
277         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
278                          + nla_total_size(4) /* RTA_TABLE */
279                          + nla_total_size(4) /* RTA_DST */
280                          + nla_total_size(4) /* RTA_PRIORITY */
281                          + nla_total_size(4); /* RTA_PREFSRC */
282
283         /* space for nested metrics */
284         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285
286         if (fi->fib_nhs) {
287                 /* Also handles the special case fib_nhs == 1 */
288
289                 /* each nexthop is packed in an attribute */
290                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
291
292                 /* may contain flow and gateway attribute */
293                 nhsize += 2 * nla_total_size(4);
294
295                 /* all nexthops are packed in a nested attribute */
296                 payload += nla_total_size(fi->fib_nhs * nhsize);
297         }
298
299         return payload;
300 }
301
302 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
303                int dst_len, u32 tb_id, struct nl_info *info,
304                unsigned int nlm_flags)
305 {
306         struct sk_buff *skb;
307         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
308         int err = -ENOBUFS;
309
310         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
311         if (skb == NULL)
312                 goto errout;
313
314         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
315                             fa->fa_type, fa->fa_scope, key, dst_len,
316                             fa->fa_tos, fa->fa_info, nlm_flags);
317         if (err < 0) {
318                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319                 WARN_ON(err == -EMSGSIZE);
320                 kfree_skb(skb);
321                 goto errout;
322         }
323         err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
324                           info->nlh, GFP_KERNEL);
325 errout:
326         if (err < 0)
327                 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
328 }
329
330 /* Return the first fib alias matching TOS with
331  * priority less than or equal to PRIO.
332  */
333 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334 {
335         if (fah) {
336                 struct fib_alias *fa;
337                 list_for_each_entry(fa, fah, fa_list) {
338                         if (fa->fa_tos > tos)
339                                 continue;
340                         if (fa->fa_info->fib_priority >= prio ||
341                             fa->fa_tos < tos)
342                                 return fa;
343                 }
344         }
345         return NULL;
346 }
347
348 int fib_detect_death(struct fib_info *fi, int order,
349                      struct fib_info **last_resort, int *last_idx, int dflt)
350 {
351         struct neighbour *n;
352         int state = NUD_NONE;
353
354         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355         if (n) {
356                 state = n->nud_state;
357                 neigh_release(n);
358         }
359         if (state==NUD_REACHABLE)
360                 return 0;
361         if ((state&NUD_VALID) && order != dflt)
362                 return 0;
363         if ((state&NUD_VALID) ||
364             (*last_idx<0 && order > dflt)) {
365                 *last_resort = fi;
366                 *last_idx = order;
367         }
368         return 1;
369 }
370
371 #ifdef CONFIG_IP_ROUTE_MULTIPATH
372
373 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
374 {
375         int nhs = 0;
376
377         while (rtnh_ok(rtnh, remaining)) {
378                 nhs++;
379                 rtnh = rtnh_next(rtnh, &remaining);
380         }
381
382         /* leftover implies invalid nexthop configuration, discard it */
383         return remaining > 0 ? 0 : nhs;
384 }
385
386 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387                        int remaining, struct fib_config *cfg)
388 {
389         change_nexthops(fi) {
390                 int attrlen;
391
392                 if (!rtnh_ok(rtnh, remaining))
393                         return -EINVAL;
394
395                 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396                 nh->nh_oif = rtnh->rtnh_ifindex;
397                 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399                 attrlen = rtnh_attrlen(rtnh);
400                 if (attrlen > 0) {
401                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
404                         nh->nh_gw = nla ? nla_get_be32(nla) : 0;
405 #ifdef CONFIG_NET_CLS_ROUTE
406                         nla = nla_find(attrs, attrlen, RTA_FLOW);
407                         nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
408 #endif
409                 }
410
411                 rtnh = rtnh_next(rtnh, &remaining);
412         } endfor_nexthops(fi);
413
414         return 0;
415 }
416
417 #endif
418
419 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
420 {
421 #ifdef CONFIG_IP_ROUTE_MULTIPATH
422         struct rtnexthop *rtnh;
423         int remaining;
424 #endif
425
426         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
427                 return 1;
428
429         if (cfg->fc_oif || cfg->fc_gw) {
430                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
432                         return 0;
433                 return 1;
434         }
435
436 #ifdef CONFIG_IP_ROUTE_MULTIPATH
437         if (cfg->fc_mp == NULL)
438                 return 0;
439
440         rtnh = cfg->fc_mp;
441         remaining = cfg->fc_mp_len;
442
443         for_nexthops(fi) {
444                 int attrlen;
445
446                 if (!rtnh_ok(rtnh, remaining))
447                         return -EINVAL;
448
449                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
450                         return 1;
451
452                 attrlen = rtnh_attrlen(rtnh);
453                 if (attrlen < 0) {
454                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
457                         if (nla && nla_get_be32(nla) != nh->nh_gw)
458                                 return 1;
459 #ifdef CONFIG_NET_CLS_ROUTE
460                         nla = nla_find(attrs, attrlen, RTA_FLOW);
461                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
462                                 return 1;
463 #endif
464                 }
465
466                 rtnh = rtnh_next(rtnh, &remaining);
467         } endfor_nexthops(fi);
468 #endif
469         return 0;
470 }
471
472
473 /*
474    Picture
475    -------
476
477    Semantics of nexthop is very messy by historical reasons.
478    We have to take into account, that:
479    a) gateway can be actually local interface address,
480       so that gatewayed route is direct.
481    b) gateway must be on-link address, possibly
482       described not by an ifaddr, but also by a direct route.
483    c) If both gateway and interface are specified, they should not
484       contradict.
485    d) If we use tunnel routes, gateway could be not on-link.
486
487    Attempt to reconcile all of these (alas, self-contradictory) conditions
488    results in pretty ugly and hairy code with obscure logic.
489
490    I chose to generalized it instead, so that the size
491    of code does not increase practically, but it becomes
492    much more general.
493    Every prefix is assigned a "scope" value: "host" is local address,
494    "link" is direct route,
495    [ ... "site" ... "interior" ... ]
496    and "universe" is true gateway route with global meaning.
497
498    Every prefix refers to a set of "nexthop"s (gw, oif),
499    where gw must have narrower scope. This recursion stops
500    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501    which means that gw is forced to be on link.
502
503    Code is still hairy, but now it is apparently logically
504    consistent and very flexible. F.e. as by-product it allows
505    to co-exists in peace independent exterior and interior
506    routing processes.
507
508    Normally it looks as following.
509
510    {universe prefix}  -> (gw, oif) [scope link]
511                           |
512                           |-> {link prefix} -> (gw, oif) [scope local]
513                                                 |
514                                                 |-> {local prefix} (terminal node)
515  */
516
517 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518                         struct fib_nh *nh)
519 {
520         int err;
521
522         if (nh->nh_gw) {
523                 struct fib_result res;
524
525 #ifdef CONFIG_IP_ROUTE_PERVASIVE
526                 if (nh->nh_flags&RTNH_F_PERVASIVE)
527                         return 0;
528 #endif
529                 if (nh->nh_flags&RTNH_F_ONLINK) {
530                         struct net_device *dev;
531
532                         if (cfg->fc_scope >= RT_SCOPE_LINK)
533                                 return -EINVAL;
534                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
535                                            nh->nh_gw) != RTN_UNICAST)
536                                 return -EINVAL;
537                         if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
538                                                       nh->nh_oif)) == NULL)
539                                 return -ENODEV;
540                         if (!(dev->flags&IFF_UP))
541                                 return -ENETDOWN;
542                         nh->nh_dev = dev;
543                         dev_hold(dev);
544                         nh->nh_scope = RT_SCOPE_LINK;
545                         return 0;
546                 }
547                 {
548                         struct flowi fl = {
549                                 .nl_u = {
550                                         .ip4_u = {
551                                                 .daddr = nh->nh_gw,
552                                                 .scope = cfg->fc_scope + 1,
553                                         },
554                                 },
555                                 .oif = nh->nh_oif,
556                         };
557
558                         /* It is not necessary, but requires a bit of thinking */
559                         if (fl.fl4_scope < RT_SCOPE_LINK)
560                                 fl.fl4_scope = RT_SCOPE_LINK;
561                         if ((err = fib_lookup(&fl, &res)) != 0)
562                                 return err;
563                 }
564                 err = -EINVAL;
565                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
566                         goto out;
567                 nh->nh_scope = res.scope;
568                 nh->nh_oif = FIB_RES_OIF(res);
569                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
570                         goto out;
571                 dev_hold(nh->nh_dev);
572                 err = -ENETDOWN;
573                 if (!(nh->nh_dev->flags & IFF_UP))
574                         goto out;
575                 err = 0;
576 out:
577                 fib_res_put(&res);
578                 return err;
579         } else {
580                 struct in_device *in_dev;
581
582                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
583                         return -EINVAL;
584
585                 in_dev = inetdev_by_index(nh->nh_oif);
586                 if (in_dev == NULL)
587                         return -ENODEV;
588                 if (!(in_dev->dev->flags&IFF_UP)) {
589                         in_dev_put(in_dev);
590                         return -ENETDOWN;
591                 }
592                 nh->nh_dev = in_dev->dev;
593                 dev_hold(nh->nh_dev);
594                 nh->nh_scope = RT_SCOPE_HOST;
595                 in_dev_put(in_dev);
596         }
597         return 0;
598 }
599
600 static inline unsigned int fib_laddr_hashfn(__be32 val)
601 {
602         unsigned int mask = (fib_hash_size - 1);
603
604         return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
605 }
606
607 static struct hlist_head *fib_hash_alloc(int bytes)
608 {
609         if (bytes <= PAGE_SIZE)
610                 return kzalloc(bytes, GFP_KERNEL);
611         else
612                 return (struct hlist_head *)
613                         __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
614 }
615
616 static void fib_hash_free(struct hlist_head *hash, int bytes)
617 {
618         if (!hash)
619                 return;
620
621         if (bytes <= PAGE_SIZE)
622                 kfree(hash);
623         else
624                 free_pages((unsigned long) hash, get_order(bytes));
625 }
626
627 static void fib_hash_move(struct hlist_head *new_info_hash,
628                           struct hlist_head *new_laddrhash,
629                           unsigned int new_size)
630 {
631         struct hlist_head *old_info_hash, *old_laddrhash;
632         unsigned int old_size = fib_hash_size;
633         unsigned int i, bytes;
634
635         spin_lock_bh(&fib_info_lock);
636         old_info_hash = fib_info_hash;
637         old_laddrhash = fib_info_laddrhash;
638         fib_hash_size = new_size;
639
640         for (i = 0; i < old_size; i++) {
641                 struct hlist_head *head = &fib_info_hash[i];
642                 struct hlist_node *node, *n;
643                 struct fib_info *fi;
644
645                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
646                         struct hlist_head *dest;
647                         unsigned int new_hash;
648
649                         hlist_del(&fi->fib_hash);
650
651                         new_hash = fib_info_hashfn(fi);
652                         dest = &new_info_hash[new_hash];
653                         hlist_add_head(&fi->fib_hash, dest);
654                 }
655         }
656         fib_info_hash = new_info_hash;
657
658         for (i = 0; i < old_size; i++) {
659                 struct hlist_head *lhead = &fib_info_laddrhash[i];
660                 struct hlist_node *node, *n;
661                 struct fib_info *fi;
662
663                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
664                         struct hlist_head *ldest;
665                         unsigned int new_hash;
666
667                         hlist_del(&fi->fib_lhash);
668
669                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
670                         ldest = &new_laddrhash[new_hash];
671                         hlist_add_head(&fi->fib_lhash, ldest);
672                 }
673         }
674         fib_info_laddrhash = new_laddrhash;
675
676         spin_unlock_bh(&fib_info_lock);
677
678         bytes = old_size * sizeof(struct hlist_head *);
679         fib_hash_free(old_info_hash, bytes);
680         fib_hash_free(old_laddrhash, bytes);
681 }
682
683 struct fib_info *fib_create_info(struct fib_config *cfg)
684 {
685         int err;
686         struct fib_info *fi = NULL;
687         struct fib_info *ofi;
688         int nhs = 1;
689
690         /* Fast check to catch the most weird cases */
691         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
692                 goto err_inval;
693
694 #ifdef CONFIG_IP_ROUTE_MULTIPATH
695         if (cfg->fc_mp) {
696                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
697                 if (nhs == 0)
698                         goto err_inval;
699         }
700 #endif
701
702         err = -ENOBUFS;
703         if (fib_info_cnt >= fib_hash_size) {
704                 unsigned int new_size = fib_hash_size << 1;
705                 struct hlist_head *new_info_hash;
706                 struct hlist_head *new_laddrhash;
707                 unsigned int bytes;
708
709                 if (!new_size)
710                         new_size = 1;
711                 bytes = new_size * sizeof(struct hlist_head *);
712                 new_info_hash = fib_hash_alloc(bytes);
713                 new_laddrhash = fib_hash_alloc(bytes);
714                 if (!new_info_hash || !new_laddrhash) {
715                         fib_hash_free(new_info_hash, bytes);
716                         fib_hash_free(new_laddrhash, bytes);
717                 } else
718                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
719
720                 if (!fib_hash_size)
721                         goto failure;
722         }
723
724         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
725         if (fi == NULL)
726                 goto failure;
727         fib_info_cnt++;
728
729         fi->fib_protocol = cfg->fc_protocol;
730         fi->fib_flags = cfg->fc_flags;
731         fi->fib_priority = cfg->fc_priority;
732         fi->fib_prefsrc = cfg->fc_prefsrc;
733
734         fi->fib_nhs = nhs;
735         change_nexthops(fi) {
736                 nh->nh_parent = fi;
737         } endfor_nexthops(fi)
738
739         if (cfg->fc_mx) {
740                 struct nlattr *nla;
741                 int remaining;
742
743                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
744                         int type = nla_type(nla);
745
746                         if (type) {
747                                 if (type > RTAX_MAX)
748                                         goto err_inval;
749                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
750                         }
751                 }
752         }
753
754         if (cfg->fc_mp) {
755 #ifdef CONFIG_IP_ROUTE_MULTIPATH
756                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
757                 if (err != 0)
758                         goto failure;
759                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
760                         goto err_inval;
761                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
762                         goto err_inval;
763 #ifdef CONFIG_NET_CLS_ROUTE
764                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
765                         goto err_inval;
766 #endif
767 #else
768                 goto err_inval;
769 #endif
770         } else {
771                 struct fib_nh *nh = fi->fib_nh;
772
773                 nh->nh_oif = cfg->fc_oif;
774                 nh->nh_gw = cfg->fc_gw;
775                 nh->nh_flags = cfg->fc_flags;
776 #ifdef CONFIG_NET_CLS_ROUTE
777                 nh->nh_tclassid = cfg->fc_flow;
778 #endif
779 #ifdef CONFIG_IP_ROUTE_MULTIPATH
780                 nh->nh_weight = 1;
781 #endif
782         }
783
784         if (fib_props[cfg->fc_type].error) {
785                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
786                         goto err_inval;
787                 goto link_it;
788         }
789
790         if (cfg->fc_scope > RT_SCOPE_HOST)
791                 goto err_inval;
792
793         if (cfg->fc_scope == RT_SCOPE_HOST) {
794                 struct fib_nh *nh = fi->fib_nh;
795
796                 /* Local address is added. */
797                 if (nhs != 1 || nh->nh_gw)
798                         goto err_inval;
799                 nh->nh_scope = RT_SCOPE_NOWHERE;
800                 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
801                                               fi->fib_nh->nh_oif);
802                 err = -ENODEV;
803                 if (nh->nh_dev == NULL)
804                         goto failure;
805         } else {
806                 change_nexthops(fi) {
807                         if ((err = fib_check_nh(cfg, fi, nh)) != 0)
808                                 goto failure;
809                 } endfor_nexthops(fi)
810         }
811
812         if (fi->fib_prefsrc) {
813                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
814                     fi->fib_prefsrc != cfg->fc_dst)
815                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
816                                            fi->fib_prefsrc) != RTN_LOCAL)
817                                 goto err_inval;
818         }
819
820 link_it:
821         if ((ofi = fib_find_info(fi)) != NULL) {
822                 fi->fib_dead = 1;
823                 free_fib_info(fi);
824                 ofi->fib_treeref++;
825                 return ofi;
826         }
827
828         fi->fib_treeref++;
829         atomic_inc(&fi->fib_clntref);
830         spin_lock_bh(&fib_info_lock);
831         hlist_add_head(&fi->fib_hash,
832                        &fib_info_hash[fib_info_hashfn(fi)]);
833         if (fi->fib_prefsrc) {
834                 struct hlist_head *head;
835
836                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
837                 hlist_add_head(&fi->fib_lhash, head);
838         }
839         change_nexthops(fi) {
840                 struct hlist_head *head;
841                 unsigned int hash;
842
843                 if (!nh->nh_dev)
844                         continue;
845                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
846                 head = &fib_info_devhash[hash];
847                 hlist_add_head(&nh->nh_hash, head);
848         } endfor_nexthops(fi)
849         spin_unlock_bh(&fib_info_lock);
850         return fi;
851
852 err_inval:
853         err = -EINVAL;
854
855 failure:
856         if (fi) {
857                 fi->fib_dead = 1;
858                 free_fib_info(fi);
859         }
860
861         return ERR_PTR(err);
862 }
863
864 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
865 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
866                        struct fib_result *res, __be32 zone, __be32 mask,
867                         int prefixlen)
868 {
869         struct fib_alias *fa;
870         int nh_sel = 0;
871
872         list_for_each_entry_rcu(fa, head, fa_list) {
873                 int err;
874
875                 if (fa->fa_tos &&
876                     fa->fa_tos != flp->fl4_tos)
877                         continue;
878
879                 if (fa->fa_scope < flp->fl4_scope)
880                         continue;
881
882                 fa->fa_state |= FA_S_ACCESSED;
883
884                 err = fib_props[fa->fa_type].error;
885                 if (err == 0) {
886                         struct fib_info *fi = fa->fa_info;
887
888                         if (fi->fib_flags & RTNH_F_DEAD)
889                                 continue;
890
891                         switch (fa->fa_type) {
892                         case RTN_UNICAST:
893                         case RTN_LOCAL:
894                         case RTN_BROADCAST:
895                         case RTN_ANYCAST:
896                         case RTN_MULTICAST:
897                                 for_nexthops(fi) {
898                                         if (nh->nh_flags&RTNH_F_DEAD)
899                                                 continue;
900                                         if (!flp->oif || flp->oif == nh->nh_oif)
901                                                 break;
902                                 }
903 #ifdef CONFIG_IP_ROUTE_MULTIPATH
904                                 if (nhsel < fi->fib_nhs) {
905                                         nh_sel = nhsel;
906                                         goto out_fill_res;
907                                 }
908 #else
909                                 if (nhsel < 1) {
910                                         goto out_fill_res;
911                                 }
912 #endif
913                                 endfor_nexthops(fi);
914                                 continue;
915
916                         default:
917                                 printk(KERN_DEBUG "impossible 102\n");
918                                 return -EINVAL;
919                         }
920                 }
921                 return err;
922         }
923         return 1;
924
925 out_fill_res:
926         res->prefixlen = prefixlen;
927         res->nh_sel = nh_sel;
928         res->type = fa->fa_type;
929         res->scope = fa->fa_scope;
930         res->fi = fa->fa_info;
931         atomic_inc(&res->fi->fib_clntref);
932         return 0;
933 }
934
935 /* Find appropriate source address to this destination */
936
937 __be32 __fib_res_prefsrc(struct fib_result *res)
938 {
939         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940 }
941
942 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
943                   u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
944                   struct fib_info *fi, unsigned int flags)
945 {
946         struct nlmsghdr *nlh;
947         struct rtmsg *rtm;
948
949         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950         if (nlh == NULL)
951                 return -EMSGSIZE;
952
953         rtm = nlmsg_data(nlh);
954         rtm->rtm_family = AF_INET;
955         rtm->rtm_dst_len = dst_len;
956         rtm->rtm_src_len = 0;
957         rtm->rtm_tos = tos;
958         rtm->rtm_table = tb_id;
959         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
960         rtm->rtm_type = type;
961         rtm->rtm_flags = fi->fib_flags;
962         rtm->rtm_scope = scope;
963         rtm->rtm_protocol = fi->fib_protocol;
964
965         if (rtm->rtm_dst_len)
966                 NLA_PUT_BE32(skb, RTA_DST, dst);
967
968         if (fi->fib_priority)
969                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
970
971         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
972                 goto nla_put_failure;
973
974         if (fi->fib_prefsrc)
975                 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
976
977         if (fi->fib_nhs == 1) {
978                 if (fi->fib_nh->nh_gw)
979                         NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
980
981                 if (fi->fib_nh->nh_oif)
982                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
983 #ifdef CONFIG_NET_CLS_ROUTE
984                 if (fi->fib_nh[0].nh_tclassid)
985                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
986 #endif
987         }
988 #ifdef CONFIG_IP_ROUTE_MULTIPATH
989         if (fi->fib_nhs > 1) {
990                 struct rtnexthop *rtnh;
991                 struct nlattr *mp;
992
993                 mp = nla_nest_start(skb, RTA_MULTIPATH);
994                 if (mp == NULL)
995                         goto nla_put_failure;
996
997                 for_nexthops(fi) {
998                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
999                         if (rtnh == NULL)
1000                                 goto nla_put_failure;
1001
1002                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1003                         rtnh->rtnh_hops = nh->nh_weight - 1;
1004                         rtnh->rtnh_ifindex = nh->nh_oif;
1005
1006                         if (nh->nh_gw)
1007                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1008 #ifdef CONFIG_NET_CLS_ROUTE
1009                         if (nh->nh_tclassid)
1010                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1011 #endif
1012                         /* length of rtnetlink header + attributes */
1013                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1014                 } endfor_nexthops(fi);
1015
1016                 nla_nest_end(skb, mp);
1017         }
1018 #endif
1019         return nlmsg_end(skb, nlh);
1020
1021 nla_put_failure:
1022         nlmsg_cancel(skb, nlh);
1023         return -EMSGSIZE;
1024 }
1025
1026 /*
1027    Update FIB if:
1028    - local address disappeared -> we must delete all the entries
1029      referring to it.
1030    - device went down -> we must shutdown all nexthops going via it.
1031  */
1032
1033 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1034 {
1035         int ret = 0;
1036         int scope = RT_SCOPE_NOWHERE;
1037
1038         if (force)
1039                 scope = -1;
1040
1041         if (local && fib_info_laddrhash) {
1042                 unsigned int hash = fib_laddr_hashfn(local);
1043                 struct hlist_head *head = &fib_info_laddrhash[hash];
1044                 struct hlist_node *node;
1045                 struct fib_info *fi;
1046
1047                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1048                         if (fi->fib_prefsrc == local) {
1049                                 fi->fib_flags |= RTNH_F_DEAD;
1050                                 ret++;
1051                         }
1052                 }
1053         }
1054
1055         if (dev) {
1056                 struct fib_info *prev_fi = NULL;
1057                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1058                 struct hlist_head *head = &fib_info_devhash[hash];
1059                 struct hlist_node *node;
1060                 struct fib_nh *nh;
1061
1062                 hlist_for_each_entry(nh, node, head, nh_hash) {
1063                         struct fib_info *fi = nh->nh_parent;
1064                         int dead;
1065
1066                         BUG_ON(!fi->fib_nhs);
1067                         if (nh->nh_dev != dev || fi == prev_fi)
1068                                 continue;
1069                         prev_fi = fi;
1070                         dead = 0;
1071                         change_nexthops(fi) {
1072                                 if (nh->nh_flags&RTNH_F_DEAD)
1073                                         dead++;
1074                                 else if (nh->nh_dev == dev &&
1075                                          nh->nh_scope != scope) {
1076                                         nh->nh_flags |= RTNH_F_DEAD;
1077 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1078                                         spin_lock_bh(&fib_multipath_lock);
1079                                         fi->fib_power -= nh->nh_power;
1080                                         nh->nh_power = 0;
1081                                         spin_unlock_bh(&fib_multipath_lock);
1082 #endif
1083                                         dead++;
1084                                 }
1085 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1086                                 if (force > 1 && nh->nh_dev == dev) {
1087                                         dead = fi->fib_nhs;
1088                                         break;
1089                                 }
1090 #endif
1091                         } endfor_nexthops(fi)
1092                         if (dead == fi->fib_nhs) {
1093                                 fi->fib_flags |= RTNH_F_DEAD;
1094                                 ret++;
1095                         }
1096                 }
1097         }
1098
1099         return ret;
1100 }
1101
1102 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1103
1104 /*
1105    Dead device goes up. We wake up dead nexthops.
1106    It takes sense only on multipath routes.
1107  */
1108
1109 int fib_sync_up(struct net_device *dev)
1110 {
1111         struct fib_info *prev_fi;
1112         unsigned int hash;
1113         struct hlist_head *head;
1114         struct hlist_node *node;
1115         struct fib_nh *nh;
1116         int ret;
1117
1118         if (!(dev->flags&IFF_UP))
1119                 return 0;
1120
1121         prev_fi = NULL;
1122         hash = fib_devindex_hashfn(dev->ifindex);
1123         head = &fib_info_devhash[hash];
1124         ret = 0;
1125
1126         hlist_for_each_entry(nh, node, head, nh_hash) {
1127                 struct fib_info *fi = nh->nh_parent;
1128                 int alive;
1129
1130                 BUG_ON(!fi->fib_nhs);
1131                 if (nh->nh_dev != dev || fi == prev_fi)
1132                         continue;
1133
1134                 prev_fi = fi;
1135                 alive = 0;
1136                 change_nexthops(fi) {
1137                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1138                                 alive++;
1139                                 continue;
1140                         }
1141                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1142                                 continue;
1143                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1144                                 continue;
1145                         alive++;
1146                         spin_lock_bh(&fib_multipath_lock);
1147                         nh->nh_power = 0;
1148                         nh->nh_flags &= ~RTNH_F_DEAD;
1149                         spin_unlock_bh(&fib_multipath_lock);
1150                 } endfor_nexthops(fi)
1151
1152                 if (alive > 0) {
1153                         fi->fib_flags &= ~RTNH_F_DEAD;
1154                         ret++;
1155                 }
1156         }
1157
1158         return ret;
1159 }
1160
1161 /*
1162    The algorithm is suboptimal, but it provides really
1163    fair weighted route distribution.
1164  */
1165
1166 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1167 {
1168         struct fib_info *fi = res->fi;
1169         int w;
1170
1171         spin_lock_bh(&fib_multipath_lock);
1172         if (fi->fib_power <= 0) {
1173                 int power = 0;
1174                 change_nexthops(fi) {
1175                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1176                                 power += nh->nh_weight;
1177                                 nh->nh_power = nh->nh_weight;
1178                         }
1179                 } endfor_nexthops(fi);
1180                 fi->fib_power = power;
1181                 if (power <= 0) {
1182                         spin_unlock_bh(&fib_multipath_lock);
1183                         /* Race condition: route has just become dead. */
1184                         res->nh_sel = 0;
1185                         return;
1186                 }
1187         }
1188
1189
1190         /* w should be random number [0..fi->fib_power-1],
1191            it is pretty bad approximation.
1192          */
1193
1194         w = jiffies % fi->fib_power;
1195
1196         change_nexthops(fi) {
1197                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1198                         if ((w -= nh->nh_power) <= 0) {
1199                                 nh->nh_power--;
1200                                 fi->fib_power--;
1201                                 res->nh_sel = nhsel;
1202                                 spin_unlock_bh(&fib_multipath_lock);
1203                                 return;
1204                         }
1205                 }
1206         } endfor_nexthops(fi);
1207
1208         /* Race condition: route has just become dead. */
1209         res->nh_sel = 0;
1210         spin_unlock_bh(&fib_multipath_lock);
1211 }
1212 #endif