]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv6/route.c
Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
[linux-2.6-omap-h63xx.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 /* Set to 3 to get tracing. */
68 #define RT6_DEBUG 2
69
70 #if RT6_DEBUG >= 3
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #else
74 #define RDBG(x)
75 #define RT6_TRACE(x...) do { ; } while (0)
76 #endif
77
78 #define CLONE_OFFLINK_ROUTE 0
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124 {
125 }
126
127 static struct dst_ops ip6_dst_blackhole_ops = {
128         .family                 =       AF_INET6,
129         .protocol               =       __constant_htons(ETH_P_IPV6),
130         .destroy                =       ip6_dst_destroy,
131         .check                  =       ip6_dst_check,
132         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
133         .entry_size             =       sizeof(struct rt6_info),
134 };
135
136 struct rt6_info ip6_null_entry = {
137         .u = {
138                 .dst = {
139                         .__refcnt       = ATOMIC_INIT(1),
140                         .__use          = 1,
141                         .obsolete       = -1,
142                         .error          = -ENETUNREACH,
143                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
144                         .input          = ip6_pkt_discard,
145                         .output         = ip6_pkt_discard_out,
146                         .ops            = &ip6_dst_ops,
147                         .path           = (struct dst_entry*)&ip6_null_entry,
148                 }
149         },
150         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
151         .rt6i_metric    = ~(u32) 0,
152         .rt6i_ref       = ATOMIC_INIT(1),
153 };
154
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
161 struct rt6_info ip6_prohibit_entry = {
162         .u = {
163                 .dst = {
164                         .__refcnt       = ATOMIC_INIT(1),
165                         .__use          = 1,
166                         .obsolete       = -1,
167                         .error          = -EACCES,
168                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
169                         .input          = ip6_pkt_prohibit,
170                         .output         = ip6_pkt_prohibit_out,
171                         .ops            = &ip6_dst_ops,
172                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
173                 }
174         },
175         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
176         .rt6i_metric    = ~(u32) 0,
177         .rt6i_ref       = ATOMIC_INIT(1),
178 };
179
180 struct rt6_info ip6_blk_hole_entry = {
181         .u = {
182                 .dst = {
183                         .__refcnt       = ATOMIC_INIT(1),
184                         .__use          = 1,
185                         .obsolete       = -1,
186                         .error          = -EINVAL,
187                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
188                         .input          = ip6_pkt_blk_hole,
189                         .output         = ip6_pkt_blk_hole,
190                         .ops            = &ip6_dst_ops,
191                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
192                 }
193         },
194         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
195         .rt6i_metric    = ~(u32) 0,
196         .rt6i_ref       = ATOMIC_INIT(1),
197 };
198
199 #endif
200
201 /* allocate dst with ip6_dst_ops */
202 static __inline__ struct rt6_info *ip6_dst_alloc(void)
203 {
204         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
205 }
206
207 static void ip6_dst_destroy(struct dst_entry *dst)
208 {
209         struct rt6_info *rt = (struct rt6_info *)dst;
210         struct inet6_dev *idev = rt->rt6i_idev;
211
212         if (idev != NULL) {
213                 rt->rt6i_idev = NULL;
214                 in6_dev_put(idev);
215         }
216 }
217
218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219                            int how)
220 {
221         struct rt6_info *rt = (struct rt6_info *)dst;
222         struct inet6_dev *idev = rt->rt6i_idev;
223
224         if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
225                 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
226                 if (loopback_idev != NULL) {
227                         rt->rt6i_idev = loopback_idev;
228                         in6_dev_put(idev);
229                 }
230         }
231 }
232
233 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 {
235         return (rt->rt6i_flags & RTF_EXPIRES &&
236                 time_after(jiffies, rt->rt6i_expires));
237 }
238
239 static inline int rt6_need_strict(struct in6_addr *daddr)
240 {
241         return (ipv6_addr_type(daddr) &
242                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
243 }
244
245 /*
246  *      Route lookup. Any table->tb6_lock is implied.
247  */
248
249 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
250                                                     int oif,
251                                                     int strict)
252 {
253         struct rt6_info *local = NULL;
254         struct rt6_info *sprt;
255
256         if (oif) {
257                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
258                         struct net_device *dev = sprt->rt6i_dev;
259                         if (dev->ifindex == oif)
260                                 return sprt;
261                         if (dev->flags & IFF_LOOPBACK) {
262                                 if (sprt->rt6i_idev == NULL ||
263                                     sprt->rt6i_idev->dev->ifindex != oif) {
264                                         if (strict && oif)
265                                                 continue;
266                                         if (local && (!oif ||
267                                                       local->rt6i_idev->dev->ifindex == oif))
268                                                 continue;
269                                 }
270                                 local = sprt;
271                         }
272                 }
273
274                 if (local)
275                         return local;
276
277                 if (strict)
278                         return &ip6_null_entry;
279         }
280         return rt;
281 }
282
283 #ifdef CONFIG_IPV6_ROUTER_PREF
284 static void rt6_probe(struct rt6_info *rt)
285 {
286         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287         /*
288          * Okay, this does not seem to be appropriate
289          * for now, however, we need to check if it
290          * is really so; aka Router Reachability Probing.
291          *
292          * Router Reachability Probe MUST be rate-limited
293          * to no more than one per minute.
294          */
295         if (!neigh || (neigh->nud_state & NUD_VALID))
296                 return;
297         read_lock_bh(&neigh->lock);
298         if (!(neigh->nud_state & NUD_VALID) &&
299             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
300                 struct in6_addr mcaddr;
301                 struct in6_addr *target;
302
303                 neigh->updated = jiffies;
304                 read_unlock_bh(&neigh->lock);
305
306                 target = (struct in6_addr *)&neigh->primary_key;
307                 addrconf_addr_solict_mult(target, &mcaddr);
308                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309         } else
310                 read_unlock_bh(&neigh->lock);
311 }
312 #else
313 static inline void rt6_probe(struct rt6_info *rt)
314 {
315         return;
316 }
317 #endif
318
319 /*
320  * Default Router Selection (RFC 2461 6.3.6)
321  */
322 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
323 {
324         struct net_device *dev = rt->rt6i_dev;
325         if (!oif || dev->ifindex == oif)
326                 return 2;
327         if ((dev->flags & IFF_LOOPBACK) &&
328             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329                 return 1;
330         return 0;
331 }
332
333 static inline int rt6_check_neigh(struct rt6_info *rt)
334 {
335         struct neighbour *neigh = rt->rt6i_nexthop;
336         int m = 0;
337         if (rt->rt6i_flags & RTF_NONEXTHOP ||
338             !(rt->rt6i_flags & RTF_GATEWAY))
339                 m = 1;
340         else if (neigh) {
341                 read_lock_bh(&neigh->lock);
342                 if (neigh->nud_state & NUD_VALID)
343                         m = 2;
344                 else if (!(neigh->nud_state & NUD_FAILED))
345                         m = 1;
346                 read_unlock_bh(&neigh->lock);
347         }
348         return m;
349 }
350
351 static int rt6_score_route(struct rt6_info *rt, int oif,
352                            int strict)
353 {
354         int m, n;
355
356         m = rt6_check_dev(rt, oif);
357         if (!m && (strict & RT6_LOOKUP_F_IFACE))
358                 return -1;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 #endif
362         n = rt6_check_neigh(rt);
363         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
364                 return -1;
365         return m;
366 }
367
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369                                    int *mpri, struct rt6_info *match)
370 {
371         int m;
372
373         if (rt6_check_expired(rt))
374                 goto out;
375
376         m = rt6_score_route(rt, oif, strict);
377         if (m < 0)
378                 goto out;
379
380         if (m > *mpri) {
381                 if (strict & RT6_LOOKUP_F_REACHABLE)
382                         rt6_probe(match);
383                 *mpri = m;
384                 match = rt;
385         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386                 rt6_probe(rt);
387         }
388
389 out:
390         return match;
391 }
392
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394                                      struct rt6_info *rr_head,
395                                      u32 metric, int oif, int strict)
396 {
397         struct rt6_info *rt, *match;
398         int mpri = -1;
399
400         match = NULL;
401         for (rt = rr_head; rt && rt->rt6i_metric == metric;
402              rt = rt->u.dst.rt6_next)
403                 match = find_match(rt, oif, strict, &mpri, match);
404         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405              rt = rt->u.dst.rt6_next)
406                 match = find_match(rt, oif, strict, &mpri, match);
407
408         return match;
409 }
410
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 {
413         struct rt6_info *match, *rt0;
414
415         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416                   __FUNCTION__, fn->leaf, oif);
417
418         rt0 = fn->rr_ptr;
419         if (!rt0)
420                 fn->rr_ptr = rt0 = fn->leaf;
421
422         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
423
424         if (!match &&
425             (strict & RT6_LOOKUP_F_REACHABLE)) {
426                 struct rt6_info *next = rt0->u.dst.rt6_next;
427
428                 /* no entries matched; do round-robin */
429                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430                         next = fn->leaf;
431
432                 if (next != rt0)
433                         fn->rr_ptr = next;
434         }
435
436         RT6_TRACE("%s() => %p\n",
437                   __FUNCTION__, match);
438
439         return (match ? match : &ip6_null_entry);
440 }
441
442 #ifdef CONFIG_IPV6_ROUTE_INFO
443 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444                   struct in6_addr *gwaddr)
445 {
446         struct route_info *rinfo = (struct route_info *) opt;
447         struct in6_addr prefix_buf, *prefix;
448         unsigned int pref;
449         u32 lifetime;
450         struct rt6_info *rt;
451
452         if (len < sizeof(struct route_info)) {
453                 return -EINVAL;
454         }
455
456         /* Sanity check for prefix_len and length */
457         if (rinfo->length > 3) {
458                 return -EINVAL;
459         } else if (rinfo->prefix_len > 128) {
460                 return -EINVAL;
461         } else if (rinfo->prefix_len > 64) {
462                 if (rinfo->length < 2) {
463                         return -EINVAL;
464                 }
465         } else if (rinfo->prefix_len > 0) {
466                 if (rinfo->length < 1) {
467                         return -EINVAL;
468                 }
469         }
470
471         pref = rinfo->route_pref;
472         if (pref == ICMPV6_ROUTER_PREF_INVALID)
473                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474
475         lifetime = ntohl(rinfo->lifetime);
476         if (lifetime == 0xffffffff) {
477                 /* infinity */
478         } else if (lifetime > 0x7fffffff/HZ) {
479                 /* Avoid arithmetic overflow */
480                 lifetime = 0x7fffffff/HZ - 1;
481         }
482
483         if (rinfo->length == 3)
484                 prefix = (struct in6_addr *)rinfo->prefix;
485         else {
486                 /* this function is safe */
487                 ipv6_addr_prefix(&prefix_buf,
488                                  (struct in6_addr *)rinfo->prefix,
489                                  rinfo->prefix_len);
490                 prefix = &prefix_buf;
491         }
492
493         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494
495         if (rt && !lifetime) {
496                 ip6_del_rt(rt);
497                 rt = NULL;
498         }
499
500         if (!rt && lifetime)
501                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502                                         pref);
503         else if (rt)
504                 rt->rt6i_flags = RTF_ROUTEINFO |
505                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507         if (rt) {
508                 if (lifetime == 0xffffffff) {
509                         rt->rt6i_flags &= ~RTF_EXPIRES;
510                 } else {
511                         rt->rt6i_expires = jiffies + HZ * lifetime;
512                         rt->rt6i_flags |= RTF_EXPIRES;
513                 }
514                 dst_release(&rt->u.dst);
515         }
516         return 0;
517 }
518 #endif
519
520 #define BACKTRACK(saddr) \
521 do { \
522         if (rt == &ip6_null_entry) { \
523                 struct fib6_node *pn; \
524                 while (1) { \
525                         if (fn->fn_flags & RTN_TL_ROOT) \
526                                 goto out; \
527                         pn = fn->parent; \
528                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530                         else \
531                                 fn = pn; \
532                         if (fn->fn_flags & RTN_RTINFO) \
533                                 goto restart; \
534                 } \
535         } \
536 } while(0)
537
538 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539                                              struct flowi *fl, int flags)
540 {
541         struct fib6_node *fn;
542         struct rt6_info *rt;
543
544         read_lock_bh(&table->tb6_lock);
545         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546 restart:
547         rt = fn->leaf;
548         rt = rt6_device_match(rt, fl->oif, flags);
549         BACKTRACK(&fl->fl6_src);
550 out:
551         dst_hold(&rt->u.dst);
552         read_unlock_bh(&table->tb6_lock);
553
554         rt->u.dst.lastuse = jiffies;
555         rt->u.dst.__use++;
556
557         return rt;
558
559 }
560
561 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562                             int oif, int strict)
563 {
564         struct flowi fl = {
565                 .oif = oif,
566                 .nl_u = {
567                         .ip6_u = {
568                                 .daddr = *daddr,
569                         },
570                 },
571         };
572         struct dst_entry *dst;
573         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574
575         if (saddr) {
576                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577                 flags |= RT6_LOOKUP_F_HAS_SADDR;
578         }
579
580         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581         if (dst->error == 0)
582                 return (struct rt6_info *) dst;
583
584         dst_release(dst);
585
586         return NULL;
587 }
588
589 EXPORT_SYMBOL(rt6_lookup);
590
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599         int err;
600         struct fib6_table *table;
601
602         table = rt->rt6i_table;
603         write_lock_bh(&table->tb6_lock);
604         err = fib6_add(&table->tb6_root, rt, info);
605         write_unlock_bh(&table->tb6_lock);
606
607         return err;
608 }
609
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612         return __ip6_ins_rt(rt, NULL);
613 }
614
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616                                       struct in6_addr *saddr)
617 {
618         struct rt6_info *rt;
619
620         /*
621          *      Clone the route.
622          */
623
624         rt = ip6_rt_copy(ort);
625
626         if (rt) {
627                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628                         if (rt->rt6i_dst.plen != 128 &&
629                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630                                 rt->rt6i_flags |= RTF_ANYCAST;
631                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632                 }
633
634                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635                 rt->rt6i_dst.plen = 128;
636                 rt->rt6i_flags |= RTF_CACHE;
637                 rt->u.dst.flags |= DST_HOST;
638
639 #ifdef CONFIG_IPV6_SUBTREES
640                 if (rt->rt6i_src.plen && saddr) {
641                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642                         rt->rt6i_src.plen = 128;
643                 }
644 #endif
645
646                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647
648         }
649
650         return rt;
651 }
652
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655         struct rt6_info *rt = ip6_rt_copy(ort);
656         if (rt) {
657                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658                 rt->rt6i_dst.plen = 128;
659                 rt->rt6i_flags |= RTF_CACHE;
660                 rt->u.dst.flags |= DST_HOST;
661                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662         }
663         return rt;
664 }
665
666 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
667                                             struct flowi *fl, int flags)
668 {
669         struct fib6_node *fn;
670         struct rt6_info *rt, *nrt;
671         int strict = 0;
672         int attempts = 3;
673         int err;
674         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675
676         strict |= flags & RT6_LOOKUP_F_IFACE;
677
678 relookup:
679         read_lock_bh(&table->tb6_lock);
680
681 restart_2:
682         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683
684 restart:
685         rt = rt6_select(fn, oif, strict | reachable);
686         BACKTRACK(&fl->fl6_src);
687         if (rt == &ip6_null_entry ||
688             rt->rt6i_flags & RTF_CACHE)
689                 goto out;
690
691         dst_hold(&rt->u.dst);
692         read_unlock_bh(&table->tb6_lock);
693
694         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
695                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
696         else {
697 #if CLONE_OFFLINK_ROUTE
698                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
699 #else
700                 goto out2;
701 #endif
702         }
703
704         dst_release(&rt->u.dst);
705         rt = nrt ? : &ip6_null_entry;
706
707         dst_hold(&rt->u.dst);
708         if (nrt) {
709                 err = ip6_ins_rt(nrt);
710                 if (!err)
711                         goto out2;
712         }
713
714         if (--attempts <= 0)
715                 goto out2;
716
717         /*
718          * Race condition! In the gap, when table->tb6_lock was
719          * released someone could insert this route.  Relookup.
720          */
721         dst_release(&rt->u.dst);
722         goto relookup;
723
724 out:
725         if (reachable) {
726                 reachable = 0;
727                 goto restart_2;
728         }
729         dst_hold(&rt->u.dst);
730         read_unlock_bh(&table->tb6_lock);
731 out2:
732         rt->u.dst.lastuse = jiffies;
733         rt->u.dst.__use++;
734
735         return rt;
736 }
737
738 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
739                                             struct flowi *fl, int flags)
740 {
741         return ip6_pol_route(table, fl->iif, fl, flags);
742 }
743
744 void ip6_route_input(struct sk_buff *skb)
745 {
746         struct ipv6hdr *iph = ipv6_hdr(skb);
747         int flags = RT6_LOOKUP_F_HAS_SADDR;
748         struct flowi fl = {
749                 .iif = skb->dev->ifindex,
750                 .nl_u = {
751                         .ip6_u = {
752                                 .daddr = iph->daddr,
753                                 .saddr = iph->saddr,
754                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
755                         },
756                 },
757                 .mark = skb->mark,
758                 .proto = iph->nexthdr,
759         };
760
761         if (rt6_need_strict(&iph->daddr))
762                 flags |= RT6_LOOKUP_F_IFACE;
763
764         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
765 }
766
767 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
768                                              struct flowi *fl, int flags)
769 {
770         return ip6_pol_route(table, fl->oif, fl, flags);
771 }
772
773 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
774 {
775         int flags = 0;
776
777         if (rt6_need_strict(&fl->fl6_dst))
778                 flags |= RT6_LOOKUP_F_IFACE;
779
780         if (!ipv6_addr_any(&fl->fl6_src))
781                 flags |= RT6_LOOKUP_F_HAS_SADDR;
782
783         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
784 }
785
786 EXPORT_SYMBOL(ip6_route_output);
787
788 static int ip6_blackhole_output(struct sk_buff *skb)
789 {
790         kfree_skb(skb);
791         return 0;
792 }
793
794 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795 {
796         struct rt6_info *ort = (struct rt6_info *) *dstp;
797         struct rt6_info *rt = (struct rt6_info *)
798                 dst_alloc(&ip6_dst_blackhole_ops);
799         struct dst_entry *new = NULL;
800
801         if (rt) {
802                 new = &rt->u.dst;
803
804                 atomic_set(&new->__refcnt, 1);
805                 new->__use = 1;
806                 new->input = ip6_blackhole_output;
807                 new->output = ip6_blackhole_output;
808
809                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810                 new->dev = ort->u.dst.dev;
811                 if (new->dev)
812                         dev_hold(new->dev);
813                 rt->rt6i_idev = ort->rt6i_idev;
814                 if (rt->rt6i_idev)
815                         in6_dev_hold(rt->rt6i_idev);
816                 rt->rt6i_expires = 0;
817
818                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
819                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
820                 rt->rt6i_metric = 0;
821
822                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
823 #ifdef CONFIG_IPV6_SUBTREES
824                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
825 #endif
826
827                 dst_free(new);
828         }
829
830         dst_release(*dstp);
831         *dstp = new;
832         return (new ? 0 : -ENOMEM);
833 }
834 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
835
836 /*
837  *      Destination cache support functions
838  */
839
840 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841 {
842         struct rt6_info *rt;
843
844         rt = (struct rt6_info *) dst;
845
846         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
847                 return dst;
848
849         return NULL;
850 }
851
852 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853 {
854         struct rt6_info *rt = (struct rt6_info *) dst;
855
856         if (rt) {
857                 if (rt->rt6i_flags & RTF_CACHE)
858                         ip6_del_rt(rt);
859                 else
860                         dst_release(dst);
861         }
862         return NULL;
863 }
864
865 static void ip6_link_failure(struct sk_buff *skb)
866 {
867         struct rt6_info *rt;
868
869         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870
871         rt = (struct rt6_info *) skb->dst;
872         if (rt) {
873                 if (rt->rt6i_flags&RTF_CACHE) {
874                         dst_set_expires(&rt->u.dst, 0);
875                         rt->rt6i_flags |= RTF_EXPIRES;
876                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877                         rt->rt6i_node->fn_sernum = -1;
878         }
879 }
880
881 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882 {
883         struct rt6_info *rt6 = (struct rt6_info*)dst;
884
885         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886                 rt6->rt6i_flags |= RTF_MODIFIED;
887                 if (mtu < IPV6_MIN_MTU) {
888                         mtu = IPV6_MIN_MTU;
889                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890                 }
891                 dst->metrics[RTAX_MTU-1] = mtu;
892                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
893         }
894 }
895
896 static int ipv6_get_mtu(struct net_device *dev);
897
898 static inline unsigned int ipv6_advmss(unsigned int mtu)
899 {
900         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901
902         if (mtu < ip6_rt_min_advmss)
903                 mtu = ip6_rt_min_advmss;
904
905         /*
906          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908          * IPV6_MAXPLEN is also valid and means: "any MSS,
909          * rely only on pmtu discovery"
910          */
911         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
912                 mtu = IPV6_MAXPLEN;
913         return mtu;
914 }
915
916 static struct dst_entry *ndisc_dst_gc_list;
917 static DEFINE_SPINLOCK(ndisc_lock);
918
919 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
920                                   struct neighbour *neigh,
921                                   struct in6_addr *addr,
922                                   int (*output)(struct sk_buff *))
923 {
924         struct rt6_info *rt;
925         struct inet6_dev *idev = in6_dev_get(dev);
926
927         if (unlikely(idev == NULL))
928                 return NULL;
929
930         rt = ip6_dst_alloc();
931         if (unlikely(rt == NULL)) {
932                 in6_dev_put(idev);
933                 goto out;
934         }
935
936         dev_hold(dev);
937         if (neigh)
938                 neigh_hold(neigh);
939         else
940                 neigh = ndisc_get_neigh(dev, addr);
941
942         rt->rt6i_dev      = dev;
943         rt->rt6i_idev     = idev;
944         rt->rt6i_nexthop  = neigh;
945         atomic_set(&rt->u.dst.__refcnt, 1);
946         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
948         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
949         rt->u.dst.output  = output;
950
951 #if 0   /* there's no chance to use these for ndisc */
952         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
953                                 ? DST_HOST
954                                 : 0;
955         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956         rt->rt6i_dst.plen = 128;
957 #endif
958
959         spin_lock_bh(&ndisc_lock);
960         rt->u.dst.next = ndisc_dst_gc_list;
961         ndisc_dst_gc_list = &rt->u.dst;
962         spin_unlock_bh(&ndisc_lock);
963
964         fib6_force_start_gc();
965
966 out:
967         return &rt->u.dst;
968 }
969
970 int ndisc_dst_gc(int *more)
971 {
972         struct dst_entry *dst, *next, **pprev;
973         int freed;
974
975         next = NULL;
976         freed = 0;
977
978         spin_lock_bh(&ndisc_lock);
979         pprev = &ndisc_dst_gc_list;
980
981         while ((dst = *pprev) != NULL) {
982                 if (!atomic_read(&dst->__refcnt)) {
983                         *pprev = dst->next;
984                         dst_free(dst);
985                         freed++;
986                 } else {
987                         pprev = &dst->next;
988                         (*more)++;
989                 }
990         }
991
992         spin_unlock_bh(&ndisc_lock);
993
994         return freed;
995 }
996
997 static int ip6_dst_gc(void)
998 {
999         static unsigned expire = 30*HZ;
1000         static unsigned long last_gc;
1001         unsigned long now = jiffies;
1002
1003         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1004             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1005                 goto out;
1006
1007         expire++;
1008         fib6_run_gc(expire);
1009         last_gc = now;
1010         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1011                 expire = ip6_rt_gc_timeout>>1;
1012
1013 out:
1014         expire -= expire>>ip6_rt_gc_elasticity;
1015         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1016 }
1017
1018 /* Clean host part of a prefix. Not necessary in radix tree,
1019    but results in cleaner routing tables.
1020
1021    Remove it only when all the things will work!
1022  */
1023
1024 static int ipv6_get_mtu(struct net_device *dev)
1025 {
1026         int mtu = IPV6_MIN_MTU;
1027         struct inet6_dev *idev;
1028
1029         idev = in6_dev_get(dev);
1030         if (idev) {
1031                 mtu = idev->cnf.mtu6;
1032                 in6_dev_put(idev);
1033         }
1034         return mtu;
1035 }
1036
1037 int ipv6_get_hoplimit(struct net_device *dev)
1038 {
1039         int hoplimit = ipv6_devconf.hop_limit;
1040         struct inet6_dev *idev;
1041
1042         idev = in6_dev_get(dev);
1043         if (idev) {
1044                 hoplimit = idev->cnf.hop_limit;
1045                 in6_dev_put(idev);
1046         }
1047         return hoplimit;
1048 }
1049
1050 /*
1051  *
1052  */
1053
1054 int ip6_route_add(struct fib6_config *cfg)
1055 {
1056         int err;
1057         struct rt6_info *rt = NULL;
1058         struct net_device *dev = NULL;
1059         struct inet6_dev *idev = NULL;
1060         struct fib6_table *table;
1061         int addr_type;
1062
1063         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1064                 return -EINVAL;
1065 #ifndef CONFIG_IPV6_SUBTREES
1066         if (cfg->fc_src_len)
1067                 return -EINVAL;
1068 #endif
1069         if (cfg->fc_ifindex) {
1070                 err = -ENODEV;
1071                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1072                 if (!dev)
1073                         goto out;
1074                 idev = in6_dev_get(dev);
1075                 if (!idev)
1076                         goto out;
1077         }
1078
1079         if (cfg->fc_metric == 0)
1080                 cfg->fc_metric = IP6_RT_PRIO_USER;
1081
1082         table = fib6_new_table(cfg->fc_table);
1083         if (table == NULL) {
1084                 err = -ENOBUFS;
1085                 goto out;
1086         }
1087
1088         rt = ip6_dst_alloc();
1089
1090         if (rt == NULL) {
1091                 err = -ENOMEM;
1092                 goto out;
1093         }
1094
1095         rt->u.dst.obsolete = -1;
1096         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1097
1098         if (cfg->fc_protocol == RTPROT_UNSPEC)
1099                 cfg->fc_protocol = RTPROT_BOOT;
1100         rt->rt6i_protocol = cfg->fc_protocol;
1101
1102         addr_type = ipv6_addr_type(&cfg->fc_dst);
1103
1104         if (addr_type & IPV6_ADDR_MULTICAST)
1105                 rt->u.dst.input = ip6_mc_input;
1106         else
1107                 rt->u.dst.input = ip6_forward;
1108
1109         rt->u.dst.output = ip6_output;
1110
1111         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112         rt->rt6i_dst.plen = cfg->fc_dst_len;
1113         if (rt->rt6i_dst.plen == 128)
1114                rt->u.dst.flags = DST_HOST;
1115
1116 #ifdef CONFIG_IPV6_SUBTREES
1117         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118         rt->rt6i_src.plen = cfg->fc_src_len;
1119 #endif
1120
1121         rt->rt6i_metric = cfg->fc_metric;
1122
1123         /* We cannot add true routes via loopback here,
1124            they would result in kernel looping; promote them to reject routes
1125          */
1126         if ((cfg->fc_flags & RTF_REJECT) ||
1127             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128                 /* hold loopback dev/idev if we haven't done so. */
1129                 if (dev != init_net.loopback_dev) {
1130                         if (dev) {
1131                                 dev_put(dev);
1132                                 in6_dev_put(idev);
1133                         }
1134                         dev = init_net.loopback_dev;
1135                         dev_hold(dev);
1136                         idev = in6_dev_get(dev);
1137                         if (!idev) {
1138                                 err = -ENODEV;
1139                                 goto out;
1140                         }
1141                 }
1142                 rt->u.dst.output = ip6_pkt_discard_out;
1143                 rt->u.dst.input = ip6_pkt_discard;
1144                 rt->u.dst.error = -ENETUNREACH;
1145                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146                 goto install_route;
1147         }
1148
1149         if (cfg->fc_flags & RTF_GATEWAY) {
1150                 struct in6_addr *gw_addr;
1151                 int gwa_type;
1152
1153                 gw_addr = &cfg->fc_gateway;
1154                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1155                 gwa_type = ipv6_addr_type(gw_addr);
1156
1157                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158                         struct rt6_info *grt;
1159
1160                         /* IPv6 strictly inhibits using not link-local
1161                            addresses as nexthop address.
1162                            Otherwise, router will not able to send redirects.
1163                            It is very good, but in some (rare!) circumstances
1164                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1165                            some exceptions. --ANK
1166                          */
1167                         err = -EINVAL;
1168                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1169                                 goto out;
1170
1171                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1172
1173                         err = -EHOSTUNREACH;
1174                         if (grt == NULL)
1175                                 goto out;
1176                         if (dev) {
1177                                 if (dev != grt->rt6i_dev) {
1178                                         dst_release(&grt->u.dst);
1179                                         goto out;
1180                                 }
1181                         } else {
1182                                 dev = grt->rt6i_dev;
1183                                 idev = grt->rt6i_idev;
1184                                 dev_hold(dev);
1185                                 in6_dev_hold(grt->rt6i_idev);
1186                         }
1187                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1188                                 err = 0;
1189                         dst_release(&grt->u.dst);
1190
1191                         if (err)
1192                                 goto out;
1193                 }
1194                 err = -EINVAL;
1195                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196                         goto out;
1197         }
1198
1199         err = -ENODEV;
1200         if (dev == NULL)
1201                 goto out;
1202
1203         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1204                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205                 if (IS_ERR(rt->rt6i_nexthop)) {
1206                         err = PTR_ERR(rt->rt6i_nexthop);
1207                         rt->rt6i_nexthop = NULL;
1208                         goto out;
1209                 }
1210         }
1211
1212         rt->rt6i_flags = cfg->fc_flags;
1213
1214 install_route:
1215         if (cfg->fc_mx) {
1216                 struct nlattr *nla;
1217                 int remaining;
1218
1219                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1220                         int type = nla_type(nla);
1221
1222                         if (type) {
1223                                 if (type > RTAX_MAX) {
1224                                         err = -EINVAL;
1225                                         goto out;
1226                                 }
1227
1228                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1229                         }
1230                 }
1231         }
1232
1233         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235         if (!rt->u.dst.metrics[RTAX_MTU-1])
1236                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1238                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1239         rt->u.dst.dev = dev;
1240         rt->rt6i_idev = idev;
1241         rt->rt6i_table = table;
1242         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1243
1244 out:
1245         if (dev)
1246                 dev_put(dev);
1247         if (idev)
1248                 in6_dev_put(idev);
1249         if (rt)
1250                 dst_free(&rt->u.dst);
1251         return err;
1252 }
1253
1254 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1255 {
1256         int err;
1257         struct fib6_table *table;
1258
1259         if (rt == &ip6_null_entry)
1260                 return -ENOENT;
1261
1262         table = rt->rt6i_table;
1263         write_lock_bh(&table->tb6_lock);
1264
1265         err = fib6_del(rt, info);
1266         dst_release(&rt->u.dst);
1267
1268         write_unlock_bh(&table->tb6_lock);
1269
1270         return err;
1271 }
1272
1273 int ip6_del_rt(struct rt6_info *rt)
1274 {
1275         return __ip6_del_rt(rt, NULL);
1276 }
1277
1278 static int ip6_route_del(struct fib6_config *cfg)
1279 {
1280         struct fib6_table *table;
1281         struct fib6_node *fn;
1282         struct rt6_info *rt;
1283         int err = -ESRCH;
1284
1285         table = fib6_get_table(cfg->fc_table);
1286         if (table == NULL)
1287                 return err;
1288
1289         read_lock_bh(&table->tb6_lock);
1290
1291         fn = fib6_locate(&table->tb6_root,
1292                          &cfg->fc_dst, cfg->fc_dst_len,
1293                          &cfg->fc_src, cfg->fc_src_len);
1294
1295         if (fn) {
1296                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1297                         if (cfg->fc_ifindex &&
1298                             (rt->rt6i_dev == NULL ||
1299                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1300                                 continue;
1301                         if (cfg->fc_flags & RTF_GATEWAY &&
1302                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1303                                 continue;
1304                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1305                                 continue;
1306                         dst_hold(&rt->u.dst);
1307                         read_unlock_bh(&table->tb6_lock);
1308
1309                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1310                 }
1311         }
1312         read_unlock_bh(&table->tb6_lock);
1313
1314         return err;
1315 }
1316
1317 /*
1318  *      Handle redirects
1319  */
1320 struct ip6rd_flowi {
1321         struct flowi fl;
1322         struct in6_addr gateway;
1323 };
1324
1325 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1326                                              struct flowi *fl,
1327                                              int flags)
1328 {
1329         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1330         struct rt6_info *rt;
1331         struct fib6_node *fn;
1332
1333         /*
1334          * Get the "current" route for this destination and
1335          * check if the redirect has come from approriate router.
1336          *
1337          * RFC 2461 specifies that redirects should only be
1338          * accepted if they come from the nexthop to the target.
1339          * Due to the way the routes are chosen, this notion
1340          * is a bit fuzzy and one might need to check all possible
1341          * routes.
1342          */
1343
1344         read_lock_bh(&table->tb6_lock);
1345         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1346 restart:
1347         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1348                 /*
1349                  * Current route is on-link; redirect is always invalid.
1350                  *
1351                  * Seems, previous statement is not true. It could
1352                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1353                  * But then router serving it might decide, that we should
1354                  * know truth 8)8) --ANK (980726).
1355                  */
1356                 if (rt6_check_expired(rt))
1357                         continue;
1358                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1359                         continue;
1360                 if (fl->oif != rt->rt6i_dev->ifindex)
1361                         continue;
1362                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1363                         continue;
1364                 break;
1365         }
1366
1367         if (!rt)
1368                 rt = &ip6_null_entry;
1369         BACKTRACK(&fl->fl6_src);
1370 out:
1371         dst_hold(&rt->u.dst);
1372
1373         read_unlock_bh(&table->tb6_lock);
1374
1375         return rt;
1376 };
1377
1378 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1379                                            struct in6_addr *src,
1380                                            struct in6_addr *gateway,
1381                                            struct net_device *dev)
1382 {
1383         int flags = RT6_LOOKUP_F_HAS_SADDR;
1384         struct ip6rd_flowi rdfl = {
1385                 .fl = {
1386                         .oif = dev->ifindex,
1387                         .nl_u = {
1388                                 .ip6_u = {
1389                                         .daddr = *dest,
1390                                         .saddr = *src,
1391                                 },
1392                         },
1393                 },
1394                 .gateway = *gateway,
1395         };
1396
1397         if (rt6_need_strict(dest))
1398                 flags |= RT6_LOOKUP_F_IFACE;
1399
1400         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1401 }
1402
1403 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1404                   struct in6_addr *saddr,
1405                   struct neighbour *neigh, u8 *lladdr, int on_link)
1406 {
1407         struct rt6_info *rt, *nrt = NULL;
1408         struct netevent_redirect netevent;
1409
1410         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1411
1412         if (rt == &ip6_null_entry) {
1413                 if (net_ratelimit())
1414                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1415                                "for redirect target\n");
1416                 goto out;
1417         }
1418
1419         /*
1420          *      We have finally decided to accept it.
1421          */
1422
1423         neigh_update(neigh, lladdr, NUD_STALE,
1424                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1425                      NEIGH_UPDATE_F_OVERRIDE|
1426                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1427                                      NEIGH_UPDATE_F_ISROUTER))
1428                      );
1429
1430         /*
1431          * Redirect received -> path was valid.
1432          * Look, redirects are sent only in response to data packets,
1433          * so that this nexthop apparently is reachable. --ANK
1434          */
1435         dst_confirm(&rt->u.dst);
1436
1437         /* Duplicate redirect: silently ignore. */
1438         if (neigh == rt->u.dst.neighbour)
1439                 goto out;
1440
1441         nrt = ip6_rt_copy(rt);
1442         if (nrt == NULL)
1443                 goto out;
1444
1445         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1446         if (on_link)
1447                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1448
1449         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1450         nrt->rt6i_dst.plen = 128;
1451         nrt->u.dst.flags |= DST_HOST;
1452
1453         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1454         nrt->rt6i_nexthop = neigh_clone(neigh);
1455         /* Reset pmtu, it may be better */
1456         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1457         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1458
1459         if (ip6_ins_rt(nrt))
1460                 goto out;
1461
1462         netevent.old = &rt->u.dst;
1463         netevent.new = &nrt->u.dst;
1464         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1465
1466         if (rt->rt6i_flags&RTF_CACHE) {
1467                 ip6_del_rt(rt);
1468                 return;
1469         }
1470
1471 out:
1472         dst_release(&rt->u.dst);
1473         return;
1474 }
1475
1476 /*
1477  *      Handle ICMP "packet too big" messages
1478  *      i.e. Path MTU discovery
1479  */
1480
1481 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1482                         struct net_device *dev, u32 pmtu)
1483 {
1484         struct rt6_info *rt, *nrt;
1485         int allfrag = 0;
1486
1487         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1488         if (rt == NULL)
1489                 return;
1490
1491         if (pmtu >= dst_mtu(&rt->u.dst))
1492                 goto out;
1493
1494         if (pmtu < IPV6_MIN_MTU) {
1495                 /*
1496                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1497                  * MTU (1280) and a fragment header should always be included
1498                  * after a node receiving Too Big message reporting PMTU is
1499                  * less than the IPv6 Minimum Link MTU.
1500                  */
1501                 pmtu = IPV6_MIN_MTU;
1502                 allfrag = 1;
1503         }
1504
1505         /* New mtu received -> path was valid.
1506            They are sent only in response to data packets,
1507            so that this nexthop apparently is reachable. --ANK
1508          */
1509         dst_confirm(&rt->u.dst);
1510
1511         /* Host route. If it is static, it would be better
1512            not to override it, but add new one, so that
1513            when cache entry will expire old pmtu
1514            would return automatically.
1515          */
1516         if (rt->rt6i_flags & RTF_CACHE) {
1517                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1518                 if (allfrag)
1519                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1520                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1521                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1522                 goto out;
1523         }
1524
1525         /* Network route.
1526            Two cases are possible:
1527            1. It is connected route. Action: COW
1528            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1529          */
1530         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1531                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1532         else
1533                 nrt = rt6_alloc_clone(rt, daddr);
1534
1535         if (nrt) {
1536                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1537                 if (allfrag)
1538                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1539
1540                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1541                  * happened within 5 mins, the recommended timer is 10 mins.
1542                  * Here this route expiration time is set to ip6_rt_mtu_expires
1543                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1544                  * and detecting PMTU increase will be automatically happened.
1545                  */
1546                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1547                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1548
1549                 ip6_ins_rt(nrt);
1550         }
1551 out:
1552         dst_release(&rt->u.dst);
1553 }
1554
1555 /*
1556  *      Misc support functions
1557  */
1558
1559 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1560 {
1561         struct rt6_info *rt = ip6_dst_alloc();
1562
1563         if (rt) {
1564                 rt->u.dst.input = ort->u.dst.input;
1565                 rt->u.dst.output = ort->u.dst.output;
1566
1567                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1568                 rt->u.dst.error = ort->u.dst.error;
1569                 rt->u.dst.dev = ort->u.dst.dev;
1570                 if (rt->u.dst.dev)
1571                         dev_hold(rt->u.dst.dev);
1572                 rt->rt6i_idev = ort->rt6i_idev;
1573                 if (rt->rt6i_idev)
1574                         in6_dev_hold(rt->rt6i_idev);
1575                 rt->u.dst.lastuse = jiffies;
1576                 rt->rt6i_expires = 0;
1577
1578                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1579                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1580                 rt->rt6i_metric = 0;
1581
1582                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1583 #ifdef CONFIG_IPV6_SUBTREES
1584                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1585 #endif
1586                 rt->rt6i_table = ort->rt6i_table;
1587         }
1588         return rt;
1589 }
1590
1591 #ifdef CONFIG_IPV6_ROUTE_INFO
1592 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1593                                            struct in6_addr *gwaddr, int ifindex)
1594 {
1595         struct fib6_node *fn;
1596         struct rt6_info *rt = NULL;
1597         struct fib6_table *table;
1598
1599         table = fib6_get_table(RT6_TABLE_INFO);
1600         if (table == NULL)
1601                 return NULL;
1602
1603         write_lock_bh(&table->tb6_lock);
1604         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1605         if (!fn)
1606                 goto out;
1607
1608         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1609                 if (rt->rt6i_dev->ifindex != ifindex)
1610                         continue;
1611                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1612                         continue;
1613                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1614                         continue;
1615                 dst_hold(&rt->u.dst);
1616                 break;
1617         }
1618 out:
1619         write_unlock_bh(&table->tb6_lock);
1620         return rt;
1621 }
1622
1623 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1624                                            struct in6_addr *gwaddr, int ifindex,
1625                                            unsigned pref)
1626 {
1627         struct fib6_config cfg = {
1628                 .fc_table       = RT6_TABLE_INFO,
1629                 .fc_metric      = 1024,
1630                 .fc_ifindex     = ifindex,
1631                 .fc_dst_len     = prefixlen,
1632                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1633                                   RTF_UP | RTF_PREF(pref),
1634         };
1635
1636         ipv6_addr_copy(&cfg.fc_dst, prefix);
1637         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1638
1639         /* We should treat it as a default route if prefix length is 0. */
1640         if (!prefixlen)
1641                 cfg.fc_flags |= RTF_DEFAULT;
1642
1643         ip6_route_add(&cfg);
1644
1645         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1646 }
1647 #endif
1648
1649 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1650 {
1651         struct rt6_info *rt;
1652         struct fib6_table *table;
1653
1654         table = fib6_get_table(RT6_TABLE_DFLT);
1655         if (table == NULL)
1656                 return NULL;
1657
1658         write_lock_bh(&table->tb6_lock);
1659         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1660                 if (dev == rt->rt6i_dev &&
1661                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1662                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1663                         break;
1664         }
1665         if (rt)
1666                 dst_hold(&rt->u.dst);
1667         write_unlock_bh(&table->tb6_lock);
1668         return rt;
1669 }
1670
1671 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1672                                      struct net_device *dev,
1673                                      unsigned int pref)
1674 {
1675         struct fib6_config cfg = {
1676                 .fc_table       = RT6_TABLE_DFLT,
1677                 .fc_metric      = 1024,
1678                 .fc_ifindex     = dev->ifindex,
1679                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1680                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1681         };
1682
1683         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1684
1685         ip6_route_add(&cfg);
1686
1687         return rt6_get_dflt_router(gwaddr, dev);
1688 }
1689
1690 void rt6_purge_dflt_routers(void)
1691 {
1692         struct rt6_info *rt;
1693         struct fib6_table *table;
1694
1695         /* NOTE: Keep consistent with rt6_get_dflt_router */
1696         table = fib6_get_table(RT6_TABLE_DFLT);
1697         if (table == NULL)
1698                 return;
1699
1700 restart:
1701         read_lock_bh(&table->tb6_lock);
1702         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1703                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1704                         dst_hold(&rt->u.dst);
1705                         read_unlock_bh(&table->tb6_lock);
1706                         ip6_del_rt(rt);
1707                         goto restart;
1708                 }
1709         }
1710         read_unlock_bh(&table->tb6_lock);
1711 }
1712
1713 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1714                                  struct fib6_config *cfg)
1715 {
1716         memset(cfg, 0, sizeof(*cfg));
1717
1718         cfg->fc_table = RT6_TABLE_MAIN;
1719         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1720         cfg->fc_metric = rtmsg->rtmsg_metric;
1721         cfg->fc_expires = rtmsg->rtmsg_info;
1722         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1723         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1724         cfg->fc_flags = rtmsg->rtmsg_flags;
1725
1726         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1727         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1728         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1729 }
1730
1731 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1732 {
1733         struct fib6_config cfg;
1734         struct in6_rtmsg rtmsg;
1735         int err;
1736
1737         switch(cmd) {
1738         case SIOCADDRT:         /* Add a route */
1739         case SIOCDELRT:         /* Delete a route */
1740                 if (!capable(CAP_NET_ADMIN))
1741                         return -EPERM;
1742                 err = copy_from_user(&rtmsg, arg,
1743                                      sizeof(struct in6_rtmsg));
1744                 if (err)
1745                         return -EFAULT;
1746
1747                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1748
1749                 rtnl_lock();
1750                 switch (cmd) {
1751                 case SIOCADDRT:
1752                         err = ip6_route_add(&cfg);
1753                         break;
1754                 case SIOCDELRT:
1755                         err = ip6_route_del(&cfg);
1756                         break;
1757                 default:
1758                         err = -EINVAL;
1759                 }
1760                 rtnl_unlock();
1761
1762                 return err;
1763         }
1764
1765         return -EINVAL;
1766 }
1767
1768 /*
1769  *      Drop the packet on the floor
1770  */
1771
1772 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1773                                int ipstats_mib_noroutes)
1774 {
1775         int type;
1776         switch (ipstats_mib_noroutes) {
1777         case IPSTATS_MIB_INNOROUTES:
1778                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1779                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1780                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1781                         break;
1782                 }
1783                 /* FALLTHROUGH */
1784         case IPSTATS_MIB_OUTNOROUTES:
1785                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1786                 break;
1787         }
1788         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1789         kfree_skb(skb);
1790         return 0;
1791 }
1792
1793 static int ip6_pkt_discard(struct sk_buff *skb)
1794 {
1795         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1796 }
1797
1798 static int ip6_pkt_discard_out(struct sk_buff *skb)
1799 {
1800         skb->dev = skb->dst->dev;
1801         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1802 }
1803
1804 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1805
1806 static int ip6_pkt_prohibit(struct sk_buff *skb)
1807 {
1808         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1809 }
1810
1811 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1812 {
1813         skb->dev = skb->dst->dev;
1814         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1815 }
1816
1817 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1818 {
1819         kfree_skb(skb);
1820         return 0;
1821 }
1822
1823 #endif
1824
1825 /*
1826  *      Allocate a dst for local (unicast / anycast) address.
1827  */
1828
1829 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1830                                     const struct in6_addr *addr,
1831                                     int anycast)
1832 {
1833         struct rt6_info *rt = ip6_dst_alloc();
1834
1835         if (rt == NULL)
1836                 return ERR_PTR(-ENOMEM);
1837
1838         dev_hold(init_net.loopback_dev);
1839         in6_dev_hold(idev);
1840
1841         rt->u.dst.flags = DST_HOST;
1842         rt->u.dst.input = ip6_input;
1843         rt->u.dst.output = ip6_output;
1844         rt->rt6i_dev = init_net.loopback_dev;
1845         rt->rt6i_idev = idev;
1846         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1847         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1848         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1849         rt->u.dst.obsolete = -1;
1850
1851         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1852         if (anycast)
1853                 rt->rt6i_flags |= RTF_ANYCAST;
1854         else
1855                 rt->rt6i_flags |= RTF_LOCAL;
1856         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1857         if (rt->rt6i_nexthop == NULL) {
1858                 dst_free(&rt->u.dst);
1859                 return ERR_PTR(-ENOMEM);
1860         }
1861
1862         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1863         rt->rt6i_dst.plen = 128;
1864         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1865
1866         atomic_set(&rt->u.dst.__refcnt, 1);
1867
1868         return rt;
1869 }
1870
1871 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1872 {
1873         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1874             rt != &ip6_null_entry) {
1875                 RT6_TRACE("deleted by ifdown %p\n", rt);
1876                 return -1;
1877         }
1878         return 0;
1879 }
1880
1881 void rt6_ifdown(struct net_device *dev)
1882 {
1883         fib6_clean_all(fib6_ifdown, 0, dev);
1884 }
1885
1886 struct rt6_mtu_change_arg
1887 {
1888         struct net_device *dev;
1889         unsigned mtu;
1890 };
1891
1892 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1893 {
1894         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1895         struct inet6_dev *idev;
1896
1897         /* In IPv6 pmtu discovery is not optional,
1898            so that RTAX_MTU lock cannot disable it.
1899            We still use this lock to block changes
1900            caused by addrconf/ndisc.
1901         */
1902
1903         idev = __in6_dev_get(arg->dev);
1904         if (idev == NULL)
1905                 return 0;
1906
1907         /* For administrative MTU increase, there is no way to discover
1908            IPv6 PMTU increase, so PMTU increase should be updated here.
1909            Since RFC 1981 doesn't include administrative MTU increase
1910            update PMTU increase is a MUST. (i.e. jumbo frame)
1911          */
1912         /*
1913            If new MTU is less than route PMTU, this new MTU will be the
1914            lowest MTU in the path, update the route PMTU to reflect PMTU
1915            decreases; if new MTU is greater than route PMTU, and the
1916            old MTU is the lowest MTU in the path, update the route PMTU
1917            to reflect the increase. In this case if the other nodes' MTU
1918            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1919            PMTU discouvery.
1920          */
1921         if (rt->rt6i_dev == arg->dev &&
1922             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1923             (dst_mtu(&rt->u.dst) > arg->mtu ||
1924              (dst_mtu(&rt->u.dst) < arg->mtu &&
1925               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1926                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1927                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1928         }
1929         return 0;
1930 }
1931
1932 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1933 {
1934         struct rt6_mtu_change_arg arg = {
1935                 .dev = dev,
1936                 .mtu = mtu,
1937         };
1938
1939         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1940 }
1941
1942 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1943         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1944         [RTA_OIF]               = { .type = NLA_U32 },
1945         [RTA_IIF]               = { .type = NLA_U32 },
1946         [RTA_PRIORITY]          = { .type = NLA_U32 },
1947         [RTA_METRICS]           = { .type = NLA_NESTED },
1948 };
1949
1950 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1951                               struct fib6_config *cfg)
1952 {
1953         struct rtmsg *rtm;
1954         struct nlattr *tb[RTA_MAX+1];
1955         int err;
1956
1957         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1958         if (err < 0)
1959                 goto errout;
1960
1961         err = -EINVAL;
1962         rtm = nlmsg_data(nlh);
1963         memset(cfg, 0, sizeof(*cfg));
1964
1965         cfg->fc_table = rtm->rtm_table;
1966         cfg->fc_dst_len = rtm->rtm_dst_len;
1967         cfg->fc_src_len = rtm->rtm_src_len;
1968         cfg->fc_flags = RTF_UP;
1969         cfg->fc_protocol = rtm->rtm_protocol;
1970
1971         if (rtm->rtm_type == RTN_UNREACHABLE)
1972                 cfg->fc_flags |= RTF_REJECT;
1973
1974         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1975         cfg->fc_nlinfo.nlh = nlh;
1976
1977         if (tb[RTA_GATEWAY]) {
1978                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1979                 cfg->fc_flags |= RTF_GATEWAY;
1980         }
1981
1982         if (tb[RTA_DST]) {
1983                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1984
1985                 if (nla_len(tb[RTA_DST]) < plen)
1986                         goto errout;
1987
1988                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1989         }
1990
1991         if (tb[RTA_SRC]) {
1992                 int plen = (rtm->rtm_src_len + 7) >> 3;
1993
1994                 if (nla_len(tb[RTA_SRC]) < plen)
1995                         goto errout;
1996
1997                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1998         }
1999
2000         if (tb[RTA_OIF])
2001                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2002
2003         if (tb[RTA_PRIORITY])
2004                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2005
2006         if (tb[RTA_METRICS]) {
2007                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2008                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2009         }
2010
2011         if (tb[RTA_TABLE])
2012                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2013
2014         err = 0;
2015 errout:
2016         return err;
2017 }
2018
2019 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2020 {
2021         struct fib6_config cfg;
2022         int err;
2023
2024         err = rtm_to_fib6_config(skb, nlh, &cfg);
2025         if (err < 0)
2026                 return err;
2027
2028         return ip6_route_del(&cfg);
2029 }
2030
2031 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2032 {
2033         struct fib6_config cfg;
2034         int err;
2035
2036         err = rtm_to_fib6_config(skb, nlh, &cfg);
2037         if (err < 0)
2038                 return err;
2039
2040         return ip6_route_add(&cfg);
2041 }
2042
2043 static inline size_t rt6_nlmsg_size(void)
2044 {
2045         return NLMSG_ALIGN(sizeof(struct rtmsg))
2046                + nla_total_size(16) /* RTA_SRC */
2047                + nla_total_size(16) /* RTA_DST */
2048                + nla_total_size(16) /* RTA_GATEWAY */
2049                + nla_total_size(16) /* RTA_PREFSRC */
2050                + nla_total_size(4) /* RTA_TABLE */
2051                + nla_total_size(4) /* RTA_IIF */
2052                + nla_total_size(4) /* RTA_OIF */
2053                + nla_total_size(4) /* RTA_PRIORITY */
2054                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2055                + nla_total_size(sizeof(struct rta_cacheinfo));
2056 }
2057
2058 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2059                          struct in6_addr *dst, struct in6_addr *src,
2060                          int iif, int type, u32 pid, u32 seq,
2061                          int prefix, unsigned int flags)
2062 {
2063         struct rtmsg *rtm;
2064         struct nlmsghdr *nlh;
2065         long expires;
2066         u32 table;
2067
2068         if (prefix) {   /* user wants prefix routes only */
2069                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2070                         /* success since this is not a prefix route */
2071                         return 1;
2072                 }
2073         }
2074
2075         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2076         if (nlh == NULL)
2077                 return -EMSGSIZE;
2078
2079         rtm = nlmsg_data(nlh);
2080         rtm->rtm_family = AF_INET6;
2081         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2082         rtm->rtm_src_len = rt->rt6i_src.plen;
2083         rtm->rtm_tos = 0;
2084         if (rt->rt6i_table)
2085                 table = rt->rt6i_table->tb6_id;
2086         else
2087                 table = RT6_TABLE_UNSPEC;
2088         rtm->rtm_table = table;
2089         NLA_PUT_U32(skb, RTA_TABLE, table);
2090         if (rt->rt6i_flags&RTF_REJECT)
2091                 rtm->rtm_type = RTN_UNREACHABLE;
2092         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2093                 rtm->rtm_type = RTN_LOCAL;
2094         else
2095                 rtm->rtm_type = RTN_UNICAST;
2096         rtm->rtm_flags = 0;
2097         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2098         rtm->rtm_protocol = rt->rt6i_protocol;
2099         if (rt->rt6i_flags&RTF_DYNAMIC)
2100                 rtm->rtm_protocol = RTPROT_REDIRECT;
2101         else if (rt->rt6i_flags & RTF_ADDRCONF)
2102                 rtm->rtm_protocol = RTPROT_KERNEL;
2103         else if (rt->rt6i_flags&RTF_DEFAULT)
2104                 rtm->rtm_protocol = RTPROT_RA;
2105
2106         if (rt->rt6i_flags&RTF_CACHE)
2107                 rtm->rtm_flags |= RTM_F_CLONED;
2108
2109         if (dst) {
2110                 NLA_PUT(skb, RTA_DST, 16, dst);
2111                 rtm->rtm_dst_len = 128;
2112         } else if (rtm->rtm_dst_len)
2113                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2114 #ifdef CONFIG_IPV6_SUBTREES
2115         if (src) {
2116                 NLA_PUT(skb, RTA_SRC, 16, src);
2117                 rtm->rtm_src_len = 128;
2118         } else if (rtm->rtm_src_len)
2119                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2120 #endif
2121         if (iif)
2122                 NLA_PUT_U32(skb, RTA_IIF, iif);
2123         else if (dst) {
2124                 struct in6_addr saddr_buf;
2125                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2126                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2127         }
2128
2129         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2130                 goto nla_put_failure;
2131
2132         if (rt->u.dst.neighbour)
2133                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2134
2135         if (rt->u.dst.dev)
2136                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2137
2138         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2139
2140         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2141         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2142                                expires, rt->u.dst.error) < 0)
2143                 goto nla_put_failure;
2144
2145         return nlmsg_end(skb, nlh);
2146
2147 nla_put_failure:
2148         nlmsg_cancel(skb, nlh);
2149         return -EMSGSIZE;
2150 }
2151
2152 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2153 {
2154         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2155         int prefix;
2156
2157         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2158                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2159                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2160         } else
2161                 prefix = 0;
2162
2163         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2164                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2165                      prefix, NLM_F_MULTI);
2166 }
2167
2168 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2169 {
2170         struct nlattr *tb[RTA_MAX+1];
2171         struct rt6_info *rt;
2172         struct sk_buff *skb;
2173         struct rtmsg *rtm;
2174         struct flowi fl;
2175         int err, iif = 0;
2176
2177         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2178         if (err < 0)
2179                 goto errout;
2180
2181         err = -EINVAL;
2182         memset(&fl, 0, sizeof(fl));
2183
2184         if (tb[RTA_SRC]) {
2185                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2186                         goto errout;
2187
2188                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2189         }
2190
2191         if (tb[RTA_DST]) {
2192                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2193                         goto errout;
2194
2195                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2196         }
2197
2198         if (tb[RTA_IIF])
2199                 iif = nla_get_u32(tb[RTA_IIF]);
2200
2201         if (tb[RTA_OIF])
2202                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2203
2204         if (iif) {
2205                 struct net_device *dev;
2206                 dev = __dev_get_by_index(&init_net, iif);
2207                 if (!dev) {
2208                         err = -ENODEV;
2209                         goto errout;
2210                 }
2211         }
2212
2213         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2214         if (skb == NULL) {
2215                 err = -ENOBUFS;
2216                 goto errout;
2217         }
2218
2219         /* Reserve room for dummy headers, this skb can pass
2220            through good chunk of routing engine.
2221          */
2222         skb_reset_mac_header(skb);
2223         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2224
2225         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2226         skb->dst = &rt->u.dst;
2227
2228         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2229                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2230                             nlh->nlmsg_seq, 0, 0);
2231         if (err < 0) {
2232                 kfree_skb(skb);
2233                 goto errout;
2234         }
2235
2236         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2237 errout:
2238         return err;
2239 }
2240
2241 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2242 {
2243         struct sk_buff *skb;
2244         u32 pid = 0, seq = 0;
2245         struct nlmsghdr *nlh = NULL;
2246         int err = -ENOBUFS;
2247
2248         if (info) {
2249                 pid = info->pid;
2250                 nlh = info->nlh;
2251                 if (nlh)
2252                         seq = nlh->nlmsg_seq;
2253         }
2254
2255         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2256         if (skb == NULL)
2257                 goto errout;
2258
2259         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2260         if (err < 0) {
2261                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2262                 WARN_ON(err == -EMSGSIZE);
2263                 kfree_skb(skb);
2264                 goto errout;
2265         }
2266         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2267 errout:
2268         if (err < 0)
2269                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2270 }
2271
2272 /*
2273  *      /proc
2274  */
2275
2276 #ifdef CONFIG_PROC_FS
2277
2278 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2279
2280 struct rt6_proc_arg
2281 {
2282         char *buffer;
2283         int offset;
2284         int length;
2285         int skip;
2286         int len;
2287 };
2288
2289 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2290 {
2291         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2292
2293         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2294                 arg->skip++;
2295                 return 0;
2296         }
2297
2298         if (arg->len >= arg->length)
2299                 return 0;
2300
2301         arg->len += sprintf(arg->buffer + arg->len,
2302                             NIP6_SEQFMT " %02x ",
2303                             NIP6(rt->rt6i_dst.addr),
2304                             rt->rt6i_dst.plen);
2305
2306 #ifdef CONFIG_IPV6_SUBTREES
2307         arg->len += sprintf(arg->buffer + arg->len,
2308                             NIP6_SEQFMT " %02x ",
2309                             NIP6(rt->rt6i_src.addr),
2310                             rt->rt6i_src.plen);
2311 #else
2312         arg->len += sprintf(arg->buffer + arg->len,
2313                             "00000000000000000000000000000000 00 ");
2314 #endif
2315
2316         if (rt->rt6i_nexthop) {
2317                 arg->len += sprintf(arg->buffer + arg->len,
2318                                     NIP6_SEQFMT,
2319                                     NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2320         } else {
2321                 arg->len += sprintf(arg->buffer + arg->len,
2322                                     "00000000000000000000000000000000");
2323         }
2324         arg->len += sprintf(arg->buffer + arg->len,
2325                             " %08x %08x %08x %08x %8s\n",
2326                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2327                             rt->u.dst.__use, rt->rt6i_flags,
2328                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2329         return 0;
2330 }
2331
2332 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2333 {
2334         struct rt6_proc_arg arg = {
2335                 .buffer = buffer,
2336                 .offset = offset,
2337                 .length = length,
2338         };
2339
2340         fib6_clean_all(rt6_info_route, 0, &arg);
2341
2342         *start = buffer;
2343         if (offset)
2344                 *start += offset % RT6_INFO_LEN;
2345
2346         arg.len -= offset % RT6_INFO_LEN;
2347
2348         if (arg.len > length)
2349                 arg.len = length;
2350         if (arg.len < 0)
2351                 arg.len = 0;
2352
2353         return arg.len;
2354 }
2355
2356 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2357 {
2358         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2359                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2360                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2361                       rt6_stats.fib_rt_cache,
2362                       atomic_read(&ip6_dst_ops.entries),
2363                       rt6_stats.fib_discarded_routes);
2364
2365         return 0;
2366 }
2367
2368 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2369 {
2370         return single_open(file, rt6_stats_seq_show, NULL);
2371 }
2372
2373 static const struct file_operations rt6_stats_seq_fops = {
2374         .owner   = THIS_MODULE,
2375         .open    = rt6_stats_seq_open,
2376         .read    = seq_read,
2377         .llseek  = seq_lseek,
2378         .release = single_release,
2379 };
2380 #endif  /* CONFIG_PROC_FS */
2381
2382 #ifdef CONFIG_SYSCTL
2383
2384 static int flush_delay;
2385
2386 static
2387 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2388                               void __user *buffer, size_t *lenp, loff_t *ppos)
2389 {
2390         if (write) {
2391                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2392                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2393                 return 0;
2394         } else
2395                 return -EINVAL;
2396 }
2397
2398 ctl_table ipv6_route_table[] = {
2399         {
2400                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH,
2401                 .procname       =       "flush",
2402                 .data           =       &flush_delay,
2403                 .maxlen         =       sizeof(int),
2404                 .mode           =       0200,
2405                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2406         },
2407         {
2408                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2409                 .procname       =       "gc_thresh",
2410                 .data           =       &ip6_dst_ops.gc_thresh,
2411                 .maxlen         =       sizeof(int),
2412                 .mode           =       0644,
2413                 .proc_handler   =       &proc_dointvec,
2414         },
2415         {
2416                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2417                 .procname       =       "max_size",
2418                 .data           =       &ip6_rt_max_size,
2419                 .maxlen         =       sizeof(int),
2420                 .mode           =       0644,
2421                 .proc_handler   =       &proc_dointvec,
2422         },
2423         {
2424                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2425                 .procname       =       "gc_min_interval",
2426                 .data           =       &ip6_rt_gc_min_interval,
2427                 .maxlen         =       sizeof(int),
2428                 .mode           =       0644,
2429                 .proc_handler   =       &proc_dointvec_jiffies,
2430                 .strategy       =       &sysctl_jiffies,
2431         },
2432         {
2433                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2434                 .procname       =       "gc_timeout",
2435                 .data           =       &ip6_rt_gc_timeout,
2436                 .maxlen         =       sizeof(int),
2437                 .mode           =       0644,
2438                 .proc_handler   =       &proc_dointvec_jiffies,
2439                 .strategy       =       &sysctl_jiffies,
2440         },
2441         {
2442                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2443                 .procname       =       "gc_interval",
2444                 .data           =       &ip6_rt_gc_interval,
2445                 .maxlen         =       sizeof(int),
2446                 .mode           =       0644,
2447                 .proc_handler   =       &proc_dointvec_jiffies,
2448                 .strategy       =       &sysctl_jiffies,
2449         },
2450         {
2451                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2452                 .procname       =       "gc_elasticity",
2453                 .data           =       &ip6_rt_gc_elasticity,
2454                 .maxlen         =       sizeof(int),
2455                 .mode           =       0644,
2456                 .proc_handler   =       &proc_dointvec_jiffies,
2457                 .strategy       =       &sysctl_jiffies,
2458         },
2459         {
2460                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2461                 .procname       =       "mtu_expires",
2462                 .data           =       &ip6_rt_mtu_expires,
2463                 .maxlen         =       sizeof(int),
2464                 .mode           =       0644,
2465                 .proc_handler   =       &proc_dointvec_jiffies,
2466                 .strategy       =       &sysctl_jiffies,
2467         },
2468         {
2469                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2470                 .procname       =       "min_adv_mss",
2471                 .data           =       &ip6_rt_min_advmss,
2472                 .maxlen         =       sizeof(int),
2473                 .mode           =       0644,
2474                 .proc_handler   =       &proc_dointvec_jiffies,
2475                 .strategy       =       &sysctl_jiffies,
2476         },
2477         {
2478                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2479                 .procname       =       "gc_min_interval_ms",
2480                 .data           =       &ip6_rt_gc_min_interval,
2481                 .maxlen         =       sizeof(int),
2482                 .mode           =       0644,
2483                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2484                 .strategy       =       &sysctl_ms_jiffies,
2485         },
2486         { .ctl_name = 0 }
2487 };
2488
2489 #endif
2490
2491 void __init ip6_route_init(void)
2492 {
2493 #ifdef  CONFIG_PROC_FS
2494         struct proc_dir_entry *p;
2495 #endif
2496         ip6_dst_ops.kmem_cachep =
2497                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2498                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2499         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2500
2501         fib6_init();
2502 #ifdef  CONFIG_PROC_FS
2503         p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
2504         if (p)
2505                 p->owner = THIS_MODULE;
2506
2507         proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2508 #endif
2509 #ifdef CONFIG_XFRM
2510         xfrm6_init();
2511 #endif
2512 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2513         fib6_rules_init();
2514 #endif
2515
2516         __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2517         __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2518         __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2519 }
2520
2521 void ip6_route_cleanup(void)
2522 {
2523 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2524         fib6_rules_cleanup();
2525 #endif
2526 #ifdef CONFIG_PROC_FS
2527         proc_net_remove(&init_net, "ipv6_route");
2528         proc_net_remove(&init_net, "rt6_stats");
2529 #endif
2530 #ifdef CONFIG_XFRM
2531         xfrm6_fini();
2532 #endif
2533         rt6_ifdown(NULL);
2534         fib6_gc_cleanup();
2535         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2536 }