]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv6/sit.c
b0c5080420a86b2d0b01ca5600c9214e235fbfb1
[linux-2.6-omap-h63xx.git] / net / ipv6 / sit.c
1 /*
2  *      IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  *
14  *      Changes:
15  * Roger Venning <r.venning@telstra.com>:       6to4 support
16  * Nate Thompson <nate@thebog.net>:             6to4 support
17  * Fred Templin <fred.l.templin@boeing.com>:    isatap support
18  */
19
20 #include <linux/module.h>
21 #include <linux/capability.h>
22 #include <linux/errno.h>
23 #include <linux/types.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/net.h>
27 #include <linux/in6.h>
28 #include <linux/netdevice.h>
29 #include <linux/if_arp.h>
30 #include <linux/icmp.h>
31 #include <asm/uaccess.h>
32 #include <linux/init.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/snmp.h>
38
39 #include <net/ipv6.h>
40 #include <net/protocol.h>
41 #include <net/transp_v6.h>
42 #include <net/ip6_fib.h>
43 #include <net/ip6_route.h>
44 #include <net/ndisc.h>
45 #include <net/addrconf.h>
46 #include <net/ip.h>
47 #include <net/udp.h>
48 #include <net/icmp.h>
49 #include <net/ipip.h>
50 #include <net/inet_ecn.h>
51 #include <net/xfrm.h>
52 #include <net/dsfield.h>
53 #include <net/net_namespace.h>
54 #include <net/netns/generic.h>
55
56 /*
57    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
58
59    For comments look at net/ipv4/ip_gre.c --ANK
60  */
61
62 #define HASH_SIZE  16
63 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
64
65 static int ipip6_fb_tunnel_init(struct net_device *dev);
66 static int ipip6_tunnel_init(struct net_device *dev);
67 static void ipip6_tunnel_setup(struct net_device *dev);
68
69 static int sit_net_id;
70 struct sit_net {
71         struct ip_tunnel *tunnels_r_l[HASH_SIZE];
72         struct ip_tunnel *tunnels_r[HASH_SIZE];
73         struct ip_tunnel *tunnels_l[HASH_SIZE];
74         struct ip_tunnel *tunnels_wc[1];
75         struct ip_tunnel **tunnels[4];
76
77         struct net_device *fb_tunnel_dev;
78 };
79
80 static DEFINE_RWLOCK(ipip6_lock);
81
82 static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
83                 __be32 remote, __be32 local)
84 {
85         unsigned h0 = HASH(remote);
86         unsigned h1 = HASH(local);
87         struct ip_tunnel *t;
88         struct sit_net *sitn = net_generic(net, sit_net_id);
89
90         for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) {
91                 if (local == t->parms.iph.saddr &&
92                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
93                         return t;
94         }
95         for (t = sitn->tunnels_r[h0]; t; t = t->next) {
96                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
97                         return t;
98         }
99         for (t = sitn->tunnels_l[h1]; t; t = t->next) {
100                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
101                         return t;
102         }
103         if ((t = sitn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
104                 return t;
105         return NULL;
106 }
107
108 static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
109                 struct ip_tunnel_parm *parms)
110 {
111         __be32 remote = parms->iph.daddr;
112         __be32 local = parms->iph.saddr;
113         unsigned h = 0;
114         int prio = 0;
115
116         if (remote) {
117                 prio |= 2;
118                 h ^= HASH(remote);
119         }
120         if (local) {
121                 prio |= 1;
122                 h ^= HASH(local);
123         }
124         return &sitn->tunnels[prio][h];
125 }
126
127 static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
128                 struct ip_tunnel *t)
129 {
130         return __ipip6_bucket(sitn, &t->parms);
131 }
132
133 static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
134 {
135         struct ip_tunnel **tp;
136
137         for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) {
138                 if (t == *tp) {
139                         write_lock_bh(&ipip6_lock);
140                         *tp = t->next;
141                         write_unlock_bh(&ipip6_lock);
142                         break;
143                 }
144         }
145 }
146
147 static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
148 {
149         struct ip_tunnel **tp = ipip6_bucket(sitn, t);
150
151         t->next = *tp;
152         write_lock_bh(&ipip6_lock);
153         *tp = t;
154         write_unlock_bh(&ipip6_lock);
155 }
156
157 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
158                 struct ip_tunnel_parm *parms, int create)
159 {
160         __be32 remote = parms->iph.daddr;
161         __be32 local = parms->iph.saddr;
162         struct ip_tunnel *t, **tp, *nt;
163         struct net_device *dev;
164         char name[IFNAMSIZ];
165         struct sit_net *sitn = net_generic(net, sit_net_id);
166
167         for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) {
168                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
169                         return t;
170         }
171         if (!create)
172                 goto failed;
173
174         if (parms->name[0])
175                 strlcpy(name, parms->name, IFNAMSIZ);
176         else
177                 sprintf(name, "sit%%d");
178
179         dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
180         if (dev == NULL)
181                 return NULL;
182
183         dev_net_set(dev, net);
184
185         if (strchr(name, '%')) {
186                 if (dev_alloc_name(dev, name) < 0)
187                         goto failed_free;
188         }
189
190         nt = netdev_priv(dev);
191         dev->init = ipip6_tunnel_init;
192         nt->parms = *parms;
193
194         if (parms->i_flags & SIT_ISATAP)
195                 dev->priv_flags |= IFF_ISATAP;
196
197         if (register_netdevice(dev) < 0)
198                 goto failed_free;
199
200         dev_hold(dev);
201
202         ipip6_tunnel_link(sitn, nt);
203         return nt;
204
205 failed_free:
206         free_netdev(dev);
207 failed:
208         return NULL;
209 }
210
211 static struct ip_tunnel_prl_entry *
212 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
213 {
214         struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL;
215
216         for (p = t->prl; p; p = p->next)
217                 if (p->addr == addr)
218                         break;
219         return p;
220
221 }
222
223 static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
224 {
225         struct ip_tunnel_prl *kp;
226         struct ip_tunnel_prl_entry *prl;
227         unsigned int cmax, c = 0, ca, len;
228         int ret = 0;
229
230         cmax = a->datalen / sizeof(*a);
231         if (cmax > 1 && a->addr != htonl(INADDR_ANY))
232                 cmax = 1;
233
234         /* For simple GET or for root users,
235          * we try harder to allocate.
236          */
237         kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
238                 kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
239                 NULL;
240
241         read_lock(&ipip6_lock);
242
243         ca = t->prl_count < cmax ? t->prl_count : cmax;
244
245         if (!kp) {
246                 /* We don't try hard to allocate much memory for
247                  * non-root users.
248                  * For root users, retry allocating enough memory for
249                  * the answer.
250                  */
251                 kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
252                 if (!kp) {
253                         ret = -ENOMEM;
254                         goto out;
255                 }
256         }
257
258         c = 0;
259         for (prl = t->prl; prl; prl = prl->next) {
260                 if (c > cmax)
261                         break;
262                 if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr)
263                         continue;
264                 kp[c].addr = prl->addr;
265                 kp[c].flags = prl->flags;
266                 c++;
267                 if (a->addr != htonl(INADDR_ANY))
268                         break;
269         }
270 out:
271         read_unlock(&ipip6_lock);
272
273         len = sizeof(*kp) * c;
274         ret = len ? copy_to_user(a->data, kp, len) : 0;
275
276         kfree(kp);
277         if (ret)
278                 return -EFAULT;
279
280         a->datalen = len;
281         return 0;
282 }
283
284 static int
285 ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
286 {
287         struct ip_tunnel_prl_entry *p;
288         int err = 0;
289
290         if (a->addr == htonl(INADDR_ANY))
291                 return -EINVAL;
292
293         write_lock(&ipip6_lock);
294
295         for (p = t->prl; p; p = p->next) {
296                 if (p->addr == a->addr) {
297                         if (chg)
298                                 goto update;
299                         err = -EEXIST;
300                         goto out;
301                 }
302         }
303
304         if (chg) {
305                 err = -ENXIO;
306                 goto out;
307         }
308
309         p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
310         if (!p) {
311                 err = -ENOBUFS;
312                 goto out;
313         }
314
315         p->next = t->prl;
316         t->prl = p;
317         t->prl_count++;
318 update:
319         p->addr = a->addr;
320         p->flags = a->flags;
321 out:
322         write_unlock(&ipip6_lock);
323         return err;
324 }
325
326 static int
327 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
328 {
329         struct ip_tunnel_prl_entry *x, **p;
330         int err = 0;
331
332         write_lock(&ipip6_lock);
333
334         if (a && a->addr != htonl(INADDR_ANY)) {
335                 for (p = &t->prl; *p; p = &(*p)->next) {
336                         if ((*p)->addr == a->addr) {
337                                 x = *p;
338                                 *p = x->next;
339                                 kfree(x);
340                                 t->prl_count--;
341                                 goto out;
342                         }
343                 }
344                 err = -ENXIO;
345         } else {
346                 while (t->prl) {
347                         x = t->prl;
348                         t->prl = t->prl->next;
349                         kfree(x);
350                         t->prl_count--;
351                 }
352         }
353 out:
354         write_unlock(&ipip6_lock);
355         return 0;
356 }
357
358 static int
359 isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
360 {
361         struct ip_tunnel_prl_entry *p;
362         int ok = 1;
363
364         read_lock(&ipip6_lock);
365         p = __ipip6_tunnel_locate_prl(t, iph->saddr);
366         if (p) {
367                 if (p->flags & PRL_DEFAULT)
368                         skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
369                 else
370                         skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
371         } else {
372                 struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
373                 if (ipv6_addr_is_isatap(addr6) &&
374                     (addr6->s6_addr32[3] == iph->saddr) &&
375                     ipv6_chk_prefix(addr6, t->dev))
376                         skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
377                 else
378                         ok = 0;
379         }
380         read_unlock(&ipip6_lock);
381         return ok;
382 }
383
384 static void ipip6_tunnel_uninit(struct net_device *dev)
385 {
386         struct net *net = dev_net(dev);
387         struct sit_net *sitn = net_generic(net, sit_net_id);
388
389         if (dev == sitn->fb_tunnel_dev) {
390                 write_lock_bh(&ipip6_lock);
391                 sitn->tunnels_wc[0] = NULL;
392                 write_unlock_bh(&ipip6_lock);
393                 dev_put(dev);
394         } else {
395                 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
396                 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
397                 dev_put(dev);
398         }
399 }
400
401
402 static int ipip6_err(struct sk_buff *skb, u32 info)
403 {
404
405 /* All the routers (except for Linux) return only
406    8 bytes of packet payload. It means, that precise relaying of
407    ICMP in the real Internet is absolutely infeasible.
408  */
409         struct iphdr *iph = (struct iphdr*)skb->data;
410         const int type = icmp_hdr(skb)->type;
411         const int code = icmp_hdr(skb)->code;
412         struct ip_tunnel *t;
413         int err;
414
415         switch (type) {
416         default:
417         case ICMP_PARAMETERPROB:
418                 return 0;
419
420         case ICMP_DEST_UNREACH:
421                 switch (code) {
422                 case ICMP_SR_FAILED:
423                 case ICMP_PORT_UNREACH:
424                         /* Impossible event. */
425                         return 0;
426                 case ICMP_FRAG_NEEDED:
427                         /* Soft state for pmtu is maintained by IP core. */
428                         return 0;
429                 default:
430                         /* All others are translated to HOST_UNREACH.
431                            rfc2003 contains "deep thoughts" about NET_UNREACH,
432                            I believe they are just ether pollution. --ANK
433                          */
434                         break;
435                 }
436                 break;
437         case ICMP_TIME_EXCEEDED:
438                 if (code != ICMP_EXC_TTL)
439                         return 0;
440                 break;
441         }
442
443         err = -ENOENT;
444
445         read_lock(&ipip6_lock);
446         t = ipip6_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
447         if (t == NULL || t->parms.iph.daddr == 0)
448                 goto out;
449
450         err = 0;
451         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
452                 goto out;
453
454         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
455                 t->err_count++;
456         else
457                 t->err_count = 1;
458         t->err_time = jiffies;
459 out:
460         read_unlock(&ipip6_lock);
461         return err;
462 }
463
464 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
465 {
466         if (INET_ECN_is_ce(iph->tos))
467                 IP6_ECN_set_ce(ipv6_hdr(skb));
468 }
469
470 static int ipip6_rcv(struct sk_buff *skb)
471 {
472         struct iphdr *iph;
473         struct ip_tunnel *tunnel;
474
475         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
476                 goto out;
477
478         iph = ip_hdr(skb);
479
480         read_lock(&ipip6_lock);
481         if ((tunnel = ipip6_tunnel_lookup(dev_net(skb->dev),
482                                         iph->saddr, iph->daddr)) != NULL) {
483                 secpath_reset(skb);
484                 skb->mac_header = skb->network_header;
485                 skb_reset_network_header(skb);
486                 IPCB(skb)->flags = 0;
487                 skb->protocol = htons(ETH_P_IPV6);
488                 skb->pkt_type = PACKET_HOST;
489
490                 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
491                     !isatap_chksrc(skb, iph, tunnel)) {
492                         tunnel->dev->stats.rx_errors++;
493                         read_unlock(&ipip6_lock);
494                         kfree_skb(skb);
495                         return 0;
496                 }
497                 tunnel->dev->stats.rx_packets++;
498                 tunnel->dev->stats.rx_bytes += skb->len;
499                 skb->dev = tunnel->dev;
500                 dst_release(skb->dst);
501                 skb->dst = NULL;
502                 nf_reset(skb);
503                 ipip6_ecn_decapsulate(iph, skb);
504                 netif_rx(skb);
505                 read_unlock(&ipip6_lock);
506                 return 0;
507         }
508
509         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
510         read_unlock(&ipip6_lock);
511 out:
512         kfree_skb(skb);
513         return 0;
514 }
515
516 /* Returns the embedded IPv4 address if the IPv6 address
517    comes from 6to4 (RFC 3056) addr space */
518
519 static inline __be32 try_6to4(struct in6_addr *v6dst)
520 {
521         __be32 dst = 0;
522
523         if (v6dst->s6_addr16[0] == htons(0x2002)) {
524                 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
525                 memcpy(&dst, &v6dst->s6_addr16[1], 4);
526         }
527         return dst;
528 }
529
530 /*
531  *      This function assumes it is being called from dev_queue_xmit()
532  *      and that skb is filled properly by that function.
533  */
534
535 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
536 {
537         struct ip_tunnel *tunnel = netdev_priv(dev);
538         struct net_device_stats *stats = &tunnel->dev->stats;
539         struct iphdr  *tiph = &tunnel->parms.iph;
540         struct ipv6hdr *iph6 = ipv6_hdr(skb);
541         u8     tos = tunnel->parms.iph.tos;
542         struct rtable *rt;                      /* Route to the other host */
543         struct net_device *tdev;                        /* Device to other host */
544         struct iphdr  *iph;                     /* Our new IP header */
545         unsigned int max_headroom;              /* The extra header space needed */
546         __be32 dst = tiph->daddr;
547         int    mtu;
548         struct in6_addr *addr6;
549         int addr_type;
550
551         if (tunnel->recursion++) {
552                 stats->collisions++;
553                 goto tx_error;
554         }
555
556         if (skb->protocol != htons(ETH_P_IPV6))
557                 goto tx_error;
558
559         /* ISATAP (RFC4214) - must come before 6to4 */
560         if (dev->priv_flags & IFF_ISATAP) {
561                 struct neighbour *neigh = NULL;
562
563                 if (skb->dst)
564                         neigh = skb->dst->neighbour;
565
566                 if (neigh == NULL) {
567                         if (net_ratelimit())
568                                 printk(KERN_DEBUG "sit: nexthop == NULL\n");
569                         goto tx_error;
570                 }
571
572                 addr6 = (struct in6_addr*)&neigh->primary_key;
573                 addr_type = ipv6_addr_type(addr6);
574
575                 if ((addr_type & IPV6_ADDR_UNICAST) &&
576                      ipv6_addr_is_isatap(addr6))
577                         dst = addr6->s6_addr32[3];
578                 else
579                         goto tx_error;
580         }
581
582         if (!dst)
583                 dst = try_6to4(&iph6->daddr);
584
585         if (!dst) {
586                 struct neighbour *neigh = NULL;
587
588                 if (skb->dst)
589                         neigh = skb->dst->neighbour;
590
591                 if (neigh == NULL) {
592                         if (net_ratelimit())
593                                 printk(KERN_DEBUG "sit: nexthop == NULL\n");
594                         goto tx_error;
595                 }
596
597                 addr6 = (struct in6_addr*)&neigh->primary_key;
598                 addr_type = ipv6_addr_type(addr6);
599
600                 if (addr_type == IPV6_ADDR_ANY) {
601                         addr6 = &ipv6_hdr(skb)->daddr;
602                         addr_type = ipv6_addr_type(addr6);
603                 }
604
605                 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
606                         goto tx_error_icmp;
607
608                 dst = addr6->s6_addr32[3];
609         }
610
611         {
612                 struct flowi fl = { .nl_u = { .ip4_u =
613                                               { .daddr = dst,
614                                                 .saddr = tiph->saddr,
615                                                 .tos = RT_TOS(tos) } },
616                                     .oif = tunnel->parms.link,
617                                     .proto = IPPROTO_IPV6 };
618                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
619                         stats->tx_carrier_errors++;
620                         goto tx_error_icmp;
621                 }
622         }
623         if (rt->rt_type != RTN_UNICAST) {
624                 ip_rt_put(rt);
625                 stats->tx_carrier_errors++;
626                 goto tx_error_icmp;
627         }
628         tdev = rt->u.dst.dev;
629
630         if (tdev == dev) {
631                 ip_rt_put(rt);
632                 stats->collisions++;
633                 goto tx_error;
634         }
635
636         if (tiph->frag_off)
637                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
638         else
639                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
640
641         if (mtu < 68) {
642                 stats->collisions++;
643                 ip_rt_put(rt);
644                 goto tx_error;
645         }
646         if (mtu < IPV6_MIN_MTU)
647                 mtu = IPV6_MIN_MTU;
648         if (tunnel->parms.iph.daddr && skb->dst)
649                 skb->dst->ops->update_pmtu(skb->dst, mtu);
650
651         if (skb->len > mtu) {
652                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
653                 ip_rt_put(rt);
654                 goto tx_error;
655         }
656
657         if (tunnel->err_count > 0) {
658                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
659                         tunnel->err_count--;
660                         dst_link_failure(skb);
661                 } else
662                         tunnel->err_count = 0;
663         }
664
665         /*
666          * Okay, now see if we can stuff it in the buffer as-is.
667          */
668         max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
669
670         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
671             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
672                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
673                 if (!new_skb) {
674                         ip_rt_put(rt);
675                         stats->tx_dropped++;
676                         dev_kfree_skb(skb);
677                         tunnel->recursion--;
678                         return 0;
679                 }
680                 if (skb->sk)
681                         skb_set_owner_w(new_skb, skb->sk);
682                 dev_kfree_skb(skb);
683                 skb = new_skb;
684                 iph6 = ipv6_hdr(skb);
685         }
686
687         skb->transport_header = skb->network_header;
688         skb_push(skb, sizeof(struct iphdr));
689         skb_reset_network_header(skb);
690         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
691         IPCB(skb)->flags = 0;
692         dst_release(skb->dst);
693         skb->dst = &rt->u.dst;
694
695         /*
696          *      Push down and install the IPIP header.
697          */
698
699         iph                     =       ip_hdr(skb);
700         iph->version            =       4;
701         iph->ihl                =       sizeof(struct iphdr)>>2;
702         if (mtu > IPV6_MIN_MTU)
703                 iph->frag_off   =       htons(IP_DF);
704         else
705                 iph->frag_off   =       0;
706
707         iph->protocol           =       IPPROTO_IPV6;
708         iph->tos                =       INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
709         iph->daddr              =       rt->rt_dst;
710         iph->saddr              =       rt->rt_src;
711
712         if ((iph->ttl = tiph->ttl) == 0)
713                 iph->ttl        =       iph6->hop_limit;
714
715         nf_reset(skb);
716
717         IPTUNNEL_XMIT();
718         tunnel->recursion--;
719         return 0;
720
721 tx_error_icmp:
722         dst_link_failure(skb);
723 tx_error:
724         stats->tx_errors++;
725         dev_kfree_skb(skb);
726         tunnel->recursion--;
727         return 0;
728 }
729
730 static void ipip6_tunnel_bind_dev(struct net_device *dev)
731 {
732         struct net_device *tdev = NULL;
733         struct ip_tunnel *tunnel;
734         struct iphdr *iph;
735
736         tunnel = netdev_priv(dev);
737         iph = &tunnel->parms.iph;
738
739         if (iph->daddr) {
740                 struct flowi fl = { .nl_u = { .ip4_u =
741                                               { .daddr = iph->daddr,
742                                                 .saddr = iph->saddr,
743                                                 .tos = RT_TOS(iph->tos) } },
744                                     .oif = tunnel->parms.link,
745                                     .proto = IPPROTO_IPV6 };
746                 struct rtable *rt;
747                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
748                         tdev = rt->u.dst.dev;
749                         ip_rt_put(rt);
750                 }
751                 dev->flags |= IFF_POINTOPOINT;
752         }
753
754         if (!tdev && tunnel->parms.link)
755                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
756
757         if (tdev) {
758                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
759                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
760                 if (dev->mtu < IPV6_MIN_MTU)
761                         dev->mtu = IPV6_MIN_MTU;
762         }
763         dev->iflink = tunnel->parms.link;
764 }
765
766 static int
767 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
768 {
769         int err = 0;
770         struct ip_tunnel_parm p;
771         struct ip_tunnel_prl prl;
772         struct ip_tunnel *t;
773         struct net *net = dev_net(dev);
774         struct sit_net *sitn = net_generic(net, sit_net_id);
775
776         switch (cmd) {
777         case SIOCGETTUNNEL:
778                 t = NULL;
779                 if (dev == sitn->fb_tunnel_dev) {
780                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
781                                 err = -EFAULT;
782                                 break;
783                         }
784                         t = ipip6_tunnel_locate(net, &p, 0);
785                 }
786                 if (t == NULL)
787                         t = netdev_priv(dev);
788                 memcpy(&p, &t->parms, sizeof(p));
789                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
790                         err = -EFAULT;
791                 break;
792
793         case SIOCADDTUNNEL:
794         case SIOCCHGTUNNEL:
795                 err = -EPERM;
796                 if (!capable(CAP_NET_ADMIN))
797                         goto done;
798
799                 err = -EFAULT;
800                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
801                         goto done;
802
803                 err = -EINVAL;
804                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
805                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
806                         goto done;
807                 if (p.iph.ttl)
808                         p.iph.frag_off |= htons(IP_DF);
809
810                 t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
811
812                 if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
813                         if (t != NULL) {
814                                 if (t->dev != dev) {
815                                         err = -EEXIST;
816                                         break;
817                                 }
818                         } else {
819                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
820                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
821                                         err = -EINVAL;
822                                         break;
823                                 }
824                                 t = netdev_priv(dev);
825                                 ipip6_tunnel_unlink(sitn, t);
826                                 t->parms.iph.saddr = p.iph.saddr;
827                                 t->parms.iph.daddr = p.iph.daddr;
828                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
829                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
830                                 ipip6_tunnel_link(sitn, t);
831                                 netdev_state_change(dev);
832                         }
833                 }
834
835                 if (t) {
836                         err = 0;
837                         if (cmd == SIOCCHGTUNNEL) {
838                                 t->parms.iph.ttl = p.iph.ttl;
839                                 t->parms.iph.tos = p.iph.tos;
840                                 if (t->parms.link != p.link) {
841                                         t->parms.link = p.link;
842                                         ipip6_tunnel_bind_dev(dev);
843                                         netdev_state_change(dev);
844                                 }
845                         }
846                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
847                                 err = -EFAULT;
848                 } else
849                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
850                 break;
851
852         case SIOCDELTUNNEL:
853                 err = -EPERM;
854                 if (!capable(CAP_NET_ADMIN))
855                         goto done;
856
857                 if (dev == sitn->fb_tunnel_dev) {
858                         err = -EFAULT;
859                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
860                                 goto done;
861                         err = -ENOENT;
862                         if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL)
863                                 goto done;
864                         err = -EPERM;
865                         if (t == netdev_priv(sitn->fb_tunnel_dev))
866                                 goto done;
867                         dev = t->dev;
868                 }
869                 unregister_netdevice(dev);
870                 err = 0;
871                 break;
872
873         case SIOCGETPRL:
874         case SIOCADDPRL:
875         case SIOCDELPRL:
876         case SIOCCHGPRL:
877                 err = -EPERM;
878                 if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN))
879                         goto done;
880                 err = -EINVAL;
881                 if (dev == sitn->fb_tunnel_dev)
882                         goto done;
883                 err = -EFAULT;
884                 if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
885                         goto done;
886                 err = -ENOENT;
887                 if (!(t = netdev_priv(dev)))
888                         goto done;
889
890                 switch (cmd) {
891                 case SIOCGETPRL:
892                         err = ipip6_tunnel_get_prl(t, &prl);
893                         if (!err && copy_to_user(ifr->ifr_ifru.ifru_data,
894                                                  &prl, sizeof(prl)))
895                                 err = -EFAULT;
896                         break;
897                 case SIOCDELPRL:
898                         err = ipip6_tunnel_del_prl(t, &prl);
899                         break;
900                 case SIOCADDPRL:
901                 case SIOCCHGPRL:
902                         err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
903                         break;
904                 }
905                 if (cmd != SIOCGETPRL)
906                         netdev_state_change(dev);
907                 break;
908
909         default:
910                 err = -EINVAL;
911         }
912
913 done:
914         return err;
915 }
916
917 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
918 {
919         if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
920                 return -EINVAL;
921         dev->mtu = new_mtu;
922         return 0;
923 }
924
925 static void ipip6_tunnel_setup(struct net_device *dev)
926 {
927         dev->uninit             = ipip6_tunnel_uninit;
928         dev->destructor         = free_netdev;
929         dev->hard_start_xmit    = ipip6_tunnel_xmit;
930         dev->do_ioctl           = ipip6_tunnel_ioctl;
931         dev->change_mtu         = ipip6_tunnel_change_mtu;
932
933         dev->type               = ARPHRD_SIT;
934         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
935         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
936         dev->flags              = IFF_NOARP;
937         dev->iflink             = 0;
938         dev->addr_len           = 4;
939         dev->features           |= NETIF_F_NETNS_LOCAL;
940 }
941
942 static int ipip6_tunnel_init(struct net_device *dev)
943 {
944         struct ip_tunnel *tunnel;
945
946         tunnel = netdev_priv(dev);
947
948         tunnel->dev = dev;
949         strcpy(tunnel->parms.name, dev->name);
950
951         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
952         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
953
954         ipip6_tunnel_bind_dev(dev);
955
956         return 0;
957 }
958
959 static int ipip6_fb_tunnel_init(struct net_device *dev)
960 {
961         struct ip_tunnel *tunnel = netdev_priv(dev);
962         struct iphdr *iph = &tunnel->parms.iph;
963         struct net *net = dev_net(dev);
964         struct sit_net *sitn = net_generic(net, sit_net_id);
965
966         tunnel->dev = dev;
967         strcpy(tunnel->parms.name, dev->name);
968
969         iph->version            = 4;
970         iph->protocol           = IPPROTO_IPV6;
971         iph->ihl                = 5;
972         iph->ttl                = 64;
973
974         dev_hold(dev);
975         sitn->tunnels_wc[0]     = tunnel;
976         return 0;
977 }
978
979 static struct xfrm_tunnel sit_handler = {
980         .handler        =       ipip6_rcv,
981         .err_handler    =       ipip6_err,
982         .priority       =       1,
983 };
984
985 static void sit_destroy_tunnels(struct sit_net *sitn)
986 {
987         int prio;
988
989         for (prio = 1; prio < 4; prio++) {
990                 int h;
991                 for (h = 0; h < HASH_SIZE; h++) {
992                         struct ip_tunnel *t;
993                         while ((t = sitn->tunnels[prio][h]) != NULL)
994                                 unregister_netdevice(t->dev);
995                 }
996         }
997 }
998
999 static int sit_init_net(struct net *net)
1000 {
1001         int err;
1002         struct sit_net *sitn;
1003
1004         err = -ENOMEM;
1005         sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL);
1006         if (sitn == NULL)
1007                 goto err_alloc;
1008
1009         err = net_assign_generic(net, sit_net_id, sitn);
1010         if (err < 0)
1011                 goto err_assign;
1012
1013         sitn->tunnels[0] = sitn->tunnels_wc;
1014         sitn->tunnels[1] = sitn->tunnels_l;
1015         sitn->tunnels[2] = sitn->tunnels_r;
1016         sitn->tunnels[3] = sitn->tunnels_r_l;
1017
1018         sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
1019                                            ipip6_tunnel_setup);
1020         if (!sitn->fb_tunnel_dev) {
1021                 err = -ENOMEM;
1022                 goto err_alloc_dev;
1023         }
1024
1025         sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init;
1026         dev_net_set(sitn->fb_tunnel_dev, net);
1027
1028         if ((err = register_netdev(sitn->fb_tunnel_dev)))
1029                 goto err_reg_dev;
1030
1031         return 0;
1032
1033 err_reg_dev:
1034         free_netdev(sitn->fb_tunnel_dev);
1035 err_alloc_dev:
1036         /* nothing */
1037 err_assign:
1038         kfree(sitn);
1039 err_alloc:
1040         return err;
1041 }
1042
1043 static void sit_exit_net(struct net *net)
1044 {
1045         struct sit_net *sitn;
1046
1047         sitn = net_generic(net, sit_net_id);
1048         rtnl_lock();
1049         sit_destroy_tunnels(sitn);
1050         unregister_netdevice(sitn->fb_tunnel_dev);
1051         rtnl_unlock();
1052         kfree(sitn);
1053 }
1054
1055 static struct pernet_operations sit_net_ops = {
1056         .init = sit_init_net,
1057         .exit = sit_exit_net,
1058 };
1059
1060 static void __exit sit_cleanup(void)
1061 {
1062         xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1063
1064         unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
1065 }
1066
1067 static int __init sit_init(void)
1068 {
1069         int err;
1070
1071         printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
1072
1073         if (xfrm4_tunnel_register(&sit_handler, AF_INET6) < 0) {
1074                 printk(KERN_INFO "sit init: Can't add protocol\n");
1075                 return -EAGAIN;
1076         }
1077
1078         err = register_pernet_gen_device(&sit_net_id, &sit_net_ops);
1079         if (err < 0)
1080                 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1081
1082         return err;
1083 }
1084
1085 module_init(sit_init);
1086 module_exit(sit_cleanup);
1087 MODULE_LICENSE("GPL");
1088 MODULE_ALIAS("sit0");