]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/ipv4/ip_gre.c
[GRE]: Introduce empty ipgre_net structure and net init/exit ops.
[linux-2.6-omap-h63xx.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50
51 /*
52    Problems & solutions
53    --------------------
54
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71
72
73
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107
108
109
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116
117    Alexey Kuznetsov.
118  */
119
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122
123 /* Fallback tunnel: no source, no destination, no key, no options */
124
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126
127 static int ipgre_net_id;
128 struct ipgre_net {
129 };
130
131 static struct net_device *ipgre_fb_tunnel_dev;
132
133 /* Tunnel hash table */
134
135 /*
136    4 hash tables:
137
138    3: (remote,local)
139    2: (remote,*)
140    1: (*,local)
141    0: (*,*)
142
143    We require exact key match i.e. if a key is present in packet
144    it will match only tunnel with the same key; if it is not present,
145    it will match only keyless tunnel.
146
147    All keysless packets, if not matched configured keyless tunnels
148    will match fallback tunnel.
149  */
150
151 #define HASH_SIZE  16
152 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
153
154 static struct ip_tunnel *tunnels[4][HASH_SIZE];
155
156 #define tunnels_r_l     (tunnels[3])
157 #define tunnels_r       (tunnels[2])
158 #define tunnels_l       (tunnels[1])
159 #define tunnels_wc      (tunnels[0])
160
161 static DEFINE_RWLOCK(ipgre_lock);
162
163 /* Given src, dst and key, find appropriate for input tunnel. */
164
165 static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
166 {
167         unsigned h0 = HASH(remote);
168         unsigned h1 = HASH(key);
169         struct ip_tunnel *t;
170
171         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
172                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
173                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174                                 return t;
175                 }
176         }
177         for (t = tunnels_r[h0^h1]; t; t = t->next) {
178                 if (remote == t->parms.iph.daddr) {
179                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
180                                 return t;
181                 }
182         }
183         for (t = tunnels_l[h1]; t; t = t->next) {
184                 if (local == t->parms.iph.saddr ||
185                      (local == t->parms.iph.daddr &&
186                       ipv4_is_multicast(local))) {
187                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
188                                 return t;
189                 }
190         }
191         for (t = tunnels_wc[h1]; t; t = t->next) {
192                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193                         return t;
194         }
195
196         if (ipgre_fb_tunnel_dev->flags&IFF_UP)
197                 return netdev_priv(ipgre_fb_tunnel_dev);
198         return NULL;
199 }
200
201 static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
202 {
203         __be32 remote = parms->iph.daddr;
204         __be32 local = parms->iph.saddr;
205         __be32 key = parms->i_key;
206         unsigned h = HASH(key);
207         int prio = 0;
208
209         if (local)
210                 prio |= 1;
211         if (remote && !ipv4_is_multicast(remote)) {
212                 prio |= 2;
213                 h ^= HASH(remote);
214         }
215
216         return &tunnels[prio][h];
217 }
218
219 static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
220 {
221         return __ipgre_bucket(&t->parms);
222 }
223
224 static void ipgre_tunnel_link(struct ip_tunnel *t)
225 {
226         struct ip_tunnel **tp = ipgre_bucket(t);
227
228         t->next = *tp;
229         write_lock_bh(&ipgre_lock);
230         *tp = t;
231         write_unlock_bh(&ipgre_lock);
232 }
233
234 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
235 {
236         struct ip_tunnel **tp;
237
238         for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
239                 if (t == *tp) {
240                         write_lock_bh(&ipgre_lock);
241                         *tp = t->next;
242                         write_unlock_bh(&ipgre_lock);
243                         break;
244                 }
245         }
246 }
247
248 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
249 {
250         __be32 remote = parms->iph.daddr;
251         __be32 local = parms->iph.saddr;
252         __be32 key = parms->i_key;
253         struct ip_tunnel *t, **tp, *nt;
254         struct net_device *dev;
255         char name[IFNAMSIZ];
256
257         for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
258                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
259                         if (key == t->parms.i_key)
260                                 return t;
261                 }
262         }
263         if (!create)
264                 return NULL;
265
266         if (parms->name[0])
267                 strlcpy(name, parms->name, IFNAMSIZ);
268         else
269                 sprintf(name, "gre%%d");
270
271         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
272         if (!dev)
273           return NULL;
274
275         if (strchr(name, '%')) {
276                 if (dev_alloc_name(dev, name) < 0)
277                         goto failed_free;
278         }
279
280         dev->init = ipgre_tunnel_init;
281         nt = netdev_priv(dev);
282         nt->parms = *parms;
283
284         if (register_netdevice(dev) < 0)
285                 goto failed_free;
286
287         dev_hold(dev);
288         ipgre_tunnel_link(nt);
289         return nt;
290
291 failed_free:
292         free_netdev(dev);
293         return NULL;
294 }
295
296 static void ipgre_tunnel_uninit(struct net_device *dev)
297 {
298         ipgre_tunnel_unlink(netdev_priv(dev));
299         dev_put(dev);
300 }
301
302
303 static void ipgre_err(struct sk_buff *skb, u32 info)
304 {
305 #ifndef I_WISH_WORLD_WERE_PERFECT
306
307 /* It is not :-( All the routers (except for Linux) return only
308    8 bytes of packet payload. It means, that precise relaying of
309    ICMP in the real Internet is absolutely infeasible.
310
311    Moreover, Cisco "wise men" put GRE key to the third word
312    in GRE header. It makes impossible maintaining even soft state for keyed
313    GRE tunnels with enabled checksum. Tell them "thank you".
314
315    Well, I wonder, rfc1812 was written by Cisco employee,
316    what the hell these idiots break standrads established
317    by themself???
318  */
319
320         struct iphdr *iph = (struct iphdr*)skb->data;
321         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
322         int grehlen = (iph->ihl<<2) + 4;
323         const int type = icmp_hdr(skb)->type;
324         const int code = icmp_hdr(skb)->code;
325         struct ip_tunnel *t;
326         __be16 flags;
327
328         flags = p[0];
329         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
330                 if (flags&(GRE_VERSION|GRE_ROUTING))
331                         return;
332                 if (flags&GRE_KEY) {
333                         grehlen += 4;
334                         if (flags&GRE_CSUM)
335                                 grehlen += 4;
336                 }
337         }
338
339         /* If only 8 bytes returned, keyed message will be dropped here */
340         if (skb_headlen(skb) < grehlen)
341                 return;
342
343         switch (type) {
344         default:
345         case ICMP_PARAMETERPROB:
346                 return;
347
348         case ICMP_DEST_UNREACH:
349                 switch (code) {
350                 case ICMP_SR_FAILED:
351                 case ICMP_PORT_UNREACH:
352                         /* Impossible event. */
353                         return;
354                 case ICMP_FRAG_NEEDED:
355                         /* Soft state for pmtu is maintained by IP core. */
356                         return;
357                 default:
358                         /* All others are translated to HOST_UNREACH.
359                            rfc2003 contains "deep thoughts" about NET_UNREACH,
360                            I believe they are just ether pollution. --ANK
361                          */
362                         break;
363                 }
364                 break;
365         case ICMP_TIME_EXCEEDED:
366                 if (code != ICMP_EXC_TTL)
367                         return;
368                 break;
369         }
370
371         read_lock(&ipgre_lock);
372         t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
373         if (t == NULL || t->parms.iph.daddr == 0 ||
374             ipv4_is_multicast(t->parms.iph.daddr))
375                 goto out;
376
377         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
378                 goto out;
379
380         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
381                 t->err_count++;
382         else
383                 t->err_count = 1;
384         t->err_time = jiffies;
385 out:
386         read_unlock(&ipgre_lock);
387         return;
388 #else
389         struct iphdr *iph = (struct iphdr*)dp;
390         struct iphdr *eiph;
391         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
392         const int type = icmp_hdr(skb)->type;
393         const int code = icmp_hdr(skb)->code;
394         int rel_type = 0;
395         int rel_code = 0;
396         __be32 rel_info = 0;
397         __u32 n = 0;
398         __be16 flags;
399         int grehlen = (iph->ihl<<2) + 4;
400         struct sk_buff *skb2;
401         struct flowi fl;
402         struct rtable *rt;
403
404         if (p[1] != htons(ETH_P_IP))
405                 return;
406
407         flags = p[0];
408         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
409                 if (flags&(GRE_VERSION|GRE_ROUTING))
410                         return;
411                 if (flags&GRE_CSUM)
412                         grehlen += 4;
413                 if (flags&GRE_KEY)
414                         grehlen += 4;
415                 if (flags&GRE_SEQ)
416                         grehlen += 4;
417         }
418         if (len < grehlen + sizeof(struct iphdr))
419                 return;
420         eiph = (struct iphdr*)(dp + grehlen);
421
422         switch (type) {
423         default:
424                 return;
425         case ICMP_PARAMETERPROB:
426                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
427                 if (n < (iph->ihl<<2))
428                         return;
429
430                 /* So... This guy found something strange INSIDE encapsulated
431                    packet. Well, he is fool, but what can we do ?
432                  */
433                 rel_type = ICMP_PARAMETERPROB;
434                 n -= grehlen;
435                 rel_info = htonl(n << 24);
436                 break;
437
438         case ICMP_DEST_UNREACH:
439                 switch (code) {
440                 case ICMP_SR_FAILED:
441                 case ICMP_PORT_UNREACH:
442                         /* Impossible event. */
443                         return;
444                 case ICMP_FRAG_NEEDED:
445                         /* And it is the only really necessary thing :-) */
446                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
447                         if (n < grehlen+68)
448                                 return;
449                         n -= grehlen;
450                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
451                         if (n > ntohs(eiph->tot_len))
452                                 return;
453                         rel_info = htonl(n);
454                         break;
455                 default:
456                         /* All others are translated to HOST_UNREACH.
457                            rfc2003 contains "deep thoughts" about NET_UNREACH,
458                            I believe, it is just ether pollution. --ANK
459                          */
460                         rel_type = ICMP_DEST_UNREACH;
461                         rel_code = ICMP_HOST_UNREACH;
462                         break;
463                 }
464                 break;
465         case ICMP_TIME_EXCEEDED:
466                 if (code != ICMP_EXC_TTL)
467                         return;
468                 break;
469         }
470
471         /* Prepare fake skb to feed it to icmp_send */
472         skb2 = skb_clone(skb, GFP_ATOMIC);
473         if (skb2 == NULL)
474                 return;
475         dst_release(skb2->dst);
476         skb2->dst = NULL;
477         skb_pull(skb2, skb->data - (u8*)eiph);
478         skb_reset_network_header(skb2);
479
480         /* Try to guess incoming interface */
481         memset(&fl, 0, sizeof(fl));
482         fl.fl4_dst = eiph->saddr;
483         fl.fl4_tos = RT_TOS(eiph->tos);
484         fl.proto = IPPROTO_GRE;
485         if (ip_route_output_key(&init_net, &rt, &fl)) {
486                 kfree_skb(skb2);
487                 return;
488         }
489         skb2->dev = rt->u.dst.dev;
490
491         /* route "incoming" packet */
492         if (rt->rt_flags&RTCF_LOCAL) {
493                 ip_rt_put(rt);
494                 rt = NULL;
495                 fl.fl4_dst = eiph->daddr;
496                 fl.fl4_src = eiph->saddr;
497                 fl.fl4_tos = eiph->tos;
498                 if (ip_route_output_key(&init_net, &rt, &fl) ||
499                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
500                         ip_rt_put(rt);
501                         kfree_skb(skb2);
502                         return;
503                 }
504         } else {
505                 ip_rt_put(rt);
506                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
507                     skb2->dst->dev->type != ARPHRD_IPGRE) {
508                         kfree_skb(skb2);
509                         return;
510                 }
511         }
512
513         /* change mtu on this route */
514         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
515                 if (n > dst_mtu(skb2->dst)) {
516                         kfree_skb(skb2);
517                         return;
518                 }
519                 skb2->dst->ops->update_pmtu(skb2->dst, n);
520         } else if (type == ICMP_TIME_EXCEEDED) {
521                 struct ip_tunnel *t = netdev_priv(skb2->dev);
522                 if (t->parms.iph.ttl) {
523                         rel_type = ICMP_DEST_UNREACH;
524                         rel_code = ICMP_HOST_UNREACH;
525                 }
526         }
527
528         icmp_send(skb2, rel_type, rel_code, rel_info);
529         kfree_skb(skb2);
530 #endif
531 }
532
533 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
534 {
535         if (INET_ECN_is_ce(iph->tos)) {
536                 if (skb->protocol == htons(ETH_P_IP)) {
537                         IP_ECN_set_ce(ip_hdr(skb));
538                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
539                         IP6_ECN_set_ce(ipv6_hdr(skb));
540                 }
541         }
542 }
543
544 static inline u8
545 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
546 {
547         u8 inner = 0;
548         if (skb->protocol == htons(ETH_P_IP))
549                 inner = old_iph->tos;
550         else if (skb->protocol == htons(ETH_P_IPV6))
551                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
552         return INET_ECN_encapsulate(tos, inner);
553 }
554
555 static int ipgre_rcv(struct sk_buff *skb)
556 {
557         struct iphdr *iph;
558         u8     *h;
559         __be16    flags;
560         __sum16   csum = 0;
561         __be32 key = 0;
562         u32    seqno = 0;
563         struct ip_tunnel *tunnel;
564         int    offset = 4;
565
566         if (!pskb_may_pull(skb, 16))
567                 goto drop_nolock;
568
569         iph = ip_hdr(skb);
570         h = skb->data;
571         flags = *(__be16*)h;
572
573         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
574                 /* - Version must be 0.
575                    - We do not support routing headers.
576                  */
577                 if (flags&(GRE_VERSION|GRE_ROUTING))
578                         goto drop_nolock;
579
580                 if (flags&GRE_CSUM) {
581                         switch (skb->ip_summed) {
582                         case CHECKSUM_COMPLETE:
583                                 csum = csum_fold(skb->csum);
584                                 if (!csum)
585                                         break;
586                                 /* fall through */
587                         case CHECKSUM_NONE:
588                                 skb->csum = 0;
589                                 csum = __skb_checksum_complete(skb);
590                                 skb->ip_summed = CHECKSUM_COMPLETE;
591                         }
592                         offset += 4;
593                 }
594                 if (flags&GRE_KEY) {
595                         key = *(__be32*)(h + offset);
596                         offset += 4;
597                 }
598                 if (flags&GRE_SEQ) {
599                         seqno = ntohl(*(__be32*)(h + offset));
600                         offset += 4;
601                 }
602         }
603
604         read_lock(&ipgre_lock);
605         if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
606                 secpath_reset(skb);
607
608                 skb->protocol = *(__be16*)(h + 2);
609                 /* WCCP version 1 and 2 protocol decoding.
610                  * - Change protocol to IP
611                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
612                  */
613                 if (flags == 0 &&
614                     skb->protocol == htons(ETH_P_WCCP)) {
615                         skb->protocol = htons(ETH_P_IP);
616                         if ((*(h + offset) & 0xF0) != 0x40)
617                                 offset += 4;
618                 }
619
620                 skb->mac_header = skb->network_header;
621                 __pskb_pull(skb, offset);
622                 skb_reset_network_header(skb);
623                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
624                 skb->pkt_type = PACKET_HOST;
625 #ifdef CONFIG_NET_IPGRE_BROADCAST
626                 if (ipv4_is_multicast(iph->daddr)) {
627                         /* Looped back packet, drop it! */
628                         if (skb->rtable->fl.iif == 0)
629                                 goto drop;
630                         tunnel->stat.multicast++;
631                         skb->pkt_type = PACKET_BROADCAST;
632                 }
633 #endif
634
635                 if (((flags&GRE_CSUM) && csum) ||
636                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
637                         tunnel->stat.rx_crc_errors++;
638                         tunnel->stat.rx_errors++;
639                         goto drop;
640                 }
641                 if (tunnel->parms.i_flags&GRE_SEQ) {
642                         if (!(flags&GRE_SEQ) ||
643                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
644                                 tunnel->stat.rx_fifo_errors++;
645                                 tunnel->stat.rx_errors++;
646                                 goto drop;
647                         }
648                         tunnel->i_seqno = seqno + 1;
649                 }
650                 tunnel->stat.rx_packets++;
651                 tunnel->stat.rx_bytes += skb->len;
652                 skb->dev = tunnel->dev;
653                 dst_release(skb->dst);
654                 skb->dst = NULL;
655                 nf_reset(skb);
656                 ipgre_ecn_decapsulate(iph, skb);
657                 netif_rx(skb);
658                 read_unlock(&ipgre_lock);
659                 return(0);
660         }
661         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
662
663 drop:
664         read_unlock(&ipgre_lock);
665 drop_nolock:
666         kfree_skb(skb);
667         return(0);
668 }
669
670 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
671 {
672         struct ip_tunnel *tunnel = netdev_priv(dev);
673         struct net_device_stats *stats = &tunnel->stat;
674         struct iphdr  *old_iph = ip_hdr(skb);
675         struct iphdr  *tiph;
676         u8     tos;
677         __be16 df;
678         struct rtable *rt;                      /* Route to the other host */
679         struct net_device *tdev;                        /* Device to other host */
680         struct iphdr  *iph;                     /* Our new IP header */
681         unsigned int max_headroom;              /* The extra header space needed */
682         int    gre_hlen;
683         __be32 dst;
684         int    mtu;
685
686         if (tunnel->recursion++) {
687                 tunnel->stat.collisions++;
688                 goto tx_error;
689         }
690
691         if (dev->header_ops) {
692                 gre_hlen = 0;
693                 tiph = (struct iphdr*)skb->data;
694         } else {
695                 gre_hlen = tunnel->hlen;
696                 tiph = &tunnel->parms.iph;
697         }
698
699         if ((dst = tiph->daddr) == 0) {
700                 /* NBMA tunnel */
701
702                 if (skb->dst == NULL) {
703                         tunnel->stat.tx_fifo_errors++;
704                         goto tx_error;
705                 }
706
707                 if (skb->protocol == htons(ETH_P_IP)) {
708                         rt = skb->rtable;
709                         if ((dst = rt->rt_gateway) == 0)
710                                 goto tx_error_icmp;
711                 }
712 #ifdef CONFIG_IPV6
713                 else if (skb->protocol == htons(ETH_P_IPV6)) {
714                         struct in6_addr *addr6;
715                         int addr_type;
716                         struct neighbour *neigh = skb->dst->neighbour;
717
718                         if (neigh == NULL)
719                                 goto tx_error;
720
721                         addr6 = (struct in6_addr*)&neigh->primary_key;
722                         addr_type = ipv6_addr_type(addr6);
723
724                         if (addr_type == IPV6_ADDR_ANY) {
725                                 addr6 = &ipv6_hdr(skb)->daddr;
726                                 addr_type = ipv6_addr_type(addr6);
727                         }
728
729                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
730                                 goto tx_error_icmp;
731
732                         dst = addr6->s6_addr32[3];
733                 }
734 #endif
735                 else
736                         goto tx_error;
737         }
738
739         tos = tiph->tos;
740         if (tos&1) {
741                 if (skb->protocol == htons(ETH_P_IP))
742                         tos = old_iph->tos;
743                 tos &= ~1;
744         }
745
746         {
747                 struct flowi fl = { .oif = tunnel->parms.link,
748                                     .nl_u = { .ip4_u =
749                                               { .daddr = dst,
750                                                 .saddr = tiph->saddr,
751                                                 .tos = RT_TOS(tos) } },
752                                     .proto = IPPROTO_GRE };
753                 if (ip_route_output_key(&init_net, &rt, &fl)) {
754                         tunnel->stat.tx_carrier_errors++;
755                         goto tx_error;
756                 }
757         }
758         tdev = rt->u.dst.dev;
759
760         if (tdev == dev) {
761                 ip_rt_put(rt);
762                 tunnel->stat.collisions++;
763                 goto tx_error;
764         }
765
766         df = tiph->frag_off;
767         if (df)
768                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
769         else
770                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
771
772         if (skb->dst)
773                 skb->dst->ops->update_pmtu(skb->dst, mtu);
774
775         if (skb->protocol == htons(ETH_P_IP)) {
776                 df |= (old_iph->frag_off&htons(IP_DF));
777
778                 if ((old_iph->frag_off&htons(IP_DF)) &&
779                     mtu < ntohs(old_iph->tot_len)) {
780                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
781                         ip_rt_put(rt);
782                         goto tx_error;
783                 }
784         }
785 #ifdef CONFIG_IPV6
786         else if (skb->protocol == htons(ETH_P_IPV6)) {
787                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
788
789                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
790                         if ((tunnel->parms.iph.daddr &&
791                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
792                             rt6->rt6i_dst.plen == 128) {
793                                 rt6->rt6i_flags |= RTF_MODIFIED;
794                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
795                         }
796                 }
797
798                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
799                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
800                         ip_rt_put(rt);
801                         goto tx_error;
802                 }
803         }
804 #endif
805
806         if (tunnel->err_count > 0) {
807                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
808                         tunnel->err_count--;
809
810                         dst_link_failure(skb);
811                 } else
812                         tunnel->err_count = 0;
813         }
814
815         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
816
817         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
818             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
819                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
820                 if (!new_skb) {
821                         ip_rt_put(rt);
822                         stats->tx_dropped++;
823                         dev_kfree_skb(skb);
824                         tunnel->recursion--;
825                         return 0;
826                 }
827                 if (skb->sk)
828                         skb_set_owner_w(new_skb, skb->sk);
829                 dev_kfree_skb(skb);
830                 skb = new_skb;
831                 old_iph = ip_hdr(skb);
832         }
833
834         skb->transport_header = skb->network_header;
835         skb_push(skb, gre_hlen);
836         skb_reset_network_header(skb);
837         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
838         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839                               IPSKB_REROUTED);
840         dst_release(skb->dst);
841         skb->dst = &rt->u.dst;
842
843         /*
844          *      Push down and install the IPIP header.
845          */
846
847         iph                     =       ip_hdr(skb);
848         iph->version            =       4;
849         iph->ihl                =       sizeof(struct iphdr) >> 2;
850         iph->frag_off           =       df;
851         iph->protocol           =       IPPROTO_GRE;
852         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
853         iph->daddr              =       rt->rt_dst;
854         iph->saddr              =       rt->rt_src;
855
856         if ((iph->ttl = tiph->ttl) == 0) {
857                 if (skb->protocol == htons(ETH_P_IP))
858                         iph->ttl = old_iph->ttl;
859 #ifdef CONFIG_IPV6
860                 else if (skb->protocol == htons(ETH_P_IPV6))
861                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
862 #endif
863                 else
864                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
865         }
866
867         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
868         ((__be16*)(iph+1))[1] = skb->protocol;
869
870         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
871                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
872
873                 if (tunnel->parms.o_flags&GRE_SEQ) {
874                         ++tunnel->o_seqno;
875                         *ptr = htonl(tunnel->o_seqno);
876                         ptr--;
877                 }
878                 if (tunnel->parms.o_flags&GRE_KEY) {
879                         *ptr = tunnel->parms.o_key;
880                         ptr--;
881                 }
882                 if (tunnel->parms.o_flags&GRE_CSUM) {
883                         *ptr = 0;
884                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
885                 }
886         }
887
888         nf_reset(skb);
889
890         IPTUNNEL_XMIT();
891         tunnel->recursion--;
892         return 0;
893
894 tx_error_icmp:
895         dst_link_failure(skb);
896
897 tx_error:
898         stats->tx_errors++;
899         dev_kfree_skb(skb);
900         tunnel->recursion--;
901         return 0;
902 }
903
904 static void ipgre_tunnel_bind_dev(struct net_device *dev)
905 {
906         struct net_device *tdev = NULL;
907         struct ip_tunnel *tunnel;
908         struct iphdr *iph;
909         int hlen = LL_MAX_HEADER;
910         int mtu = ETH_DATA_LEN;
911         int addend = sizeof(struct iphdr) + 4;
912
913         tunnel = netdev_priv(dev);
914         iph = &tunnel->parms.iph;
915
916         /* Guess output device to choose reasonable mtu and hard_header_len */
917
918         if (iph->daddr) {
919                 struct flowi fl = { .oif = tunnel->parms.link,
920                                     .nl_u = { .ip4_u =
921                                               { .daddr = iph->daddr,
922                                                 .saddr = iph->saddr,
923                                                 .tos = RT_TOS(iph->tos) } },
924                                     .proto = IPPROTO_GRE };
925                 struct rtable *rt;
926                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
927                         tdev = rt->u.dst.dev;
928                         ip_rt_put(rt);
929                 }
930                 dev->flags |= IFF_POINTOPOINT;
931         }
932
933         if (!tdev && tunnel->parms.link)
934                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
935
936         if (tdev) {
937                 hlen = tdev->hard_header_len;
938                 mtu = tdev->mtu;
939         }
940         dev->iflink = tunnel->parms.link;
941
942         /* Precalculate GRE options length */
943         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
944                 if (tunnel->parms.o_flags&GRE_CSUM)
945                         addend += 4;
946                 if (tunnel->parms.o_flags&GRE_KEY)
947                         addend += 4;
948                 if (tunnel->parms.o_flags&GRE_SEQ)
949                         addend += 4;
950         }
951         dev->hard_header_len = hlen + addend;
952         dev->mtu = mtu - addend;
953         tunnel->hlen = addend;
954
955 }
956
957 static int
958 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
959 {
960         int err = 0;
961         struct ip_tunnel_parm p;
962         struct ip_tunnel *t;
963
964         switch (cmd) {
965         case SIOCGETTUNNEL:
966                 t = NULL;
967                 if (dev == ipgre_fb_tunnel_dev) {
968                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
969                                 err = -EFAULT;
970                                 break;
971                         }
972                         t = ipgre_tunnel_locate(&p, 0);
973                 }
974                 if (t == NULL)
975                         t = netdev_priv(dev);
976                 memcpy(&p, &t->parms, sizeof(p));
977                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
978                         err = -EFAULT;
979                 break;
980
981         case SIOCADDTUNNEL:
982         case SIOCCHGTUNNEL:
983                 err = -EPERM;
984                 if (!capable(CAP_NET_ADMIN))
985                         goto done;
986
987                 err = -EFAULT;
988                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
989                         goto done;
990
991                 err = -EINVAL;
992                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
993                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
994                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
995                         goto done;
996                 if (p.iph.ttl)
997                         p.iph.frag_off |= htons(IP_DF);
998
999                 if (!(p.i_flags&GRE_KEY))
1000                         p.i_key = 0;
1001                 if (!(p.o_flags&GRE_KEY))
1002                         p.o_key = 0;
1003
1004                 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1005
1006                 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1007                         if (t != NULL) {
1008                                 if (t->dev != dev) {
1009                                         err = -EEXIST;
1010                                         break;
1011                                 }
1012                         } else {
1013                                 unsigned nflags=0;
1014
1015                                 t = netdev_priv(dev);
1016
1017                                 if (ipv4_is_multicast(p.iph.daddr))
1018                                         nflags = IFF_BROADCAST;
1019                                 else if (p.iph.daddr)
1020                                         nflags = IFF_POINTOPOINT;
1021
1022                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1023                                         err = -EINVAL;
1024                                         break;
1025                                 }
1026                                 ipgre_tunnel_unlink(t);
1027                                 t->parms.iph.saddr = p.iph.saddr;
1028                                 t->parms.iph.daddr = p.iph.daddr;
1029                                 t->parms.i_key = p.i_key;
1030                                 t->parms.o_key = p.o_key;
1031                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1032                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1033                                 ipgre_tunnel_link(t);
1034                                 netdev_state_change(dev);
1035                         }
1036                 }
1037
1038                 if (t) {
1039                         err = 0;
1040                         if (cmd == SIOCCHGTUNNEL) {
1041                                 t->parms.iph.ttl = p.iph.ttl;
1042                                 t->parms.iph.tos = p.iph.tos;
1043                                 t->parms.iph.frag_off = p.iph.frag_off;
1044                                 if (t->parms.link != p.link) {
1045                                         t->parms.link = p.link;
1046                                         ipgre_tunnel_bind_dev(dev);
1047                                         netdev_state_change(dev);
1048                                 }
1049                         }
1050                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1051                                 err = -EFAULT;
1052                 } else
1053                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1054                 break;
1055
1056         case SIOCDELTUNNEL:
1057                 err = -EPERM;
1058                 if (!capable(CAP_NET_ADMIN))
1059                         goto done;
1060
1061                 if (dev == ipgre_fb_tunnel_dev) {
1062                         err = -EFAULT;
1063                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1064                                 goto done;
1065                         err = -ENOENT;
1066                         if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1067                                 goto done;
1068                         err = -EPERM;
1069                         if (t == netdev_priv(ipgre_fb_tunnel_dev))
1070                                 goto done;
1071                         dev = t->dev;
1072                 }
1073                 unregister_netdevice(dev);
1074                 err = 0;
1075                 break;
1076
1077         default:
1078                 err = -EINVAL;
1079         }
1080
1081 done:
1082         return err;
1083 }
1084
1085 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1086 {
1087         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1088 }
1089
1090 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091 {
1092         struct ip_tunnel *tunnel = netdev_priv(dev);
1093         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1094                 return -EINVAL;
1095         dev->mtu = new_mtu;
1096         return 0;
1097 }
1098
1099 /* Nice toy. Unfortunately, useless in real life :-)
1100    It allows to construct virtual multiprotocol broadcast "LAN"
1101    over the Internet, provided multicast routing is tuned.
1102
1103
1104    I have no idea was this bicycle invented before me,
1105    so that I had to set ARPHRD_IPGRE to a random value.
1106    I have an impression, that Cisco could make something similar,
1107    but this feature is apparently missing in IOS<=11.2(8).
1108
1109    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1110    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1111
1112    ping -t 255 224.66.66.66
1113
1114    If nobody answers, mbone does not work.
1115
1116    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1117    ip addr add 10.66.66.<somewhat>/24 dev Universe
1118    ifconfig Universe up
1119    ifconfig Universe add fe80::<Your_real_addr>/10
1120    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1121    ftp 10.66.66.66
1122    ...
1123    ftp fec0:6666:6666::193.233.7.65
1124    ...
1125
1126  */
1127
1128 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1129                         unsigned short type,
1130                         const void *daddr, const void *saddr, unsigned len)
1131 {
1132         struct ip_tunnel *t = netdev_priv(dev);
1133         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1134         __be16 *p = (__be16*)(iph+1);
1135
1136         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1137         p[0]            = t->parms.o_flags;
1138         p[1]            = htons(type);
1139
1140         /*
1141          *      Set the source hardware address.
1142          */
1143
1144         if (saddr)
1145                 memcpy(&iph->saddr, saddr, 4);
1146
1147         if (daddr) {
1148                 memcpy(&iph->daddr, daddr, 4);
1149                 return t->hlen;
1150         }
1151         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1152                 return t->hlen;
1153
1154         return -t->hlen;
1155 }
1156
1157 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1158 {
1159         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1160         memcpy(haddr, &iph->saddr, 4);
1161         return 4;
1162 }
1163
1164 static const struct header_ops ipgre_header_ops = {
1165         .create = ipgre_header,
1166         .parse  = ipgre_header_parse,
1167 };
1168
1169 #ifdef CONFIG_NET_IPGRE_BROADCAST
1170 static int ipgre_open(struct net_device *dev)
1171 {
1172         struct ip_tunnel *t = netdev_priv(dev);
1173
1174         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1175                 struct flowi fl = { .oif = t->parms.link,
1176                                     .nl_u = { .ip4_u =
1177                                               { .daddr = t->parms.iph.daddr,
1178                                                 .saddr = t->parms.iph.saddr,
1179                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1180                                     .proto = IPPROTO_GRE };
1181                 struct rtable *rt;
1182                 if (ip_route_output_key(&init_net, &rt, &fl))
1183                         return -EADDRNOTAVAIL;
1184                 dev = rt->u.dst.dev;
1185                 ip_rt_put(rt);
1186                 if (__in_dev_get_rtnl(dev) == NULL)
1187                         return -EADDRNOTAVAIL;
1188                 t->mlink = dev->ifindex;
1189                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1190         }
1191         return 0;
1192 }
1193
1194 static int ipgre_close(struct net_device *dev)
1195 {
1196         struct ip_tunnel *t = netdev_priv(dev);
1197         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1198                 struct in_device *in_dev;
1199                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1200                 if (in_dev) {
1201                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1202                         in_dev_put(in_dev);
1203                 }
1204         }
1205         return 0;
1206 }
1207
1208 #endif
1209
1210 static void ipgre_tunnel_setup(struct net_device *dev)
1211 {
1212         dev->uninit             = ipgre_tunnel_uninit;
1213         dev->destructor         = free_netdev;
1214         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1215         dev->get_stats          = ipgre_tunnel_get_stats;
1216         dev->do_ioctl           = ipgre_tunnel_ioctl;
1217         dev->change_mtu         = ipgre_tunnel_change_mtu;
1218
1219         dev->type               = ARPHRD_IPGRE;
1220         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1222         dev->flags              = IFF_NOARP;
1223         dev->iflink             = 0;
1224         dev->addr_len           = 4;
1225 }
1226
1227 static int ipgre_tunnel_init(struct net_device *dev)
1228 {
1229         struct ip_tunnel *tunnel;
1230         struct iphdr *iph;
1231
1232         tunnel = netdev_priv(dev);
1233         iph = &tunnel->parms.iph;
1234
1235         tunnel->dev = dev;
1236         strcpy(tunnel->parms.name, dev->name);
1237
1238         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1239         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1240
1241         ipgre_tunnel_bind_dev(dev);
1242
1243         if (iph->daddr) {
1244 #ifdef CONFIG_NET_IPGRE_BROADCAST
1245                 if (ipv4_is_multicast(iph->daddr)) {
1246                         if (!iph->saddr)
1247                                 return -EINVAL;
1248                         dev->flags = IFF_BROADCAST;
1249                         dev->header_ops = &ipgre_header_ops;
1250                         dev->open = ipgre_open;
1251                         dev->stop = ipgre_close;
1252                 }
1253 #endif
1254         } else
1255                 dev->header_ops = &ipgre_header_ops;
1256
1257         return 0;
1258 }
1259
1260 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1261 {
1262         struct ip_tunnel *tunnel = netdev_priv(dev);
1263         struct iphdr *iph = &tunnel->parms.iph;
1264
1265         tunnel->dev = dev;
1266         strcpy(tunnel->parms.name, dev->name);
1267
1268         iph->version            = 4;
1269         iph->protocol           = IPPROTO_GRE;
1270         iph->ihl                = 5;
1271         tunnel->hlen            = sizeof(struct iphdr) + 4;
1272
1273         dev_hold(dev);
1274         tunnels_wc[0]           = tunnel;
1275         return 0;
1276 }
1277
1278
1279 static struct net_protocol ipgre_protocol = {
1280         .handler        =       ipgre_rcv,
1281         .err_handler    =       ipgre_err,
1282 };
1283
1284 static int ipgre_init_net(struct net *net)
1285 {
1286         int err;
1287         struct ipgre_net *ign;
1288
1289         err = -ENOMEM;
1290         ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1291         if (ign == NULL)
1292                 goto err_alloc;
1293
1294         err = net_assign_generic(net, ipgre_net_id, ign);
1295         if (err < 0)
1296                 goto err_assign;
1297
1298         return 0;
1299
1300 err_assign:
1301         kfree(ign);
1302 err_alloc:
1303         return err;
1304 }
1305
1306 static void ipgre_exit_net(struct net *net)
1307 {
1308         struct ipgre_net *ign;
1309
1310         ign = net_generic(net, ipgre_net_id);
1311         kfree(ign);
1312 }
1313
1314 static struct pernet_operations ipgre_net_ops = {
1315         .init = ipgre_init_net,
1316         .exit = ipgre_exit_net,
1317 };
1318
1319 /*
1320  *      And now the modules code and kernel interface.
1321  */
1322
1323 static int __init ipgre_init(void)
1324 {
1325         int err;
1326
1327         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1328
1329         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1330                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1331                 return -EAGAIN;
1332         }
1333
1334         ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1335                                            ipgre_tunnel_setup);
1336         if (!ipgre_fb_tunnel_dev) {
1337                 err = -ENOMEM;
1338                 goto err1;
1339         }
1340
1341         ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1342
1343         if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1344                 goto err2;
1345
1346         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1347         if (err < 0)
1348                 goto err3;
1349 out:
1350         return err;
1351 err2:
1352         free_netdev(ipgre_fb_tunnel_dev);
1353 err1:
1354         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1355         goto out;
1356 err3:
1357         unregister_netdevice(ipgre_fb_tunnel_dev);
1358         goto err1;
1359 }
1360
1361 static void __exit ipgre_destroy_tunnels(void)
1362 {
1363         int prio;
1364
1365         for (prio = 0; prio < 4; prio++) {
1366                 int h;
1367                 for (h = 0; h < HASH_SIZE; h++) {
1368                         struct ip_tunnel *t;
1369                         while ((t = tunnels[prio][h]) != NULL)
1370                                 unregister_netdevice(t->dev);
1371                 }
1372         }
1373 }
1374
1375 static void __exit ipgre_fini(void)
1376 {
1377         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1378                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1379
1380         rtnl_lock();
1381         ipgre_destroy_tunnels();
1382         rtnl_unlock();
1383
1384         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1385 }
1386
1387 module_init(ipgre_init);
1388 module_exit(ipgre_fini);
1389 MODULE_LICENSE("GPL");