]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/xfrm/xfrm_policy.c
[NETFILTER]: Handle NAT in IPsec policy checks
[linux-2.6-omap-h63xx.git] / net / xfrm / xfrm_policy.c
1 /* 
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <asm/bug.h>
17 #include <linux/config.h>
18 #include <linux/slab.h>
19 #include <linux/kmod.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/workqueue.h>
23 #include <linux/notifier.h>
24 #include <linux/netdevice.h>
25 #include <linux/netfilter.h>
26 #include <linux/module.h>
27 #include <net/xfrm.h>
28 #include <net/ip.h>
29
30 DECLARE_MUTEX(xfrm_cfg_sem);
31 EXPORT_SYMBOL(xfrm_cfg_sem);
32
33 static DEFINE_RWLOCK(xfrm_policy_lock);
34
35 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
36 EXPORT_SYMBOL(xfrm_policy_list);
37
38 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
39 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
40
41 static kmem_cache_t *xfrm_dst_cache __read_mostly;
42
43 static struct work_struct xfrm_policy_gc_work;
44 static struct list_head xfrm_policy_gc_list =
45         LIST_HEAD_INIT(xfrm_policy_gc_list);
46 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
47
48 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
49 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
50
51 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
52 {
53         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
54         struct xfrm_type_map *typemap;
55         int err = 0;
56
57         if (unlikely(afinfo == NULL))
58                 return -EAFNOSUPPORT;
59         typemap = afinfo->type_map;
60
61         write_lock(&typemap->lock);
62         if (likely(typemap->map[type->proto] == NULL))
63                 typemap->map[type->proto] = type;
64         else
65                 err = -EEXIST;
66         write_unlock(&typemap->lock);
67         xfrm_policy_put_afinfo(afinfo);
68         return err;
69 }
70 EXPORT_SYMBOL(xfrm_register_type);
71
72 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
73 {
74         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
75         struct xfrm_type_map *typemap;
76         int err = 0;
77
78         if (unlikely(afinfo == NULL))
79                 return -EAFNOSUPPORT;
80         typemap = afinfo->type_map;
81
82         write_lock(&typemap->lock);
83         if (unlikely(typemap->map[type->proto] != type))
84                 err = -ENOENT;
85         else
86                 typemap->map[type->proto] = NULL;
87         write_unlock(&typemap->lock);
88         xfrm_policy_put_afinfo(afinfo);
89         return err;
90 }
91 EXPORT_SYMBOL(xfrm_unregister_type);
92
93 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
94 {
95         struct xfrm_policy_afinfo *afinfo;
96         struct xfrm_type_map *typemap;
97         struct xfrm_type *type;
98         int modload_attempted = 0;
99
100 retry:
101         afinfo = xfrm_policy_get_afinfo(family);
102         if (unlikely(afinfo == NULL))
103                 return NULL;
104         typemap = afinfo->type_map;
105
106         read_lock(&typemap->lock);
107         type = typemap->map[proto];
108         if (unlikely(type && !try_module_get(type->owner)))
109                 type = NULL;
110         read_unlock(&typemap->lock);
111         if (!type && !modload_attempted) {
112                 xfrm_policy_put_afinfo(afinfo);
113                 request_module("xfrm-type-%d-%d",
114                                (int) family, (int) proto);
115                 modload_attempted = 1;
116                 goto retry;
117         }
118
119         xfrm_policy_put_afinfo(afinfo);
120         return type;
121 }
122
123 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
124                     unsigned short family)
125 {
126         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
127         int err = 0;
128
129         if (unlikely(afinfo == NULL))
130                 return -EAFNOSUPPORT;
131
132         if (likely(afinfo->dst_lookup != NULL))
133                 err = afinfo->dst_lookup(dst, fl);
134         else
135                 err = -EINVAL;
136         xfrm_policy_put_afinfo(afinfo);
137         return err;
138 }
139 EXPORT_SYMBOL(xfrm_dst_lookup);
140
141 void xfrm_put_type(struct xfrm_type *type)
142 {
143         module_put(type->owner);
144 }
145
146 static inline unsigned long make_jiffies(long secs)
147 {
148         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
149                 return MAX_SCHEDULE_TIMEOUT-1;
150         else
151                 return secs*HZ;
152 }
153
154 static void xfrm_policy_timer(unsigned long data)
155 {
156         struct xfrm_policy *xp = (struct xfrm_policy*)data;
157         unsigned long now = (unsigned long)xtime.tv_sec;
158         long next = LONG_MAX;
159         int warn = 0;
160         int dir;
161
162         read_lock(&xp->lock);
163
164         if (xp->dead)
165                 goto out;
166
167         dir = xfrm_policy_id2dir(xp->index);
168
169         if (xp->lft.hard_add_expires_seconds) {
170                 long tmo = xp->lft.hard_add_expires_seconds +
171                         xp->curlft.add_time - now;
172                 if (tmo <= 0)
173                         goto expired;
174                 if (tmo < next)
175                         next = tmo;
176         }
177         if (xp->lft.hard_use_expires_seconds) {
178                 long tmo = xp->lft.hard_use_expires_seconds +
179                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
180                 if (tmo <= 0)
181                         goto expired;
182                 if (tmo < next)
183                         next = tmo;
184         }
185         if (xp->lft.soft_add_expires_seconds) {
186                 long tmo = xp->lft.soft_add_expires_seconds +
187                         xp->curlft.add_time - now;
188                 if (tmo <= 0) {
189                         warn = 1;
190                         tmo = XFRM_KM_TIMEOUT;
191                 }
192                 if (tmo < next)
193                         next = tmo;
194         }
195         if (xp->lft.soft_use_expires_seconds) {
196                 long tmo = xp->lft.soft_use_expires_seconds +
197                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
198                 if (tmo <= 0) {
199                         warn = 1;
200                         tmo = XFRM_KM_TIMEOUT;
201                 }
202                 if (tmo < next)
203                         next = tmo;
204         }
205
206         if (warn)
207                 km_policy_expired(xp, dir, 0);
208         if (next != LONG_MAX &&
209             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
210                 xfrm_pol_hold(xp);
211
212 out:
213         read_unlock(&xp->lock);
214         xfrm_pol_put(xp);
215         return;
216
217 expired:
218         read_unlock(&xp->lock);
219         if (!xfrm_policy_delete(xp, dir))
220                 km_policy_expired(xp, dir, 1);
221         xfrm_pol_put(xp);
222 }
223
224
225 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
226  * SPD calls.
227  */
228
229 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
230 {
231         struct xfrm_policy *policy;
232
233         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
234
235         if (policy) {
236                 memset(policy, 0, sizeof(struct xfrm_policy));
237                 atomic_set(&policy->refcnt, 1);
238                 rwlock_init(&policy->lock);
239                 init_timer(&policy->timer);
240                 policy->timer.data = (unsigned long)policy;
241                 policy->timer.function = xfrm_policy_timer;
242         }
243         return policy;
244 }
245 EXPORT_SYMBOL(xfrm_policy_alloc);
246
247 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
248
249 void __xfrm_policy_destroy(struct xfrm_policy *policy)
250 {
251         if (!policy->dead)
252                 BUG();
253
254         if (policy->bundles)
255                 BUG();
256
257         if (del_timer(&policy->timer))
258                 BUG();
259
260         security_xfrm_policy_free(policy);
261         kfree(policy);
262 }
263 EXPORT_SYMBOL(__xfrm_policy_destroy);
264
265 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
266 {
267         struct dst_entry *dst;
268
269         while ((dst = policy->bundles) != NULL) {
270                 policy->bundles = dst->next;
271                 dst_free(dst);
272         }
273
274         if (del_timer(&policy->timer))
275                 atomic_dec(&policy->refcnt);
276
277         if (atomic_read(&policy->refcnt) > 1)
278                 flow_cache_flush();
279
280         xfrm_pol_put(policy);
281 }
282
283 static void xfrm_policy_gc_task(void *data)
284 {
285         struct xfrm_policy *policy;
286         struct list_head *entry, *tmp;
287         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
288
289         spin_lock_bh(&xfrm_policy_gc_lock);
290         list_splice_init(&xfrm_policy_gc_list, &gc_list);
291         spin_unlock_bh(&xfrm_policy_gc_lock);
292
293         list_for_each_safe(entry, tmp, &gc_list) {
294                 policy = list_entry(entry, struct xfrm_policy, list);
295                 xfrm_policy_gc_kill(policy);
296         }
297 }
298
299 /* Rule must be locked. Release descentant resources, announce
300  * entry dead. The rule must be unlinked from lists to the moment.
301  */
302
303 static void xfrm_policy_kill(struct xfrm_policy *policy)
304 {
305         int dead;
306
307         write_lock_bh(&policy->lock);
308         dead = policy->dead;
309         policy->dead = 1;
310         write_unlock_bh(&policy->lock);
311
312         if (unlikely(dead)) {
313                 WARN_ON(1);
314                 return;
315         }
316
317         spin_lock(&xfrm_policy_gc_lock);
318         list_add(&policy->list, &xfrm_policy_gc_list);
319         spin_unlock(&xfrm_policy_gc_lock);
320
321         schedule_work(&xfrm_policy_gc_work);
322 }
323
324 /* Generate new index... KAME seems to generate them ordered by cost
325  * of an absolute inpredictability of ordering of rules. This will not pass. */
326 static u32 xfrm_gen_index(int dir)
327 {
328         u32 idx;
329         struct xfrm_policy *p;
330         static u32 idx_generator;
331
332         for (;;) {
333                 idx = (idx_generator | dir);
334                 idx_generator += 8;
335                 if (idx == 0)
336                         idx = 8;
337                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
338                         if (p->index == idx)
339                                 break;
340                 }
341                 if (!p)
342                         return idx;
343         }
344 }
345
346 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
347 {
348         struct xfrm_policy *pol, **p;
349         struct xfrm_policy *delpol = NULL;
350         struct xfrm_policy **newpos = NULL;
351         struct dst_entry *gc_list;
352
353         write_lock_bh(&xfrm_policy_lock);
354         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
355                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
356                     xfrm_sec_ctx_match(pol->security, policy->security)) {
357                         if (excl) {
358                                 write_unlock_bh(&xfrm_policy_lock);
359                                 return -EEXIST;
360                         }
361                         *p = pol->next;
362                         delpol = pol;
363                         if (policy->priority > pol->priority)
364                                 continue;
365                 } else if (policy->priority >= pol->priority) {
366                         p = &pol->next;
367                         continue;
368                 }
369                 if (!newpos)
370                         newpos = p;
371                 if (delpol)
372                         break;
373                 p = &pol->next;
374         }
375         if (newpos)
376                 p = newpos;
377         xfrm_pol_hold(policy);
378         policy->next = *p;
379         *p = policy;
380         atomic_inc(&flow_cache_genid);
381         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
382         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
383         policy->curlft.use_time = 0;
384         if (!mod_timer(&policy->timer, jiffies + HZ))
385                 xfrm_pol_hold(policy);
386         write_unlock_bh(&xfrm_policy_lock);
387
388         if (delpol)
389                 xfrm_policy_kill(delpol);
390
391         read_lock_bh(&xfrm_policy_lock);
392         gc_list = NULL;
393         for (policy = policy->next; policy; policy = policy->next) {
394                 struct dst_entry *dst;
395
396                 write_lock(&policy->lock);
397                 dst = policy->bundles;
398                 if (dst) {
399                         struct dst_entry *tail = dst;
400                         while (tail->next)
401                                 tail = tail->next;
402                         tail->next = gc_list;
403                         gc_list = dst;
404
405                         policy->bundles = NULL;
406                 }
407                 write_unlock(&policy->lock);
408         }
409         read_unlock_bh(&xfrm_policy_lock);
410
411         while (gc_list) {
412                 struct dst_entry *dst = gc_list;
413
414                 gc_list = dst->next;
415                 dst_free(dst);
416         }
417
418         return 0;
419 }
420 EXPORT_SYMBOL(xfrm_policy_insert);
421
422 struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
423                                           struct xfrm_sec_ctx *ctx, int delete)
424 {
425         struct xfrm_policy *pol, **p;
426
427         write_lock_bh(&xfrm_policy_lock);
428         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
429                 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
430                     (xfrm_sec_ctx_match(ctx, pol->security))) {
431                         xfrm_pol_hold(pol);
432                         if (delete)
433                                 *p = pol->next;
434                         break;
435                 }
436         }
437         write_unlock_bh(&xfrm_policy_lock);
438
439         if (pol && delete) {
440                 atomic_inc(&flow_cache_genid);
441                 xfrm_policy_kill(pol);
442         }
443         return pol;
444 }
445 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
446
447 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
448 {
449         struct xfrm_policy *pol, **p;
450
451         write_lock_bh(&xfrm_policy_lock);
452         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
453                 if (pol->index == id) {
454                         xfrm_pol_hold(pol);
455                         if (delete)
456                                 *p = pol->next;
457                         break;
458                 }
459         }
460         write_unlock_bh(&xfrm_policy_lock);
461
462         if (pol && delete) {
463                 atomic_inc(&flow_cache_genid);
464                 xfrm_policy_kill(pol);
465         }
466         return pol;
467 }
468 EXPORT_SYMBOL(xfrm_policy_byid);
469
470 void xfrm_policy_flush(void)
471 {
472         struct xfrm_policy *xp;
473         int dir;
474
475         write_lock_bh(&xfrm_policy_lock);
476         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
477                 while ((xp = xfrm_policy_list[dir]) != NULL) {
478                         xfrm_policy_list[dir] = xp->next;
479                         write_unlock_bh(&xfrm_policy_lock);
480
481                         xfrm_policy_kill(xp);
482
483                         write_lock_bh(&xfrm_policy_lock);
484                 }
485         }
486         atomic_inc(&flow_cache_genid);
487         write_unlock_bh(&xfrm_policy_lock);
488 }
489 EXPORT_SYMBOL(xfrm_policy_flush);
490
491 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
492                      void *data)
493 {
494         struct xfrm_policy *xp;
495         int dir;
496         int count = 0;
497         int error = 0;
498
499         read_lock_bh(&xfrm_policy_lock);
500         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
501                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
502                         count++;
503         }
504
505         if (count == 0) {
506                 error = -ENOENT;
507                 goto out;
508         }
509
510         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
511                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
512                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
513                         if (error)
514                                 goto out;
515                 }
516         }
517
518 out:
519         read_unlock_bh(&xfrm_policy_lock);
520         return error;
521 }
522 EXPORT_SYMBOL(xfrm_policy_walk);
523
524 /* Find policy to apply to this flow. */
525
526 static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
527                                void **objp, atomic_t **obj_refp)
528 {
529         struct xfrm_policy *pol;
530
531         read_lock_bh(&xfrm_policy_lock);
532         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
533                 struct xfrm_selector *sel = &pol->selector;
534                 int match;
535
536                 if (pol->family != family)
537                         continue;
538
539                 match = xfrm_selector_match(sel, fl, family);
540
541                 if (match) {
542                         if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
543                                 xfrm_pol_hold(pol);
544                                 break;
545                         }
546                 }
547         }
548         read_unlock_bh(&xfrm_policy_lock);
549         if ((*objp = (void *) pol) != NULL)
550                 *obj_refp = &pol->refcnt;
551 }
552
553 static inline int policy_to_flow_dir(int dir)
554 {
555         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
556             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
557             XFRM_POLICY_FWD == FLOW_DIR_FWD)
558                 return dir;
559         switch (dir) {
560         default:
561         case XFRM_POLICY_IN:
562                 return FLOW_DIR_IN;
563         case XFRM_POLICY_OUT:
564                 return FLOW_DIR_OUT;
565         case XFRM_POLICY_FWD:
566                 return FLOW_DIR_FWD;
567         };
568 }
569
570 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
571 {
572         struct xfrm_policy *pol;
573
574         read_lock_bh(&xfrm_policy_lock);
575         if ((pol = sk->sk_policy[dir]) != NULL) {
576                 int match = xfrm_selector_match(&pol->selector, fl,
577                                                 sk->sk_family);
578                 int err = 0;
579
580                 if (match)
581                   err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
582
583                 if (match && !err)
584                         xfrm_pol_hold(pol);
585                 else
586                         pol = NULL;
587         }
588         read_unlock_bh(&xfrm_policy_lock);
589         return pol;
590 }
591
592 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
593 {
594         pol->next = xfrm_policy_list[dir];
595         xfrm_policy_list[dir] = pol;
596         xfrm_pol_hold(pol);
597 }
598
599 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
600                                                 int dir)
601 {
602         struct xfrm_policy **polp;
603
604         for (polp = &xfrm_policy_list[dir];
605              *polp != NULL; polp = &(*polp)->next) {
606                 if (*polp == pol) {
607                         *polp = pol->next;
608                         return pol;
609                 }
610         }
611         return NULL;
612 }
613
614 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
615 {
616         write_lock_bh(&xfrm_policy_lock);
617         pol = __xfrm_policy_unlink(pol, dir);
618         write_unlock_bh(&xfrm_policy_lock);
619         if (pol) {
620                 if (dir < XFRM_POLICY_MAX)
621                         atomic_inc(&flow_cache_genid);
622                 xfrm_policy_kill(pol);
623                 return 0;
624         }
625         return -ENOENT;
626 }
627
628 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
629 {
630         struct xfrm_policy *old_pol;
631
632         write_lock_bh(&xfrm_policy_lock);
633         old_pol = sk->sk_policy[dir];
634         sk->sk_policy[dir] = pol;
635         if (pol) {
636                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
637                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
638                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
639         }
640         if (old_pol)
641                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
642         write_unlock_bh(&xfrm_policy_lock);
643
644         if (old_pol) {
645                 xfrm_policy_kill(old_pol);
646         }
647         return 0;
648 }
649
650 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
651 {
652         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
653
654         if (newp) {
655                 newp->selector = old->selector;
656                 if (security_xfrm_policy_clone(old, newp)) {
657                         kfree(newp);
658                         return NULL;  /* ENOMEM */
659                 }
660                 newp->lft = old->lft;
661                 newp->curlft = old->curlft;
662                 newp->action = old->action;
663                 newp->flags = old->flags;
664                 newp->xfrm_nr = old->xfrm_nr;
665                 newp->index = old->index;
666                 memcpy(newp->xfrm_vec, old->xfrm_vec,
667                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
668                 write_lock_bh(&xfrm_policy_lock);
669                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
670                 write_unlock_bh(&xfrm_policy_lock);
671                 xfrm_pol_put(newp);
672         }
673         return newp;
674 }
675
676 int __xfrm_sk_clone_policy(struct sock *sk)
677 {
678         struct xfrm_policy *p0 = sk->sk_policy[0],
679                            *p1 = sk->sk_policy[1];
680
681         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
682         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
683                 return -ENOMEM;
684         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
685                 return -ENOMEM;
686         return 0;
687 }
688
689 /* Resolve list of templates for the flow, given policy. */
690
691 static int
692 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
693                   struct xfrm_state **xfrm,
694                   unsigned short family)
695 {
696         int nx;
697         int i, error;
698         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
699         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
700
701         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
702                 struct xfrm_state *x;
703                 xfrm_address_t *remote = daddr;
704                 xfrm_address_t *local  = saddr;
705                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
706
707                 if (tmpl->mode) {
708                         remote = &tmpl->id.daddr;
709                         local = &tmpl->saddr;
710                 }
711
712                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
713
714                 if (x && x->km.state == XFRM_STATE_VALID) {
715                         xfrm[nx++] = x;
716                         daddr = remote;
717                         saddr = local;
718                         continue;
719                 }
720                 if (x) {
721                         error = (x->km.state == XFRM_STATE_ERROR ?
722                                  -EINVAL : -EAGAIN);
723                         xfrm_state_put(x);
724                 }
725
726                 if (!tmpl->optional)
727                         goto fail;
728         }
729         return nx;
730
731 fail:
732         for (nx--; nx>=0; nx--)
733                 xfrm_state_put(xfrm[nx]);
734         return error;
735 }
736
737 /* Check that the bundle accepts the flow and its components are
738  * still valid.
739  */
740
741 static struct dst_entry *
742 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
743 {
744         struct dst_entry *x;
745         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
746         if (unlikely(afinfo == NULL))
747                 return ERR_PTR(-EINVAL);
748         x = afinfo->find_bundle(fl, policy);
749         xfrm_policy_put_afinfo(afinfo);
750         return x;
751 }
752
753 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
754  * all the metrics... Shortly, bundle a bundle.
755  */
756
757 static int
758 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
759                    struct flowi *fl, struct dst_entry **dst_p,
760                    unsigned short family)
761 {
762         int err;
763         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
764         if (unlikely(afinfo == NULL))
765                 return -EINVAL;
766         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
767         xfrm_policy_put_afinfo(afinfo);
768         return err;
769 }
770
771
772 static int stale_bundle(struct dst_entry *dst);
773
774 /* Main function: finds/creates a bundle for given flow.
775  *
776  * At the moment we eat a raw IP route. Mostly to speed up lookups
777  * on interfaces with disabled IPsec.
778  */
779 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
780                 struct sock *sk, int flags)
781 {
782         struct xfrm_policy *policy;
783         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
784         struct dst_entry *dst, *dst_orig = *dst_p;
785         int nx = 0;
786         int err;
787         u32 genid;
788         u16 family = dst_orig->ops->family;
789         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
790         u32 sk_sid = security_sk_sid(sk, fl, dir);
791 restart:
792         genid = atomic_read(&flow_cache_genid);
793         policy = NULL;
794         if (sk && sk->sk_policy[1])
795                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
796
797         if (!policy) {
798                 /* To accelerate a bit...  */
799                 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
800                         return 0;
801
802                 policy = flow_cache_lookup(fl, sk_sid, family, dir,
803                                            xfrm_policy_lookup);
804         }
805
806         if (!policy)
807                 return 0;
808
809         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
810
811         switch (policy->action) {
812         case XFRM_POLICY_BLOCK:
813                 /* Prohibit the flow */
814                 err = -EPERM;
815                 goto error;
816
817         case XFRM_POLICY_ALLOW:
818                 if (policy->xfrm_nr == 0) {
819                         /* Flow passes not transformed. */
820                         xfrm_pol_put(policy);
821                         return 0;
822                 }
823
824                 /* Try to find matching bundle.
825                  *
826                  * LATER: help from flow cache. It is optional, this
827                  * is required only for output policy.
828                  */
829                 dst = xfrm_find_bundle(fl, policy, family);
830                 if (IS_ERR(dst)) {
831                         err = PTR_ERR(dst);
832                         goto error;
833                 }
834
835                 if (dst)
836                         break;
837
838                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
839
840                 if (unlikely(nx<0)) {
841                         err = nx;
842                         if (err == -EAGAIN && flags) {
843                                 DECLARE_WAITQUEUE(wait, current);
844
845                                 add_wait_queue(&km_waitq, &wait);
846                                 set_current_state(TASK_INTERRUPTIBLE);
847                                 schedule();
848                                 set_current_state(TASK_RUNNING);
849                                 remove_wait_queue(&km_waitq, &wait);
850
851                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
852
853                                 if (nx == -EAGAIN && signal_pending(current)) {
854                                         err = -ERESTART;
855                                         goto error;
856                                 }
857                                 if (nx == -EAGAIN ||
858                                     genid != atomic_read(&flow_cache_genid)) {
859                                         xfrm_pol_put(policy);
860                                         goto restart;
861                                 }
862                                 err = nx;
863                         }
864                         if (err < 0)
865                                 goto error;
866                 }
867                 if (nx == 0) {
868                         /* Flow passes not transformed. */
869                         xfrm_pol_put(policy);
870                         return 0;
871                 }
872
873                 dst = dst_orig;
874                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
875
876                 if (unlikely(err)) {
877                         int i;
878                         for (i=0; i<nx; i++)
879                                 xfrm_state_put(xfrm[i]);
880                         goto error;
881                 }
882
883                 write_lock_bh(&policy->lock);
884                 if (unlikely(policy->dead || stale_bundle(dst))) {
885                         /* Wow! While we worked on resolving, this
886                          * policy has gone. Retry. It is not paranoia,
887                          * we just cannot enlist new bundle to dead object.
888                          * We can't enlist stable bundles either.
889                          */
890                         write_unlock_bh(&policy->lock);
891
892                         xfrm_pol_put(policy);
893                         if (dst)
894                                 dst_free(dst);
895                         goto restart;
896                 }
897                 dst->next = policy->bundles;
898                 policy->bundles = dst;
899                 dst_hold(dst);
900                 write_unlock_bh(&policy->lock);
901         }
902         *dst_p = dst;
903         dst_release(dst_orig);
904         xfrm_pol_put(policy);
905         return 0;
906
907 error:
908         dst_release(dst_orig);
909         xfrm_pol_put(policy);
910         *dst_p = NULL;
911         return err;
912 }
913 EXPORT_SYMBOL(xfrm_lookup);
914
915 /* When skb is transformed back to its "native" form, we have to
916  * check policy restrictions. At the moment we make this in maximally
917  * stupid way. Shame on me. :-) Of course, connected sockets must
918  * have policy cached at them.
919  */
920
921 static inline int
922 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
923               unsigned short family)
924 {
925         if (xfrm_state_kern(x))
926                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
927         return  x->id.proto == tmpl->id.proto &&
928                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
929                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
930                 x->props.mode == tmpl->mode &&
931                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
932                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
933 }
934
935 static inline int
936 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
937                unsigned short family)
938 {
939         int idx = start;
940
941         if (tmpl->optional) {
942                 if (!tmpl->mode)
943                         return start;
944         } else
945                 start = -1;
946         for (; idx < sp->len; idx++) {
947                 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
948                         return ++idx;
949                 if (sp->x[idx].xvec->props.mode)
950                         break;
951         }
952         return start;
953 }
954
955 int
956 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
957 {
958         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
959
960         if (unlikely(afinfo == NULL))
961                 return -EAFNOSUPPORT;
962
963         afinfo->decode_session(skb, fl);
964         xfrm_policy_put_afinfo(afinfo);
965         return 0;
966 }
967 EXPORT_SYMBOL(xfrm_decode_session);
968
969 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
970 {
971         for (; k < sp->len; k++) {
972                 if (sp->x[k].xvec->props.mode)
973                         return 1;
974         }
975
976         return 0;
977 }
978
979 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
980                         unsigned short family)
981 {
982         struct xfrm_policy *pol;
983         struct flowi fl;
984         u8 fl_dir = policy_to_flow_dir(dir);
985         u32 sk_sid;
986
987         if (xfrm_decode_session(skb, &fl, family) < 0)
988                 return 0;
989         nf_nat_decode_session(skb, &fl, family);
990
991         sk_sid = security_sk_sid(sk, &fl, fl_dir);
992
993         /* First, check used SA against their selectors. */
994         if (skb->sp) {
995                 int i;
996
997                 for (i=skb->sp->len-1; i>=0; i--) {
998                         struct sec_decap_state *xvec = &(skb->sp->x[i]);
999                         if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
1000                                 return 0;
1001
1002                         /* If there is a post_input processor, try running it */
1003                         if (xvec->xvec->type->post_input &&
1004                             (xvec->xvec->type->post_input)(xvec->xvec,
1005                                                            &(xvec->decap),
1006                                                            skb) != 0)
1007                                 return 0;
1008                 }
1009         }
1010
1011         pol = NULL;
1012         if (sk && sk->sk_policy[dir])
1013                 pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
1014
1015         if (!pol)
1016                 pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
1017                                         xfrm_policy_lookup);
1018
1019         if (!pol)
1020                 return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
1021
1022         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
1023
1024         if (pol->action == XFRM_POLICY_ALLOW) {
1025                 struct sec_path *sp;
1026                 static struct sec_path dummy;
1027                 int i, k;
1028
1029                 if ((sp = skb->sp) == NULL)
1030                         sp = &dummy;
1031
1032                 /* For each tunnel xfrm, find the first matching tmpl.
1033                  * For each tmpl before that, find corresponding xfrm.
1034                  * Order is _important_. Later we will implement
1035                  * some barriers, but at the moment barriers
1036                  * are implied between each two transformations.
1037                  */
1038                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
1039                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
1040                         if (k < 0)
1041                                 goto reject;
1042                 }
1043
1044                 if (secpath_has_tunnel(sp, k))
1045                         goto reject;
1046
1047                 xfrm_pol_put(pol);
1048                 return 1;
1049         }
1050
1051 reject:
1052         xfrm_pol_put(pol);
1053         return 0;
1054 }
1055 EXPORT_SYMBOL(__xfrm_policy_check);
1056
1057 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1058 {
1059         struct flowi fl;
1060
1061         if (xfrm_decode_session(skb, &fl, family) < 0)
1062                 return 0;
1063
1064         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1065 }
1066 EXPORT_SYMBOL(__xfrm_route_forward);
1067
1068 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1069 {
1070         /* If it is marked obsolete, which is how we even get here,
1071          * then we have purged it from the policy bundle list and we
1072          * did that for a good reason.
1073          */
1074         return NULL;
1075 }
1076
1077 static int stale_bundle(struct dst_entry *dst)
1078 {
1079         return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
1080 }
1081
1082 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1083 {
1084         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1085                 dst->dev = &loopback_dev;
1086                 dev_hold(&loopback_dev);
1087                 dev_put(dev);
1088         }
1089 }
1090 EXPORT_SYMBOL(xfrm_dst_ifdown);
1091
1092 static void xfrm_link_failure(struct sk_buff *skb)
1093 {
1094         /* Impossible. Such dst must be popped before reaches point of failure. */
1095         return;
1096 }
1097
1098 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1099 {
1100         if (dst) {
1101                 if (dst->obsolete) {
1102                         dst_release(dst);
1103                         dst = NULL;
1104                 }
1105         }
1106         return dst;
1107 }
1108
1109 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1110 {
1111         int i;
1112         struct xfrm_policy *pol;
1113         struct dst_entry *dst, **dstp, *gc_list = NULL;
1114
1115         read_lock_bh(&xfrm_policy_lock);
1116         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
1117                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
1118                         write_lock(&pol->lock);
1119                         dstp = &pol->bundles;
1120                         while ((dst=*dstp) != NULL) {
1121                                 if (func(dst)) {
1122                                         *dstp = dst->next;
1123                                         dst->next = gc_list;
1124                                         gc_list = dst;
1125                                 } else {
1126                                         dstp = &dst->next;
1127                                 }
1128                         }
1129                         write_unlock(&pol->lock);
1130                 }
1131         }
1132         read_unlock_bh(&xfrm_policy_lock);
1133
1134         while (gc_list) {
1135                 dst = gc_list;
1136                 gc_list = dst->next;
1137                 dst_free(dst);
1138         }
1139 }
1140
1141 static int unused_bundle(struct dst_entry *dst)
1142 {
1143         return !atomic_read(&dst->__refcnt);
1144 }
1145
1146 static void __xfrm_garbage_collect(void)
1147 {
1148         xfrm_prune_bundles(unused_bundle);
1149 }
1150
1151 int xfrm_flush_bundles(void)
1152 {
1153         xfrm_prune_bundles(stale_bundle);
1154         return 0;
1155 }
1156
1157 static int always_true(struct dst_entry *dst)
1158 {
1159         return 1;
1160 }
1161
1162 void xfrm_flush_all_bundles(void)
1163 {
1164         xfrm_prune_bundles(always_true);
1165 }
1166
1167 void xfrm_init_pmtu(struct dst_entry *dst)
1168 {
1169         do {
1170                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1171                 u32 pmtu, route_mtu_cached;
1172
1173                 pmtu = dst_mtu(dst->child);
1174                 xdst->child_mtu_cached = pmtu;
1175
1176                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
1177
1178                 route_mtu_cached = dst_mtu(xdst->route);
1179                 xdst->route_mtu_cached = route_mtu_cached;
1180
1181                 if (pmtu > route_mtu_cached)
1182                         pmtu = route_mtu_cached;
1183
1184                 dst->metrics[RTAX_MTU-1] = pmtu;
1185         } while ((dst = dst->next));
1186 }
1187
1188 EXPORT_SYMBOL(xfrm_init_pmtu);
1189
1190 /* Check that the bundle accepts the flow and its components are
1191  * still valid.
1192  */
1193
1194 int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1195 {
1196         struct dst_entry *dst = &first->u.dst;
1197         struct xfrm_dst *last;
1198         u32 mtu;
1199
1200         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
1201             (dst->dev && !netif_running(dst->dev)))
1202                 return 0;
1203
1204         last = NULL;
1205
1206         do {
1207                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1208
1209                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
1210                         return 0;
1211                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
1212                         return 0;
1213
1214                 mtu = dst_mtu(dst->child);
1215                 if (xdst->child_mtu_cached != mtu) {
1216                         last = xdst;
1217                         xdst->child_mtu_cached = mtu;
1218                 }
1219
1220                 if (!dst_check(xdst->route, xdst->route_cookie))
1221                         return 0;
1222                 mtu = dst_mtu(xdst->route);
1223                 if (xdst->route_mtu_cached != mtu) {
1224                         last = xdst;
1225                         xdst->route_mtu_cached = mtu;
1226                 }
1227
1228                 dst = dst->child;
1229         } while (dst->xfrm);
1230
1231         if (likely(!last))
1232                 return 1;
1233
1234         mtu = last->child_mtu_cached;
1235         for (;;) {
1236                 dst = &last->u.dst;
1237
1238                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
1239                 if (mtu > last->route_mtu_cached)
1240                         mtu = last->route_mtu_cached;
1241                 dst->metrics[RTAX_MTU-1] = mtu;
1242
1243                 if (last == first)
1244                         break;
1245
1246                 last = last->u.next;
1247                 last->child_mtu_cached = mtu;
1248         }
1249
1250         return 1;
1251 }
1252
1253 EXPORT_SYMBOL(xfrm_bundle_ok);
1254
1255 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1256 {
1257         int err = 0;
1258         if (unlikely(afinfo == NULL))
1259                 return -EINVAL;
1260         if (unlikely(afinfo->family >= NPROTO))
1261                 return -EAFNOSUPPORT;
1262         write_lock(&xfrm_policy_afinfo_lock);
1263         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
1264                 err = -ENOBUFS;
1265         else {
1266                 struct dst_ops *dst_ops = afinfo->dst_ops;
1267                 if (likely(dst_ops->kmem_cachep == NULL))
1268                         dst_ops->kmem_cachep = xfrm_dst_cache;
1269                 if (likely(dst_ops->check == NULL))
1270                         dst_ops->check = xfrm_dst_check;
1271                 if (likely(dst_ops->negative_advice == NULL))
1272                         dst_ops->negative_advice = xfrm_negative_advice;
1273                 if (likely(dst_ops->link_failure == NULL))
1274                         dst_ops->link_failure = xfrm_link_failure;
1275                 if (likely(afinfo->garbage_collect == NULL))
1276                         afinfo->garbage_collect = __xfrm_garbage_collect;
1277                 xfrm_policy_afinfo[afinfo->family] = afinfo;
1278         }
1279         write_unlock(&xfrm_policy_afinfo_lock);
1280         return err;
1281 }
1282 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
1283
1284 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1285 {
1286         int err = 0;
1287         if (unlikely(afinfo == NULL))
1288                 return -EINVAL;
1289         if (unlikely(afinfo->family >= NPROTO))
1290                 return -EAFNOSUPPORT;
1291         write_lock(&xfrm_policy_afinfo_lock);
1292         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
1293                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
1294                         err = -EINVAL;
1295                 else {
1296                         struct dst_ops *dst_ops = afinfo->dst_ops;
1297                         xfrm_policy_afinfo[afinfo->family] = NULL;
1298                         dst_ops->kmem_cachep = NULL;
1299                         dst_ops->check = NULL;
1300                         dst_ops->negative_advice = NULL;
1301                         dst_ops->link_failure = NULL;
1302                         afinfo->garbage_collect = NULL;
1303                 }
1304         }
1305         write_unlock(&xfrm_policy_afinfo_lock);
1306         return err;
1307 }
1308 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
1309
1310 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
1311 {
1312         struct xfrm_policy_afinfo *afinfo;
1313         if (unlikely(family >= NPROTO))
1314                 return NULL;
1315         read_lock(&xfrm_policy_afinfo_lock);
1316         afinfo = xfrm_policy_afinfo[family];
1317         if (likely(afinfo != NULL))
1318                 read_lock(&afinfo->lock);
1319         read_unlock(&xfrm_policy_afinfo_lock);
1320         return afinfo;
1321 }
1322
1323 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
1324 {
1325         if (unlikely(afinfo == NULL))
1326                 return;
1327         read_unlock(&afinfo->lock);
1328 }
1329
1330 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
1331 {
1332         switch (event) {
1333         case NETDEV_DOWN:
1334                 xfrm_flush_bundles();
1335         }
1336         return NOTIFY_DONE;
1337 }
1338
1339 static struct notifier_block xfrm_dev_notifier = {
1340         xfrm_dev_event,
1341         NULL,
1342         0
1343 };
1344
1345 static void __init xfrm_policy_init(void)
1346 {
1347         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1348                                            sizeof(struct xfrm_dst),
1349                                            0, SLAB_HWCACHE_ALIGN,
1350                                            NULL, NULL);
1351         if (!xfrm_dst_cache)
1352                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1353
1354         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1355         register_netdevice_notifier(&xfrm_dev_notifier);
1356 }
1357
1358 void __init xfrm_init(void)
1359 {
1360         xfrm_state_init();
1361         xfrm_policy_init();
1362         xfrm_input_init();
1363 }
1364