]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/xfrm/xfrm_policy.c
[XFRM]: RFC4303 compliant auditing
[linux-2.6-omap-h63xx.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 int sysctl_xfrm_larval_drop __read_mostly;
38
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48
49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
50 EXPORT_SYMBOL(xfrm_policy_count);
51
52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
54
55 static struct kmem_cache *xfrm_dst_cache __read_mostly;
56
57 static struct work_struct xfrm_policy_gc_work;
58 static HLIST_HEAD(xfrm_policy_gc_list);
59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
60
61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63 static void xfrm_init_pmtu(struct dst_entry *dst);
64
65 static inline int
66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
67 {
68         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
69                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
70                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72                 (fl->proto == sel->proto || !sel->proto) &&
73                 (fl->oif == sel->ifindex || !sel->ifindex);
74 }
75
76 static inline int
77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
78 {
79         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
80                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
81                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
82                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
83                 (fl->proto == sel->proto || !sel->proto) &&
84                 (fl->oif == sel->ifindex || !sel->ifindex);
85 }
86
87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
88                     unsigned short family)
89 {
90         switch (family) {
91         case AF_INET:
92                 return __xfrm4_selector_match(sel, fl);
93         case AF_INET6:
94                 return __xfrm6_selector_match(sel, fl);
95         }
96         return 0;
97 }
98
99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
100                                                 int family)
101 {
102         xfrm_address_t *saddr = &x->props.saddr;
103         xfrm_address_t *daddr = &x->id.daddr;
104         struct xfrm_policy_afinfo *afinfo;
105         struct dst_entry *dst;
106
107         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108                 saddr = x->coaddr;
109         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110                 daddr = x->coaddr;
111
112         afinfo = xfrm_policy_get_afinfo(family);
113         if (unlikely(afinfo == NULL))
114                 return ERR_PTR(-EAFNOSUPPORT);
115
116         dst = afinfo->dst_lookup(tos, saddr, daddr);
117         xfrm_policy_put_afinfo(afinfo);
118         return dst;
119 }
120
121 static inline unsigned long make_jiffies(long secs)
122 {
123         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
124                 return MAX_SCHEDULE_TIMEOUT-1;
125         else
126                 return secs*HZ;
127 }
128
129 static void xfrm_policy_timer(unsigned long data)
130 {
131         struct xfrm_policy *xp = (struct xfrm_policy*)data;
132         unsigned long now = get_seconds();
133         long next = LONG_MAX;
134         int warn = 0;
135         int dir;
136
137         read_lock(&xp->lock);
138
139         if (xp->dead)
140                 goto out;
141
142         dir = xfrm_policy_id2dir(xp->index);
143
144         if (xp->lft.hard_add_expires_seconds) {
145                 long tmo = xp->lft.hard_add_expires_seconds +
146                         xp->curlft.add_time - now;
147                 if (tmo <= 0)
148                         goto expired;
149                 if (tmo < next)
150                         next = tmo;
151         }
152         if (xp->lft.hard_use_expires_seconds) {
153                 long tmo = xp->lft.hard_use_expires_seconds +
154                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
155                 if (tmo <= 0)
156                         goto expired;
157                 if (tmo < next)
158                         next = tmo;
159         }
160         if (xp->lft.soft_add_expires_seconds) {
161                 long tmo = xp->lft.soft_add_expires_seconds +
162                         xp->curlft.add_time - now;
163                 if (tmo <= 0) {
164                         warn = 1;
165                         tmo = XFRM_KM_TIMEOUT;
166                 }
167                 if (tmo < next)
168                         next = tmo;
169         }
170         if (xp->lft.soft_use_expires_seconds) {
171                 long tmo = xp->lft.soft_use_expires_seconds +
172                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
173                 if (tmo <= 0) {
174                         warn = 1;
175                         tmo = XFRM_KM_TIMEOUT;
176                 }
177                 if (tmo < next)
178                         next = tmo;
179         }
180
181         if (warn)
182                 km_policy_expired(xp, dir, 0, 0);
183         if (next != LONG_MAX &&
184             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
185                 xfrm_pol_hold(xp);
186
187 out:
188         read_unlock(&xp->lock);
189         xfrm_pol_put(xp);
190         return;
191
192 expired:
193         read_unlock(&xp->lock);
194         if (!xfrm_policy_delete(xp, dir))
195                 km_policy_expired(xp, dir, 1, 0);
196         xfrm_pol_put(xp);
197 }
198
199
200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
201  * SPD calls.
202  */
203
204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
205 {
206         struct xfrm_policy *policy;
207
208         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
209
210         if (policy) {
211                 INIT_HLIST_NODE(&policy->bydst);
212                 INIT_HLIST_NODE(&policy->byidx);
213                 rwlock_init(&policy->lock);
214                 atomic_set(&policy->refcnt, 1);
215                 setup_timer(&policy->timer, xfrm_policy_timer,
216                                 (unsigned long)policy);
217         }
218         return policy;
219 }
220 EXPORT_SYMBOL(xfrm_policy_alloc);
221
222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
223
224 void __xfrm_policy_destroy(struct xfrm_policy *policy)
225 {
226         BUG_ON(!policy->dead);
227
228         BUG_ON(policy->bundles);
229
230         if (del_timer(&policy->timer))
231                 BUG();
232
233         security_xfrm_policy_free(policy);
234         kfree(policy);
235 }
236 EXPORT_SYMBOL(__xfrm_policy_destroy);
237
238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
239 {
240         struct dst_entry *dst;
241
242         while ((dst = policy->bundles) != NULL) {
243                 policy->bundles = dst->next;
244                 dst_free(dst);
245         }
246
247         if (del_timer(&policy->timer))
248                 atomic_dec(&policy->refcnt);
249
250         if (atomic_read(&policy->refcnt) > 1)
251                 flow_cache_flush();
252
253         xfrm_pol_put(policy);
254 }
255
256 static void xfrm_policy_gc_task(struct work_struct *work)
257 {
258         struct xfrm_policy *policy;
259         struct hlist_node *entry, *tmp;
260         struct hlist_head gc_list;
261
262         spin_lock_bh(&xfrm_policy_gc_lock);
263         gc_list.first = xfrm_policy_gc_list.first;
264         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
265         spin_unlock_bh(&xfrm_policy_gc_lock);
266
267         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
268                 xfrm_policy_gc_kill(policy);
269 }
270
271 /* Rule must be locked. Release descentant resources, announce
272  * entry dead. The rule must be unlinked from lists to the moment.
273  */
274
275 static void xfrm_policy_kill(struct xfrm_policy *policy)
276 {
277         int dead;
278
279         write_lock_bh(&policy->lock);
280         dead = policy->dead;
281         policy->dead = 1;
282         write_unlock_bh(&policy->lock);
283
284         if (unlikely(dead)) {
285                 WARN_ON(1);
286                 return;
287         }
288
289         spin_lock(&xfrm_policy_gc_lock);
290         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
291         spin_unlock(&xfrm_policy_gc_lock);
292
293         schedule_work(&xfrm_policy_gc_work);
294 }
295
296 struct xfrm_policy_hash {
297         struct hlist_head       *table;
298         unsigned int            hmask;
299 };
300
301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
303 static struct hlist_head *xfrm_policy_byidx __read_mostly;
304 static unsigned int xfrm_idx_hmask __read_mostly;
305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
306
307 static inline unsigned int idx_hash(u32 index)
308 {
309         return __idx_hash(index, xfrm_idx_hmask);
310 }
311
312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
313 {
314         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
315         unsigned int hash = __sel_hash(sel, family, hmask);
316
317         return (hash == hmask + 1 ?
318                 &xfrm_policy_inexact[dir] :
319                 xfrm_policy_bydst[dir].table + hash);
320 }
321
322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
323 {
324         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
325         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
326
327         return xfrm_policy_bydst[dir].table + hash;
328 }
329
330 static void xfrm_dst_hash_transfer(struct hlist_head *list,
331                                    struct hlist_head *ndsttable,
332                                    unsigned int nhashmask)
333 {
334         struct hlist_node *entry, *tmp;
335         struct xfrm_policy *pol;
336
337         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
338                 unsigned int h;
339
340                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
341                                 pol->family, nhashmask);
342                 hlist_add_head(&pol->bydst, ndsttable+h);
343         }
344 }
345
346 static void xfrm_idx_hash_transfer(struct hlist_head *list,
347                                    struct hlist_head *nidxtable,
348                                    unsigned int nhashmask)
349 {
350         struct hlist_node *entry, *tmp;
351         struct xfrm_policy *pol;
352
353         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
354                 unsigned int h;
355
356                 h = __idx_hash(pol->index, nhashmask);
357                 hlist_add_head(&pol->byidx, nidxtable+h);
358         }
359 }
360
361 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
362 {
363         return ((old_hmask + 1) << 1) - 1;
364 }
365
366 static void xfrm_bydst_resize(int dir)
367 {
368         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
369         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
370         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
371         struct hlist_head *odst = xfrm_policy_bydst[dir].table;
372         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
373         int i;
374
375         if (!ndst)
376                 return;
377
378         write_lock_bh(&xfrm_policy_lock);
379
380         for (i = hmask; i >= 0; i--)
381                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
382
383         xfrm_policy_bydst[dir].table = ndst;
384         xfrm_policy_bydst[dir].hmask = nhashmask;
385
386         write_unlock_bh(&xfrm_policy_lock);
387
388         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
389 }
390
391 static void xfrm_byidx_resize(int total)
392 {
393         unsigned int hmask = xfrm_idx_hmask;
394         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
395         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
396         struct hlist_head *oidx = xfrm_policy_byidx;
397         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
398         int i;
399
400         if (!nidx)
401                 return;
402
403         write_lock_bh(&xfrm_policy_lock);
404
405         for (i = hmask; i >= 0; i--)
406                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
407
408         xfrm_policy_byidx = nidx;
409         xfrm_idx_hmask = nhashmask;
410
411         write_unlock_bh(&xfrm_policy_lock);
412
413         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
414 }
415
416 static inline int xfrm_bydst_should_resize(int dir, int *total)
417 {
418         unsigned int cnt = xfrm_policy_count[dir];
419         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
420
421         if (total)
422                 *total += cnt;
423
424         if ((hmask + 1) < xfrm_policy_hashmax &&
425             cnt > hmask)
426                 return 1;
427
428         return 0;
429 }
430
431 static inline int xfrm_byidx_should_resize(int total)
432 {
433         unsigned int hmask = xfrm_idx_hmask;
434
435         if ((hmask + 1) < xfrm_policy_hashmax &&
436             total > hmask)
437                 return 1;
438
439         return 0;
440 }
441
442 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
443 {
444         read_lock_bh(&xfrm_policy_lock);
445         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
446         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
447         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
448         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
449         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
450         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
451         si->spdhcnt = xfrm_idx_hmask;
452         si->spdhmcnt = xfrm_policy_hashmax;
453         read_unlock_bh(&xfrm_policy_lock);
454 }
455 EXPORT_SYMBOL(xfrm_spd_getinfo);
456
457 static DEFINE_MUTEX(hash_resize_mutex);
458 static void xfrm_hash_resize(struct work_struct *__unused)
459 {
460         int dir, total;
461
462         mutex_lock(&hash_resize_mutex);
463
464         total = 0;
465         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
466                 if (xfrm_bydst_should_resize(dir, &total))
467                         xfrm_bydst_resize(dir);
468         }
469         if (xfrm_byidx_should_resize(total))
470                 xfrm_byidx_resize(total);
471
472         mutex_unlock(&hash_resize_mutex);
473 }
474
475 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
476
477 /* Generate new index... KAME seems to generate them ordered by cost
478  * of an absolute inpredictability of ordering of rules. This will not pass. */
479 static u32 xfrm_gen_index(u8 type, int dir)
480 {
481         static u32 idx_generator;
482
483         for (;;) {
484                 struct hlist_node *entry;
485                 struct hlist_head *list;
486                 struct xfrm_policy *p;
487                 u32 idx;
488                 int found;
489
490                 idx = (idx_generator | dir);
491                 idx_generator += 8;
492                 if (idx == 0)
493                         idx = 8;
494                 list = xfrm_policy_byidx + idx_hash(idx);
495                 found = 0;
496                 hlist_for_each_entry(p, entry, list, byidx) {
497                         if (p->index == idx) {
498                                 found = 1;
499                                 break;
500                         }
501                 }
502                 if (!found)
503                         return idx;
504         }
505 }
506
507 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
508 {
509         u32 *p1 = (u32 *) s1;
510         u32 *p2 = (u32 *) s2;
511         int len = sizeof(struct xfrm_selector) / sizeof(u32);
512         int i;
513
514         for (i = 0; i < len; i++) {
515                 if (p1[i] != p2[i])
516                         return 1;
517         }
518
519         return 0;
520 }
521
522 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
523 {
524         struct xfrm_policy *pol;
525         struct xfrm_policy *delpol;
526         struct hlist_head *chain;
527         struct hlist_node *entry, *newpos;
528         struct dst_entry *gc_list;
529
530         write_lock_bh(&xfrm_policy_lock);
531         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
532         delpol = NULL;
533         newpos = NULL;
534         hlist_for_each_entry(pol, entry, chain, bydst) {
535                 if (pol->type == policy->type &&
536                     !selector_cmp(&pol->selector, &policy->selector) &&
537                     xfrm_sec_ctx_match(pol->security, policy->security) &&
538                     !WARN_ON(delpol)) {
539                         if (excl) {
540                                 write_unlock_bh(&xfrm_policy_lock);
541                                 return -EEXIST;
542                         }
543                         delpol = pol;
544                         if (policy->priority > pol->priority)
545                                 continue;
546                 } else if (policy->priority >= pol->priority) {
547                         newpos = &pol->bydst;
548                         continue;
549                 }
550                 if (delpol)
551                         break;
552         }
553         if (newpos)
554                 hlist_add_after(newpos, &policy->bydst);
555         else
556                 hlist_add_head(&policy->bydst, chain);
557         xfrm_pol_hold(policy);
558         xfrm_policy_count[dir]++;
559         atomic_inc(&flow_cache_genid);
560         if (delpol) {
561                 hlist_del(&delpol->bydst);
562                 hlist_del(&delpol->byidx);
563                 xfrm_policy_count[dir]--;
564         }
565         policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
566         hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
567         policy->curlft.add_time = get_seconds();
568         policy->curlft.use_time = 0;
569         if (!mod_timer(&policy->timer, jiffies + HZ))
570                 xfrm_pol_hold(policy);
571         write_unlock_bh(&xfrm_policy_lock);
572
573         if (delpol)
574                 xfrm_policy_kill(delpol);
575         else if (xfrm_bydst_should_resize(dir, NULL))
576                 schedule_work(&xfrm_hash_work);
577
578         read_lock_bh(&xfrm_policy_lock);
579         gc_list = NULL;
580         entry = &policy->bydst;
581         hlist_for_each_entry_continue(policy, entry, bydst) {
582                 struct dst_entry *dst;
583
584                 write_lock(&policy->lock);
585                 dst = policy->bundles;
586                 if (dst) {
587                         struct dst_entry *tail = dst;
588                         while (tail->next)
589                                 tail = tail->next;
590                         tail->next = gc_list;
591                         gc_list = dst;
592
593                         policy->bundles = NULL;
594                 }
595                 write_unlock(&policy->lock);
596         }
597         read_unlock_bh(&xfrm_policy_lock);
598
599         while (gc_list) {
600                 struct dst_entry *dst = gc_list;
601
602                 gc_list = dst->next;
603                 dst_free(dst);
604         }
605
606         return 0;
607 }
608 EXPORT_SYMBOL(xfrm_policy_insert);
609
610 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
611                                           struct xfrm_selector *sel,
612                                           struct xfrm_sec_ctx *ctx, int delete,
613                                           int *err)
614 {
615         struct xfrm_policy *pol, *ret;
616         struct hlist_head *chain;
617         struct hlist_node *entry;
618
619         *err = 0;
620         write_lock_bh(&xfrm_policy_lock);
621         chain = policy_hash_bysel(sel, sel->family, dir);
622         ret = NULL;
623         hlist_for_each_entry(pol, entry, chain, bydst) {
624                 if (pol->type == type &&
625                     !selector_cmp(sel, &pol->selector) &&
626                     xfrm_sec_ctx_match(ctx, pol->security)) {
627                         xfrm_pol_hold(pol);
628                         if (delete) {
629                                 *err = security_xfrm_policy_delete(pol);
630                                 if (*err) {
631                                         write_unlock_bh(&xfrm_policy_lock);
632                                         return pol;
633                                 }
634                                 hlist_del(&pol->bydst);
635                                 hlist_del(&pol->byidx);
636                                 xfrm_policy_count[dir]--;
637                         }
638                         ret = pol;
639                         break;
640                 }
641         }
642         write_unlock_bh(&xfrm_policy_lock);
643
644         if (ret && delete) {
645                 atomic_inc(&flow_cache_genid);
646                 xfrm_policy_kill(ret);
647         }
648         return ret;
649 }
650 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
651
652 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
653                                      int *err)
654 {
655         struct xfrm_policy *pol, *ret;
656         struct hlist_head *chain;
657         struct hlist_node *entry;
658
659         *err = -ENOENT;
660         if (xfrm_policy_id2dir(id) != dir)
661                 return NULL;
662
663         *err = 0;
664         write_lock_bh(&xfrm_policy_lock);
665         chain = xfrm_policy_byidx + idx_hash(id);
666         ret = NULL;
667         hlist_for_each_entry(pol, entry, chain, byidx) {
668                 if (pol->type == type && pol->index == id) {
669                         xfrm_pol_hold(pol);
670                         if (delete) {
671                                 *err = security_xfrm_policy_delete(pol);
672                                 if (*err) {
673                                         write_unlock_bh(&xfrm_policy_lock);
674                                         return pol;
675                                 }
676                                 hlist_del(&pol->bydst);
677                                 hlist_del(&pol->byidx);
678                                 xfrm_policy_count[dir]--;
679                         }
680                         ret = pol;
681                         break;
682                 }
683         }
684         write_unlock_bh(&xfrm_policy_lock);
685
686         if (ret && delete) {
687                 atomic_inc(&flow_cache_genid);
688                 xfrm_policy_kill(ret);
689         }
690         return ret;
691 }
692 EXPORT_SYMBOL(xfrm_policy_byid);
693
694 #ifdef CONFIG_SECURITY_NETWORK_XFRM
695 static inline int
696 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
697 {
698         int dir, err = 0;
699
700         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
701                 struct xfrm_policy *pol;
702                 struct hlist_node *entry;
703                 int i;
704
705                 hlist_for_each_entry(pol, entry,
706                                      &xfrm_policy_inexact[dir], bydst) {
707                         if (pol->type != type)
708                                 continue;
709                         err = security_xfrm_policy_delete(pol);
710                         if (err) {
711                                 xfrm_audit_policy_delete(pol, 0,
712                                                          audit_info->loginuid,
713                                                          audit_info->secid);
714                                 return err;
715                         }
716                 }
717                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
718                         hlist_for_each_entry(pol, entry,
719                                              xfrm_policy_bydst[dir].table + i,
720                                              bydst) {
721                                 if (pol->type != type)
722                                         continue;
723                                 err = security_xfrm_policy_delete(pol);
724                                 if (err) {
725                                         xfrm_audit_policy_delete(pol, 0,
726                                                         audit_info->loginuid,
727                                                         audit_info->secid);
728                                         return err;
729                                 }
730                         }
731                 }
732         }
733         return err;
734 }
735 #else
736 static inline int
737 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
738 {
739         return 0;
740 }
741 #endif
742
743 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
744 {
745         int dir, err = 0;
746
747         write_lock_bh(&xfrm_policy_lock);
748
749         err = xfrm_policy_flush_secctx_check(type, audit_info);
750         if (err)
751                 goto out;
752
753         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
754                 struct xfrm_policy *pol;
755                 struct hlist_node *entry;
756                 int i, killed;
757
758                 killed = 0;
759         again1:
760                 hlist_for_each_entry(pol, entry,
761                                      &xfrm_policy_inexact[dir], bydst) {
762                         if (pol->type != type)
763                                 continue;
764                         hlist_del(&pol->bydst);
765                         hlist_del(&pol->byidx);
766                         write_unlock_bh(&xfrm_policy_lock);
767
768                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
769                                                  audit_info->secid);
770
771                         xfrm_policy_kill(pol);
772                         killed++;
773
774                         write_lock_bh(&xfrm_policy_lock);
775                         goto again1;
776                 }
777
778                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
779         again2:
780                         hlist_for_each_entry(pol, entry,
781                                              xfrm_policy_bydst[dir].table + i,
782                                              bydst) {
783                                 if (pol->type != type)
784                                         continue;
785                                 hlist_del(&pol->bydst);
786                                 hlist_del(&pol->byidx);
787                                 write_unlock_bh(&xfrm_policy_lock);
788
789                                 xfrm_audit_policy_delete(pol, 1,
790                                                          audit_info->loginuid,
791                                                          audit_info->secid);
792                                 xfrm_policy_kill(pol);
793                                 killed++;
794
795                                 write_lock_bh(&xfrm_policy_lock);
796                                 goto again2;
797                         }
798                 }
799
800                 xfrm_policy_count[dir] -= killed;
801         }
802         atomic_inc(&flow_cache_genid);
803 out:
804         write_unlock_bh(&xfrm_policy_lock);
805         return err;
806 }
807 EXPORT_SYMBOL(xfrm_policy_flush);
808
809 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
810                      void *data)
811 {
812         struct xfrm_policy *pol, *last = NULL;
813         struct hlist_node *entry;
814         int dir, last_dir = 0, count, error;
815
816         read_lock_bh(&xfrm_policy_lock);
817         count = 0;
818
819         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
820                 struct hlist_head *table = xfrm_policy_bydst[dir].table;
821                 int i;
822
823                 hlist_for_each_entry(pol, entry,
824                                      &xfrm_policy_inexact[dir], bydst) {
825                         if (pol->type != type)
826                                 continue;
827                         if (last) {
828                                 error = func(last, last_dir % XFRM_POLICY_MAX,
829                                              count, data);
830                                 if (error)
831                                         goto out;
832                         }
833                         last = pol;
834                         last_dir = dir;
835                         count++;
836                 }
837                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
838                         hlist_for_each_entry(pol, entry, table + i, bydst) {
839                                 if (pol->type != type)
840                                         continue;
841                                 if (last) {
842                                         error = func(last, last_dir % XFRM_POLICY_MAX,
843                                                      count, data);
844                                         if (error)
845                                                 goto out;
846                                 }
847                                 last = pol;
848                                 last_dir = dir;
849                                 count++;
850                         }
851                 }
852         }
853         if (count == 0) {
854                 error = -ENOENT;
855                 goto out;
856         }
857         error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
858 out:
859         read_unlock_bh(&xfrm_policy_lock);
860         return error;
861 }
862 EXPORT_SYMBOL(xfrm_policy_walk);
863
864 /*
865  * Find policy to apply to this flow.
866  *
867  * Returns 0 if policy found, else an -errno.
868  */
869 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
870                              u8 type, u16 family, int dir)
871 {
872         struct xfrm_selector *sel = &pol->selector;
873         int match, ret = -ESRCH;
874
875         if (pol->family != family ||
876             pol->type != type)
877                 return ret;
878
879         match = xfrm_selector_match(sel, fl, family);
880         if (match)
881                 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
882
883         return ret;
884 }
885
886 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
887                                                      u16 family, u8 dir)
888 {
889         int err;
890         struct xfrm_policy *pol, *ret;
891         xfrm_address_t *daddr, *saddr;
892         struct hlist_node *entry;
893         struct hlist_head *chain;
894         u32 priority = ~0U;
895
896         daddr = xfrm_flowi_daddr(fl, family);
897         saddr = xfrm_flowi_saddr(fl, family);
898         if (unlikely(!daddr || !saddr))
899                 return NULL;
900
901         read_lock_bh(&xfrm_policy_lock);
902         chain = policy_hash_direct(daddr, saddr, family, dir);
903         ret = NULL;
904         hlist_for_each_entry(pol, entry, chain, bydst) {
905                 err = xfrm_policy_match(pol, fl, type, family, dir);
906                 if (err) {
907                         if (err == -ESRCH)
908                                 continue;
909                         else {
910                                 ret = ERR_PTR(err);
911                                 goto fail;
912                         }
913                 } else {
914                         ret = pol;
915                         priority = ret->priority;
916                         break;
917                 }
918         }
919         chain = &xfrm_policy_inexact[dir];
920         hlist_for_each_entry(pol, entry, chain, bydst) {
921                 err = xfrm_policy_match(pol, fl, type, family, dir);
922                 if (err) {
923                         if (err == -ESRCH)
924                                 continue;
925                         else {
926                                 ret = ERR_PTR(err);
927                                 goto fail;
928                         }
929                 } else if (pol->priority < priority) {
930                         ret = pol;
931                         break;
932                 }
933         }
934         if (ret)
935                 xfrm_pol_hold(ret);
936 fail:
937         read_unlock_bh(&xfrm_policy_lock);
938
939         return ret;
940 }
941
942 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
943                                void **objp, atomic_t **obj_refp)
944 {
945         struct xfrm_policy *pol;
946         int err = 0;
947
948 #ifdef CONFIG_XFRM_SUB_POLICY
949         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
950         if (IS_ERR(pol)) {
951                 err = PTR_ERR(pol);
952                 pol = NULL;
953         }
954         if (pol || err)
955                 goto end;
956 #endif
957         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
958         if (IS_ERR(pol)) {
959                 err = PTR_ERR(pol);
960                 pol = NULL;
961         }
962 #ifdef CONFIG_XFRM_SUB_POLICY
963 end:
964 #endif
965         if ((*objp = (void *) pol) != NULL)
966                 *obj_refp = &pol->refcnt;
967         return err;
968 }
969
970 static inline int policy_to_flow_dir(int dir)
971 {
972         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
973             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
974             XFRM_POLICY_FWD == FLOW_DIR_FWD)
975                 return dir;
976         switch (dir) {
977         default:
978         case XFRM_POLICY_IN:
979                 return FLOW_DIR_IN;
980         case XFRM_POLICY_OUT:
981                 return FLOW_DIR_OUT;
982         case XFRM_POLICY_FWD:
983                 return FLOW_DIR_FWD;
984         }
985 }
986
987 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
988 {
989         struct xfrm_policy *pol;
990
991         read_lock_bh(&xfrm_policy_lock);
992         if ((pol = sk->sk_policy[dir]) != NULL) {
993                 int match = xfrm_selector_match(&pol->selector, fl,
994                                                 sk->sk_family);
995                 int err = 0;
996
997                 if (match) {
998                         err = security_xfrm_policy_lookup(pol, fl->secid,
999                                         policy_to_flow_dir(dir));
1000                         if (!err)
1001                                 xfrm_pol_hold(pol);
1002                         else if (err == -ESRCH)
1003                                 pol = NULL;
1004                         else
1005                                 pol = ERR_PTR(err);
1006                 } else
1007                         pol = NULL;
1008         }
1009         read_unlock_bh(&xfrm_policy_lock);
1010         return pol;
1011 }
1012
1013 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1014 {
1015         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1016                                                      pol->family, dir);
1017
1018         hlist_add_head(&pol->bydst, chain);
1019         hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1020         xfrm_policy_count[dir]++;
1021         xfrm_pol_hold(pol);
1022
1023         if (xfrm_bydst_should_resize(dir, NULL))
1024                 schedule_work(&xfrm_hash_work);
1025 }
1026
1027 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1028                                                 int dir)
1029 {
1030         if (hlist_unhashed(&pol->bydst))
1031                 return NULL;
1032
1033         hlist_del(&pol->bydst);
1034         hlist_del(&pol->byidx);
1035         xfrm_policy_count[dir]--;
1036
1037         return pol;
1038 }
1039
1040 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1041 {
1042         write_lock_bh(&xfrm_policy_lock);
1043         pol = __xfrm_policy_unlink(pol, dir);
1044         write_unlock_bh(&xfrm_policy_lock);
1045         if (pol) {
1046                 if (dir < XFRM_POLICY_MAX)
1047                         atomic_inc(&flow_cache_genid);
1048                 xfrm_policy_kill(pol);
1049                 return 0;
1050         }
1051         return -ENOENT;
1052 }
1053 EXPORT_SYMBOL(xfrm_policy_delete);
1054
1055 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1056 {
1057         struct xfrm_policy *old_pol;
1058
1059 #ifdef CONFIG_XFRM_SUB_POLICY
1060         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1061                 return -EINVAL;
1062 #endif
1063
1064         write_lock_bh(&xfrm_policy_lock);
1065         old_pol = sk->sk_policy[dir];
1066         sk->sk_policy[dir] = pol;
1067         if (pol) {
1068                 pol->curlft.add_time = get_seconds();
1069                 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1070                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1071         }
1072         if (old_pol)
1073                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1074         write_unlock_bh(&xfrm_policy_lock);
1075
1076         if (old_pol) {
1077                 xfrm_policy_kill(old_pol);
1078         }
1079         return 0;
1080 }
1081
1082 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1083 {
1084         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1085
1086         if (newp) {
1087                 newp->selector = old->selector;
1088                 if (security_xfrm_policy_clone(old, newp)) {
1089                         kfree(newp);
1090                         return NULL;  /* ENOMEM */
1091                 }
1092                 newp->lft = old->lft;
1093                 newp->curlft = old->curlft;
1094                 newp->action = old->action;
1095                 newp->flags = old->flags;
1096                 newp->xfrm_nr = old->xfrm_nr;
1097                 newp->index = old->index;
1098                 newp->type = old->type;
1099                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1100                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1101                 write_lock_bh(&xfrm_policy_lock);
1102                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1103                 write_unlock_bh(&xfrm_policy_lock);
1104                 xfrm_pol_put(newp);
1105         }
1106         return newp;
1107 }
1108
1109 int __xfrm_sk_clone_policy(struct sock *sk)
1110 {
1111         struct xfrm_policy *p0 = sk->sk_policy[0],
1112                            *p1 = sk->sk_policy[1];
1113
1114         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1115         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1116                 return -ENOMEM;
1117         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1118                 return -ENOMEM;
1119         return 0;
1120 }
1121
1122 static int
1123 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1124                unsigned short family)
1125 {
1126         int err;
1127         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1128
1129         if (unlikely(afinfo == NULL))
1130                 return -EINVAL;
1131         err = afinfo->get_saddr(local, remote);
1132         xfrm_policy_put_afinfo(afinfo);
1133         return err;
1134 }
1135
1136 /* Resolve list of templates for the flow, given policy. */
1137
1138 static int
1139 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1140                       struct xfrm_state **xfrm,
1141                       unsigned short family)
1142 {
1143         int nx;
1144         int i, error;
1145         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1146         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1147         xfrm_address_t tmp;
1148
1149         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1150                 struct xfrm_state *x;
1151                 xfrm_address_t *remote = daddr;
1152                 xfrm_address_t *local  = saddr;
1153                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1154
1155                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1156                     tmpl->mode == XFRM_MODE_BEET) {
1157                         remote = &tmpl->id.daddr;
1158                         local = &tmpl->saddr;
1159                         family = tmpl->encap_family;
1160                         if (xfrm_addr_any(local, family)) {
1161                                 error = xfrm_get_saddr(&tmp, remote, family);
1162                                 if (error)
1163                                         goto fail;
1164                                 local = &tmp;
1165                         }
1166                 }
1167
1168                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1169
1170                 if (x && x->km.state == XFRM_STATE_VALID) {
1171                         xfrm[nx++] = x;
1172                         daddr = remote;
1173                         saddr = local;
1174                         continue;
1175                 }
1176                 if (x) {
1177                         error = (x->km.state == XFRM_STATE_ERROR ?
1178                                  -EINVAL : -EAGAIN);
1179                         xfrm_state_put(x);
1180                 }
1181
1182                 if (!tmpl->optional)
1183                         goto fail;
1184         }
1185         return nx;
1186
1187 fail:
1188         for (nx--; nx>=0; nx--)
1189                 xfrm_state_put(xfrm[nx]);
1190         return error;
1191 }
1192
1193 static int
1194 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1195                   struct xfrm_state **xfrm,
1196                   unsigned short family)
1197 {
1198         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1199         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1200         int cnx = 0;
1201         int error;
1202         int ret;
1203         int i;
1204
1205         for (i = 0; i < npols; i++) {
1206                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1207                         error = -ENOBUFS;
1208                         goto fail;
1209                 }
1210
1211                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1212                 if (ret < 0) {
1213                         error = ret;
1214                         goto fail;
1215                 } else
1216                         cnx += ret;
1217         }
1218
1219         /* found states are sorted for outbound processing */
1220         if (npols > 1)
1221                 xfrm_state_sort(xfrm, tpp, cnx, family);
1222
1223         return cnx;
1224
1225  fail:
1226         for (cnx--; cnx>=0; cnx--)
1227                 xfrm_state_put(tpp[cnx]);
1228         return error;
1229
1230 }
1231
1232 /* Check that the bundle accepts the flow and its components are
1233  * still valid.
1234  */
1235
1236 static struct dst_entry *
1237 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1238 {
1239         struct dst_entry *x;
1240         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1241         if (unlikely(afinfo == NULL))
1242                 return ERR_PTR(-EINVAL);
1243         x = afinfo->find_bundle(fl, policy);
1244         xfrm_policy_put_afinfo(afinfo);
1245         return x;
1246 }
1247
1248 static inline int xfrm_get_tos(struct flowi *fl, int family)
1249 {
1250         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1251         int tos;
1252
1253         if (!afinfo)
1254                 return -EINVAL;
1255
1256         tos = afinfo->get_tos(fl);
1257
1258         xfrm_policy_put_afinfo(afinfo);
1259
1260         return tos;
1261 }
1262
1263 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1264 {
1265         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1266         struct xfrm_dst *xdst;
1267
1268         if (!afinfo)
1269                 return ERR_PTR(-EINVAL);
1270
1271         xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1272
1273         xfrm_policy_put_afinfo(afinfo);
1274
1275         return xdst;
1276 }
1277
1278 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1279                                  int nfheader_len)
1280 {
1281         struct xfrm_policy_afinfo *afinfo =
1282                 xfrm_policy_get_afinfo(dst->ops->family);
1283         int err;
1284
1285         if (!afinfo)
1286                 return -EINVAL;
1287
1288         err = afinfo->init_path(path, dst, nfheader_len);
1289
1290         xfrm_policy_put_afinfo(afinfo);
1291
1292         return err;
1293 }
1294
1295 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1296 {
1297         struct xfrm_policy_afinfo *afinfo =
1298                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1299         int err;
1300
1301         if (!afinfo)
1302                 return -EINVAL;
1303
1304         err = afinfo->fill_dst(xdst, dev);
1305
1306         xfrm_policy_put_afinfo(afinfo);
1307
1308         return err;
1309 }
1310
1311 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1312  * all the metrics... Shortly, bundle a bundle.
1313  */
1314
1315 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1316                                             struct xfrm_state **xfrm, int nx,
1317                                             struct flowi *fl,
1318                                             struct dst_entry *dst)
1319 {
1320         unsigned long now = jiffies;
1321         struct net_device *dev;
1322         struct dst_entry *dst_prev = NULL;
1323         struct dst_entry *dst0 = NULL;
1324         int i = 0;
1325         int err;
1326         int header_len = 0;
1327         int nfheader_len = 0;
1328         int trailer_len = 0;
1329         int tos;
1330         int family = policy->selector.family;
1331
1332         tos = xfrm_get_tos(fl, family);
1333         err = tos;
1334         if (tos < 0)
1335                 goto put_states;
1336
1337         dst_hold(dst);
1338
1339         for (; i < nx; i++) {
1340                 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1341                 struct dst_entry *dst1 = &xdst->u.dst;
1342
1343                 err = PTR_ERR(xdst);
1344                 if (IS_ERR(xdst)) {
1345                         dst_release(dst);
1346                         goto put_states;
1347                 }
1348
1349                 if (!dst_prev)
1350                         dst0 = dst1;
1351                 else {
1352                         dst_prev->child = dst_clone(dst1);
1353                         dst1->flags |= DST_NOHASH;
1354                 }
1355
1356                 xdst->route = dst;
1357                 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1358
1359                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1360                         family = xfrm[i]->props.family;
1361                         dst = xfrm_dst_lookup(xfrm[i], tos, family);
1362                         err = PTR_ERR(dst);
1363                         if (IS_ERR(dst))
1364                                 goto put_states;
1365                 } else
1366                         dst_hold(dst);
1367
1368                 dst1->xfrm = xfrm[i];
1369                 xdst->genid = xfrm[i]->genid;
1370
1371                 dst1->obsolete = -1;
1372                 dst1->flags |= DST_HOST;
1373                 dst1->lastuse = now;
1374
1375                 dst1->input = dst_discard;
1376                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1377
1378                 dst1->next = dst_prev;
1379                 dst_prev = dst1;
1380
1381                 header_len += xfrm[i]->props.header_len;
1382                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1383                         nfheader_len += xfrm[i]->props.header_len;
1384                 trailer_len += xfrm[i]->props.trailer_len;
1385         }
1386
1387         dst_prev->child = dst;
1388         dst0->path = dst;
1389
1390         err = -ENODEV;
1391         dev = dst->dev;
1392         if (!dev)
1393                 goto free_dst;
1394
1395         /* Copy neighbout for reachability confirmation */
1396         dst0->neighbour = neigh_clone(dst->neighbour);
1397
1398         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1399         xfrm_init_pmtu(dst_prev);
1400
1401         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1402                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1403
1404                 err = xfrm_fill_dst(xdst, dev);
1405                 if (err)
1406                         goto free_dst;
1407
1408                 dst_prev->header_len = header_len;
1409                 dst_prev->trailer_len = trailer_len;
1410                 header_len -= xdst->u.dst.xfrm->props.header_len;
1411                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1412         }
1413
1414 out:
1415         return dst0;
1416
1417 put_states:
1418         for (; i < nx; i++)
1419                 xfrm_state_put(xfrm[i]);
1420 free_dst:
1421         if (dst0)
1422                 dst_free(dst0);
1423         dst0 = ERR_PTR(err);
1424         goto out;
1425 }
1426
1427 static int inline
1428 xfrm_dst_alloc_copy(void **target, void *src, int size)
1429 {
1430         if (!*target) {
1431                 *target = kmalloc(size, GFP_ATOMIC);
1432                 if (!*target)
1433                         return -ENOMEM;
1434         }
1435         memcpy(*target, src, size);
1436         return 0;
1437 }
1438
1439 static int inline
1440 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1441 {
1442 #ifdef CONFIG_XFRM_SUB_POLICY
1443         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1444         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1445                                    sel, sizeof(*sel));
1446 #else
1447         return 0;
1448 #endif
1449 }
1450
1451 static int inline
1452 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1453 {
1454 #ifdef CONFIG_XFRM_SUB_POLICY
1455         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1456         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1457 #else
1458         return 0;
1459 #endif
1460 }
1461
1462 static int stale_bundle(struct dst_entry *dst);
1463
1464 /* Main function: finds/creates a bundle for given flow.
1465  *
1466  * At the moment we eat a raw IP route. Mostly to speed up lookups
1467  * on interfaces with disabled IPsec.
1468  */
1469 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1470                   struct sock *sk, int flags)
1471 {
1472         struct xfrm_policy *policy;
1473         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1474         int npols;
1475         int pol_dead;
1476         int xfrm_nr;
1477         int pi;
1478         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1479         struct dst_entry *dst, *dst_orig = *dst_p;
1480         int nx = 0;
1481         int err;
1482         u32 genid;
1483         u16 family;
1484         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1485
1486 restart:
1487         genid = atomic_read(&flow_cache_genid);
1488         policy = NULL;
1489         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1490                 pols[pi] = NULL;
1491         npols = 0;
1492         pol_dead = 0;
1493         xfrm_nr = 0;
1494
1495         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1496                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1497                 err = PTR_ERR(policy);
1498                 if (IS_ERR(policy)) {
1499                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1500                         goto dropdst;
1501                 }
1502         }
1503
1504         if (!policy) {
1505                 /* To accelerate a bit...  */
1506                 if ((dst_orig->flags & DST_NOXFRM) ||
1507                     !xfrm_policy_count[XFRM_POLICY_OUT])
1508                         goto nopol;
1509
1510                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1511                                            dir, xfrm_policy_lookup);
1512                 err = PTR_ERR(policy);
1513                 if (IS_ERR(policy))
1514                         goto dropdst;
1515         }
1516
1517         if (!policy)
1518                 goto nopol;
1519
1520         family = dst_orig->ops->family;
1521         pols[0] = policy;
1522         npols ++;
1523         xfrm_nr += pols[0]->xfrm_nr;
1524
1525         err = -ENOENT;
1526         if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1527                 goto error;
1528
1529         policy->curlft.use_time = get_seconds();
1530
1531         switch (policy->action) {
1532         default:
1533         case XFRM_POLICY_BLOCK:
1534                 /* Prohibit the flow */
1535                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1536                 err = -EPERM;
1537                 goto error;
1538
1539         case XFRM_POLICY_ALLOW:
1540 #ifndef CONFIG_XFRM_SUB_POLICY
1541                 if (policy->xfrm_nr == 0) {
1542                         /* Flow passes not transformed. */
1543                         xfrm_pol_put(policy);
1544                         return 0;
1545                 }
1546 #endif
1547
1548                 /* Try to find matching bundle.
1549                  *
1550                  * LATER: help from flow cache. It is optional, this
1551                  * is required only for output policy.
1552                  */
1553                 dst = xfrm_find_bundle(fl, policy, family);
1554                 if (IS_ERR(dst)) {
1555                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1556                         err = PTR_ERR(dst);
1557                         goto error;
1558                 }
1559
1560                 if (dst)
1561                         break;
1562
1563 #ifdef CONFIG_XFRM_SUB_POLICY
1564                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1565                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1566                                                             fl, family,
1567                                                             XFRM_POLICY_OUT);
1568                         if (pols[1]) {
1569                                 if (IS_ERR(pols[1])) {
1570                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1571                                         err = PTR_ERR(pols[1]);
1572                                         goto error;
1573                                 }
1574                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1575                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1576                                         err = -EPERM;
1577                                         goto error;
1578                                 }
1579                                 npols ++;
1580                                 xfrm_nr += pols[1]->xfrm_nr;
1581                         }
1582                 }
1583
1584                 /*
1585                  * Because neither flowi nor bundle information knows about
1586                  * transformation template size. On more than one policy usage
1587                  * we can realize whether all of them is bypass or not after
1588                  * they are searched. See above not-transformed bypass
1589                  * is surrounded by non-sub policy configuration, too.
1590                  */
1591                 if (xfrm_nr == 0) {
1592                         /* Flow passes not transformed. */
1593                         xfrm_pols_put(pols, npols);
1594                         return 0;
1595                 }
1596
1597 #endif
1598                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1599
1600                 if (unlikely(nx<0)) {
1601                         err = nx;
1602                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1603                                 /* EREMOTE tells the caller to generate
1604                                  * a one-shot blackhole route.
1605                                  */
1606                                 xfrm_pol_put(policy);
1607                                 return -EREMOTE;
1608                         }
1609                         if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1610                                 DECLARE_WAITQUEUE(wait, current);
1611
1612                                 add_wait_queue(&km_waitq, &wait);
1613                                 set_current_state(TASK_INTERRUPTIBLE);
1614                                 schedule();
1615                                 set_current_state(TASK_RUNNING);
1616                                 remove_wait_queue(&km_waitq, &wait);
1617
1618                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1619
1620                                 if (nx == -EAGAIN && signal_pending(current)) {
1621                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1622                                         err = -ERESTART;
1623                                         goto error;
1624                                 }
1625                                 if (nx == -EAGAIN ||
1626                                     genid != atomic_read(&flow_cache_genid)) {
1627                                         xfrm_pols_put(pols, npols);
1628                                         goto restart;
1629                                 }
1630                                 err = nx;
1631                         }
1632                         if (err < 0) {
1633                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1634                                 goto error;
1635                         }
1636                 }
1637                 if (nx == 0) {
1638                         /* Flow passes not transformed. */
1639                         xfrm_pols_put(pols, npols);
1640                         return 0;
1641                 }
1642
1643                 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1644                 err = PTR_ERR(dst);
1645                 if (IS_ERR(dst)) {
1646                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1647                         goto error;
1648                 }
1649
1650                 for (pi = 0; pi < npols; pi++) {
1651                         read_lock_bh(&pols[pi]->lock);
1652                         pol_dead |= pols[pi]->dead;
1653                         read_unlock_bh(&pols[pi]->lock);
1654                 }
1655
1656                 write_lock_bh(&policy->lock);
1657                 if (unlikely(pol_dead || stale_bundle(dst))) {
1658                         /* Wow! While we worked on resolving, this
1659                          * policy has gone. Retry. It is not paranoia,
1660                          * we just cannot enlist new bundle to dead object.
1661                          * We can't enlist stable bundles either.
1662                          */
1663                         write_unlock_bh(&policy->lock);
1664                         if (dst)
1665                                 dst_free(dst);
1666
1667                         if (pol_dead)
1668                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1669                         else
1670                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1671                         err = -EHOSTUNREACH;
1672                         goto error;
1673                 }
1674
1675                 if (npols > 1)
1676                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1677                 else
1678                         err = xfrm_dst_update_origin(dst, fl);
1679                 if (unlikely(err)) {
1680                         write_unlock_bh(&policy->lock);
1681                         if (dst)
1682                                 dst_free(dst);
1683                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1684                         goto error;
1685                 }
1686
1687                 dst->next = policy->bundles;
1688                 policy->bundles = dst;
1689                 dst_hold(dst);
1690                 write_unlock_bh(&policy->lock);
1691         }
1692         *dst_p = dst;
1693         dst_release(dst_orig);
1694         xfrm_pols_put(pols, npols);
1695         return 0;
1696
1697 error:
1698         xfrm_pols_put(pols, npols);
1699 dropdst:
1700         dst_release(dst_orig);
1701         *dst_p = NULL;
1702         return err;
1703
1704 nopol:
1705         err = -ENOENT;
1706         if (flags & XFRM_LOOKUP_ICMP)
1707                 goto dropdst;
1708         return 0;
1709 }
1710 EXPORT_SYMBOL(__xfrm_lookup);
1711
1712 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1713                 struct sock *sk, int flags)
1714 {
1715         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1716
1717         if (err == -EREMOTE) {
1718                 dst_release(*dst_p);
1719                 *dst_p = NULL;
1720                 err = -EAGAIN;
1721         }
1722
1723         return err;
1724 }
1725 EXPORT_SYMBOL(xfrm_lookup);
1726
1727 static inline int
1728 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1729 {
1730         struct xfrm_state *x;
1731
1732         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1733                 return 0;
1734         x = skb->sp->xvec[idx];
1735         if (!x->type->reject)
1736                 return 0;
1737         return x->type->reject(x, skb, fl);
1738 }
1739
1740 /* When skb is transformed back to its "native" form, we have to
1741  * check policy restrictions. At the moment we make this in maximally
1742  * stupid way. Shame on me. :-) Of course, connected sockets must
1743  * have policy cached at them.
1744  */
1745
1746 static inline int
1747 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1748               unsigned short family)
1749 {
1750         if (xfrm_state_kern(x))
1751                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1752         return  x->id.proto == tmpl->id.proto &&
1753                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1754                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1755                 x->props.mode == tmpl->mode &&
1756                 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1757                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1758                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1759                   xfrm_state_addr_cmp(tmpl, x, family));
1760 }
1761
1762 /*
1763  * 0 or more than 0 is returned when validation is succeeded (either bypass
1764  * because of optional transport mode, or next index of the mathced secpath
1765  * state with the template.
1766  * -1 is returned when no matching template is found.
1767  * Otherwise "-2 - errored_index" is returned.
1768  */
1769 static inline int
1770 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1771                unsigned short family)
1772 {
1773         int idx = start;
1774
1775         if (tmpl->optional) {
1776                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1777                         return start;
1778         } else
1779                 start = -1;
1780         for (; idx < sp->len; idx++) {
1781                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1782                         return ++idx;
1783                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1784                         if (start == -1)
1785                                 start = -2-idx;
1786                         break;
1787                 }
1788         }
1789         return start;
1790 }
1791
1792 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1793                           unsigned int family, int reverse)
1794 {
1795         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1796         int err;
1797
1798         if (unlikely(afinfo == NULL))
1799                 return -EAFNOSUPPORT;
1800
1801         afinfo->decode_session(skb, fl, reverse);
1802         err = security_xfrm_decode_session(skb, &fl->secid);
1803         xfrm_policy_put_afinfo(afinfo);
1804         return err;
1805 }
1806 EXPORT_SYMBOL(__xfrm_decode_session);
1807
1808 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1809 {
1810         for (; k < sp->len; k++) {
1811                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1812                         *idxp = k;
1813                         return 1;
1814                 }
1815         }
1816
1817         return 0;
1818 }
1819
1820 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1821                         unsigned short family)
1822 {
1823         struct xfrm_policy *pol;
1824         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1825         int npols = 0;
1826         int xfrm_nr;
1827         int pi;
1828         int reverse;
1829         struct flowi fl;
1830         u8 fl_dir;
1831         int xerr_idx = -1;
1832
1833         reverse = dir & ~XFRM_POLICY_MASK;
1834         dir &= XFRM_POLICY_MASK;
1835         fl_dir = policy_to_flow_dir(dir);
1836
1837         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1838                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1839                 return 0;
1840         }
1841
1842         nf_nat_decode_session(skb, &fl, family);
1843
1844         /* First, check used SA against their selectors. */
1845         if (skb->sp) {
1846                 int i;
1847
1848                 for (i=skb->sp->len-1; i>=0; i--) {
1849                         struct xfrm_state *x = skb->sp->xvec[i];
1850                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
1851                                 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1852                                 return 0;
1853                         }
1854                 }
1855         }
1856
1857         pol = NULL;
1858         if (sk && sk->sk_policy[dir]) {
1859                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1860                 if (IS_ERR(pol)) {
1861                         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1862                         return 0;
1863                 }
1864         }
1865
1866         if (!pol)
1867                 pol = flow_cache_lookup(&fl, family, fl_dir,
1868                                         xfrm_policy_lookup);
1869
1870         if (IS_ERR(pol)) {
1871                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1872                 return 0;
1873         }
1874
1875         if (!pol) {
1876                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1877                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1878                         XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1879                         return 0;
1880                 }
1881                 return 1;
1882         }
1883
1884         pol->curlft.use_time = get_seconds();
1885
1886         pols[0] = pol;
1887         npols ++;
1888 #ifdef CONFIG_XFRM_SUB_POLICY
1889         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1890                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1891                                                     &fl, family,
1892                                                     XFRM_POLICY_IN);
1893                 if (pols[1]) {
1894                         if (IS_ERR(pols[1])) {
1895                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1896                                 return 0;
1897                         }
1898                         pols[1]->curlft.use_time = get_seconds();
1899                         npols ++;
1900                 }
1901         }
1902 #endif
1903
1904         if (pol->action == XFRM_POLICY_ALLOW) {
1905                 struct sec_path *sp;
1906                 static struct sec_path dummy;
1907                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1908                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1909                 struct xfrm_tmpl **tpp = tp;
1910                 int ti = 0;
1911                 int i, k;
1912
1913                 if ((sp = skb->sp) == NULL)
1914                         sp = &dummy;
1915
1916                 for (pi = 0; pi < npols; pi++) {
1917                         if (pols[pi] != pol &&
1918                             pols[pi]->action != XFRM_POLICY_ALLOW) {
1919                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1920                                 goto reject;
1921                         }
1922                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1923                                 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1924                                 goto reject_error;
1925                         }
1926                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1927                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1928                 }
1929                 xfrm_nr = ti;
1930                 if (npols > 1) {
1931                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1932                         tpp = stp;
1933                 }
1934
1935                 /* For each tunnel xfrm, find the first matching tmpl.
1936                  * For each tmpl before that, find corresponding xfrm.
1937                  * Order is _important_. Later we will implement
1938                  * some barriers, but at the moment barriers
1939                  * are implied between each two transformations.
1940                  */
1941                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1942                         k = xfrm_policy_ok(tpp[i], sp, k, family);
1943                         if (k < 0) {
1944                                 if (k < -1)
1945                                         /* "-2 - errored_index" returned */
1946                                         xerr_idx = -(2+k);
1947                                 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1948                                 goto reject;
1949                         }
1950                 }
1951
1952                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1953                         XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1954                         goto reject;
1955                 }
1956
1957                 xfrm_pols_put(pols, npols);
1958                 return 1;
1959         }
1960         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1961
1962 reject:
1963         xfrm_secpath_reject(xerr_idx, skb, &fl);
1964 reject_error:
1965         xfrm_pols_put(pols, npols);
1966         return 0;
1967 }
1968 EXPORT_SYMBOL(__xfrm_policy_check);
1969
1970 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1971 {
1972         struct flowi fl;
1973
1974         if (xfrm_decode_session(skb, &fl, family) < 0) {
1975                 /* XXX: we should have something like FWDHDRERROR here. */
1976                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1977                 return 0;
1978         }
1979
1980         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1981 }
1982 EXPORT_SYMBOL(__xfrm_route_forward);
1983
1984 /* Optimize later using cookies and generation ids. */
1985
1986 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1987 {
1988         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1989          * to "-1" to force all XFRM destinations to get validated by
1990          * dst_ops->check on every use.  We do this because when a
1991          * normal route referenced by an XFRM dst is obsoleted we do
1992          * not go looking around for all parent referencing XFRM dsts
1993          * so that we can invalidate them.  It is just too much work.
1994          * Instead we make the checks here on every use.  For example:
1995          *
1996          *      XFRM dst A --> IPv4 dst X
1997          *
1998          * X is the "xdst->route" of A (X is also the "dst->path" of A
1999          * in this example).  If X is marked obsolete, "A" will not
2000          * notice.  That's what we are validating here via the
2001          * stale_bundle() check.
2002          *
2003          * When a policy's bundle is pruned, we dst_free() the XFRM
2004          * dst which causes it's ->obsolete field to be set to a
2005          * positive non-zero integer.  If an XFRM dst has been pruned
2006          * like this, we want to force a new route lookup.
2007          */
2008         if (dst->obsolete < 0 && !stale_bundle(dst))
2009                 return dst;
2010
2011         return NULL;
2012 }
2013
2014 static int stale_bundle(struct dst_entry *dst)
2015 {
2016         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2017 }
2018
2019 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2020 {
2021         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2022                 dst->dev = dev->nd_net->loopback_dev;
2023                 dev_hold(dst->dev);
2024                 dev_put(dev);
2025         }
2026 }
2027 EXPORT_SYMBOL(xfrm_dst_ifdown);
2028
2029 static void xfrm_link_failure(struct sk_buff *skb)
2030 {
2031         /* Impossible. Such dst must be popped before reaches point of failure. */
2032         return;
2033 }
2034
2035 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2036 {
2037         if (dst) {
2038                 if (dst->obsolete) {
2039                         dst_release(dst);
2040                         dst = NULL;
2041                 }
2042         }
2043         return dst;
2044 }
2045
2046 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2047 {
2048         struct dst_entry *dst, **dstp;
2049
2050         write_lock(&pol->lock);
2051         dstp = &pol->bundles;
2052         while ((dst=*dstp) != NULL) {
2053                 if (func(dst)) {
2054                         *dstp = dst->next;
2055                         dst->next = *gc_list_p;
2056                         *gc_list_p = dst;
2057                 } else {
2058                         dstp = &dst->next;
2059                 }
2060         }
2061         write_unlock(&pol->lock);
2062 }
2063
2064 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2065 {
2066         struct dst_entry *gc_list = NULL;
2067         int dir;
2068
2069         read_lock_bh(&xfrm_policy_lock);
2070         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2071                 struct xfrm_policy *pol;
2072                 struct hlist_node *entry;
2073                 struct hlist_head *table;
2074                 int i;
2075
2076                 hlist_for_each_entry(pol, entry,
2077                                      &xfrm_policy_inexact[dir], bydst)
2078                         prune_one_bundle(pol, func, &gc_list);
2079
2080                 table = xfrm_policy_bydst[dir].table;
2081                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2082                         hlist_for_each_entry(pol, entry, table + i, bydst)
2083                                 prune_one_bundle(pol, func, &gc_list);
2084                 }
2085         }
2086         read_unlock_bh(&xfrm_policy_lock);
2087
2088         while (gc_list) {
2089                 struct dst_entry *dst = gc_list;
2090                 gc_list = dst->next;
2091                 dst_free(dst);
2092         }
2093 }
2094
2095 static int unused_bundle(struct dst_entry *dst)
2096 {
2097         return !atomic_read(&dst->__refcnt);
2098 }
2099
2100 static void __xfrm_garbage_collect(void)
2101 {
2102         xfrm_prune_bundles(unused_bundle);
2103 }
2104
2105 static int xfrm_flush_bundles(void)
2106 {
2107         xfrm_prune_bundles(stale_bundle);
2108         return 0;
2109 }
2110
2111 static void xfrm_init_pmtu(struct dst_entry *dst)
2112 {
2113         do {
2114                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2115                 u32 pmtu, route_mtu_cached;
2116
2117                 pmtu = dst_mtu(dst->child);
2118                 xdst->child_mtu_cached = pmtu;
2119
2120                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2121
2122                 route_mtu_cached = dst_mtu(xdst->route);
2123                 xdst->route_mtu_cached = route_mtu_cached;
2124
2125                 if (pmtu > route_mtu_cached)
2126                         pmtu = route_mtu_cached;
2127
2128                 dst->metrics[RTAX_MTU-1] = pmtu;
2129         } while ((dst = dst->next));
2130 }
2131
2132 /* Check that the bundle accepts the flow and its components are
2133  * still valid.
2134  */
2135
2136 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2137                 struct flowi *fl, int family, int strict)
2138 {
2139         struct dst_entry *dst = &first->u.dst;
2140         struct xfrm_dst *last;
2141         u32 mtu;
2142
2143         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2144             (dst->dev && !netif_running(dst->dev)))
2145                 return 0;
2146 #ifdef CONFIG_XFRM_SUB_POLICY
2147         if (fl) {
2148                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2149                         return 0;
2150                 if (first->partner &&
2151                     !xfrm_selector_match(first->partner, fl, family))
2152                         return 0;
2153         }
2154 #endif
2155
2156         last = NULL;
2157
2158         do {
2159                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2160
2161                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2162                         return 0;
2163                 if (fl && pol &&
2164                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2165                         return 0;
2166                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2167                         return 0;
2168                 if (xdst->genid != dst->xfrm->genid)
2169                         return 0;
2170
2171                 if (strict && fl &&
2172                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2173                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2174                         return 0;
2175
2176                 mtu = dst_mtu(dst->child);
2177                 if (xdst->child_mtu_cached != mtu) {
2178                         last = xdst;
2179                         xdst->child_mtu_cached = mtu;
2180                 }
2181
2182                 if (!dst_check(xdst->route, xdst->route_cookie))
2183                         return 0;
2184                 mtu = dst_mtu(xdst->route);
2185                 if (xdst->route_mtu_cached != mtu) {
2186                         last = xdst;
2187                         xdst->route_mtu_cached = mtu;
2188                 }
2189
2190                 dst = dst->child;
2191         } while (dst->xfrm);
2192
2193         if (likely(!last))
2194                 return 1;
2195
2196         mtu = last->child_mtu_cached;
2197         for (;;) {
2198                 dst = &last->u.dst;
2199
2200                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2201                 if (mtu > last->route_mtu_cached)
2202                         mtu = last->route_mtu_cached;
2203                 dst->metrics[RTAX_MTU-1] = mtu;
2204
2205                 if (last == first)
2206                         break;
2207
2208                 last = (struct xfrm_dst *)last->u.dst.next;
2209                 last->child_mtu_cached = mtu;
2210         }
2211
2212         return 1;
2213 }
2214
2215 EXPORT_SYMBOL(xfrm_bundle_ok);
2216
2217 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2218 {
2219         int err = 0;
2220         if (unlikely(afinfo == NULL))
2221                 return -EINVAL;
2222         if (unlikely(afinfo->family >= NPROTO))
2223                 return -EAFNOSUPPORT;
2224         write_lock_bh(&xfrm_policy_afinfo_lock);
2225         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2226                 err = -ENOBUFS;
2227         else {
2228                 struct dst_ops *dst_ops = afinfo->dst_ops;
2229                 if (likely(dst_ops->kmem_cachep == NULL))
2230                         dst_ops->kmem_cachep = xfrm_dst_cache;
2231                 if (likely(dst_ops->check == NULL))
2232                         dst_ops->check = xfrm_dst_check;
2233                 if (likely(dst_ops->negative_advice == NULL))
2234                         dst_ops->negative_advice = xfrm_negative_advice;
2235                 if (likely(dst_ops->link_failure == NULL))
2236                         dst_ops->link_failure = xfrm_link_failure;
2237                 if (likely(afinfo->garbage_collect == NULL))
2238                         afinfo->garbage_collect = __xfrm_garbage_collect;
2239                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2240         }
2241         write_unlock_bh(&xfrm_policy_afinfo_lock);
2242         return err;
2243 }
2244 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2245
2246 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2247 {
2248         int err = 0;
2249         if (unlikely(afinfo == NULL))
2250                 return -EINVAL;
2251         if (unlikely(afinfo->family >= NPROTO))
2252                 return -EAFNOSUPPORT;
2253         write_lock_bh(&xfrm_policy_afinfo_lock);
2254         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2255                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2256                         err = -EINVAL;
2257                 else {
2258                         struct dst_ops *dst_ops = afinfo->dst_ops;
2259                         xfrm_policy_afinfo[afinfo->family] = NULL;
2260                         dst_ops->kmem_cachep = NULL;
2261                         dst_ops->check = NULL;
2262                         dst_ops->negative_advice = NULL;
2263                         dst_ops->link_failure = NULL;
2264                         afinfo->garbage_collect = NULL;
2265                 }
2266         }
2267         write_unlock_bh(&xfrm_policy_afinfo_lock);
2268         return err;
2269 }
2270 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2271
2272 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2273 {
2274         struct xfrm_policy_afinfo *afinfo;
2275         if (unlikely(family >= NPROTO))
2276                 return NULL;
2277         read_lock(&xfrm_policy_afinfo_lock);
2278         afinfo = xfrm_policy_afinfo[family];
2279         if (unlikely(!afinfo))
2280                 read_unlock(&xfrm_policy_afinfo_lock);
2281         return afinfo;
2282 }
2283
2284 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2285 {
2286         read_unlock(&xfrm_policy_afinfo_lock);
2287 }
2288
2289 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2290 {
2291         struct net_device *dev = ptr;
2292
2293         if (dev->nd_net != &init_net)
2294                 return NOTIFY_DONE;
2295
2296         switch (event) {
2297         case NETDEV_DOWN:
2298                 xfrm_flush_bundles();
2299         }
2300         return NOTIFY_DONE;
2301 }
2302
2303 static struct notifier_block xfrm_dev_notifier = {
2304         xfrm_dev_event,
2305         NULL,
2306         0
2307 };
2308
2309 #ifdef CONFIG_XFRM_STATISTICS
2310 static int __init xfrm_statistics_init(void)
2311 {
2312         if (snmp_mib_init((void **)xfrm_statistics,
2313                           sizeof(struct linux_xfrm_mib)) < 0)
2314                 return -ENOMEM;
2315         return 0;
2316 }
2317 #endif
2318
2319 static void __init xfrm_policy_init(void)
2320 {
2321         unsigned int hmask, sz;
2322         int dir;
2323
2324         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2325                                            sizeof(struct xfrm_dst),
2326                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2327                                            NULL);
2328
2329         hmask = 8 - 1;
2330         sz = (hmask+1) * sizeof(struct hlist_head);
2331
2332         xfrm_policy_byidx = xfrm_hash_alloc(sz);
2333         xfrm_idx_hmask = hmask;
2334         if (!xfrm_policy_byidx)
2335                 panic("XFRM: failed to allocate byidx hash\n");
2336
2337         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2338                 struct xfrm_policy_hash *htab;
2339
2340                 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2341
2342                 htab = &xfrm_policy_bydst[dir];
2343                 htab->table = xfrm_hash_alloc(sz);
2344                 htab->hmask = hmask;
2345                 if (!htab->table)
2346                         panic("XFRM: failed to allocate bydst hash\n");
2347         }
2348
2349         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2350         register_netdevice_notifier(&xfrm_dev_notifier);
2351 }
2352
2353 void __init xfrm_init(void)
2354 {
2355 #ifdef CONFIG_XFRM_STATISTICS
2356         xfrm_statistics_init();
2357 #endif
2358         xfrm_state_init();
2359         xfrm_policy_init();
2360         xfrm_input_init();
2361 #ifdef CONFIG_XFRM_STATISTICS
2362         xfrm_proc_init();
2363 #endif
2364 }
2365
2366 #ifdef CONFIG_AUDITSYSCALL
2367 static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2368                                                 struct audit_buffer *audit_buf)
2369 {
2370         struct xfrm_sec_ctx *ctx = xp->security;
2371         struct xfrm_selector *sel = &xp->selector;
2372
2373         if (ctx)
2374                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2375                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2376
2377         switch(sel->family) {
2378         case AF_INET:
2379                 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2380                                  NIPQUAD(sel->saddr.a4));
2381                 if (sel->prefixlen_s != 32)
2382                         audit_log_format(audit_buf, " src_prefixlen=%d",
2383                                          sel->prefixlen_s);
2384                 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2385                                  NIPQUAD(sel->daddr.a4));
2386                 if (sel->prefixlen_d != 32)
2387                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2388                                          sel->prefixlen_d);
2389                 break;
2390         case AF_INET6:
2391                 audit_log_format(audit_buf, " src=" NIP6_FMT,
2392                                  NIP6(*(struct in6_addr *)sel->saddr.a6));
2393                 if (sel->prefixlen_s != 128)
2394                         audit_log_format(audit_buf, " src_prefixlen=%d",
2395                                          sel->prefixlen_s);
2396                 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2397                                  NIP6(*(struct in6_addr *)sel->daddr.a6));
2398                 if (sel->prefixlen_d != 128)
2399                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2400                                          sel->prefixlen_d);
2401                 break;
2402         }
2403 }
2404
2405 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2406                            u32 auid, u32 secid)
2407 {
2408         struct audit_buffer *audit_buf;
2409
2410         audit_buf = xfrm_audit_start("SPD-add");
2411         if (audit_buf == NULL)
2412                 return;
2413         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2414         audit_log_format(audit_buf, " res=%u", result);
2415         xfrm_audit_common_policyinfo(xp, audit_buf);
2416         audit_log_end(audit_buf);
2417 }
2418 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2419
2420 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2421                               u32 auid, u32 secid)
2422 {
2423         struct audit_buffer *audit_buf;
2424
2425         audit_buf = xfrm_audit_start("SPD-delete");
2426         if (audit_buf == NULL)
2427                 return;
2428         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2429         audit_log_format(audit_buf, " res=%u", result);
2430         xfrm_audit_common_policyinfo(xp, audit_buf);
2431         audit_log_end(audit_buf);
2432 }
2433 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2434 #endif
2435
2436 #ifdef CONFIG_XFRM_MIGRATE
2437 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2438                                        struct xfrm_selector *sel_tgt)
2439 {
2440         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2441                 if (sel_tgt->family == sel_cmp->family &&
2442                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2443                                   sel_cmp->family) == 0 &&
2444                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2445                                   sel_cmp->family) == 0 &&
2446                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2447                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2448                         return 1;
2449                 }
2450         } else {
2451                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2452                         return 1;
2453                 }
2454         }
2455         return 0;
2456 }
2457
2458 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2459                                                      u8 dir, u8 type)
2460 {
2461         struct xfrm_policy *pol, *ret = NULL;
2462         struct hlist_node *entry;
2463         struct hlist_head *chain;
2464         u32 priority = ~0U;
2465
2466         read_lock_bh(&xfrm_policy_lock);
2467         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2468         hlist_for_each_entry(pol, entry, chain, bydst) {
2469                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2470                     pol->type == type) {
2471                         ret = pol;
2472                         priority = ret->priority;
2473                         break;
2474                 }
2475         }
2476         chain = &xfrm_policy_inexact[dir];
2477         hlist_for_each_entry(pol, entry, chain, bydst) {
2478                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2479                     pol->type == type &&
2480                     pol->priority < priority) {
2481                         ret = pol;
2482                         break;
2483                 }
2484         }
2485
2486         if (ret)
2487                 xfrm_pol_hold(ret);
2488
2489         read_unlock_bh(&xfrm_policy_lock);
2490
2491         return ret;
2492 }
2493
2494 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2495 {
2496         int match = 0;
2497
2498         if (t->mode == m->mode && t->id.proto == m->proto &&
2499             (m->reqid == 0 || t->reqid == m->reqid)) {
2500                 switch (t->mode) {
2501                 case XFRM_MODE_TUNNEL:
2502                 case XFRM_MODE_BEET:
2503                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2504                                           m->old_family) == 0 &&
2505                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2506                                           m->old_family) == 0) {
2507                                 match = 1;
2508                         }
2509                         break;
2510                 case XFRM_MODE_TRANSPORT:
2511                         /* in case of transport mode, template does not store
2512                            any IP addresses, hence we just compare mode and
2513                            protocol */
2514                         match = 1;
2515                         break;
2516                 default:
2517                         break;
2518                 }
2519         }
2520         return match;
2521 }
2522
2523 /* update endpoint address(es) of template(s) */
2524 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2525                                struct xfrm_migrate *m, int num_migrate)
2526 {
2527         struct xfrm_migrate *mp;
2528         struct dst_entry *dst;
2529         int i, j, n = 0;
2530
2531         write_lock_bh(&pol->lock);
2532         if (unlikely(pol->dead)) {
2533                 /* target policy has been deleted */
2534                 write_unlock_bh(&pol->lock);
2535                 return -ENOENT;
2536         }
2537
2538         for (i = 0; i < pol->xfrm_nr; i++) {
2539                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2540                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2541                                 continue;
2542                         n++;
2543                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2544                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2545                                 continue;
2546                         /* update endpoints */
2547                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2548                                sizeof(pol->xfrm_vec[i].id.daddr));
2549                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2550                                sizeof(pol->xfrm_vec[i].saddr));
2551                         pol->xfrm_vec[i].encap_family = mp->new_family;
2552                         /* flush bundles */
2553                         while ((dst = pol->bundles) != NULL) {
2554                                 pol->bundles = dst->next;
2555                                 dst_free(dst);
2556                         }
2557                 }
2558         }
2559
2560         write_unlock_bh(&pol->lock);
2561
2562         if (!n)
2563                 return -ENODATA;
2564
2565         return 0;
2566 }
2567
2568 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2569 {
2570         int i, j;
2571
2572         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2573                 return -EINVAL;
2574
2575         for (i = 0; i < num_migrate; i++) {
2576                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2577                                    m[i].old_family) == 0) &&
2578                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2579                                    m[i].old_family) == 0))
2580                         return -EINVAL;
2581                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2582                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2583                         return -EINVAL;
2584
2585                 /* check if there is any duplicated entry */
2586                 for (j = i + 1; j < num_migrate; j++) {
2587                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2588                                     sizeof(m[i].old_daddr)) &&
2589                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2590                                     sizeof(m[i].old_saddr)) &&
2591                             m[i].proto == m[j].proto &&
2592                             m[i].mode == m[j].mode &&
2593                             m[i].reqid == m[j].reqid &&
2594                             m[i].old_family == m[j].old_family)
2595                                 return -EINVAL;
2596                 }
2597         }
2598
2599         return 0;
2600 }
2601
2602 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2603                  struct xfrm_migrate *m, int num_migrate)
2604 {
2605         int i, err, nx_cur = 0, nx_new = 0;
2606         struct xfrm_policy *pol = NULL;
2607         struct xfrm_state *x, *xc;
2608         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2609         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2610         struct xfrm_migrate *mp;
2611
2612         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2613                 goto out;
2614
2615         /* Stage 1 - find policy */
2616         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2617                 err = -ENOENT;
2618                 goto out;
2619         }
2620
2621         /* Stage 2 - find and update state(s) */
2622         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2623                 if ((x = xfrm_migrate_state_find(mp))) {
2624                         x_cur[nx_cur] = x;
2625                         nx_cur++;
2626                         if ((xc = xfrm_state_migrate(x, mp))) {
2627                                 x_new[nx_new] = xc;
2628                                 nx_new++;
2629                         } else {
2630                                 err = -ENODATA;
2631                                 goto restore_state;
2632                         }
2633                 }
2634         }
2635
2636         /* Stage 3 - update policy */
2637         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2638                 goto restore_state;
2639
2640         /* Stage 4 - delete old state(s) */
2641         if (nx_cur) {
2642                 xfrm_states_put(x_cur, nx_cur);
2643                 xfrm_states_delete(x_cur, nx_cur);
2644         }
2645
2646         /* Stage 5 - announce */
2647         km_migrate(sel, dir, type, m, num_migrate);
2648
2649         xfrm_pol_put(pol);
2650
2651         return 0;
2652 out:
2653         return err;
2654
2655 restore_state:
2656         if (pol)
2657                 xfrm_pol_put(pol);
2658         if (nx_cur)
2659                 xfrm_states_put(x_cur, nx_cur);
2660         if (nx_new)
2661                 xfrm_states_delete(x_new, nx_new);
2662
2663         return err;
2664 }
2665 EXPORT_SYMBOL(xfrm_migrate);
2666 #endif