6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24 #include <linux/cache.h>
26 #include "xfrm_hash.h"
29 EXPORT_SYMBOL(xfrm_nl);
31 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
32 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
34 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
35 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
37 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
39 /* Each xfrm_state may be linked to two tables:
41 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
42 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
43 destination/tunnel endpoint. (output)
46 static DEFINE_SPINLOCK(xfrm_state_lock);
48 /* Hash table to find appropriate SA towards given target (endpoint
49 * of tunnel or destination of transport mode) allowed by selector.
51 * Main use is finding SA after policy selected tunnel or transport mode.
52 * Also, it can be used by ah/esp icmp error handler to find offending SA.
54 static struct hlist_head *xfrm_state_bydst __read_mostly;
55 static struct hlist_head *xfrm_state_bysrc __read_mostly;
56 static struct hlist_head *xfrm_state_byspi __read_mostly;
57 static unsigned int xfrm_state_hmask __read_mostly;
58 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
59 static unsigned int xfrm_state_num;
60 static unsigned int xfrm_state_genid;
62 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
63 xfrm_address_t *saddr,
65 unsigned short family)
67 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
70 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
71 xfrm_address_t *saddr,
72 unsigned short family)
74 return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
77 static inline unsigned int
78 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
80 return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
83 static void xfrm_hash_transfer(struct hlist_head *list,
84 struct hlist_head *ndsttable,
85 struct hlist_head *nsrctable,
86 struct hlist_head *nspitable,
87 unsigned int nhashmask)
89 struct hlist_node *entry, *tmp;
92 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
95 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
96 x->props.reqid, x->props.family,
98 hlist_add_head(&x->bydst, ndsttable+h);
100 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
103 hlist_add_head(&x->bysrc, nsrctable+h);
106 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
107 x->id.proto, x->props.family,
109 hlist_add_head(&x->byspi, nspitable+h);
114 static unsigned long xfrm_hash_new_size(void)
116 return ((xfrm_state_hmask + 1) << 1) *
117 sizeof(struct hlist_head);
120 static DEFINE_MUTEX(hash_resize_mutex);
122 static void xfrm_hash_resize(struct work_struct *__unused)
124 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
125 unsigned long nsize, osize;
126 unsigned int nhashmask, ohashmask;
129 mutex_lock(&hash_resize_mutex);
131 nsize = xfrm_hash_new_size();
132 ndst = xfrm_hash_alloc(nsize);
135 nsrc = xfrm_hash_alloc(nsize);
137 xfrm_hash_free(ndst, nsize);
140 nspi = xfrm_hash_alloc(nsize);
142 xfrm_hash_free(ndst, nsize);
143 xfrm_hash_free(nsrc, nsize);
147 spin_lock_bh(&xfrm_state_lock);
149 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
150 for (i = xfrm_state_hmask; i >= 0; i--)
151 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
154 odst = xfrm_state_bydst;
155 osrc = xfrm_state_bysrc;
156 ospi = xfrm_state_byspi;
157 ohashmask = xfrm_state_hmask;
159 xfrm_state_bydst = ndst;
160 xfrm_state_bysrc = nsrc;
161 xfrm_state_byspi = nspi;
162 xfrm_state_hmask = nhashmask;
164 spin_unlock_bh(&xfrm_state_lock);
166 osize = (ohashmask + 1) * sizeof(struct hlist_head);
167 xfrm_hash_free(odst, osize);
168 xfrm_hash_free(osrc, osize);
169 xfrm_hash_free(ospi, osize);
172 mutex_unlock(&hash_resize_mutex);
175 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
177 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
178 EXPORT_SYMBOL(km_waitq);
180 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
181 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
183 static struct work_struct xfrm_state_gc_work;
184 static HLIST_HEAD(xfrm_state_gc_list);
185 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
187 int __xfrm_state_delete(struct xfrm_state *x);
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
194 del_timer_sync(&x->timer);
195 del_timer_sync(&x->rtimer);
202 xfrm_put_mode(x->mode);
204 x->type->destructor(x);
205 xfrm_put_type(x->type);
207 security_xfrm_state_free(x);
211 static void xfrm_state_gc_task(struct work_struct *data)
213 struct xfrm_state *x;
214 struct hlist_node *entry, *tmp;
215 struct hlist_head gc_list;
217 spin_lock_bh(&xfrm_state_gc_lock);
218 gc_list.first = xfrm_state_gc_list.first;
219 INIT_HLIST_HEAD(&xfrm_state_gc_list);
220 spin_unlock_bh(&xfrm_state_gc_lock);
222 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223 xfrm_state_gc_destroy(x);
228 static inline unsigned long make_jiffies(long secs)
230 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231 return MAX_SCHEDULE_TIMEOUT-1;
236 static void xfrm_timer_handler(unsigned long data)
238 struct xfrm_state *x = (struct xfrm_state*)data;
239 unsigned long now = get_seconds();
240 long next = LONG_MAX;
245 if (x->km.state == XFRM_STATE_DEAD)
247 if (x->km.state == XFRM_STATE_EXPIRED)
249 if (x->lft.hard_add_expires_seconds) {
250 long tmo = x->lft.hard_add_expires_seconds +
251 x->curlft.add_time - now;
257 if (x->lft.hard_use_expires_seconds) {
258 long tmo = x->lft.hard_use_expires_seconds +
259 (x->curlft.use_time ? : now) - now;
267 if (x->lft.soft_add_expires_seconds) {
268 long tmo = x->lft.soft_add_expires_seconds +
269 x->curlft.add_time - now;
275 if (x->lft.soft_use_expires_seconds) {
276 long tmo = x->lft.soft_use_expires_seconds +
277 (x->curlft.use_time ? : now) - now;
286 km_state_expired(x, 0, 0);
288 if (next != LONG_MAX)
289 mod_timer(&x->timer, jiffies + make_jiffies(next));
294 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295 x->km.state = XFRM_STATE_EXPIRED;
301 err = __xfrm_state_delete(x);
302 if (!err && x->id.spi)
303 km_state_expired(x, 1, 0);
305 xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306 AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
309 spin_unlock(&x->lock);
312 static void xfrm_replay_timer_handler(unsigned long data);
314 struct xfrm_state *xfrm_state_alloc(void)
316 struct xfrm_state *x;
318 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
321 atomic_set(&x->refcnt, 1);
322 atomic_set(&x->tunnel_users, 0);
323 INIT_HLIST_NODE(&x->bydst);
324 INIT_HLIST_NODE(&x->bysrc);
325 INIT_HLIST_NODE(&x->byspi);
326 init_timer(&x->timer);
327 x->timer.function = xfrm_timer_handler;
328 x->timer.data = (unsigned long)x;
329 init_timer(&x->rtimer);
330 x->rtimer.function = xfrm_replay_timer_handler;
331 x->rtimer.data = (unsigned long)x;
332 x->curlft.add_time = get_seconds();
333 x->lft.soft_byte_limit = XFRM_INF;
334 x->lft.soft_packet_limit = XFRM_INF;
335 x->lft.hard_byte_limit = XFRM_INF;
336 x->lft.hard_packet_limit = XFRM_INF;
337 x->replay_maxage = 0;
338 x->replay_maxdiff = 0;
339 spin_lock_init(&x->lock);
343 EXPORT_SYMBOL(xfrm_state_alloc);
345 void __xfrm_state_destroy(struct xfrm_state *x)
347 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
349 spin_lock_bh(&xfrm_state_gc_lock);
350 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351 spin_unlock_bh(&xfrm_state_gc_lock);
352 schedule_work(&xfrm_state_gc_work);
354 EXPORT_SYMBOL(__xfrm_state_destroy);
356 int __xfrm_state_delete(struct xfrm_state *x)
360 if (x->km.state != XFRM_STATE_DEAD) {
361 x->km.state = XFRM_STATE_DEAD;
362 spin_lock(&xfrm_state_lock);
363 hlist_del(&x->bydst);
364 hlist_del(&x->bysrc);
366 hlist_del(&x->byspi);
368 spin_unlock(&xfrm_state_lock);
370 /* All xfrm_state objects are created by xfrm_state_alloc.
371 * The xfrm_state_alloc call gives a reference, and that
372 * is what we are dropping here.
380 EXPORT_SYMBOL(__xfrm_state_delete);
382 int xfrm_state_delete(struct xfrm_state *x)
386 spin_lock_bh(&x->lock);
387 err = __xfrm_state_delete(x);
388 spin_unlock_bh(&x->lock);
392 EXPORT_SYMBOL(xfrm_state_delete);
394 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
399 spin_lock_bh(&xfrm_state_lock);
400 for (i = 0; i <= xfrm_state_hmask; i++) {
401 struct hlist_node *entry;
402 struct xfrm_state *x;
404 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405 if (!xfrm_state_kern(x) &&
406 xfrm_id_proto_match(x->id.proto, proto)) {
408 spin_unlock_bh(&xfrm_state_lock);
410 err = xfrm_state_delete(x);
411 xfrm_audit_log(audit_info->loginuid,
413 AUDIT_MAC_IPSEC_DELSA,
414 err ? 0 : 1, NULL, x);
417 spin_lock_bh(&xfrm_state_lock);
422 spin_unlock_bh(&xfrm_state_lock);
425 EXPORT_SYMBOL(xfrm_state_flush);
427 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
429 spin_lock_bh(&xfrm_state_lock);
430 si->sadcnt = xfrm_state_num;
431 si->sadhcnt = xfrm_state_hmask;
432 si->sadhmcnt = xfrm_state_hashmax;
433 spin_unlock_bh(&xfrm_state_lock);
435 EXPORT_SYMBOL(xfrm_sad_getinfo);
438 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
439 struct xfrm_tmpl *tmpl,
440 xfrm_address_t *daddr, xfrm_address_t *saddr,
441 unsigned short family)
443 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
446 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
447 xfrm_state_put_afinfo(afinfo);
451 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
453 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
454 struct xfrm_state *x;
455 struct hlist_node *entry;
457 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
458 if (x->props.family != family ||
460 x->id.proto != proto)
465 if (x->id.daddr.a4 != daddr->a4)
469 if (!ipv6_addr_equal((struct in6_addr *)daddr,
483 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
485 unsigned int h = xfrm_src_hash(daddr, saddr, family);
486 struct xfrm_state *x;
487 struct hlist_node *entry;
489 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
490 if (x->props.family != family ||
491 x->id.proto != proto)
496 if (x->id.daddr.a4 != daddr->a4 ||
497 x->props.saddr.a4 != saddr->a4)
501 if (!ipv6_addr_equal((struct in6_addr *)daddr,
504 !ipv6_addr_equal((struct in6_addr *)saddr,
518 static inline struct xfrm_state *
519 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
522 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
523 x->id.proto, family);
525 return __xfrm_state_lookup_byaddr(&x->id.daddr,
527 x->id.proto, family);
530 static void xfrm_hash_grow_check(int have_hash_collision)
532 if (have_hash_collision &&
533 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
534 xfrm_state_num > xfrm_state_hmask)
535 schedule_work(&xfrm_hash_work);
539 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
540 struct flowi *fl, struct xfrm_tmpl *tmpl,
541 struct xfrm_policy *pol, int *err,
542 unsigned short family)
544 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
545 struct hlist_node *entry;
546 struct xfrm_state *x, *x0;
547 int acquire_in_progress = 0;
549 struct xfrm_state *best = NULL;
551 spin_lock_bh(&xfrm_state_lock);
552 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
553 if (x->props.family == family &&
554 x->props.reqid == tmpl->reqid &&
555 !(x->props.flags & XFRM_STATE_WILDRECV) &&
556 xfrm_state_addr_check(x, daddr, saddr, family) &&
557 tmpl->mode == x->props.mode &&
558 tmpl->id.proto == x->id.proto &&
559 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
561 1. There is a valid state with matching selector.
563 2. Valid state with inappropriate selector. Skip.
565 Entering area of "sysdeps".
567 3. If state is not valid, selector is temporary,
568 it selects only session which triggered
569 previous resolution. Key manager will do
570 something to install a state with proper
573 if (x->km.state == XFRM_STATE_VALID) {
574 if (!xfrm_selector_match(&x->sel, fl, family) ||
575 !security_xfrm_state_pol_flow_match(x, pol, fl))
578 best->km.dying > x->km.dying ||
579 (best->km.dying == x->km.dying &&
580 best->curlft.add_time < x->curlft.add_time))
582 } else if (x->km.state == XFRM_STATE_ACQ) {
583 acquire_in_progress = 1;
584 } else if (x->km.state == XFRM_STATE_ERROR ||
585 x->km.state == XFRM_STATE_EXPIRED) {
586 if (xfrm_selector_match(&x->sel, fl, family) &&
587 security_xfrm_state_pol_flow_match(x, pol, fl))
594 if (!x && !error && !acquire_in_progress) {
596 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
597 tmpl->id.proto, family)) != NULL) {
602 x = xfrm_state_alloc();
607 /* Initialize temporary selector matching only
608 * to current session. */
609 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
611 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
613 x->km.state = XFRM_STATE_DEAD;
619 if (km_query(x, tmpl, pol) == 0) {
620 x->km.state = XFRM_STATE_ACQ;
621 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
622 h = xfrm_src_hash(daddr, saddr, family);
623 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
625 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
626 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
628 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
629 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
630 add_timer(&x->timer);
632 xfrm_hash_grow_check(x->bydst.next != NULL);
634 x->km.state = XFRM_STATE_DEAD;
644 *err = acquire_in_progress ? -EAGAIN : error;
645 spin_unlock_bh(&xfrm_state_lock);
649 static void __xfrm_state_insert(struct xfrm_state *x)
653 x->genid = ++xfrm_state_genid;
655 h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
656 x->props.reqid, x->props.family);
657 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
659 h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
660 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
663 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
666 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
669 mod_timer(&x->timer, jiffies + HZ);
670 if (x->replay_maxage)
671 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
677 xfrm_hash_grow_check(x->bydst.next != NULL);
680 /* xfrm_state_lock is held */
681 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
683 unsigned short family = xnew->props.family;
684 u32 reqid = xnew->props.reqid;
685 struct xfrm_state *x;
686 struct hlist_node *entry;
689 h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
690 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
691 if (x->props.family == family &&
692 x->props.reqid == reqid &&
693 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
694 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
695 x->genid = xfrm_state_genid;
699 void xfrm_state_insert(struct xfrm_state *x)
701 spin_lock_bh(&xfrm_state_lock);
702 __xfrm_state_bump_genids(x);
703 __xfrm_state_insert(x);
704 spin_unlock_bh(&xfrm_state_lock);
706 EXPORT_SYMBOL(xfrm_state_insert);
708 /* xfrm_state_lock is held */
709 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
711 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
712 struct hlist_node *entry;
713 struct xfrm_state *x;
715 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
716 if (x->props.reqid != reqid ||
717 x->props.mode != mode ||
718 x->props.family != family ||
719 x->km.state != XFRM_STATE_ACQ ||
721 x->id.proto != proto)
726 if (x->id.daddr.a4 != daddr->a4 ||
727 x->props.saddr.a4 != saddr->a4)
731 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
732 (struct in6_addr *)daddr) ||
733 !ipv6_addr_equal((struct in6_addr *)
735 (struct in6_addr *)saddr))
747 x = xfrm_state_alloc();
751 x->sel.daddr.a4 = daddr->a4;
752 x->sel.saddr.a4 = saddr->a4;
753 x->sel.prefixlen_d = 32;
754 x->sel.prefixlen_s = 32;
755 x->props.saddr.a4 = saddr->a4;
756 x->id.daddr.a4 = daddr->a4;
760 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
761 (struct in6_addr *)daddr);
762 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
763 (struct in6_addr *)saddr);
764 x->sel.prefixlen_d = 128;
765 x->sel.prefixlen_s = 128;
766 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
767 (struct in6_addr *)saddr);
768 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
769 (struct in6_addr *)daddr);
773 x->km.state = XFRM_STATE_ACQ;
775 x->props.family = family;
776 x->props.mode = mode;
777 x->props.reqid = reqid;
778 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
780 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
781 add_timer(&x->timer);
782 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
783 h = xfrm_src_hash(daddr, saddr, family);
784 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
789 xfrm_hash_grow_check(x->bydst.next != NULL);
795 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
797 int xfrm_state_add(struct xfrm_state *x)
799 struct xfrm_state *x1;
802 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
804 family = x->props.family;
806 spin_lock_bh(&xfrm_state_lock);
808 x1 = __xfrm_state_locate(x, use_spi, family);
816 if (use_spi && x->km.seq) {
817 x1 = __xfrm_find_acq_byseq(x->km.seq);
818 if (x1 && ((x1->id.proto != x->id.proto) ||
819 xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
826 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
828 &x->id.daddr, &x->props.saddr, 0);
830 __xfrm_state_bump_genids(x);
831 __xfrm_state_insert(x);
835 spin_unlock_bh(&xfrm_state_lock);
838 xfrm_state_delete(x1);
844 EXPORT_SYMBOL(xfrm_state_add);
846 #ifdef CONFIG_XFRM_MIGRATE
847 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
850 struct xfrm_state *x = xfrm_state_alloc();
854 memcpy(&x->id, &orig->id, sizeof(x->id));
855 memcpy(&x->sel, &orig->sel, sizeof(x->sel));
856 memcpy(&x->lft, &orig->lft, sizeof(x->lft));
857 x->props.mode = orig->props.mode;
858 x->props.replay_window = orig->props.replay_window;
859 x->props.reqid = orig->props.reqid;
860 x->props.family = orig->props.family;
861 x->props.saddr = orig->props.saddr;
864 x->aalg = xfrm_algo_clone(orig->aalg);
868 x->props.aalgo = orig->props.aalgo;
871 x->ealg = xfrm_algo_clone(orig->ealg);
875 x->props.ealgo = orig->props.ealgo;
878 x->calg = xfrm_algo_clone(orig->calg);
882 x->props.calgo = orig->props.calgo;
885 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
891 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
897 err = xfrm_init_state(x);
901 x->props.flags = orig->props.flags;
903 x->curlft.add_time = orig->curlft.add_time;
904 x->km.state = orig->km.state;
905 x->km.seq = orig->km.seq;
922 EXPORT_SYMBOL(xfrm_state_clone);
924 /* xfrm_state_lock is held */
925 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
928 struct xfrm_state *x;
929 struct hlist_node *entry;
932 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
933 m->reqid, m->old_family);
934 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
935 if (x->props.mode != m->mode ||
936 x->id.proto != m->proto)
938 if (m->reqid && x->props.reqid != m->reqid)
940 if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
942 xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
949 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
951 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
952 if (x->props.mode != m->mode ||
953 x->id.proto != m->proto)
955 if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
957 xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
967 EXPORT_SYMBOL(xfrm_migrate_state_find);
969 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
970 struct xfrm_migrate *m)
972 struct xfrm_state *xc;
975 xc = xfrm_state_clone(x, &err);
979 memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
980 memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
983 if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
984 /* a care is needed when the destination address of the
985 state is to be updated as it is a part of triplet */
986 xfrm_state_insert(xc);
988 if ((err = xfrm_state_add(xc)) < 0)
997 EXPORT_SYMBOL(xfrm_state_migrate);
1000 int xfrm_state_update(struct xfrm_state *x)
1002 struct xfrm_state *x1;
1004 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1006 spin_lock_bh(&xfrm_state_lock);
1007 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1013 if (xfrm_state_kern(x1)) {
1019 if (x1->km.state == XFRM_STATE_ACQ) {
1020 __xfrm_state_insert(x);
1026 spin_unlock_bh(&xfrm_state_lock);
1032 xfrm_state_delete(x1);
1038 spin_lock_bh(&x1->lock);
1039 if (likely(x1->km.state == XFRM_STATE_VALID)) {
1040 if (x->encap && x1->encap)
1041 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1042 if (x->coaddr && x1->coaddr) {
1043 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1045 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1046 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1047 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1050 mod_timer(&x1->timer, jiffies + HZ);
1051 if (x1->curlft.use_time)
1052 xfrm_state_check_expire(x1);
1056 spin_unlock_bh(&x1->lock);
1062 EXPORT_SYMBOL(xfrm_state_update);
1064 int xfrm_state_check_expire(struct xfrm_state *x)
1066 if (!x->curlft.use_time)
1067 x->curlft.use_time = get_seconds();
1069 if (x->km.state != XFRM_STATE_VALID)
1072 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1073 x->curlft.packets >= x->lft.hard_packet_limit) {
1074 x->km.state = XFRM_STATE_EXPIRED;
1075 mod_timer(&x->timer, jiffies);
1080 (x->curlft.bytes >= x->lft.soft_byte_limit ||
1081 x->curlft.packets >= x->lft.soft_packet_limit)) {
1083 km_state_expired(x, 0, 0);
1087 EXPORT_SYMBOL(xfrm_state_check_expire);
1089 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1091 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1092 - skb_headroom(skb);
1095 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1097 /* Check tail too... */
1101 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1103 int err = xfrm_state_check_expire(x);
1106 err = xfrm_state_check_space(x, skb);
1110 EXPORT_SYMBOL(xfrm_state_check);
1113 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1114 unsigned short family)
1116 struct xfrm_state *x;
1118 spin_lock_bh(&xfrm_state_lock);
1119 x = __xfrm_state_lookup(daddr, spi, proto, family);
1120 spin_unlock_bh(&xfrm_state_lock);
1123 EXPORT_SYMBOL(xfrm_state_lookup);
1126 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1127 u8 proto, unsigned short family)
1129 struct xfrm_state *x;
1131 spin_lock_bh(&xfrm_state_lock);
1132 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1133 spin_unlock_bh(&xfrm_state_lock);
1136 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1139 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1140 xfrm_address_t *daddr, xfrm_address_t *saddr,
1141 int create, unsigned short family)
1143 struct xfrm_state *x;
1145 spin_lock_bh(&xfrm_state_lock);
1146 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1147 spin_unlock_bh(&xfrm_state_lock);
1151 EXPORT_SYMBOL(xfrm_find_acq);
1153 #ifdef CONFIG_XFRM_SUB_POLICY
1155 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1156 unsigned short family)
1159 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1161 return -EAFNOSUPPORT;
1163 spin_lock_bh(&xfrm_state_lock);
1164 if (afinfo->tmpl_sort)
1165 err = afinfo->tmpl_sort(dst, src, n);
1166 spin_unlock_bh(&xfrm_state_lock);
1167 xfrm_state_put_afinfo(afinfo);
1170 EXPORT_SYMBOL(xfrm_tmpl_sort);
1173 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1174 unsigned short family)
1177 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1179 return -EAFNOSUPPORT;
1181 spin_lock_bh(&xfrm_state_lock);
1182 if (afinfo->state_sort)
1183 err = afinfo->state_sort(dst, src, n);
1184 spin_unlock_bh(&xfrm_state_lock);
1185 xfrm_state_put_afinfo(afinfo);
1188 EXPORT_SYMBOL(xfrm_state_sort);
1191 /* Silly enough, but I'm lazy to build resolution list */
1193 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1197 for (i = 0; i <= xfrm_state_hmask; i++) {
1198 struct hlist_node *entry;
1199 struct xfrm_state *x;
1201 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1202 if (x->km.seq == seq &&
1203 x->km.state == XFRM_STATE_ACQ) {
1212 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1214 struct xfrm_state *x;
1216 spin_lock_bh(&xfrm_state_lock);
1217 x = __xfrm_find_acq_byseq(seq);
1218 spin_unlock_bh(&xfrm_state_lock);
1221 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1223 u32 xfrm_get_acqseq(void)
1227 static DEFINE_SPINLOCK(acqseq_lock);
1229 spin_lock_bh(&acqseq_lock);
1230 res = (++acqseq ? : ++acqseq);
1231 spin_unlock_bh(&acqseq_lock);
1234 EXPORT_SYMBOL(xfrm_get_acqseq);
1237 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1240 struct xfrm_state *x0;
1245 if (minspi == maxspi) {
1246 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1254 u32 low = ntohl(minspi);
1255 u32 high = ntohl(maxspi);
1256 for (h=0; h<high-low+1; h++) {
1257 spi = low + net_random()%(high-low+1);
1258 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1260 x->id.spi = htonl(spi);
1267 spin_lock_bh(&xfrm_state_lock);
1268 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1269 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1270 spin_unlock_bh(&xfrm_state_lock);
1274 EXPORT_SYMBOL(xfrm_alloc_spi);
1276 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1280 struct xfrm_state *x, *last = NULL;
1281 struct hlist_node *entry;
1285 spin_lock_bh(&xfrm_state_lock);
1286 for (i = 0; i <= xfrm_state_hmask; i++) {
1287 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1288 if (!xfrm_id_proto_match(x->id.proto, proto))
1291 err = func(last, count, data);
1303 err = func(last, 0, data);
1305 spin_unlock_bh(&xfrm_state_lock);
1308 EXPORT_SYMBOL(xfrm_state_walk);
1311 void xfrm_replay_notify(struct xfrm_state *x, int event)
1314 /* we send notify messages in case
1315 * 1. we updated on of the sequence numbers, and the seqno difference
1316 * is at least x->replay_maxdiff, in this case we also update the
1317 * timeout of our timer function
1318 * 2. if x->replay_maxage has elapsed since last update,
1319 * and there were changes
1321 * The state structure must be locked!
1325 case XFRM_REPLAY_UPDATE:
1326 if (x->replay_maxdiff &&
1327 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1328 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1329 if (x->xflags & XFRM_TIME_DEFER)
1330 event = XFRM_REPLAY_TIMEOUT;
1337 case XFRM_REPLAY_TIMEOUT:
1338 if ((x->replay.seq == x->preplay.seq) &&
1339 (x->replay.bitmap == x->preplay.bitmap) &&
1340 (x->replay.oseq == x->preplay.oseq)) {
1341 x->xflags |= XFRM_TIME_DEFER;
1348 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1349 c.event = XFRM_MSG_NEWAE;
1350 c.data.aevent = event;
1351 km_state_notify(x, &c);
1353 if (x->replay_maxage &&
1354 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1355 x->xflags &= ~XFRM_TIME_DEFER;
1357 EXPORT_SYMBOL(xfrm_replay_notify);
1359 static void xfrm_replay_timer_handler(unsigned long data)
1361 struct xfrm_state *x = (struct xfrm_state*)data;
1363 spin_lock(&x->lock);
1365 if (x->km.state == XFRM_STATE_VALID) {
1366 if (xfrm_aevent_is_on())
1367 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1369 x->xflags |= XFRM_TIME_DEFER;
1372 spin_unlock(&x->lock);
1375 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1378 u32 seq = ntohl(net_seq);
1380 if (unlikely(seq == 0))
1383 if (likely(seq > x->replay.seq))
1386 diff = x->replay.seq - seq;
1387 if (diff >= min_t(unsigned int, x->props.replay_window,
1388 sizeof(x->replay.bitmap) * 8)) {
1389 x->stats.replay_window++;
1393 if (x->replay.bitmap & (1U << diff)) {
1399 EXPORT_SYMBOL(xfrm_replay_check);
1401 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1404 u32 seq = ntohl(net_seq);
1406 if (seq > x->replay.seq) {
1407 diff = seq - x->replay.seq;
1408 if (diff < x->props.replay_window)
1409 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1411 x->replay.bitmap = 1;
1412 x->replay.seq = seq;
1414 diff = x->replay.seq - seq;
1415 x->replay.bitmap |= (1U << diff);
1418 if (xfrm_aevent_is_on())
1419 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1421 EXPORT_SYMBOL(xfrm_replay_advance);
1423 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1424 static DEFINE_RWLOCK(xfrm_km_lock);
1426 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1428 struct xfrm_mgr *km;
1430 read_lock(&xfrm_km_lock);
1431 list_for_each_entry(km, &xfrm_km_list, list)
1432 if (km->notify_policy)
1433 km->notify_policy(xp, dir, c);
1434 read_unlock(&xfrm_km_lock);
1437 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1439 struct xfrm_mgr *km;
1440 read_lock(&xfrm_km_lock);
1441 list_for_each_entry(km, &xfrm_km_list, list)
1444 read_unlock(&xfrm_km_lock);
1447 EXPORT_SYMBOL(km_policy_notify);
1448 EXPORT_SYMBOL(km_state_notify);
1450 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1456 c.event = XFRM_MSG_EXPIRE;
1457 km_state_notify(x, &c);
1463 EXPORT_SYMBOL(km_state_expired);
1465 * We send to all registered managers regardless of failure
1466 * We are happy with one success
1468 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1470 int err = -EINVAL, acqret;
1471 struct xfrm_mgr *km;
1473 read_lock(&xfrm_km_lock);
1474 list_for_each_entry(km, &xfrm_km_list, list) {
1475 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1479 read_unlock(&xfrm_km_lock);
1482 EXPORT_SYMBOL(km_query);
1484 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1487 struct xfrm_mgr *km;
1489 read_lock(&xfrm_km_lock);
1490 list_for_each_entry(km, &xfrm_km_list, list) {
1491 if (km->new_mapping)
1492 err = km->new_mapping(x, ipaddr, sport);
1496 read_unlock(&xfrm_km_lock);
1499 EXPORT_SYMBOL(km_new_mapping);
1501 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1507 c.event = XFRM_MSG_POLEXPIRE;
1508 km_policy_notify(pol, dir, &c);
1513 EXPORT_SYMBOL(km_policy_expired);
1515 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1516 struct xfrm_migrate *m, int num_migrate)
1520 struct xfrm_mgr *km;
1522 read_lock(&xfrm_km_lock);
1523 list_for_each_entry(km, &xfrm_km_list, list) {
1525 ret = km->migrate(sel, dir, type, m, num_migrate);
1530 read_unlock(&xfrm_km_lock);
1533 EXPORT_SYMBOL(km_migrate);
1535 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1539 struct xfrm_mgr *km;
1541 read_lock(&xfrm_km_lock);
1542 list_for_each_entry(km, &xfrm_km_list, list) {
1544 ret = km->report(proto, sel, addr);
1549 read_unlock(&xfrm_km_lock);
1552 EXPORT_SYMBOL(km_report);
1554 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1558 struct xfrm_mgr *km;
1559 struct xfrm_policy *pol = NULL;
1561 if (optlen <= 0 || optlen > PAGE_SIZE)
1564 data = kmalloc(optlen, GFP_KERNEL);
1569 if (copy_from_user(data, optval, optlen))
1573 read_lock(&xfrm_km_lock);
1574 list_for_each_entry(km, &xfrm_km_list, list) {
1575 pol = km->compile_policy(sk, optname, data,
1580 read_unlock(&xfrm_km_lock);
1583 xfrm_sk_policy_insert(sk, err, pol);
1592 EXPORT_SYMBOL(xfrm_user_policy);
1594 int xfrm_register_km(struct xfrm_mgr *km)
1596 write_lock_bh(&xfrm_km_lock);
1597 list_add_tail(&km->list, &xfrm_km_list);
1598 write_unlock_bh(&xfrm_km_lock);
1601 EXPORT_SYMBOL(xfrm_register_km);
1603 int xfrm_unregister_km(struct xfrm_mgr *km)
1605 write_lock_bh(&xfrm_km_lock);
1606 list_del(&km->list);
1607 write_unlock_bh(&xfrm_km_lock);
1610 EXPORT_SYMBOL(xfrm_unregister_km);
1612 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1615 if (unlikely(afinfo == NULL))
1617 if (unlikely(afinfo->family >= NPROTO))
1618 return -EAFNOSUPPORT;
1619 write_lock_bh(&xfrm_state_afinfo_lock);
1620 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1623 xfrm_state_afinfo[afinfo->family] = afinfo;
1624 write_unlock_bh(&xfrm_state_afinfo_lock);
1627 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1629 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1632 if (unlikely(afinfo == NULL))
1634 if (unlikely(afinfo->family >= NPROTO))
1635 return -EAFNOSUPPORT;
1636 write_lock_bh(&xfrm_state_afinfo_lock);
1637 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1638 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1641 xfrm_state_afinfo[afinfo->family] = NULL;
1643 write_unlock_bh(&xfrm_state_afinfo_lock);
1646 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1648 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1650 struct xfrm_state_afinfo *afinfo;
1651 if (unlikely(family >= NPROTO))
1653 read_lock(&xfrm_state_afinfo_lock);
1654 afinfo = xfrm_state_afinfo[family];
1655 if (unlikely(!afinfo))
1656 read_unlock(&xfrm_state_afinfo_lock);
1660 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1662 read_unlock(&xfrm_state_afinfo_lock);
1665 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1666 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1668 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1669 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1672 struct xfrm_state *t = x->tunnel;
1674 if (atomic_read(&t->tunnel_users) == 2)
1675 xfrm_state_delete(t);
1676 atomic_dec(&t->tunnel_users);
1681 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1683 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1687 spin_lock_bh(&x->lock);
1688 if (x->km.state == XFRM_STATE_VALID &&
1689 x->type && x->type->get_mtu)
1690 res = x->type->get_mtu(x, mtu);
1693 spin_unlock_bh(&x->lock);
1697 int xfrm_init_state(struct xfrm_state *x)
1699 struct xfrm_state_afinfo *afinfo;
1700 int family = x->props.family;
1703 err = -EAFNOSUPPORT;
1704 afinfo = xfrm_state_get_afinfo(family);
1709 if (afinfo->init_flags)
1710 err = afinfo->init_flags(x);
1712 xfrm_state_put_afinfo(afinfo);
1717 err = -EPROTONOSUPPORT;
1718 x->type = xfrm_get_type(x->id.proto, family);
1719 if (x->type == NULL)
1722 err = x->type->init_state(x);
1726 x->mode = xfrm_get_mode(x->props.mode, family);
1727 if (x->mode == NULL)
1730 x->km.state = XFRM_STATE_VALID;
1736 EXPORT_SYMBOL(xfrm_init_state);
1738 void __init xfrm_state_init(void)
1742 sz = sizeof(struct hlist_head) * 8;
1744 xfrm_state_bydst = xfrm_hash_alloc(sz);
1745 xfrm_state_bysrc = xfrm_hash_alloc(sz);
1746 xfrm_state_byspi = xfrm_hash_alloc(sz);
1747 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1748 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1749 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1751 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);