Merge branch 'report-erspan-version-field-just-for-erspan-tunnels'
[muen/linux.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <linux/audit.h>
23 #include <linux/uaccess.h>
24 #include <linux/ktime.h>
25 #include <linux/slab.h>
26 #include <linux/interrupt.h>
27 #include <linux/kernel.h>
28
29 #include "xfrm_hash.h"
30
31 #define xfrm_state_deref_prot(table, net) \
32         rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
33
34 static void xfrm_state_gc_task(struct work_struct *work);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
44 static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
45 static struct kmem_cache *xfrm_state_cache __ro_after_init;
46
47 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
48 static HLIST_HEAD(xfrm_state_gc_list);
49
50 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
51 {
52         return refcount_inc_not_zero(&x->refcnt);
53 }
54
55 static inline unsigned int xfrm_dst_hash(struct net *net,
56                                          const xfrm_address_t *daddr,
57                                          const xfrm_address_t *saddr,
58                                          u32 reqid,
59                                          unsigned short family)
60 {
61         return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
62 }
63
64 static inline unsigned int xfrm_src_hash(struct net *net,
65                                          const xfrm_address_t *daddr,
66                                          const xfrm_address_t *saddr,
67                                          unsigned short family)
68 {
69         return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
70 }
71
72 static inline unsigned int
73 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
74               __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head_rcu(&x->bydst, ndsttable + h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head_rcu(&x->bysrc, nsrctable + h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head_rcu(&x->byspi, nspitable + h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
111 {
112         return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
113 }
114
115 static void xfrm_hash_resize(struct work_struct *work)
116 {
117         struct net *net = container_of(work, struct net, xfrm.state_hash_work);
118         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
119         unsigned long nsize, osize;
120         unsigned int nhashmask, ohashmask;
121         int i;
122
123         nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
124         ndst = xfrm_hash_alloc(nsize);
125         if (!ndst)
126                 return;
127         nsrc = xfrm_hash_alloc(nsize);
128         if (!nsrc) {
129                 xfrm_hash_free(ndst, nsize);
130                 return;
131         }
132         nspi = xfrm_hash_alloc(nsize);
133         if (!nspi) {
134                 xfrm_hash_free(ndst, nsize);
135                 xfrm_hash_free(nsrc, nsize);
136                 return;
137         }
138
139         spin_lock_bh(&net->xfrm.xfrm_state_lock);
140         write_seqcount_begin(&xfrm_state_hash_generation);
141
142         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
143         odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
144         for (i = net->xfrm.state_hmask; i >= 0; i--)
145                 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
146
147         osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
148         ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
149         ohashmask = net->xfrm.state_hmask;
150
151         rcu_assign_pointer(net->xfrm.state_bydst, ndst);
152         rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
153         rcu_assign_pointer(net->xfrm.state_byspi, nspi);
154         net->xfrm.state_hmask = nhashmask;
155
156         write_seqcount_end(&xfrm_state_hash_generation);
157         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
158
159         osize = (ohashmask + 1) * sizeof(struct hlist_head);
160
161         synchronize_rcu();
162
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166 }
167
168 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
169 static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
170
171 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
172
173 int __xfrm_state_delete(struct xfrm_state *x);
174
175 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
176 bool km_is_alive(const struct km_event *c);
177 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
178
179 static DEFINE_SPINLOCK(xfrm_type_lock);
180 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
181 {
182         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
183         const struct xfrm_type **typemap;
184         int err = 0;
185
186         if (unlikely(afinfo == NULL))
187                 return -EAFNOSUPPORT;
188         typemap = afinfo->type_map;
189         spin_lock_bh(&xfrm_type_lock);
190
191         if (likely(typemap[type->proto] == NULL))
192                 typemap[type->proto] = type;
193         else
194                 err = -EEXIST;
195         spin_unlock_bh(&xfrm_type_lock);
196         rcu_read_unlock();
197         return err;
198 }
199 EXPORT_SYMBOL(xfrm_register_type);
200
201 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
202 {
203         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
204         const struct xfrm_type **typemap;
205         int err = 0;
206
207         if (unlikely(afinfo == NULL))
208                 return -EAFNOSUPPORT;
209         typemap = afinfo->type_map;
210         spin_lock_bh(&xfrm_type_lock);
211
212         if (unlikely(typemap[type->proto] != type))
213                 err = -ENOENT;
214         else
215                 typemap[type->proto] = NULL;
216         spin_unlock_bh(&xfrm_type_lock);
217         rcu_read_unlock();
218         return err;
219 }
220 EXPORT_SYMBOL(xfrm_unregister_type);
221
222 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
223 {
224         struct xfrm_state_afinfo *afinfo;
225         const struct xfrm_type **typemap;
226         const struct xfrm_type *type;
227         int modload_attempted = 0;
228
229 retry:
230         afinfo = xfrm_state_get_afinfo(family);
231         if (unlikely(afinfo == NULL))
232                 return NULL;
233         typemap = afinfo->type_map;
234
235         type = READ_ONCE(typemap[proto]);
236         if (unlikely(type && !try_module_get(type->owner)))
237                 type = NULL;
238
239         rcu_read_unlock();
240
241         if (!type && !modload_attempted) {
242                 request_module("xfrm-type-%d-%d", family, proto);
243                 modload_attempted = 1;
244                 goto retry;
245         }
246
247         return type;
248 }
249
250 static void xfrm_put_type(const struct xfrm_type *type)
251 {
252         module_put(type->owner);
253 }
254
255 static DEFINE_SPINLOCK(xfrm_type_offload_lock);
256 int xfrm_register_type_offload(const struct xfrm_type_offload *type,
257                                unsigned short family)
258 {
259         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
260         const struct xfrm_type_offload **typemap;
261         int err = 0;
262
263         if (unlikely(afinfo == NULL))
264                 return -EAFNOSUPPORT;
265         typemap = afinfo->type_offload_map;
266         spin_lock_bh(&xfrm_type_offload_lock);
267
268         if (likely(typemap[type->proto] == NULL))
269                 typemap[type->proto] = type;
270         else
271                 err = -EEXIST;
272         spin_unlock_bh(&xfrm_type_offload_lock);
273         rcu_read_unlock();
274         return err;
275 }
276 EXPORT_SYMBOL(xfrm_register_type_offload);
277
278 int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
279                                  unsigned short family)
280 {
281         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
282         const struct xfrm_type_offload **typemap;
283         int err = 0;
284
285         if (unlikely(afinfo == NULL))
286                 return -EAFNOSUPPORT;
287         typemap = afinfo->type_offload_map;
288         spin_lock_bh(&xfrm_type_offload_lock);
289
290         if (unlikely(typemap[type->proto] != type))
291                 err = -ENOENT;
292         else
293                 typemap[type->proto] = NULL;
294         spin_unlock_bh(&xfrm_type_offload_lock);
295         rcu_read_unlock();
296         return err;
297 }
298 EXPORT_SYMBOL(xfrm_unregister_type_offload);
299
300 static const struct xfrm_type_offload *
301 xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
302 {
303         struct xfrm_state_afinfo *afinfo;
304         const struct xfrm_type_offload **typemap;
305         const struct xfrm_type_offload *type;
306
307 retry:
308         afinfo = xfrm_state_get_afinfo(family);
309         if (unlikely(afinfo == NULL))
310                 return NULL;
311         typemap = afinfo->type_offload_map;
312
313         type = typemap[proto];
314         if ((type && !try_module_get(type->owner)))
315                 type = NULL;
316
317         rcu_read_unlock();
318
319         if (!type && try_load) {
320                 request_module("xfrm-offload-%d-%d", family, proto);
321                 try_load = false;
322                 goto retry;
323         }
324
325         return type;
326 }
327
328 static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
329 {
330         module_put(type->owner);
331 }
332
333 static DEFINE_SPINLOCK(xfrm_mode_lock);
334 int xfrm_register_mode(struct xfrm_mode *mode, int family)
335 {
336         struct xfrm_state_afinfo *afinfo;
337         struct xfrm_mode **modemap;
338         int err;
339
340         if (unlikely(mode->encap >= XFRM_MODE_MAX))
341                 return -EINVAL;
342
343         afinfo = xfrm_state_get_afinfo(family);
344         if (unlikely(afinfo == NULL))
345                 return -EAFNOSUPPORT;
346
347         err = -EEXIST;
348         modemap = afinfo->mode_map;
349         spin_lock_bh(&xfrm_mode_lock);
350         if (modemap[mode->encap])
351                 goto out;
352
353         err = -ENOENT;
354         if (!try_module_get(afinfo->owner))
355                 goto out;
356
357         mode->afinfo = afinfo;
358         modemap[mode->encap] = mode;
359         err = 0;
360
361 out:
362         spin_unlock_bh(&xfrm_mode_lock);
363         rcu_read_unlock();
364         return err;
365 }
366 EXPORT_SYMBOL(xfrm_register_mode);
367
368 int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
369 {
370         struct xfrm_state_afinfo *afinfo;
371         struct xfrm_mode **modemap;
372         int err;
373
374         if (unlikely(mode->encap >= XFRM_MODE_MAX))
375                 return -EINVAL;
376
377         afinfo = xfrm_state_get_afinfo(family);
378         if (unlikely(afinfo == NULL))
379                 return -EAFNOSUPPORT;
380
381         err = -ENOENT;
382         modemap = afinfo->mode_map;
383         spin_lock_bh(&xfrm_mode_lock);
384         if (likely(modemap[mode->encap] == mode)) {
385                 modemap[mode->encap] = NULL;
386                 module_put(mode->afinfo->owner);
387                 err = 0;
388         }
389
390         spin_unlock_bh(&xfrm_mode_lock);
391         rcu_read_unlock();
392         return err;
393 }
394 EXPORT_SYMBOL(xfrm_unregister_mode);
395
396 static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
397 {
398         struct xfrm_state_afinfo *afinfo;
399         struct xfrm_mode *mode;
400         int modload_attempted = 0;
401
402         if (unlikely(encap >= XFRM_MODE_MAX))
403                 return NULL;
404
405 retry:
406         afinfo = xfrm_state_get_afinfo(family);
407         if (unlikely(afinfo == NULL))
408                 return NULL;
409
410         mode = READ_ONCE(afinfo->mode_map[encap]);
411         if (unlikely(mode && !try_module_get(mode->owner)))
412                 mode = NULL;
413
414         rcu_read_unlock();
415         if (!mode && !modload_attempted) {
416                 request_module("xfrm-mode-%d-%d", family, encap);
417                 modload_attempted = 1;
418                 goto retry;
419         }
420
421         return mode;
422 }
423
424 static void xfrm_put_mode(struct xfrm_mode *mode)
425 {
426         module_put(mode->owner);
427 }
428
429 void xfrm_state_free(struct xfrm_state *x)
430 {
431         kmem_cache_free(xfrm_state_cache, x);
432 }
433 EXPORT_SYMBOL(xfrm_state_free);
434
435 static void xfrm_state_gc_destroy(struct xfrm_state *x)
436 {
437         tasklet_hrtimer_cancel(&x->mtimer);
438         del_timer_sync(&x->rtimer);
439         kfree(x->aead);
440         kfree(x->aalg);
441         kfree(x->ealg);
442         kfree(x->calg);
443         kfree(x->encap);
444         kfree(x->coaddr);
445         kfree(x->replay_esn);
446         kfree(x->preplay_esn);
447         if (x->inner_mode)
448                 xfrm_put_mode(x->inner_mode);
449         if (x->inner_mode_iaf)
450                 xfrm_put_mode(x->inner_mode_iaf);
451         if (x->outer_mode)
452                 xfrm_put_mode(x->outer_mode);
453         if (x->type_offload)
454                 xfrm_put_type_offload(x->type_offload);
455         if (x->type) {
456                 x->type->destructor(x);
457                 xfrm_put_type(x->type);
458         }
459         xfrm_dev_state_free(x);
460         security_xfrm_state_free(x);
461         xfrm_state_free(x);
462 }
463
464 static void xfrm_state_gc_task(struct work_struct *work)
465 {
466         struct xfrm_state *x;
467         struct hlist_node *tmp;
468         struct hlist_head gc_list;
469
470         spin_lock_bh(&xfrm_state_gc_lock);
471         hlist_move_list(&xfrm_state_gc_list, &gc_list);
472         spin_unlock_bh(&xfrm_state_gc_lock);
473
474         synchronize_rcu();
475
476         hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
477                 xfrm_state_gc_destroy(x);
478 }
479
480 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
481 {
482         struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
483         struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
484         time64_t now = ktime_get_real_seconds();
485         time64_t next = TIME64_MAX;
486         int warn = 0;
487         int err = 0;
488
489         spin_lock(&x->lock);
490         if (x->km.state == XFRM_STATE_DEAD)
491                 goto out;
492         if (x->km.state == XFRM_STATE_EXPIRED)
493                 goto expired;
494         if (x->lft.hard_add_expires_seconds) {
495                 long tmo = x->lft.hard_add_expires_seconds +
496                         x->curlft.add_time - now;
497                 if (tmo <= 0) {
498                         if (x->xflags & XFRM_SOFT_EXPIRE) {
499                                 /* enter hard expire without soft expire first?!
500                                  * setting a new date could trigger this.
501                                  * workaround: fix x->curflt.add_time by below:
502                                  */
503                                 x->curlft.add_time = now - x->saved_tmo - 1;
504                                 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
505                         } else
506                                 goto expired;
507                 }
508                 if (tmo < next)
509                         next = tmo;
510         }
511         if (x->lft.hard_use_expires_seconds) {
512                 long tmo = x->lft.hard_use_expires_seconds +
513                         (x->curlft.use_time ? : now) - now;
514                 if (tmo <= 0)
515                         goto expired;
516                 if (tmo < next)
517                         next = tmo;
518         }
519         if (x->km.dying)
520                 goto resched;
521         if (x->lft.soft_add_expires_seconds) {
522                 long tmo = x->lft.soft_add_expires_seconds +
523                         x->curlft.add_time - now;
524                 if (tmo <= 0) {
525                         warn = 1;
526                         x->xflags &= ~XFRM_SOFT_EXPIRE;
527                 } else if (tmo < next) {
528                         next = tmo;
529                         x->xflags |= XFRM_SOFT_EXPIRE;
530                         x->saved_tmo = tmo;
531                 }
532         }
533         if (x->lft.soft_use_expires_seconds) {
534                 long tmo = x->lft.soft_use_expires_seconds +
535                         (x->curlft.use_time ? : now) - now;
536                 if (tmo <= 0)
537                         warn = 1;
538                 else if (tmo < next)
539                         next = tmo;
540         }
541
542         x->km.dying = warn;
543         if (warn)
544                 km_state_expired(x, 0, 0);
545 resched:
546         if (next != TIME64_MAX) {
547                 tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
548         }
549
550         goto out;
551
552 expired:
553         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
554                 x->km.state = XFRM_STATE_EXPIRED;
555
556         err = __xfrm_state_delete(x);
557         if (!err)
558                 km_state_expired(x, 1, 0);
559
560         xfrm_audit_state_delete(x, err ? 0 : 1, true);
561
562 out:
563         spin_unlock(&x->lock);
564         return HRTIMER_NORESTART;
565 }
566
567 static void xfrm_replay_timer_handler(struct timer_list *t);
568
569 struct xfrm_state *xfrm_state_alloc(struct net *net)
570 {
571         struct xfrm_state *x;
572
573         x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
574
575         if (x) {
576                 write_pnet(&x->xs_net, net);
577                 refcount_set(&x->refcnt, 1);
578                 atomic_set(&x->tunnel_users, 0);
579                 INIT_LIST_HEAD(&x->km.all);
580                 INIT_HLIST_NODE(&x->bydst);
581                 INIT_HLIST_NODE(&x->bysrc);
582                 INIT_HLIST_NODE(&x->byspi);
583                 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
584                                         CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
585                 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
586                 x->curlft.add_time = ktime_get_real_seconds();
587                 x->lft.soft_byte_limit = XFRM_INF;
588                 x->lft.soft_packet_limit = XFRM_INF;
589                 x->lft.hard_byte_limit = XFRM_INF;
590                 x->lft.hard_packet_limit = XFRM_INF;
591                 x->replay_maxage = 0;
592                 x->replay_maxdiff = 0;
593                 x->inner_mode = NULL;
594                 x->inner_mode_iaf = NULL;
595                 spin_lock_init(&x->lock);
596         }
597         return x;
598 }
599 EXPORT_SYMBOL(xfrm_state_alloc);
600
601 void __xfrm_state_destroy(struct xfrm_state *x)
602 {
603         WARN_ON(x->km.state != XFRM_STATE_DEAD);
604
605         spin_lock_bh(&xfrm_state_gc_lock);
606         hlist_add_head(&x->gclist, &xfrm_state_gc_list);
607         spin_unlock_bh(&xfrm_state_gc_lock);
608         schedule_work(&xfrm_state_gc_work);
609 }
610 EXPORT_SYMBOL(__xfrm_state_destroy);
611
612 int __xfrm_state_delete(struct xfrm_state *x)
613 {
614         struct net *net = xs_net(x);
615         int err = -ESRCH;
616
617         if (x->km.state != XFRM_STATE_DEAD) {
618                 x->km.state = XFRM_STATE_DEAD;
619                 spin_lock(&net->xfrm.xfrm_state_lock);
620                 list_del(&x->km.all);
621                 hlist_del_rcu(&x->bydst);
622                 hlist_del_rcu(&x->bysrc);
623                 if (x->id.spi)
624                         hlist_del_rcu(&x->byspi);
625                 net->xfrm.state_num--;
626                 spin_unlock(&net->xfrm.xfrm_state_lock);
627
628                 xfrm_dev_state_delete(x);
629
630                 /* All xfrm_state objects are created by xfrm_state_alloc.
631                  * The xfrm_state_alloc call gives a reference, and that
632                  * is what we are dropping here.
633                  */
634                 xfrm_state_put(x);
635                 err = 0;
636         }
637
638         return err;
639 }
640 EXPORT_SYMBOL(__xfrm_state_delete);
641
642 int xfrm_state_delete(struct xfrm_state *x)
643 {
644         int err;
645
646         spin_lock_bh(&x->lock);
647         err = __xfrm_state_delete(x);
648         spin_unlock_bh(&x->lock);
649
650         return err;
651 }
652 EXPORT_SYMBOL(xfrm_state_delete);
653
654 #ifdef CONFIG_SECURITY_NETWORK_XFRM
655 static inline int
656 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
657 {
658         int i, err = 0;
659
660         for (i = 0; i <= net->xfrm.state_hmask; i++) {
661                 struct xfrm_state *x;
662
663                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
664                         if (xfrm_id_proto_match(x->id.proto, proto) &&
665                            (err = security_xfrm_state_delete(x)) != 0) {
666                                 xfrm_audit_state_delete(x, 0, task_valid);
667                                 return err;
668                         }
669                 }
670         }
671
672         return err;
673 }
674
675 static inline int
676 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
677 {
678         int i, err = 0;
679
680         for (i = 0; i <= net->xfrm.state_hmask; i++) {
681                 struct xfrm_state *x;
682                 struct xfrm_state_offload *xso;
683
684                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
685                         xso = &x->xso;
686
687                         if (xso->dev == dev &&
688                            (err = security_xfrm_state_delete(x)) != 0) {
689                                 xfrm_audit_state_delete(x, 0, task_valid);
690                                 return err;
691                         }
692                 }
693         }
694
695         return err;
696 }
697 #else
698 static inline int
699 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
700 {
701         return 0;
702 }
703
704 static inline int
705 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
706 {
707         return 0;
708 }
709 #endif
710
711 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
712 {
713         int i, err = 0, cnt = 0;
714
715         spin_lock_bh(&net->xfrm.xfrm_state_lock);
716         err = xfrm_state_flush_secctx_check(net, proto, task_valid);
717         if (err)
718                 goto out;
719
720         err = -ESRCH;
721         for (i = 0; i <= net->xfrm.state_hmask; i++) {
722                 struct xfrm_state *x;
723 restart:
724                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
725                         if (!xfrm_state_kern(x) &&
726                             xfrm_id_proto_match(x->id.proto, proto)) {
727                                 xfrm_state_hold(x);
728                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
729
730                                 err = xfrm_state_delete(x);
731                                 xfrm_audit_state_delete(x, err ? 0 : 1,
732                                                         task_valid);
733                                 xfrm_state_put(x);
734                                 if (!err)
735                                         cnt++;
736
737                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
738                                 goto restart;
739                         }
740                 }
741         }
742 out:
743         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
744         if (cnt)
745                 err = 0;
746
747         return err;
748 }
749 EXPORT_SYMBOL(xfrm_state_flush);
750
751 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
752 {
753         int i, err = 0, cnt = 0;
754
755         spin_lock_bh(&net->xfrm.xfrm_state_lock);
756         err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
757         if (err)
758                 goto out;
759
760         err = -ESRCH;
761         for (i = 0; i <= net->xfrm.state_hmask; i++) {
762                 struct xfrm_state *x;
763                 struct xfrm_state_offload *xso;
764 restart:
765                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
766                         xso = &x->xso;
767
768                         if (!xfrm_state_kern(x) && xso->dev == dev) {
769                                 xfrm_state_hold(x);
770                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
771
772                                 err = xfrm_state_delete(x);
773                                 xfrm_audit_state_delete(x, err ? 0 : 1,
774                                                         task_valid);
775                                 xfrm_state_put(x);
776                                 if (!err)
777                                         cnt++;
778
779                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
780                                 goto restart;
781                         }
782                 }
783         }
784         if (cnt)
785                 err = 0;
786
787 out:
788         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
789         return err;
790 }
791 EXPORT_SYMBOL(xfrm_dev_state_flush);
792
793 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
794 {
795         spin_lock_bh(&net->xfrm.xfrm_state_lock);
796         si->sadcnt = net->xfrm.state_num;
797         si->sadhcnt = net->xfrm.state_hmask + 1;
798         si->sadhmcnt = xfrm_state_hashmax;
799         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
800 }
801 EXPORT_SYMBOL(xfrm_sad_getinfo);
802
803 static void
804 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
805                     const struct xfrm_tmpl *tmpl,
806                     const xfrm_address_t *daddr, const xfrm_address_t *saddr,
807                     unsigned short family)
808 {
809         struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
810
811         if (!afinfo)
812                 return;
813
814         afinfo->init_tempsel(&x->sel, fl);
815
816         if (family != tmpl->encap_family) {
817                 afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
818                 if (!afinfo)
819                         return;
820         }
821         afinfo->init_temprop(x, tmpl, daddr, saddr);
822 }
823
824 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
825                                               const xfrm_address_t *daddr,
826                                               __be32 spi, u8 proto,
827                                               unsigned short family)
828 {
829         unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
830         struct xfrm_state *x;
831
832         hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
833                 if (x->props.family != family ||
834                     x->id.spi       != spi ||
835                     x->id.proto     != proto ||
836                     !xfrm_addr_equal(&x->id.daddr, daddr, family))
837                         continue;
838
839                 if ((mark & x->mark.m) != x->mark.v)
840                         continue;
841                 if (!xfrm_state_hold_rcu(x))
842                         continue;
843                 return x;
844         }
845
846         return NULL;
847 }
848
849 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
850                                                      const xfrm_address_t *daddr,
851                                                      const xfrm_address_t *saddr,
852                                                      u8 proto, unsigned short family)
853 {
854         unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
855         struct xfrm_state *x;
856
857         hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
858                 if (x->props.family != family ||
859                     x->id.proto     != proto ||
860                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
861                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
862                         continue;
863
864                 if ((mark & x->mark.m) != x->mark.v)
865                         continue;
866                 if (!xfrm_state_hold_rcu(x))
867                         continue;
868                 return x;
869         }
870
871         return NULL;
872 }
873
874 static inline struct xfrm_state *
875 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
876 {
877         struct net *net = xs_net(x);
878         u32 mark = x->mark.v & x->mark.m;
879
880         if (use_spi)
881                 return __xfrm_state_lookup(net, mark, &x->id.daddr,
882                                            x->id.spi, x->id.proto, family);
883         else
884                 return __xfrm_state_lookup_byaddr(net, mark,
885                                                   &x->id.daddr,
886                                                   &x->props.saddr,
887                                                   x->id.proto, family);
888 }
889
890 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
891 {
892         if (have_hash_collision &&
893             (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
894             net->xfrm.state_num > net->xfrm.state_hmask)
895                 schedule_work(&net->xfrm.state_hash_work);
896 }
897
898 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
899                                const struct flowi *fl, unsigned short family,
900                                struct xfrm_state **best, int *acq_in_progress,
901                                int *error)
902 {
903         /* Resolution logic:
904          * 1. There is a valid state with matching selector. Done.
905          * 2. Valid state with inappropriate selector. Skip.
906          *
907          * Entering area of "sysdeps".
908          *
909          * 3. If state is not valid, selector is temporary, it selects
910          *    only session which triggered previous resolution. Key
911          *    manager will do something to install a state with proper
912          *    selector.
913          */
914         if (x->km.state == XFRM_STATE_VALID) {
915                 if ((x->sel.family &&
916                      !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
917                     !security_xfrm_state_pol_flow_match(x, pol, fl))
918                         return;
919
920                 if (!*best ||
921                     (*best)->km.dying > x->km.dying ||
922                     ((*best)->km.dying == x->km.dying &&
923                      (*best)->curlft.add_time < x->curlft.add_time))
924                         *best = x;
925         } else if (x->km.state == XFRM_STATE_ACQ) {
926                 *acq_in_progress = 1;
927         } else if (x->km.state == XFRM_STATE_ERROR ||
928                    x->km.state == XFRM_STATE_EXPIRED) {
929                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
930                     security_xfrm_state_pol_flow_match(x, pol, fl))
931                         *error = -ESRCH;
932         }
933 }
934
935 struct xfrm_state *
936 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
937                 const struct flowi *fl, struct xfrm_tmpl *tmpl,
938                 struct xfrm_policy *pol, int *err,
939                 unsigned short family, u32 if_id)
940 {
941         static xfrm_address_t saddr_wildcard = { };
942         struct net *net = xp_net(pol);
943         unsigned int h, h_wildcard;
944         struct xfrm_state *x, *x0, *to_put;
945         int acquire_in_progress = 0;
946         int error = 0;
947         struct xfrm_state *best = NULL;
948         u32 mark = pol->mark.v & pol->mark.m;
949         unsigned short encap_family = tmpl->encap_family;
950         unsigned int sequence;
951         struct km_event c;
952
953         to_put = NULL;
954
955         sequence = read_seqcount_begin(&xfrm_state_hash_generation);
956
957         rcu_read_lock();
958         h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
959         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
960                 if (x->props.family == encap_family &&
961                     x->props.reqid == tmpl->reqid &&
962                     (mark & x->mark.m) == x->mark.v &&
963                     x->if_id == if_id &&
964                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
965                     xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
966                     tmpl->mode == x->props.mode &&
967                     tmpl->id.proto == x->id.proto &&
968                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
969                         xfrm_state_look_at(pol, x, fl, encap_family,
970                                            &best, &acquire_in_progress, &error);
971         }
972         if (best || acquire_in_progress)
973                 goto found;
974
975         h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
976         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
977                 if (x->props.family == encap_family &&
978                     x->props.reqid == tmpl->reqid &&
979                     (mark & x->mark.m) == x->mark.v &&
980                     x->if_id == if_id &&
981                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
982                     xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
983                     tmpl->mode == x->props.mode &&
984                     tmpl->id.proto == x->id.proto &&
985                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
986                         xfrm_state_look_at(pol, x, fl, encap_family,
987                                            &best, &acquire_in_progress, &error);
988         }
989
990 found:
991         x = best;
992         if (!x && !error && !acquire_in_progress) {
993                 if (tmpl->id.spi &&
994                     (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
995                                               tmpl->id.proto, encap_family)) != NULL) {
996                         to_put = x0;
997                         error = -EEXIST;
998                         goto out;
999                 }
1000
1001                 c.net = net;
1002                 /* If the KMs have no listeners (yet...), avoid allocating an SA
1003                  * for each and every packet - garbage collection might not
1004                  * handle the flood.
1005                  */
1006                 if (!km_is_alive(&c)) {
1007                         error = -ESRCH;
1008                         goto out;
1009                 }
1010
1011                 x = xfrm_state_alloc(net);
1012                 if (x == NULL) {
1013                         error = -ENOMEM;
1014                         goto out;
1015                 }
1016                 /* Initialize temporary state matching only
1017                  * to current session. */
1018                 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1019                 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1020                 x->if_id = if_id;
1021
1022                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1023                 if (error) {
1024                         x->km.state = XFRM_STATE_DEAD;
1025                         to_put = x;
1026                         x = NULL;
1027                         goto out;
1028                 }
1029
1030                 if (km_query(x, tmpl, pol) == 0) {
1031                         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1032                         x->km.state = XFRM_STATE_ACQ;
1033                         list_add(&x->km.all, &net->xfrm.state_all);
1034                         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1035                         h = xfrm_src_hash(net, daddr, saddr, encap_family);
1036                         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1037                         if (x->id.spi) {
1038                                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1039                                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1040                         }
1041                         x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1042                         tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1043                         net->xfrm.state_num++;
1044                         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1045                         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1046                 } else {
1047                         x->km.state = XFRM_STATE_DEAD;
1048                         to_put = x;
1049                         x = NULL;
1050                         error = -ESRCH;
1051                 }
1052         }
1053 out:
1054         if (x) {
1055                 if (!xfrm_state_hold_rcu(x)) {
1056                         *err = -EAGAIN;
1057                         x = NULL;
1058                 }
1059         } else {
1060                 *err = acquire_in_progress ? -EAGAIN : error;
1061         }
1062         rcu_read_unlock();
1063         if (to_put)
1064                 xfrm_state_put(to_put);
1065
1066         if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
1067                 *err = -EAGAIN;
1068                 if (x) {
1069                         xfrm_state_put(x);
1070                         x = NULL;
1071                 }
1072         }
1073
1074         return x;
1075 }
1076
1077 struct xfrm_state *
1078 xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
1079                     xfrm_address_t *daddr, xfrm_address_t *saddr,
1080                     unsigned short family, u8 mode, u8 proto, u32 reqid)
1081 {
1082         unsigned int h;
1083         struct xfrm_state *rx = NULL, *x = NULL;
1084
1085         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1086         h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1087         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1088                 if (x->props.family == family &&
1089                     x->props.reqid == reqid &&
1090                     (mark & x->mark.m) == x->mark.v &&
1091                     x->if_id == if_id &&
1092                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
1093                     xfrm_state_addr_check(x, daddr, saddr, family) &&
1094                     mode == x->props.mode &&
1095                     proto == x->id.proto &&
1096                     x->km.state == XFRM_STATE_VALID) {
1097                         rx = x;
1098                         break;
1099                 }
1100         }
1101
1102         if (rx)
1103                 xfrm_state_hold(rx);
1104         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1105
1106
1107         return rx;
1108 }
1109 EXPORT_SYMBOL(xfrm_stateonly_find);
1110
1111 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1112                                               unsigned short family)
1113 {
1114         struct xfrm_state *x;
1115         struct xfrm_state_walk *w;
1116
1117         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1118         list_for_each_entry(w, &net->xfrm.state_all, all) {
1119                 x = container_of(w, struct xfrm_state, km);
1120                 if (x->props.family != family ||
1121                         x->id.spi != spi)
1122                         continue;
1123
1124                 xfrm_state_hold(x);
1125                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1126                 return x;
1127         }
1128         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1129         return NULL;
1130 }
1131 EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1132
1133 static void __xfrm_state_insert(struct xfrm_state *x)
1134 {
1135         struct net *net = xs_net(x);
1136         unsigned int h;
1137
1138         list_add(&x->km.all, &net->xfrm.state_all);
1139
1140         h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1141                           x->props.reqid, x->props.family);
1142         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1143
1144         h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1145         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1146
1147         if (x->id.spi) {
1148                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1149                                   x->props.family);
1150
1151                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1152         }
1153
1154         tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1155         if (x->replay_maxage)
1156                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1157
1158         net->xfrm.state_num++;
1159
1160         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1161 }
1162
1163 /* net->xfrm.xfrm_state_lock is held */
1164 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1165 {
1166         struct net *net = xs_net(xnew);
1167         unsigned short family = xnew->props.family;
1168         u32 reqid = xnew->props.reqid;
1169         struct xfrm_state *x;
1170         unsigned int h;
1171         u32 mark = xnew->mark.v & xnew->mark.m;
1172         u32 if_id = xnew->if_id;
1173
1174         h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1175         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1176                 if (x->props.family     == family &&
1177                     x->props.reqid      == reqid &&
1178                     x->if_id            == if_id &&
1179                     (mark & x->mark.m) == x->mark.v &&
1180                     xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1181                     xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1182                         x->genid++;
1183         }
1184 }
1185
1186 void xfrm_state_insert(struct xfrm_state *x)
1187 {
1188         struct net *net = xs_net(x);
1189
1190         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1191         __xfrm_state_bump_genids(x);
1192         __xfrm_state_insert(x);
1193         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1194 }
1195 EXPORT_SYMBOL(xfrm_state_insert);
1196
1197 /* net->xfrm.xfrm_state_lock is held */
1198 static struct xfrm_state *__find_acq_core(struct net *net,
1199                                           const struct xfrm_mark *m,
1200                                           unsigned short family, u8 mode,
1201                                           u32 reqid, u32 if_id, u8 proto,
1202                                           const xfrm_address_t *daddr,
1203                                           const xfrm_address_t *saddr,
1204                                           int create)
1205 {
1206         unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1207         struct xfrm_state *x;
1208         u32 mark = m->v & m->m;
1209
1210         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1211                 if (x->props.reqid  != reqid ||
1212                     x->props.mode   != mode ||
1213                     x->props.family != family ||
1214                     x->km.state     != XFRM_STATE_ACQ ||
1215                     x->id.spi       != 0 ||
1216                     x->id.proto     != proto ||
1217                     (mark & x->mark.m) != x->mark.v ||
1218                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1219                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
1220                         continue;
1221
1222                 xfrm_state_hold(x);
1223                 return x;
1224         }
1225
1226         if (!create)
1227                 return NULL;
1228
1229         x = xfrm_state_alloc(net);
1230         if (likely(x)) {
1231                 switch (family) {
1232                 case AF_INET:
1233                         x->sel.daddr.a4 = daddr->a4;
1234                         x->sel.saddr.a4 = saddr->a4;
1235                         x->sel.prefixlen_d = 32;
1236                         x->sel.prefixlen_s = 32;
1237                         x->props.saddr.a4 = saddr->a4;
1238                         x->id.daddr.a4 = daddr->a4;
1239                         break;
1240
1241                 case AF_INET6:
1242                         x->sel.daddr.in6 = daddr->in6;
1243                         x->sel.saddr.in6 = saddr->in6;
1244                         x->sel.prefixlen_d = 128;
1245                         x->sel.prefixlen_s = 128;
1246                         x->props.saddr.in6 = saddr->in6;
1247                         x->id.daddr.in6 = daddr->in6;
1248                         break;
1249                 }
1250
1251                 x->km.state = XFRM_STATE_ACQ;
1252                 x->id.proto = proto;
1253                 x->props.family = family;
1254                 x->props.mode = mode;
1255                 x->props.reqid = reqid;
1256                 x->if_id = if_id;
1257                 x->mark.v = m->v;
1258                 x->mark.m = m->m;
1259                 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1260                 xfrm_state_hold(x);
1261                 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1262                 list_add(&x->km.all, &net->xfrm.state_all);
1263                 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1264                 h = xfrm_src_hash(net, daddr, saddr, family);
1265                 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1266
1267                 net->xfrm.state_num++;
1268
1269                 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1270         }
1271
1272         return x;
1273 }
1274
1275 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1276
1277 int xfrm_state_add(struct xfrm_state *x)
1278 {
1279         struct net *net = xs_net(x);
1280         struct xfrm_state *x1, *to_put;
1281         int family;
1282         int err;
1283         u32 mark = x->mark.v & x->mark.m;
1284         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1285
1286         family = x->props.family;
1287
1288         to_put = NULL;
1289
1290         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1291
1292         x1 = __xfrm_state_locate(x, use_spi, family);
1293         if (x1) {
1294                 to_put = x1;
1295                 x1 = NULL;
1296                 err = -EEXIST;
1297                 goto out;
1298         }
1299
1300         if (use_spi && x->km.seq) {
1301                 x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1302                 if (x1 && ((x1->id.proto != x->id.proto) ||
1303                     !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1304                         to_put = x1;
1305                         x1 = NULL;
1306                 }
1307         }
1308
1309         if (use_spi && !x1)
1310                 x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1311                                      x->props.reqid, x->if_id, x->id.proto,
1312                                      &x->id.daddr, &x->props.saddr, 0);
1313
1314         __xfrm_state_bump_genids(x);
1315         __xfrm_state_insert(x);
1316         err = 0;
1317
1318 out:
1319         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1320
1321         if (x1) {
1322                 xfrm_state_delete(x1);
1323                 xfrm_state_put(x1);
1324         }
1325
1326         if (to_put)
1327                 xfrm_state_put(to_put);
1328
1329         return err;
1330 }
1331 EXPORT_SYMBOL(xfrm_state_add);
1332
1333 #ifdef CONFIG_XFRM_MIGRATE
1334 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1335                                            struct xfrm_encap_tmpl *encap)
1336 {
1337         struct net *net = xs_net(orig);
1338         struct xfrm_state *x = xfrm_state_alloc(net);
1339         if (!x)
1340                 goto out;
1341
1342         memcpy(&x->id, &orig->id, sizeof(x->id));
1343         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1344         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1345         x->props.mode = orig->props.mode;
1346         x->props.replay_window = orig->props.replay_window;
1347         x->props.reqid = orig->props.reqid;
1348         x->props.family = orig->props.family;
1349         x->props.saddr = orig->props.saddr;
1350
1351         if (orig->aalg) {
1352                 x->aalg = xfrm_algo_auth_clone(orig->aalg);
1353                 if (!x->aalg)
1354                         goto error;
1355         }
1356         x->props.aalgo = orig->props.aalgo;
1357
1358         if (orig->aead) {
1359                 x->aead = xfrm_algo_aead_clone(orig->aead);
1360                 x->geniv = orig->geniv;
1361                 if (!x->aead)
1362                         goto error;
1363         }
1364         if (orig->ealg) {
1365                 x->ealg = xfrm_algo_clone(orig->ealg);
1366                 if (!x->ealg)
1367                         goto error;
1368         }
1369         x->props.ealgo = orig->props.ealgo;
1370
1371         if (orig->calg) {
1372                 x->calg = xfrm_algo_clone(orig->calg);
1373                 if (!x->calg)
1374                         goto error;
1375         }
1376         x->props.calgo = orig->props.calgo;
1377
1378         if (encap || orig->encap) {
1379                 if (encap)
1380                         x->encap = kmemdup(encap, sizeof(*x->encap),
1381                                         GFP_KERNEL);
1382                 else
1383                         x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1384                                         GFP_KERNEL);
1385
1386                 if (!x->encap)
1387                         goto error;
1388         }
1389
1390         if (orig->coaddr) {
1391                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1392                                     GFP_KERNEL);
1393                 if (!x->coaddr)
1394                         goto error;
1395         }
1396
1397         if (orig->replay_esn) {
1398                 if (xfrm_replay_clone(x, orig))
1399                         goto error;
1400         }
1401
1402         memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1403
1404         if (xfrm_init_state(x) < 0)
1405                 goto error;
1406
1407         x->props.flags = orig->props.flags;
1408         x->props.extra_flags = orig->props.extra_flags;
1409
1410         x->if_id = orig->if_id;
1411         x->tfcpad = orig->tfcpad;
1412         x->replay_maxdiff = orig->replay_maxdiff;
1413         x->replay_maxage = orig->replay_maxage;
1414         x->curlft.add_time = orig->curlft.add_time;
1415         x->km.state = orig->km.state;
1416         x->km.seq = orig->km.seq;
1417         x->replay = orig->replay;
1418         x->preplay = orig->preplay;
1419
1420         return x;
1421
1422  error:
1423         xfrm_state_put(x);
1424 out:
1425         return NULL;
1426 }
1427
1428 struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1429 {
1430         unsigned int h;
1431         struct xfrm_state *x = NULL;
1432
1433         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1434
1435         if (m->reqid) {
1436                 h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1437                                   m->reqid, m->old_family);
1438                 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1439                         if (x->props.mode != m->mode ||
1440                             x->id.proto != m->proto)
1441                                 continue;
1442                         if (m->reqid && x->props.reqid != m->reqid)
1443                                 continue;
1444                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1445                                              m->old_family) ||
1446                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1447                                              m->old_family))
1448                                 continue;
1449                         xfrm_state_hold(x);
1450                         break;
1451                 }
1452         } else {
1453                 h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1454                                   m->old_family);
1455                 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1456                         if (x->props.mode != m->mode ||
1457                             x->id.proto != m->proto)
1458                                 continue;
1459                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1460                                              m->old_family) ||
1461                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1462                                              m->old_family))
1463                                 continue;
1464                         xfrm_state_hold(x);
1465                         break;
1466                 }
1467         }
1468
1469         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1470
1471         return x;
1472 }
1473 EXPORT_SYMBOL(xfrm_migrate_state_find);
1474
1475 struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1476                                       struct xfrm_migrate *m,
1477                                       struct xfrm_encap_tmpl *encap)
1478 {
1479         struct xfrm_state *xc;
1480
1481         xc = xfrm_state_clone(x, encap);
1482         if (!xc)
1483                 return NULL;
1484
1485         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1486         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1487
1488         /* add state */
1489         if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1490                 /* a care is needed when the destination address of the
1491                    state is to be updated as it is a part of triplet */
1492                 xfrm_state_insert(xc);
1493         } else {
1494                 if (xfrm_state_add(xc) < 0)
1495                         goto error;
1496         }
1497
1498         return xc;
1499 error:
1500         xfrm_state_put(xc);
1501         return NULL;
1502 }
1503 EXPORT_SYMBOL(xfrm_state_migrate);
1504 #endif
1505
1506 int xfrm_state_update(struct xfrm_state *x)
1507 {
1508         struct xfrm_state *x1, *to_put;
1509         int err;
1510         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1511         struct net *net = xs_net(x);
1512
1513         to_put = NULL;
1514
1515         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1516         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1517
1518         err = -ESRCH;
1519         if (!x1)
1520                 goto out;
1521
1522         if (xfrm_state_kern(x1)) {
1523                 to_put = x1;
1524                 err = -EEXIST;
1525                 goto out;
1526         }
1527
1528         if (x1->km.state == XFRM_STATE_ACQ) {
1529                 __xfrm_state_insert(x);
1530                 x = NULL;
1531         }
1532         err = 0;
1533
1534 out:
1535         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1536
1537         if (to_put)
1538                 xfrm_state_put(to_put);
1539
1540         if (err)
1541                 return err;
1542
1543         if (!x) {
1544                 xfrm_state_delete(x1);
1545                 xfrm_state_put(x1);
1546                 return 0;
1547         }
1548
1549         err = -EINVAL;
1550         spin_lock_bh(&x1->lock);
1551         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1552                 if (x->encap && x1->encap &&
1553                     x->encap->encap_type == x1->encap->encap_type)
1554                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1555                 else if (x->encap || x1->encap)
1556                         goto fail;
1557
1558                 if (x->coaddr && x1->coaddr) {
1559                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1560                 }
1561                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1562                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1563                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1564                 x1->km.dying = 0;
1565
1566                 tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1567                 if (x1->curlft.use_time)
1568                         xfrm_state_check_expire(x1);
1569
1570                 if (x->props.smark.m || x->props.smark.v || x->if_id) {
1571                         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1572
1573                         if (x->props.smark.m || x->props.smark.v)
1574                                 x1->props.smark = x->props.smark;
1575
1576                         if (x->if_id)
1577                                 x1->if_id = x->if_id;
1578
1579                         __xfrm_state_bump_genids(x1);
1580                         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1581                 }
1582
1583                 err = 0;
1584                 x->km.state = XFRM_STATE_DEAD;
1585                 __xfrm_state_put(x);
1586         }
1587
1588 fail:
1589         spin_unlock_bh(&x1->lock);
1590
1591         xfrm_state_put(x1);
1592
1593         return err;
1594 }
1595 EXPORT_SYMBOL(xfrm_state_update);
1596
1597 int xfrm_state_check_expire(struct xfrm_state *x)
1598 {
1599         if (!x->curlft.use_time)
1600                 x->curlft.use_time = ktime_get_real_seconds();
1601
1602         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1603             x->curlft.packets >= x->lft.hard_packet_limit) {
1604                 x->km.state = XFRM_STATE_EXPIRED;
1605                 tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1606                 return -EINVAL;
1607         }
1608
1609         if (!x->km.dying &&
1610             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1611              x->curlft.packets >= x->lft.soft_packet_limit)) {
1612                 x->km.dying = 1;
1613                 km_state_expired(x, 0, 0);
1614         }
1615         return 0;
1616 }
1617 EXPORT_SYMBOL(xfrm_state_check_expire);
1618
1619 struct xfrm_state *
1620 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1621                   u8 proto, unsigned short family)
1622 {
1623         struct xfrm_state *x;
1624
1625         rcu_read_lock();
1626         x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1627         rcu_read_unlock();
1628         return x;
1629 }
1630 EXPORT_SYMBOL(xfrm_state_lookup);
1631
1632 struct xfrm_state *
1633 xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1634                          const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1635                          u8 proto, unsigned short family)
1636 {
1637         struct xfrm_state *x;
1638
1639         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1640         x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1641         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1642         return x;
1643 }
1644 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1645
1646 struct xfrm_state *
1647 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1648               u32 if_id, u8 proto, const xfrm_address_t *daddr,
1649               const xfrm_address_t *saddr, int create, unsigned short family)
1650 {
1651         struct xfrm_state *x;
1652
1653         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1654         x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
1655         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1656
1657         return x;
1658 }
1659 EXPORT_SYMBOL(xfrm_find_acq);
1660
1661 #ifdef CONFIG_XFRM_SUB_POLICY
1662 int
1663 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1664                unsigned short family, struct net *net)
1665 {
1666         int i;
1667         int err = 0;
1668         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1669         if (!afinfo)
1670                 return -EAFNOSUPPORT;
1671
1672         spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1673         if (afinfo->tmpl_sort)
1674                 err = afinfo->tmpl_sort(dst, src, n);
1675         else
1676                 for (i = 0; i < n; i++)
1677                         dst[i] = src[i];
1678         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1679         rcu_read_unlock();
1680         return err;
1681 }
1682 EXPORT_SYMBOL(xfrm_tmpl_sort);
1683
1684 int
1685 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1686                 unsigned short family)
1687 {
1688         int i;
1689         int err = 0;
1690         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1691         struct net *net = xs_net(*src);
1692
1693         if (!afinfo)
1694                 return -EAFNOSUPPORT;
1695
1696         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1697         if (afinfo->state_sort)
1698                 err = afinfo->state_sort(dst, src, n);
1699         else
1700                 for (i = 0; i < n; i++)
1701                         dst[i] = src[i];
1702         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1703         rcu_read_unlock();
1704         return err;
1705 }
1706 EXPORT_SYMBOL(xfrm_state_sort);
1707 #endif
1708
1709 /* Silly enough, but I'm lazy to build resolution list */
1710
1711 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1712 {
1713         int i;
1714
1715         for (i = 0; i <= net->xfrm.state_hmask; i++) {
1716                 struct xfrm_state *x;
1717
1718                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1719                         if (x->km.seq == seq &&
1720                             (mark & x->mark.m) == x->mark.v &&
1721                             x->km.state == XFRM_STATE_ACQ) {
1722                                 xfrm_state_hold(x);
1723                                 return x;
1724                         }
1725                 }
1726         }
1727         return NULL;
1728 }
1729
1730 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1731 {
1732         struct xfrm_state *x;
1733
1734         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1735         x = __xfrm_find_acq_byseq(net, mark, seq);
1736         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1737         return x;
1738 }
1739 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1740
1741 u32 xfrm_get_acqseq(void)
1742 {
1743         u32 res;
1744         static atomic_t acqseq;
1745
1746         do {
1747                 res = atomic_inc_return(&acqseq);
1748         } while (!res);
1749
1750         return res;
1751 }
1752 EXPORT_SYMBOL(xfrm_get_acqseq);
1753
1754 int verify_spi_info(u8 proto, u32 min, u32 max)
1755 {
1756         switch (proto) {
1757         case IPPROTO_AH:
1758         case IPPROTO_ESP:
1759                 break;
1760
1761         case IPPROTO_COMP:
1762                 /* IPCOMP spi is 16-bits. */
1763                 if (max >= 0x10000)
1764                         return -EINVAL;
1765                 break;
1766
1767         default:
1768                 return -EINVAL;
1769         }
1770
1771         if (min > max)
1772                 return -EINVAL;
1773
1774         return 0;
1775 }
1776 EXPORT_SYMBOL(verify_spi_info);
1777
1778 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1779 {
1780         struct net *net = xs_net(x);
1781         unsigned int h;
1782         struct xfrm_state *x0;
1783         int err = -ENOENT;
1784         __be32 minspi = htonl(low);
1785         __be32 maxspi = htonl(high);
1786         u32 mark = x->mark.v & x->mark.m;
1787
1788         spin_lock_bh(&x->lock);
1789         if (x->km.state == XFRM_STATE_DEAD)
1790                 goto unlock;
1791
1792         err = 0;
1793         if (x->id.spi)
1794                 goto unlock;
1795
1796         err = -ENOENT;
1797
1798         if (minspi == maxspi) {
1799                 x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1800                 if (x0) {
1801                         xfrm_state_put(x0);
1802                         goto unlock;
1803                 }
1804                 x->id.spi = minspi;
1805         } else {
1806                 u32 spi = 0;
1807                 for (h = 0; h < high-low+1; h++) {
1808                         spi = low + prandom_u32()%(high-low+1);
1809                         x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1810                         if (x0 == NULL) {
1811                                 x->id.spi = htonl(spi);
1812                                 break;
1813                         }
1814                         xfrm_state_put(x0);
1815                 }
1816         }
1817         if (x->id.spi) {
1818                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1819                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1820                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1821                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1822
1823                 err = 0;
1824         }
1825
1826 unlock:
1827         spin_unlock_bh(&x->lock);
1828
1829         return err;
1830 }
1831 EXPORT_SYMBOL(xfrm_alloc_spi);
1832
1833 static bool __xfrm_state_filter_match(struct xfrm_state *x,
1834                                       struct xfrm_address_filter *filter)
1835 {
1836         if (filter) {
1837                 if ((filter->family == AF_INET ||
1838                      filter->family == AF_INET6) &&
1839                     x->props.family != filter->family)
1840                         return false;
1841
1842                 return addr_match(&x->props.saddr, &filter->saddr,
1843                                   filter->splen) &&
1844                        addr_match(&x->id.daddr, &filter->daddr,
1845                                   filter->dplen);
1846         }
1847         return true;
1848 }
1849
1850 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1851                     int (*func)(struct xfrm_state *, int, void*),
1852                     void *data)
1853 {
1854         struct xfrm_state *state;
1855         struct xfrm_state_walk *x;
1856         int err = 0;
1857
1858         if (walk->seq != 0 && list_empty(&walk->all))
1859                 return 0;
1860
1861         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1862         if (list_empty(&walk->all))
1863                 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1864         else
1865                 x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1866         list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1867                 if (x->state == XFRM_STATE_DEAD)
1868                         continue;
1869                 state = container_of(x, struct xfrm_state, km);
1870                 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1871                         continue;
1872                 if (!__xfrm_state_filter_match(state, walk->filter))
1873                         continue;
1874                 err = func(state, walk->seq, data);
1875                 if (err) {
1876                         list_move_tail(&walk->all, &x->all);
1877                         goto out;
1878                 }
1879                 walk->seq++;
1880         }
1881         if (walk->seq == 0) {
1882                 err = -ENOENT;
1883                 goto out;
1884         }
1885         list_del_init(&walk->all);
1886 out:
1887         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1888         return err;
1889 }
1890 EXPORT_SYMBOL(xfrm_state_walk);
1891
1892 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1893                           struct xfrm_address_filter *filter)
1894 {
1895         INIT_LIST_HEAD(&walk->all);
1896         walk->proto = proto;
1897         walk->state = XFRM_STATE_DEAD;
1898         walk->seq = 0;
1899         walk->filter = filter;
1900 }
1901 EXPORT_SYMBOL(xfrm_state_walk_init);
1902
1903 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1904 {
1905         kfree(walk->filter);
1906
1907         if (list_empty(&walk->all))
1908                 return;
1909
1910         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1911         list_del(&walk->all);
1912         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1913 }
1914 EXPORT_SYMBOL(xfrm_state_walk_done);
1915
1916 static void xfrm_replay_timer_handler(struct timer_list *t)
1917 {
1918         struct xfrm_state *x = from_timer(x, t, rtimer);
1919
1920         spin_lock(&x->lock);
1921
1922         if (x->km.state == XFRM_STATE_VALID) {
1923                 if (xfrm_aevent_is_on(xs_net(x)))
1924                         x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1925                 else
1926                         x->xflags |= XFRM_TIME_DEFER;
1927         }
1928
1929         spin_unlock(&x->lock);
1930 }
1931
1932 static LIST_HEAD(xfrm_km_list);
1933
1934 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1935 {
1936         struct xfrm_mgr *km;
1937
1938         rcu_read_lock();
1939         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1940                 if (km->notify_policy)
1941                         km->notify_policy(xp, dir, c);
1942         rcu_read_unlock();
1943 }
1944
1945 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1946 {
1947         struct xfrm_mgr *km;
1948         rcu_read_lock();
1949         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1950                 if (km->notify)
1951                         km->notify(x, c);
1952         rcu_read_unlock();
1953 }
1954
1955 EXPORT_SYMBOL(km_policy_notify);
1956 EXPORT_SYMBOL(km_state_notify);
1957
1958 void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1959 {
1960         struct km_event c;
1961
1962         c.data.hard = hard;
1963         c.portid = portid;
1964         c.event = XFRM_MSG_EXPIRE;
1965         km_state_notify(x, &c);
1966 }
1967
1968 EXPORT_SYMBOL(km_state_expired);
1969 /*
1970  * We send to all registered managers regardless of failure
1971  * We are happy with one success
1972 */
1973 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1974 {
1975         int err = -EINVAL, acqret;
1976         struct xfrm_mgr *km;
1977
1978         rcu_read_lock();
1979         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1980                 acqret = km->acquire(x, t, pol);
1981                 if (!acqret)
1982                         err = acqret;
1983         }
1984         rcu_read_unlock();
1985         return err;
1986 }
1987 EXPORT_SYMBOL(km_query);
1988
1989 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1990 {
1991         int err = -EINVAL;
1992         struct xfrm_mgr *km;
1993
1994         rcu_read_lock();
1995         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1996                 if (km->new_mapping)
1997                         err = km->new_mapping(x, ipaddr, sport);
1998                 if (!err)
1999                         break;
2000         }
2001         rcu_read_unlock();
2002         return err;
2003 }
2004 EXPORT_SYMBOL(km_new_mapping);
2005
2006 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
2007 {
2008         struct km_event c;
2009
2010         c.data.hard = hard;
2011         c.portid = portid;
2012         c.event = XFRM_MSG_POLEXPIRE;
2013         km_policy_notify(pol, dir, &c);
2014 }
2015 EXPORT_SYMBOL(km_policy_expired);
2016
2017 #ifdef CONFIG_XFRM_MIGRATE
2018 int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2019                const struct xfrm_migrate *m, int num_migrate,
2020                const struct xfrm_kmaddress *k,
2021                const struct xfrm_encap_tmpl *encap)
2022 {
2023         int err = -EINVAL;
2024         int ret;
2025         struct xfrm_mgr *km;
2026
2027         rcu_read_lock();
2028         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2029                 if (km->migrate) {
2030                         ret = km->migrate(sel, dir, type, m, num_migrate, k,
2031                                           encap);
2032                         if (!ret)
2033                                 err = ret;
2034                 }
2035         }
2036         rcu_read_unlock();
2037         return err;
2038 }
2039 EXPORT_SYMBOL(km_migrate);
2040 #endif
2041
2042 int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2043 {
2044         int err = -EINVAL;
2045         int ret;
2046         struct xfrm_mgr *km;
2047
2048         rcu_read_lock();
2049         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2050                 if (km->report) {
2051                         ret = km->report(net, proto, sel, addr);
2052                         if (!ret)
2053                                 err = ret;
2054                 }
2055         }
2056         rcu_read_unlock();
2057         return err;
2058 }
2059 EXPORT_SYMBOL(km_report);
2060
2061 bool km_is_alive(const struct km_event *c)
2062 {
2063         struct xfrm_mgr *km;
2064         bool is_alive = false;
2065
2066         rcu_read_lock();
2067         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2068                 if (km->is_alive && km->is_alive(c)) {
2069                         is_alive = true;
2070                         break;
2071                 }
2072         }
2073         rcu_read_unlock();
2074
2075         return is_alive;
2076 }
2077 EXPORT_SYMBOL(km_is_alive);
2078
2079 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
2080 {
2081         int err;
2082         u8 *data;
2083         struct xfrm_mgr *km;
2084         struct xfrm_policy *pol = NULL;
2085
2086         if (in_compat_syscall())
2087                 return -EOPNOTSUPP;
2088
2089         if (!optval && !optlen) {
2090                 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2091                 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2092                 __sk_dst_reset(sk);
2093                 return 0;
2094         }
2095
2096         if (optlen <= 0 || optlen > PAGE_SIZE)
2097                 return -EMSGSIZE;
2098
2099         data = memdup_user(optval, optlen);
2100         if (IS_ERR(data))
2101                 return PTR_ERR(data);
2102
2103         err = -EINVAL;
2104         rcu_read_lock();
2105         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2106                 pol = km->compile_policy(sk, optname, data,
2107                                          optlen, &err);
2108                 if (err >= 0)
2109                         break;
2110         }
2111         rcu_read_unlock();
2112
2113         if (err >= 0) {
2114                 xfrm_sk_policy_insert(sk, err, pol);
2115                 xfrm_pol_put(pol);
2116                 __sk_dst_reset(sk);
2117                 err = 0;
2118         }
2119
2120         kfree(data);
2121         return err;
2122 }
2123 EXPORT_SYMBOL(xfrm_user_policy);
2124
2125 static DEFINE_SPINLOCK(xfrm_km_lock);
2126
2127 int xfrm_register_km(struct xfrm_mgr *km)
2128 {
2129         spin_lock_bh(&xfrm_km_lock);
2130         list_add_tail_rcu(&km->list, &xfrm_km_list);
2131         spin_unlock_bh(&xfrm_km_lock);
2132         return 0;
2133 }
2134 EXPORT_SYMBOL(xfrm_register_km);
2135
2136 int xfrm_unregister_km(struct xfrm_mgr *km)
2137 {
2138         spin_lock_bh(&xfrm_km_lock);
2139         list_del_rcu(&km->list);
2140         spin_unlock_bh(&xfrm_km_lock);
2141         synchronize_rcu();
2142         return 0;
2143 }
2144 EXPORT_SYMBOL(xfrm_unregister_km);
2145
2146 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2147 {
2148         int err = 0;
2149
2150         if (WARN_ON(afinfo->family >= NPROTO))
2151                 return -EAFNOSUPPORT;
2152
2153         spin_lock_bh(&xfrm_state_afinfo_lock);
2154         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2155                 err = -EEXIST;
2156         else
2157                 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2158         spin_unlock_bh(&xfrm_state_afinfo_lock);
2159         return err;
2160 }
2161 EXPORT_SYMBOL(xfrm_state_register_afinfo);
2162
2163 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2164 {
2165         int err = 0, family = afinfo->family;
2166
2167         if (WARN_ON(family >= NPROTO))
2168                 return -EAFNOSUPPORT;
2169
2170         spin_lock_bh(&xfrm_state_afinfo_lock);
2171         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2172                 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2173                         err = -EINVAL;
2174                 else
2175                         RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2176         }
2177         spin_unlock_bh(&xfrm_state_afinfo_lock);
2178         synchronize_rcu();
2179         return err;
2180 }
2181 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2182
2183 struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2184 {
2185         if (unlikely(family >= NPROTO))
2186                 return NULL;
2187
2188         return rcu_dereference(xfrm_state_afinfo[family]);
2189 }
2190
2191 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2192 {
2193         struct xfrm_state_afinfo *afinfo;
2194         if (unlikely(family >= NPROTO))
2195                 return NULL;
2196         rcu_read_lock();
2197         afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2198         if (unlikely(!afinfo))
2199                 rcu_read_unlock();
2200         return afinfo;
2201 }
2202
2203 void xfrm_flush_gc(void)
2204 {
2205         flush_work(&xfrm_state_gc_work);
2206 }
2207 EXPORT_SYMBOL(xfrm_flush_gc);
2208
2209 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2210 void xfrm_state_delete_tunnel(struct xfrm_state *x)
2211 {
2212         if (x->tunnel) {
2213                 struct xfrm_state *t = x->tunnel;
2214
2215                 if (atomic_read(&t->tunnel_users) == 2)
2216                         xfrm_state_delete(t);
2217                 atomic_dec(&t->tunnel_users);
2218                 xfrm_state_put(t);
2219                 x->tunnel = NULL;
2220         }
2221 }
2222 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2223
2224 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2225 {
2226         const struct xfrm_type *type = READ_ONCE(x->type);
2227
2228         if (x->km.state == XFRM_STATE_VALID &&
2229             type && type->get_mtu)
2230                 return type->get_mtu(x, mtu);
2231
2232         return mtu - x->props.header_len;
2233 }
2234
2235 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
2236 {
2237         struct xfrm_state_afinfo *afinfo;
2238         struct xfrm_mode *inner_mode;
2239         int family = x->props.family;
2240         int err;
2241
2242         err = -EAFNOSUPPORT;
2243         afinfo = xfrm_state_get_afinfo(family);
2244         if (!afinfo)
2245                 goto error;
2246
2247         err = 0;
2248         if (afinfo->init_flags)
2249                 err = afinfo->init_flags(x);
2250
2251         rcu_read_unlock();
2252
2253         if (err)
2254                 goto error;
2255
2256         err = -EPROTONOSUPPORT;
2257
2258         if (x->sel.family != AF_UNSPEC) {
2259                 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2260                 if (inner_mode == NULL)
2261                         goto error;
2262
2263                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2264                     family != x->sel.family) {
2265                         xfrm_put_mode(inner_mode);
2266                         goto error;
2267                 }
2268
2269                 x->inner_mode = inner_mode;
2270         } else {
2271                 struct xfrm_mode *inner_mode_iaf;
2272                 int iafamily = AF_INET;
2273
2274                 inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2275                 if (inner_mode == NULL)
2276                         goto error;
2277
2278                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2279                         xfrm_put_mode(inner_mode);
2280                         goto error;
2281                 }
2282                 x->inner_mode = inner_mode;
2283
2284                 if (x->props.family == AF_INET)
2285                         iafamily = AF_INET6;
2286
2287                 inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2288                 if (inner_mode_iaf) {
2289                         if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2290                                 x->inner_mode_iaf = inner_mode_iaf;
2291                         else
2292                                 xfrm_put_mode(inner_mode_iaf);
2293                 }
2294         }
2295
2296         x->type = xfrm_get_type(x->id.proto, family);
2297         if (x->type == NULL)
2298                 goto error;
2299
2300         x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
2301
2302         err = x->type->init_state(x);
2303         if (err)
2304                 goto error;
2305
2306         x->outer_mode = xfrm_get_mode(x->props.mode, family);
2307         if (x->outer_mode == NULL) {
2308                 err = -EPROTONOSUPPORT;
2309                 goto error;
2310         }
2311
2312         if (init_replay) {
2313                 err = xfrm_init_replay(x);
2314                 if (err)
2315                         goto error;
2316         }
2317
2318 error:
2319         return err;
2320 }
2321
2322 EXPORT_SYMBOL(__xfrm_init_state);
2323
2324 int xfrm_init_state(struct xfrm_state *x)
2325 {
2326         int err;
2327
2328         err = __xfrm_init_state(x, true, false);
2329         if (!err)
2330                 x->km.state = XFRM_STATE_VALID;
2331
2332         return err;
2333 }
2334
2335 EXPORT_SYMBOL(xfrm_init_state);
2336
2337 int __net_init xfrm_state_init(struct net *net)
2338 {
2339         unsigned int sz;
2340
2341         if (net_eq(net, &init_net))
2342                 xfrm_state_cache = KMEM_CACHE(xfrm_state,
2343                                               SLAB_HWCACHE_ALIGN | SLAB_PANIC);
2344
2345         INIT_LIST_HEAD(&net->xfrm.state_all);
2346
2347         sz = sizeof(struct hlist_head) * 8;
2348
2349         net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2350         if (!net->xfrm.state_bydst)
2351                 goto out_bydst;
2352         net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2353         if (!net->xfrm.state_bysrc)
2354                 goto out_bysrc;
2355         net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2356         if (!net->xfrm.state_byspi)
2357                 goto out_byspi;
2358         net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2359
2360         net->xfrm.state_num = 0;
2361         INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2362         spin_lock_init(&net->xfrm.xfrm_state_lock);
2363         return 0;
2364
2365 out_byspi:
2366         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2367 out_bysrc:
2368         xfrm_hash_free(net->xfrm.state_bydst, sz);
2369 out_bydst:
2370         return -ENOMEM;
2371 }
2372
2373 void xfrm_state_fini(struct net *net)
2374 {
2375         unsigned int sz;
2376
2377         flush_work(&net->xfrm.state_hash_work);
2378         xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
2379         flush_work(&xfrm_state_gc_work);
2380
2381         WARN_ON(!list_empty(&net->xfrm.state_all));
2382
2383         sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2384         WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2385         xfrm_hash_free(net->xfrm.state_byspi, sz);
2386         WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2387         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2388         WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2389         xfrm_hash_free(net->xfrm.state_bydst, sz);
2390 }
2391
2392 #ifdef CONFIG_AUDITSYSCALL
2393 static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2394                                      struct audit_buffer *audit_buf)
2395 {
2396         struct xfrm_sec_ctx *ctx = x->security;
2397         u32 spi = ntohl(x->id.spi);
2398
2399         if (ctx)
2400                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2401                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2402
2403         switch (x->props.family) {
2404         case AF_INET:
2405                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2406                                  &x->props.saddr.a4, &x->id.daddr.a4);
2407                 break;
2408         case AF_INET6:
2409                 audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2410                                  x->props.saddr.a6, x->id.daddr.a6);
2411                 break;
2412         }
2413
2414         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2415 }
2416
2417 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2418                                       struct audit_buffer *audit_buf)
2419 {
2420         const struct iphdr *iph4;
2421         const struct ipv6hdr *iph6;
2422
2423         switch (family) {
2424         case AF_INET:
2425                 iph4 = ip_hdr(skb);
2426                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2427                                  &iph4->saddr, &iph4->daddr);
2428                 break;
2429         case AF_INET6:
2430                 iph6 = ipv6_hdr(skb);
2431                 audit_log_format(audit_buf,
2432                                  " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2433                                  &iph6->saddr, &iph6->daddr,
2434                                  iph6->flow_lbl[0] & 0x0f,
2435                                  iph6->flow_lbl[1],
2436                                  iph6->flow_lbl[2]);
2437                 break;
2438         }
2439 }
2440
2441 void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2442 {
2443         struct audit_buffer *audit_buf;
2444
2445         audit_buf = xfrm_audit_start("SAD-add");
2446         if (audit_buf == NULL)
2447                 return;
2448         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2449         xfrm_audit_helper_sainfo(x, audit_buf);
2450         audit_log_format(audit_buf, " res=%u", result);
2451         audit_log_end(audit_buf);
2452 }
2453 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2454
2455 void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
2456 {
2457         struct audit_buffer *audit_buf;
2458
2459         audit_buf = xfrm_audit_start("SAD-delete");
2460         if (audit_buf == NULL)
2461                 return;
2462         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2463         xfrm_audit_helper_sainfo(x, audit_buf);
2464         audit_log_format(audit_buf, " res=%u", result);
2465         audit_log_end(audit_buf);
2466 }
2467 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2468
2469 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2470                                       struct sk_buff *skb)
2471 {
2472         struct audit_buffer *audit_buf;
2473         u32 spi;
2474
2475         audit_buf = xfrm_audit_start("SA-replay-overflow");
2476         if (audit_buf == NULL)
2477                 return;
2478         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2479         /* don't record the sequence number because it's inherent in this kind
2480          * of audit message */
2481         spi = ntohl(x->id.spi);
2482         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2483         audit_log_end(audit_buf);
2484 }
2485 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2486
2487 void xfrm_audit_state_replay(struct xfrm_state *x,
2488                              struct sk_buff *skb, __be32 net_seq)
2489 {
2490         struct audit_buffer *audit_buf;
2491         u32 spi;
2492
2493         audit_buf = xfrm_audit_start("SA-replayed-pkt");
2494         if (audit_buf == NULL)
2495                 return;
2496         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2497         spi = ntohl(x->id.spi);
2498         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2499                          spi, spi, ntohl(net_seq));
2500         audit_log_end(audit_buf);
2501 }
2502 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2503
2504 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2505 {
2506         struct audit_buffer *audit_buf;
2507
2508         audit_buf = xfrm_audit_start("SA-notfound");
2509         if (audit_buf == NULL)
2510                 return;
2511         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2512         audit_log_end(audit_buf);
2513 }
2514 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2515
2516 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2517                                __be32 net_spi, __be32 net_seq)
2518 {
2519         struct audit_buffer *audit_buf;
2520         u32 spi;
2521
2522         audit_buf = xfrm_audit_start("SA-notfound");
2523         if (audit_buf == NULL)
2524                 return;
2525         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2526         spi = ntohl(net_spi);
2527         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2528                          spi, spi, ntohl(net_seq));
2529         audit_log_end(audit_buf);
2530 }
2531 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2532
2533 void xfrm_audit_state_icvfail(struct xfrm_state *x,
2534                               struct sk_buff *skb, u8 proto)
2535 {
2536         struct audit_buffer *audit_buf;
2537         __be32 net_spi;
2538         __be32 net_seq;
2539
2540         audit_buf = xfrm_audit_start("SA-icv-failure");
2541         if (audit_buf == NULL)
2542                 return;
2543         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2544         if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2545                 u32 spi = ntohl(net_spi);
2546                 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2547                                  spi, spi, ntohl(net_seq));
2548         }
2549         audit_log_end(audit_buf);
2550 }
2551 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2552 #endif /* CONFIG_AUDITSYSCALL */