]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/sched/cls_flow.c
Merge git://git.infradead.org/~kmpark/onenand-mtd-2.6
[linux-2.6-omap-h63xx.git] / net / sched / cls_flow.c
1 /*
2  * net/sched/cls_flow.c         Generic flow classifier
3  *
4  * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/list.h>
15 #include <linux/jhash.h>
16 #include <linux/random.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/skbuff.h>
19 #include <linux/in.h>
20 #include <linux/ip.h>
21 #include <linux/ipv6.h>
22
23 #include <net/pkt_cls.h>
24 #include <net/ip.h>
25 #include <net/route.h>
26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27 #include <net/netfilter/nf_conntrack.h>
28 #endif
29
30 struct flow_head {
31         struct list_head        filters;
32 };
33
34 struct flow_filter {
35         struct list_head        list;
36         struct tcf_exts         exts;
37         struct tcf_ematch_tree  ematches;
38         u32                     handle;
39
40         u32                     nkeys;
41         u32                     keymask;
42         u32                     mode;
43         u32                     mask;
44         u32                     xor;
45         u32                     rshift;
46         u32                     addend;
47         u32                     divisor;
48         u32                     baseclass;
49 };
50
51 static u32 flow_hashrnd __read_mostly;
52 static int flow_hashrnd_initted __read_mostly;
53
54 static const struct tcf_ext_map flow_ext_map = {
55         .action = TCA_FLOW_ACT,
56         .police = TCA_FLOW_POLICE,
57 };
58
59 static inline u32 addr_fold(void *addr)
60 {
61         unsigned long a = (unsigned long)addr;
62
63         return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
64 }
65
66 static u32 flow_get_src(const struct sk_buff *skb)
67 {
68         switch (skb->protocol) {
69         case __constant_htons(ETH_P_IP):
70                 return ntohl(ip_hdr(skb)->saddr);
71         case __constant_htons(ETH_P_IPV6):
72                 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
73         default:
74                 return addr_fold(skb->sk);
75         }
76 }
77
78 static u32 flow_get_dst(const struct sk_buff *skb)
79 {
80         switch (skb->protocol) {
81         case __constant_htons(ETH_P_IP):
82                 return ntohl(ip_hdr(skb)->daddr);
83         case __constant_htons(ETH_P_IPV6):
84                 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
85         default:
86                 return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
87         }
88 }
89
90 static u32 flow_get_proto(const struct sk_buff *skb)
91 {
92         switch (skb->protocol) {
93         case __constant_htons(ETH_P_IP):
94                 return ip_hdr(skb)->protocol;
95         case __constant_htons(ETH_P_IPV6):
96                 return ipv6_hdr(skb)->nexthdr;
97         default:
98                 return 0;
99         }
100 }
101
102 static int has_ports(u8 protocol)
103 {
104         switch (protocol) {
105         case IPPROTO_TCP:
106         case IPPROTO_UDP:
107         case IPPROTO_UDPLITE:
108         case IPPROTO_SCTP:
109         case IPPROTO_DCCP:
110         case IPPROTO_ESP:
111                 return 1;
112         default:
113                 return 0;
114         }
115 }
116
117 static u32 flow_get_proto_src(const struct sk_buff *skb)
118 {
119         u32 res = 0;
120
121         switch (skb->protocol) {
122         case __constant_htons(ETH_P_IP): {
123                 struct iphdr *iph = ip_hdr(skb);
124
125                 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
126                     has_ports(iph->protocol))
127                         res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
128                 break;
129         }
130         case __constant_htons(ETH_P_IPV6): {
131                 struct ipv6hdr *iph = ipv6_hdr(skb);
132
133                 if (has_ports(iph->nexthdr))
134                         res = ntohs(*(__be16 *)&iph[1]);
135                 break;
136         }
137         default:
138                 res = addr_fold(skb->sk);
139         }
140
141         return res;
142 }
143
144 static u32 flow_get_proto_dst(const struct sk_buff *skb)
145 {
146         u32 res = 0;
147
148         switch (skb->protocol) {
149         case __constant_htons(ETH_P_IP): {
150                 struct iphdr *iph = ip_hdr(skb);
151
152                 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
153                     has_ports(iph->protocol))
154                         res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
155                 break;
156         }
157         case __constant_htons(ETH_P_IPV6): {
158                 struct ipv6hdr *iph = ipv6_hdr(skb);
159
160                 if (has_ports(iph->nexthdr))
161                         res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
162                 break;
163         }
164         default:
165                 res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
166         }
167
168         return res;
169 }
170
171 static u32 flow_get_iif(const struct sk_buff *skb)
172 {
173         return skb->iif;
174 }
175
176 static u32 flow_get_priority(const struct sk_buff *skb)
177 {
178         return skb->priority;
179 }
180
181 static u32 flow_get_mark(const struct sk_buff *skb)
182 {
183         return skb->mark;
184 }
185
186 static u32 flow_get_nfct(const struct sk_buff *skb)
187 {
188 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
189         return addr_fold(skb->nfct);
190 #else
191         return 0;
192 #endif
193 }
194
195 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
196 #define CTTUPLE(skb, member)                                            \
197 ({                                                                      \
198         enum ip_conntrack_info ctinfo;                                  \
199         struct nf_conn *ct = nf_ct_get(skb, &ctinfo);                   \
200         if (ct == NULL)                                                 \
201                 goto fallback;                                          \
202         ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;                 \
203 })
204 #else
205 #define CTTUPLE(skb, member)                                            \
206 ({                                                                      \
207         goto fallback;                                                  \
208         0;                                                              \
209 })
210 #endif
211
212 static u32 flow_get_nfct_src(const struct sk_buff *skb)
213 {
214         switch (skb->protocol) {
215         case __constant_htons(ETH_P_IP):
216                 return ntohl(CTTUPLE(skb, src.u3.ip));
217         case __constant_htons(ETH_P_IPV6):
218                 return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
219         }
220 fallback:
221         return flow_get_src(skb);
222 }
223
224 static u32 flow_get_nfct_dst(const struct sk_buff *skb)
225 {
226         switch (skb->protocol) {
227         case __constant_htons(ETH_P_IP):
228                 return ntohl(CTTUPLE(skb, dst.u3.ip));
229         case __constant_htons(ETH_P_IPV6):
230                 return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
231         }
232 fallback:
233         return flow_get_dst(skb);
234 }
235
236 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
237 {
238         return ntohs(CTTUPLE(skb, src.u.all));
239 fallback:
240         return flow_get_proto_src(skb);
241 }
242
243 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
244 {
245         return ntohs(CTTUPLE(skb, dst.u.all));
246 fallback:
247         return flow_get_proto_dst(skb);
248 }
249
250 static u32 flow_get_rtclassid(const struct sk_buff *skb)
251 {
252 #ifdef CONFIG_NET_CLS_ROUTE
253         if (skb->dst)
254                 return skb->dst->tclassid;
255 #endif
256         return 0;
257 }
258
259 static u32 flow_get_skuid(const struct sk_buff *skb)
260 {
261         if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
262                 return skb->sk->sk_socket->file->f_uid;
263         return 0;
264 }
265
266 static u32 flow_get_skgid(const struct sk_buff *skb)
267 {
268         if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
269                 return skb->sk->sk_socket->file->f_gid;
270         return 0;
271 }
272
273 static u32 flow_key_get(const struct sk_buff *skb, int key)
274 {
275         switch (key) {
276         case FLOW_KEY_SRC:
277                 return flow_get_src(skb);
278         case FLOW_KEY_DST:
279                 return flow_get_dst(skb);
280         case FLOW_KEY_PROTO:
281                 return flow_get_proto(skb);
282         case FLOW_KEY_PROTO_SRC:
283                 return flow_get_proto_src(skb);
284         case FLOW_KEY_PROTO_DST:
285                 return flow_get_proto_dst(skb);
286         case FLOW_KEY_IIF:
287                 return flow_get_iif(skb);
288         case FLOW_KEY_PRIORITY:
289                 return flow_get_priority(skb);
290         case FLOW_KEY_MARK:
291                 return flow_get_mark(skb);
292         case FLOW_KEY_NFCT:
293                 return flow_get_nfct(skb);
294         case FLOW_KEY_NFCT_SRC:
295                 return flow_get_nfct_src(skb);
296         case FLOW_KEY_NFCT_DST:
297                 return flow_get_nfct_dst(skb);
298         case FLOW_KEY_NFCT_PROTO_SRC:
299                 return flow_get_nfct_proto_src(skb);
300         case FLOW_KEY_NFCT_PROTO_DST:
301                 return flow_get_nfct_proto_dst(skb);
302         case FLOW_KEY_RTCLASSID:
303                 return flow_get_rtclassid(skb);
304         case FLOW_KEY_SKUID:
305                 return flow_get_skuid(skb);
306         case FLOW_KEY_SKGID:
307                 return flow_get_skgid(skb);
308         default:
309                 WARN_ON(1);
310                 return 0;
311         }
312 }
313
314 static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
315                          struct tcf_result *res)
316 {
317         struct flow_head *head = tp->root;
318         struct flow_filter *f;
319         u32 keymask;
320         u32 classid;
321         unsigned int n, key;
322         int r;
323
324         list_for_each_entry(f, &head->filters, list) {
325                 u32 keys[f->nkeys];
326
327                 if (!tcf_em_tree_match(skb, &f->ematches, NULL))
328                         continue;
329
330                 keymask = f->keymask;
331
332                 for (n = 0; n < f->nkeys; n++) {
333                         key = ffs(keymask) - 1;
334                         keymask &= ~(1 << key);
335                         keys[n] = flow_key_get(skb, key);
336                 }
337
338                 if (f->mode == FLOW_MODE_HASH)
339                         classid = jhash2(keys, f->nkeys, flow_hashrnd);
340                 else {
341                         classid = keys[0];
342                         classid = (classid & f->mask) ^ f->xor;
343                         classid = (classid >> f->rshift) + f->addend;
344                 }
345
346                 if (f->divisor)
347                         classid %= f->divisor;
348
349                 res->class   = 0;
350                 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
351
352                 r = tcf_exts_exec(skb, &f->exts, res);
353                 if (r < 0)
354                         continue;
355                 return r;
356         }
357         return -1;
358 }
359
360 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
361         [TCA_FLOW_KEYS]         = { .type = NLA_U32 },
362         [TCA_FLOW_MODE]         = { .type = NLA_U32 },
363         [TCA_FLOW_BASECLASS]    = { .type = NLA_U32 },
364         [TCA_FLOW_RSHIFT]       = { .type = NLA_U32 },
365         [TCA_FLOW_ADDEND]       = { .type = NLA_U32 },
366         [TCA_FLOW_MASK]         = { .type = NLA_U32 },
367         [TCA_FLOW_XOR]          = { .type = NLA_U32 },
368         [TCA_FLOW_DIVISOR]      = { .type = NLA_U32 },
369         [TCA_FLOW_ACT]          = { .type = NLA_NESTED },
370         [TCA_FLOW_POLICE]       = { .type = NLA_NESTED },
371         [TCA_FLOW_EMATCHES]     = { .type = NLA_NESTED },
372 };
373
374 static int flow_change(struct tcf_proto *tp, unsigned long base,
375                        u32 handle, struct nlattr **tca,
376                        unsigned long *arg)
377 {
378         struct flow_head *head = tp->root;
379         struct flow_filter *f;
380         struct nlattr *opt = tca[TCA_OPTIONS];
381         struct nlattr *tb[TCA_FLOW_MAX + 1];
382         struct tcf_exts e;
383         struct tcf_ematch_tree t;
384         unsigned int nkeys = 0;
385         u32 baseclass = 0;
386         u32 keymask = 0;
387         u32 mode;
388         int err;
389
390         if (opt == NULL)
391                 return -EINVAL;
392
393         err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
394         if (err < 0)
395                 return err;
396
397         if (tb[TCA_FLOW_BASECLASS]) {
398                 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
399                 if (TC_H_MIN(baseclass) == 0)
400                         return -EINVAL;
401         }
402
403         if (tb[TCA_FLOW_KEYS]) {
404                 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
405                 if (fls(keymask) - 1 > FLOW_KEY_MAX)
406                         return -EOPNOTSUPP;
407
408                 nkeys = hweight32(keymask);
409                 if (nkeys == 0)
410                         return -EINVAL;
411         }
412
413         err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
414         if (err < 0)
415                 return err;
416
417         err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
418         if (err < 0)
419                 goto err1;
420
421         f = (struct flow_filter *)*arg;
422         if (f != NULL) {
423                 err = -EINVAL;
424                 if (f->handle != handle && handle)
425                         goto err2;
426
427                 mode = f->mode;
428                 if (tb[TCA_FLOW_MODE])
429                         mode = nla_get_u32(tb[TCA_FLOW_MODE]);
430                 if (mode != FLOW_MODE_HASH && nkeys > 1)
431                         goto err2;
432         } else {
433                 err = -EINVAL;
434                 if (!handle)
435                         goto err2;
436                 if (!tb[TCA_FLOW_KEYS])
437                         goto err2;
438
439                 mode = FLOW_MODE_MAP;
440                 if (tb[TCA_FLOW_MODE])
441                         mode = nla_get_u32(tb[TCA_FLOW_MODE]);
442                 if (mode != FLOW_MODE_HASH && nkeys > 1)
443                         goto err2;
444
445                 if (TC_H_MAJ(baseclass) == 0)
446                         baseclass = TC_H_MAKE(tp->q->handle, baseclass);
447                 if (TC_H_MIN(baseclass) == 0)
448                         baseclass = TC_H_MAKE(baseclass, 1);
449
450                 err = -ENOBUFS;
451                 f = kzalloc(sizeof(*f), GFP_KERNEL);
452                 if (f == NULL)
453                         goto err2;
454
455                 f->handle = handle;
456                 f->mask   = ~0U;
457         }
458
459         tcf_exts_change(tp, &f->exts, &e);
460         tcf_em_tree_change(tp, &f->ematches, &t);
461
462         tcf_tree_lock(tp);
463
464         if (tb[TCA_FLOW_KEYS]) {
465                 f->keymask = keymask;
466                 f->nkeys   = nkeys;
467         }
468
469         f->mode = mode;
470
471         if (tb[TCA_FLOW_MASK])
472                 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
473         if (tb[TCA_FLOW_XOR])
474                 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
475         if (tb[TCA_FLOW_RSHIFT])
476                 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
477         if (tb[TCA_FLOW_ADDEND])
478                 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
479
480         if (tb[TCA_FLOW_DIVISOR])
481                 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
482         if (baseclass)
483                 f->baseclass = baseclass;
484
485         if (*arg == 0)
486                 list_add_tail(&f->list, &head->filters);
487
488         tcf_tree_unlock(tp);
489
490         *arg = (unsigned long)f;
491         return 0;
492
493 err2:
494         tcf_em_tree_destroy(tp, &t);
495 err1:
496         tcf_exts_destroy(tp, &e);
497         return err;
498 }
499
500 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
501 {
502         tcf_exts_destroy(tp, &f->exts);
503         tcf_em_tree_destroy(tp, &f->ematches);
504         kfree(f);
505 }
506
507 static int flow_delete(struct tcf_proto *tp, unsigned long arg)
508 {
509         struct flow_filter *f = (struct flow_filter *)arg;
510
511         tcf_tree_lock(tp);
512         list_del(&f->list);
513         tcf_tree_unlock(tp);
514         flow_destroy_filter(tp, f);
515         return 0;
516 }
517
518 static int flow_init(struct tcf_proto *tp)
519 {
520         struct flow_head *head;
521
522         if (!flow_hashrnd_initted) {
523                 get_random_bytes(&flow_hashrnd, 4);
524                 flow_hashrnd_initted = 1;
525         }
526
527         head = kzalloc(sizeof(*head), GFP_KERNEL);
528         if (head == NULL)
529                 return -ENOBUFS;
530         INIT_LIST_HEAD(&head->filters);
531         tp->root = head;
532         return 0;
533 }
534
535 static void flow_destroy(struct tcf_proto *tp)
536 {
537         struct flow_head *head = tp->root;
538         struct flow_filter *f, *next;
539
540         list_for_each_entry_safe(f, next, &head->filters, list) {
541                 list_del(&f->list);
542                 flow_destroy_filter(tp, f);
543         }
544         kfree(head);
545 }
546
547 static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
548 {
549         struct flow_head *head = tp->root;
550         struct flow_filter *f;
551
552         list_for_each_entry(f, &head->filters, list)
553                 if (f->handle == handle)
554                         return (unsigned long)f;
555         return 0;
556 }
557
558 static void flow_put(struct tcf_proto *tp, unsigned long f)
559 {
560         return;
561 }
562
563 static int flow_dump(struct tcf_proto *tp, unsigned long fh,
564                      struct sk_buff *skb, struct tcmsg *t)
565 {
566         struct flow_filter *f = (struct flow_filter *)fh;
567         struct nlattr *nest;
568
569         if (f == NULL)
570                 return skb->len;
571
572         t->tcm_handle = f->handle;
573
574         nest = nla_nest_start(skb, TCA_OPTIONS);
575         if (nest == NULL)
576                 goto nla_put_failure;
577
578         NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
579         NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
580
581         if (f->mask != ~0 || f->xor != 0) {
582                 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
583                 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
584         }
585         if (f->rshift)
586                 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
587         if (f->addend)
588                 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
589
590         if (f->divisor)
591                 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
592         if (f->baseclass)
593                 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
594
595         if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
596                 goto nla_put_failure;
597
598         if (f->ematches.hdr.nmatches &&
599             tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
600                 goto nla_put_failure;
601
602         nla_nest_end(skb, nest);
603
604         if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
605                 goto nla_put_failure;
606
607         return skb->len;
608
609 nla_put_failure:
610         nlmsg_trim(skb, nest);
611         return -1;
612 }
613
614 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
615 {
616         struct flow_head *head = tp->root;
617         struct flow_filter *f;
618
619         list_for_each_entry(f, &head->filters, list) {
620                 if (arg->count < arg->skip)
621                         goto skip;
622                 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
623                         arg->stop = 1;
624                         break;
625                 }
626 skip:
627                 arg->count++;
628         }
629 }
630
631 static struct tcf_proto_ops cls_flow_ops __read_mostly = {
632         .kind           = "flow",
633         .classify       = flow_classify,
634         .init           = flow_init,
635         .destroy        = flow_destroy,
636         .change         = flow_change,
637         .delete         = flow_delete,
638         .get            = flow_get,
639         .put            = flow_put,
640         .dump           = flow_dump,
641         .walk           = flow_walk,
642         .owner          = THIS_MODULE,
643 };
644
645 static int __init cls_flow_init(void)
646 {
647         return register_tcf_proto_ops(&cls_flow_ops);
648 }
649
650 static void __exit cls_flow_exit(void)
651 {
652         unregister_tcf_proto_ops(&cls_flow_ops);
653 }
654
655 module_init(cls_flow_init);
656 module_exit(cls_flow_exit);
657
658 MODULE_LICENSE("GPL");
659 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
660 MODULE_DESCRIPTION("TC flow classifier");