X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnf_conntrack_expect.c;h=2191fe008f60d9800f753df273f0d510a3419dd0;hb=56b3d975bbce65f655c5612b4822da671f9fd9b2;hp=c31af29a4439f9266592abe0c162cf97fef291b7;hpb=d6454706c382ab74e2ecad7803c434cc6bd30343;p=linux-2.6-omap-h63xx.git diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c31af29a443..2191fe008f6 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -26,11 +27,20 @@ #include #include -LIST_HEAD(nf_conntrack_expect_list); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); +struct hlist_head *nf_ct_expect_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hash); -struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; -static unsigned int nf_conntrack_expect_next_id; +unsigned int nf_ct_expect_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); + +static unsigned int nf_ct_expect_hash_rnd __read_mostly; +static unsigned int nf_ct_expect_count; +unsigned int nf_ct_expect_max __read_mostly; +static int nf_ct_expect_hash_rnd_initted __read_mostly; +static int nf_ct_expect_vmalloc; + +static struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) @@ -40,60 +50,83 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); - NF_CT_STAT_INC(expect_delete); + hlist_del(&exp->hnode); + nf_ct_expect_count--; + + hlist_del(&exp->lnode); master_help->expecting--; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); + + NF_CT_STAT_INC(expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); -static void expectation_timed_out(unsigned long ul_expect) +static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; write_lock_bh(&nf_conntrack_lock); nf_ct_unlink_expect(exp); write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); +} + +static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) +{ + if (unlikely(!nf_ct_expect_hash_rnd_initted)) { + get_random_bytes(&nf_ct_expect_hash_rnd, 4); + nf_ct_expect_hash_rnd_initted = 1; + } + + return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), + (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | + tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % + nf_ct_expect_hsize; } struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; + struct hlist_node *n; + unsigned int h; + + if (!nf_ct_expect_count) + return NULL; - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + h = nf_ct_expect_dst_hash(tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; read_lock_bh(&nf_conntrack_lock); - i = __nf_conntrack_expect_find(tuple); + i = __nf_ct_expect_find(tuple); if (i) atomic_inc(&i->use); read_unlock_bh(&nf_conntrack_lock); return i; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *exp; - exp = __nf_conntrack_expect_find(tuple); + exp = __nf_ct_expect_find(tuple); if (!exp) return NULL; @@ -119,17 +152,18 @@ find_expectation(const struct nf_conntrack_tuple *tuple) /* delete all expectations for this conntrack */ void nf_ct_remove_expectations(struct nf_conn *ct) { - struct nf_conntrack_expect *i, *tmp; struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; /* Optimization: most connection never expect any others. */ if (!help || help->expecting == 0) return; - list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); + hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } } @@ -141,25 +175,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, { /* Part covered by intersection of masks must be unequal, otherwise they clash */ - struct nf_conntrack_tuple intersect_mask; + struct nf_conntrack_tuple_mask intersect_mask; int count; - intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; - intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; - intersect_mask.dst.protonum = a->mask.dst.protonum - & b->mask.dst.protonum; for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ intersect_mask.src.u3.all[count] = a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - intersect_mask.dst.u3.all[count] = - a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; - } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); } @@ -168,36 +193,29 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_equal(&a->mask, &b->mask); + && nf_ct_tuple_mask_equal(&a->mask, &b->mask); } /* Generally a bad idea to call this: could have matched already. */ -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - struct nf_conntrack_expect *i; - write_lock_bh(&nf_conntrack_lock); - /* choose the the oldest expectation to evict */ - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (expect_matches(i, exp) && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(i); - return; - } + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } write_unlock_bh(&nf_conntrack_lock); } -EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); /* We don't increase the master conntrack refcount for non-fulfilled * conntracks. During the conntrack destruction, the expectations are * always killed before the conntrack itself */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) { struct nf_conntrack_expect *new; - new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC); if (!new) return NULL; @@ -205,12 +223,12 @@ struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) atomic_set(&new->use, 1); return new; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc); +EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); -void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, - union nf_conntrack_address *saddr, - union nf_conntrack_address *daddr, - u_int8_t proto, __be16 *src, __be16 *dst) +void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, + union nf_conntrack_address *saddr, + union nf_conntrack_address *daddr, + u_int8_t proto, __be16 *src, __be16 *dst) { int len; @@ -224,8 +242,6 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, exp->helper = NULL; exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; - exp->mask.src.l3num = 0xFFFF; - exp->mask.dst.protonum = 0xFF; if (saddr) { memcpy(&exp->tuple.src.u3, saddr, len); @@ -242,21 +258,6 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3)); } - if (daddr) { - memcpy(&exp->tuple.dst.u3, daddr, len); - if (sizeof(exp->tuple.dst.u3) > len) - /* address needs to be cleared for nf_ct_tuple_equal */ - memset((void *)&exp->tuple.dst.u3 + len, 0x00, - sizeof(exp->tuple.dst.u3) - len); - memset(&exp->mask.dst.u3, 0xFF, len); - if (sizeof(exp->mask.dst.u3) > len) - memset((void *)&exp->mask.dst.u3 + len, 0x00, - sizeof(exp->mask.dst.u3) - len); - } else { - memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3)); - memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3)); - } - if (src) { exp->tuple.src.u.all = (__force u16)*src; exp->mask.src.u.all = 0xFFFF; @@ -265,36 +266,42 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, exp->mask.src.u.all = 0; } - if (dst) { - exp->tuple.dst.u.all = (__force u16)*dst; - exp->mask.dst.u.all = 0xFFFF; - } else { - exp->tuple.dst.u.all = 0; - exp->mask.dst.u.all = 0; - } + memcpy(&exp->tuple.dst.u3, daddr, len); + if (sizeof(exp->tuple.dst.u3) > len) + /* address needs to be cleared for nf_ct_tuple_equal */ + memset((void *)&exp->tuple.dst.u3 + len, 0x00, + sizeof(exp->tuple.dst.u3) - len); + + exp->tuple.dst.u.all = (__force u16)*dst; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_init); +EXPORT_SYMBOL_GPL(nf_ct_expect_init); -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +void nf_ct_expect_put(struct nf_conntrack_expect *exp) { if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_conntrack_expect_cachep, exp); + kmem_cache_free(nf_ct_expect_cachep, exp); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_put); +EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); atomic_inc(&exp->use); + + hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting++; - list_add(&exp->list, &nf_conntrack_expect_list); - setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp); + hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); + nf_ct_expect_count++; + + setup_timer(&exp->timeout, nf_ct_expectation_timed_out, + (unsigned long)exp); exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_conntrack_expect_next_id; + exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -302,16 +309,16 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) /* Race with expectations being used means we could have none to find; OK. */ static void evict_oldest_expect(struct nf_conn *master) { - struct nf_conntrack_expect *i; + struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_expect *exp = NULL; + struct hlist_node *n; - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); - } - break; - } + hlist_for_each_entry(exp, n, &master_help->expectations, lnode) + ; /* nothing */ + + if (exp && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } @@ -327,17 +334,24 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) return 1; } -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related(struct nf_conntrack_expect *expect) { struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct hlist_node *n; + unsigned int h; int ret; NF_CT_ASSERT(master_help); write_lock_bh(&nf_conntrack_lock); - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (!master_help->helper) { + ret = -ESHUTDOWN; + goto out; + } + h = nf_ct_expect_dst_hash(&expect->tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -354,57 +368,86 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) master_help->expecting >= master_help->helper->max_expected) evict_oldest_expect(master); - nf_conntrack_expect_insert(expect); - nf_conntrack_expect_event(IPEXP_NEW, expect); + if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: expectation table full"); + ret = -EMFILE; + goto out; + } + + nf_ct_expect_insert(expect); + nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&nf_conntrack_lock); return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_related); +EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS -static void *exp_seq_start(struct seq_file *s, loff_t *pos) +struct ct_expect_iter_state { + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct list_head *e = &nf_conntrack_expect_list; - loff_t i; + struct ct_expect_iter_state *st = seq->private; - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) + return nf_ct_expect_hash[st->bucket].first; + } + return NULL; +} - if (list_empty(e)) - return NULL; +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct ct_expect_iter_state *st = seq->private; - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_conntrack_expect_list) + head = head->next; + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) return NULL; + head = nf_ct_expect_hash[st->bucket].first; } - return e; + return head; } -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *e = v; + struct hlist_node *head = ct_expect_get_first(seq); - ++*pos; - e = e->next; + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} - if (e == &nf_conntrack_expect_list) - return NULL; +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_expect_get_idx(seq, *pos); +} - return e; +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); } -static void exp_seq_stop(struct seq_file *s, void *v) +static void exp_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&nf_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) { - struct nf_conntrack_expect *expect = v; + struct nf_conntrack_expect *expect; + struct hlist_node *n = v; + + expect = hlist_entry(n, struct nf_conntrack_expect, hnode); if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) @@ -421,7 +464,7 @@ static int exp_seq_show(struct seq_file *s, void *v) return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, @@ -430,14 +473,96 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open(file, &exp_seq_ops); + struct seq_file *seq; + struct ct_expect_iter_state *st; + int ret; + + st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &exp_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + memset(st, 0, sizeof(struct ct_expect_iter_state)); + return ret; +out_free: + kfree(st); + return ret; } -const struct file_operations exp_file_ops = { +static const struct file_operations exp_file_ops = { .owner = THIS_MODULE, .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; #endif /* CONFIG_PROC_FS */ + +static int __init exp_proc_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + + proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + if (!proc) + return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; +} + +static void exp_proc_remove(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove("nf_conntrack_expect"); +#endif /* CONFIG_PROC_FS */ +} + +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); + +int __init nf_conntrack_expect_init(void) +{ + int err = -ENOMEM; + + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; + + nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &nf_ct_expect_vmalloc); + if (nf_ct_expect_hash == NULL) + goto err1; + + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL, NULL); + if (!nf_ct_expect_cachep) + goto err2; + + err = exp_proc_init(); + if (err < 0) + goto err3; + + return 0; + +err3: + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); +err2: + kmem_cache_destroy(nf_ct_expect_cachep); +err1: + return err; +} + +void nf_conntrack_expect_fini(void) +{ + exp_proc_remove(); + kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); +}