X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnf_conntrack_core.c;h=b77eb56a87e33a7ec85bd351e5b6d512922ebbc3;hb=1ca721cdb748ba72c47e9c661d54696db3cde796;hp=000c2fb462d07996106270e33eadc72fb07718e6;hpb=95b00786f3b8fa99f53931361beeb4c10504ad87;p=linux-2.6-omap-h63xx.git diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 000c2fb462d..b77eb56a87e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -40,7 +40,7 @@ #define NF_CONNTRACK_VERSION "0.5.0" -DEFINE_RWLOCK(nf_conntrack_lock); +DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); /* nf_conntrack_standalone needs this */ @@ -73,15 +73,19 @@ static unsigned int nf_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { - unsigned int a, b; - - a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all), - (tuple->src.l3num << 16) | tuple->dst.protonum); - b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), - ((__force __u16)tuple->src.u.all << 16) | - (__force __u16)tuple->dst.u.all); - - return jhash_2words(a, b, rnd) % size; + unsigned int n; + u_int32_t h; + + /* The direction must be ignored, so we hash everything up to the + * destination ports (which is a multiple of 4) and treat the last + * three bytes manually. + */ + n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); + h = jhash2((u32 *)tuple, n, + rnd ^ (((__force __u16)tuple->dst.u.all << 16) | + tuple->dst.protonum)); + + return ((u64)h * size) >> 32; } static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) @@ -166,8 +170,8 @@ static void clean_from_lists(struct nf_conn *ct) { pr_debug("clean_from_lists(%p)\n", ct); - hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); - hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); + hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -199,7 +203,7 @@ destroy_conntrack(struct nf_conntrack *nfct) rcu_read_unlock(); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, @@ -213,7 +217,7 @@ destroy_conntrack(struct nf_conntrack *nfct) } NF_CT_STAT_INC(delete); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); if (ct->master) nf_ct_put(ct->master); @@ -236,31 +240,35 @@ static void death_by_timeout(unsigned long ul_conntrack) rcu_read_unlock(); } - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ NF_CT_STAT_INC(delete_list); clean_from_lists(ct); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + /* Disable BHs the entire time since we normally need to disable them + * at least once for the stats anyway. + */ + local_bh_disable(); + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + if (nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); + local_bh_enable(); return h; } NF_CT_STAT_INC(searched); } + local_bh_enable(); return NULL; } @@ -271,12 +279,16 @@ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; - read_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_find(tuple, NULL); - if (h) - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); - read_unlock_bh(&nf_conntrack_lock); + rcu_read_lock(); + h = __nf_conntrack_find(tuple); + if (h) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + h = NULL; + } + rcu_read_unlock(); return h; } @@ -286,10 +298,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); - hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &nf_conntrack_hash[hash]); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -299,9 +311,9 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); @@ -338,7 +350,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're @@ -364,7 +376,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, skb); @@ -379,7 +391,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) out: NF_CT_STAT_INC(insert_failed); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); @@ -391,12 +403,25 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct nf_conntrack_tuple_hash *h; + struct hlist_node *n; + unsigned int hash = hash_conntrack(tuple); - read_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_find(tuple, ignored_conntrack); - read_unlock_bh(&nf_conntrack_lock); + /* Disable BHs the entire time since we need to disable them at + * least once for the stats anyway. + */ + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple)) { + NF_CT_STAT_INC(found); + rcu_read_unlock_bh(); + return 1; + } + NF_CT_STAT_INC(searched); + } + rcu_read_unlock_bh(); - return h != NULL; + return 0; } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); @@ -404,7 +429,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static int early_drop(unsigned int hash) +static noinline int early_drop(unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; @@ -413,21 +438,23 @@ static int early_drop(unsigned int hash) unsigned int i, cnt = 0; int dropped = 0; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) ct = tmp; cnt++; } + + if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + ct = NULL; if (ct || cnt >= NF_CT_EVICTION_RANGE) break; hash = (hash + 1) % nf_conntrack_htable_size; } - if (ct) - atomic_inc(&ct->ct_general.use); - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); if (!ct) return dropped; @@ -444,7 +471,7 @@ static int early_drop(unsigned int hash) struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl) { - struct nf_conn *conntrack = NULL; + struct nf_conn *ct = NULL; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -454,8 +481,8 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, /* We don't want any race condition at early drop stage */ atomic_inc(&nf_conntrack_count); - if (nf_conntrack_max - && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { + if (nf_conntrack_max && + unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); if (!early_drop(hash)) { atomic_dec(&nf_conntrack_count); @@ -467,30 +494,37 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); - if (conntrack == NULL) { + ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); + if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&nf_conntrack_count); return ERR_PTR(-ENOMEM); } - atomic_set(&conntrack->ct_general.use, 1); - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; + atomic_set(&ct->ct_general.use, 1); + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ - setup_timer(&conntrack->timeout, death_by_timeout, - (unsigned long)conntrack); + setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); + INIT_RCU_HEAD(&ct->rcu); - return conntrack; + return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); -void nf_conntrack_free(struct nf_conn *conntrack) +static void nf_conntrack_free_rcu(struct rcu_head *head) { - nf_ct_ext_free(conntrack); - kmem_cache_free(nf_conntrack_cachep, conntrack); + struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + + nf_ct_ext_free(ct); + kmem_cache_free(nf_conntrack_cachep, ct); atomic_dec(&nf_conntrack_count); } + +void nf_conntrack_free(struct nf_conn *ct) +{ + call_rcu(&ct->rcu, nf_conntrack_free_rcu); +} EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification @@ -502,7 +536,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, struct sk_buff *skb, unsigned int dataoff) { - struct nf_conn *conntrack; + struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; @@ -512,46 +546,46 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - conntrack = nf_conntrack_alloc(tuple, &repl_tuple); - if (conntrack == NULL || IS_ERR(conntrack)) { + ct = nf_conntrack_alloc(tuple, &repl_tuple); + if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); - return (struct nf_conntrack_tuple_hash *)conntrack; + return (struct nf_conntrack_tuple_hash *)ct; } - if (!l4proto->new(conntrack, skb, dataoff)) { - nf_conntrack_free(conntrack); + if (!l4proto->new(ct, skb, dataoff)) { + nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; } - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, exp); + ct, exp); /* Welcome, Mr. Bond. We've been expecting you... */ - __set_bit(IPS_EXPECTED_BIT, &conntrack->status); - conntrack->master = exp->master; + __set_bit(IPS_EXPECTED_BIT, &ct->status); + ct->master = exp->master; if (exp->helper) { - help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK - conntrack->mark = exp->master->mark; + ct->mark = exp->master->mark; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK - conntrack->secmark = exp->master->secmark; + ct->secmark = exp->master->secmark; #endif - nf_conntrack_get(&conntrack->master->ct_general); + nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(expect_new); } else { struct nf_conntrack_helper *helper; helper = __nf_ct_helper_find(&repl_tuple); if (helper) { - help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, helper); } @@ -559,18 +593,17 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); if (exp) { if (exp->expectfn) - exp->expectfn(conntrack, exp); + exp->expectfn(ct, exp); nf_ct_expect_put(exp); } - return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; + return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ @@ -729,7 +762,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; - write_lock_bh(&nf_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); @@ -738,8 +770,9 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (ct->master || (help && help->expecting != 0)) - goto out; + return; + rcu_read_lock(); helper = __nf_ct_helper_find(newreply); if (helper == NULL) { if (help) @@ -757,7 +790,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, rcu_assign_pointer(help->helper, helper); out: - write_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -773,13 +806,11 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Only update if this is not a fixed timeout */ - if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { - write_unlock_bh(&nf_conntrack_lock); - return; - } + if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) + goto acct; /* If not in hash table, timer will not be active yet */ if (!nf_ct_is_confirmed(ct)) { @@ -799,6 +830,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } } +acct: #ifdef CONFIG_NF_CT_ACCT if (do_acct) { ct->counters[CTINFO2DIR(ctinfo)].packets++; @@ -811,7 +843,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } #endif - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); /* must be unlocked when calling event cache */ if (event) @@ -831,10 +863,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), - &tuple->src.u.tcp.port); - NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), - &tuple->dst.u.tcp.port); + NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port); + NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port); return 0; nla_put_failure: @@ -854,8 +884,8 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) return -EINVAL; - t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]); - t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]); + t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); + t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); return 0; } @@ -863,7 +893,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ -void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) +static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -880,15 +910,6 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nskb->nfctinfo = ctinfo; nf_conntrack_get(nskb->nfct); } -EXPORT_SYMBOL_GPL(__nf_conntrack_attach); - -static inline int -do_iter(const struct nf_conntrack_tuple_hash *i, - int (*iter)(struct nf_conn *i, void *data), - void *data) -{ - return iter(nf_ct_tuplehash_to_ctrack(i), data); -} /* Bring out ya dead! */ static struct nf_conn * @@ -899,7 +920,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), struct nf_conn *ct; struct hlist_node *n; - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); @@ -912,11 +933,11 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return NULL; found: atomic_inc(&ct->ct_general.use); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return ct; } @@ -942,7 +963,7 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size) +void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) { if (vmalloced) vfree(hash); @@ -991,7 +1012,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_expect_fini(); } -struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) +struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) { struct hlist_head *hash; unsigned int size, i; @@ -1016,10 +1037,10 @@ struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); -int set_hashsize(const char *val, struct kernel_param *kp) +int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { - int i, bucket, hashsize, vmalloced; - int old_vmalloced, old_size; + int i, bucket, vmalloced, old_vmalloced; + unsigned int hashsize, old_size; int rnd; struct hlist_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; @@ -1028,7 +1049,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) if (!nf_conntrack_htable_size) return param_set_uint(val, kp); - hashsize = simple_strtol(val, NULL, 0); + hashsize = simple_strtoul(val, NULL, 0); if (!hashsize) return -EINVAL; @@ -1040,12 +1061,17 @@ int set_hashsize(const char *val, struct kernel_param *kp) * use a newrandom seed */ get_random_bytes(&rnd, 4); - write_lock_bh(&nf_conntrack_lock); + /* Lookups in the old hash might happen in parallel, which means we + * might get false negatives during connection lookup. New connections + * created because of a false negative won't make it into the hash + * though since that required taking the lock. + */ + spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_empty(&nf_conntrack_hash[i])) { h = hlist_entry(nf_conntrack_hash[i].first, struct nf_conntrack_tuple_hash, hnode); - hlist_del(&h->hnode); + hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); hlist_add_head(&h->hnode, &hash[bucket]); } @@ -1058,13 +1084,14 @@ int set_hashsize(const char *val, struct kernel_param *kp) nf_conntrack_vmalloc = vmalloced; nf_conntrack_hash = hash; nf_conntrack_hash_rnd = rnd; - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); return 0; } +EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); -module_param_call(hashsize, set_hashsize, param_get_uint, +module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); int __init nf_conntrack_init(void) @@ -1123,7 +1150,7 @@ int __init nf_conntrack_init(void) goto out_fini_expect; /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); + rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); /* Set up fake conntrack: