X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=include%2Fnet%2Finet_hashtables.h;h=f44bb5c77a70cdb4d5cfdea2688a1a8ec3492b20;hb=b3225190c1991449086395f85a3cc07ac936e8f0;hp=bb619d80f2e2c21d8027a54cb0d634258105a710;hpb=ce6754235b423610e91f5300e1555c2e4ee1c03a;p=linux-2.6-omap-h63xx.git diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bb619d80f2e..f44bb5c77a7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include @@ -39,8 +41,8 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - struct hlist_head chain; - struct hlist_head twchain; + struct hlist_nulls_head chain; + struct hlist_nulls_head twchain; }; /* There are a few simple rules, which allow for local port reuse by @@ -75,13 +77,20 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { +#ifdef CONFIG_NET_NS struct net *ib_net; +#endif unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; +static inline struct net *ib_net(struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, node, head) \ hlist_for_each_entry(tb, node, head, node) @@ -90,6 +99,18 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* + * Sockets can be hashed in established or listening table + * We must use different 'nulls' end-of-chain value for listening + * hash table, or we might find a socket that was closed and + * reallocated/inserted into established hash table + */ +#define LISTENING_NULLS_BASE (1U << 29) +struct inet_listen_hashbucket { + spinlock_t lock; + struct hlist_nulls_head head; +}; + /* This is for listening sockets, thus all sockets which possess wildcards. */ #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ @@ -102,7 +123,7 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; - rwlock_t *ehash_locks; + spinlock_t *ehash_locks; unsigned int ehash_size; unsigned int ehash_locks_mask; @@ -114,22 +135,21 @@ struct inet_hashinfo { unsigned int bhash_size; /* Note : 4 bytes padding on 64 bit arches */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + struct kmem_cache *bind_bucket_cachep; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members - * are often dirty. + * might be often dirty. + */ + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - struct kmem_cache *bind_bucket_cachep; + struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] + ____cacheline_aligned_in_smp; + }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -139,7 +159,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } -static inline rwlock_t *inet_ehash_lockp( +static inline spinlock_t *inet_ehash_lockp( struct inet_hashinfo *hashinfo, unsigned int hash) { @@ -164,16 +184,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 4096; if (sizeof(rwlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(rwlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + if (size * sizeof(spinlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); else #endif - hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!hashinfo->ehash_locks) return ENOMEM; for (i = 0; i < size; i++) - rwlock_init(&hashinfo->ehash_locks[i]); + spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = size - 1; return 0; @@ -184,7 +204,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) if (hashinfo->ehash_locks) { #ifdef CONFIG_NUMA unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(rwlock_t); + sizeof(spinlock_t); if (size > PAGE_SIZE) vfree(hashinfo->ehash_locks); else @@ -227,26 +247,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); - -/* - * - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ -static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&hashinfo->lhash_lock); - atomic_inc(&hashinfo->lhash_users); - read_unlock(&hashinfo->lhash_lock); -} - -static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) -{ - if (atomic_dec_and_test(&hashinfo->lhash_users)) - wake_up(&hashinfo->lhash_wait); -} +void inet_hashinfo_init(struct inet_hashinfo *h); extern void __inet_hash_nolisten(struct sock *sk); extern void inet_hash(struct sock *sk); @@ -297,25 +298,25 @@ typedef __u64 __bitwise __addrpair; ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ @@ -371,6 +372,22 @@ static inline struct sock *inet_lookup(struct net *net, return sk; } +static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, + const __be16 sport, + const __be16 dport) +{ + struct sock *sk; + const struct iphdr *iph = ip_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + iph->saddr, sport, + iph->daddr, dport, inet_iif(skb)); +} + extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *,