#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
+#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
-#include <net/route.h>
#include <net/sock.h>
#include <net/tcp_states.h>
* I'll experiment with dynamic table growth later.
*/
struct inet_ehash_bucket {
- rwlock_t lock;
struct hlist_head chain;
struct hlist_head twchain;
};
* TIME_WAIT sockets use a separate chain (twchain).
*/
struct inet_ehash_bucket *ehash;
+ rwlock_t *ehash_locks;
+ unsigned int ehash_size;
+ unsigned int ehash_locks_mask;
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
struct inet_bind_hashbucket *bhash;
unsigned int bhash_size;
- unsigned int ehash_size;
+ /* Note : 4 bytes padding on 64 bit arches */
/* All sockets in TCP_LISTEN state will be in here. This is the only
* table where wildcard'd TCP sockets can exist. Hash function here
return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
}
+static inline rwlock_t *inet_ehash_lockp(
+ struct inet_hashinfo *hashinfo,
+ unsigned int hash)
+{
+ return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
+}
+
+static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
+{
+ unsigned int i, size = 256;
+#if defined(CONFIG_PROVE_LOCKING)
+ unsigned int nr_pcpus = 2;
+#else
+ unsigned int nr_pcpus = num_possible_cpus();
+#endif
+ if (nr_pcpus >= 4)
+ size = 512;
+ if (nr_pcpus >= 8)
+ size = 1024;
+ if (nr_pcpus >= 16)
+ size = 2048;
+ if (nr_pcpus >= 32)
+ size = 4096;
+ if (sizeof(rwlock_t) != 0) {
+#ifdef CONFIG_NUMA
+ if (size * sizeof(rwlock_t) > PAGE_SIZE)
+ hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
+ else
+#endif
+ hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
+ GFP_KERNEL);
+ if (!hashinfo->ehash_locks)
+ return ENOMEM;
+ for (i = 0; i < size; i++)
+ rwlock_init(&hashinfo->ehash_locks[i]);
+ }
+ hashinfo->ehash_locks_mask = size - 1;
+ return 0;
+}
+
+static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
+{
+ if (hashinfo->ehash_locks) {
+#ifdef CONFIG_NUMA
+ unsigned int size = (hashinfo->ehash_locks_mask + 1) *
+ sizeof(rwlock_t);
+ if (size > PAGE_SIZE)
+ vfree(hashinfo->ehash_locks);
+ else
+#endif
+ kfree(hashinfo->ehash_locks);
+ hashinfo->ehash_locks = NULL;
+ }
+}
+
extern struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep,
struct inet_bind_hashbucket *head,
wake_up(&hashinfo->lhash_wait);
}
-static inline void __inet_hash(struct inet_hashinfo *hashinfo,
- struct sock *sk, const int listen_possible)
-{
- struct hlist_head *list;
- rwlock_t *lock;
-
- BUG_TRAP(sk_unhashed(sk));
- if (listen_possible && sk->sk_state == TCP_LISTEN) {
- list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &hashinfo->lhash_lock;
- inet_listen_wlock(hashinfo);
- } else {
- struct inet_ehash_bucket *head;
- sk->sk_hash = inet_sk_ehashfn(sk);
- head = inet_ehash_bucket(hashinfo, sk->sk_hash);
- list = &head->chain;
- lock = &head->lock;
- write_lock(lock);
- }
- __sk_add_node(sk, list);
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(lock);
- if (listen_possible && sk->sk_state == TCP_LISTEN)
- wake_up(&hashinfo->lhash_wait);
-}
+extern void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk);
+extern void __inet_hash_nolisten(struct inet_hashinfo *hinfo, struct sock *sk);
static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(hashinfo, sk, 1);
+ __inet_hash(hashinfo, sk);
local_bh_enable();
}
}
inet_listen_wlock(hashinfo);
lock = &hashinfo->lhash_lock;
} else {
- lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock;
+ lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock_bh(lock);
}
if (__sk_del_node_init(sk))
- sock_prot_dec_use(sk->sk_prot);
+ sock_prot_inuse_add(sk->sk_prot, -1);
write_unlock_bh(lock);
out:
if (sk->sk_state == TCP_LISTEN)
wake_up(&hashinfo->lhash_wait);
}
-static inline int inet_iif(const struct sk_buff *skb)
-{
- return ((struct rtable *)skb->dst)->rt_iif;
-}
-
extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
const __be32 daddr,
const unsigned short hnum,
*
* Local BH must be disabled here.
*/
-static inline struct sock *
- __inet_lookup_established(struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const u16 hnum,
- const int dif)
-{
- INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
- struct sock *sk;
- const struct hlist_node *node;
- /* Optimize here for direct hit, only listening connections can
- * have wildcards anyways.
- */
- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
- struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-
- prefetch(head->chain.first);
- read_lock(&head->lock);
- sk_for_each(sk, node, &head->chain) {
- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
- goto hit; /* You sunk my battleship! */
- }
-
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_for_each(sk, node, &head->twchain) {
- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
- goto hit;
- }
- sk = NULL;
-out:
- read_unlock(&head->lock);
- return sk;
-hit:
- sock_hold(sk);
- goto out;
-}
+extern struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const u16 hnum, const int dif);
static inline struct sock *
inet_lookup_established(struct inet_hashinfo *hashinfo,