/* Check TCP sequence numbers in ICMP packets. */
#define ICMP_MIN_LENGTH 8
-/* Socket used for sending RSTs */
-static struct socket *tcp_socket __read_mostly;
-
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
__be32 saddr, __be32 daddr,
struct tcphdr *th, int protocol,
- int tcplen);
+ unsigned int tcplen);
#endif
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
};
-static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
-{
- return inet_csk_get_port(&tcp_hashinfo, sk, snum,
- inet_csk_bind_conflict);
-}
-
-static void tcp_v4_hash(struct sock *sk)
-{
- inet_hash(&tcp_hashinfo, sk);
-}
-
-void tcp_unhash(struct sock *sk)
-{
- inet_unhash(&tcp_hashinfo, sk);
-}
-
static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
return;
}
- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
- th->source, inet_iif(skb));
+ sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
+ iph->saddr, th->source, inet_iif(skb));
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
if (th->rst)
return;
- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
+ if (skb->rtable->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+ ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
if (twsk)
arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+ ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
}
}
/*
- * Send a SYN-ACK after having received an ACK.
+ * Send a SYN-ACK after having received a SYN.
* This still operates on a request_sock only, not on a big
* socket.
*/
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
+static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+ struct dst_entry *dst)
{
const struct inet_request_sock *ireq = inet_rsk(req);
int err = -1;
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto out;
+ return -1;
skb = tcp_make_synack(sk, dst, req);
err = net_xmit_eval(err);
}
-out:
dst_release(dst);
return err;
}
+static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
+{
+ return __tcp_v4_send_synack(sk, req, NULL);
+}
+
/*
* IPv4 request_sock destructor.
*/
sizeof(*keys) * md5sig->entries4);
/* Free old key list, and reference new one */
- if (md5sig->keys4)
- kfree(md5sig->keys4);
+ kfree(md5sig->keys4);
md5sig->keys4 = keys;
md5sig->alloced4++;
}
tp->md5sig_info->alloced4 = 0;
} else if (tp->md5sig_info->entries4 != i) {
/* Need to do some manipulation */
- memcpy(&tp->md5sig_info->keys4[i],
- &tp->md5sig_info->keys4[i+1],
- (tp->md5sig_info->entries4 - i) *
- sizeof(struct tcp4_md5sig_key));
+ memmove(&tp->md5sig_info->keys4[i],
+ &tp->md5sig_info->keys4[i+1],
+ (tp->md5sig_info->entries4 - i) *
+ sizeof(struct tcp4_md5sig_key));
}
tcp_free_md5sig_pool();
return 0;
static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
__be32 saddr, __be32 daddr,
struct tcphdr *th, int protocol,
- int tcplen)
+ unsigned int tcplen)
{
struct scatterlist sg[4];
__u16 data_len;
struct dst_entry *dst,
struct request_sock *req,
struct tcphdr *th, int protocol,
- int tcplen)
+ unsigned int tcplen)
{
__be32 saddr, daddr;
#endif
/* Never answer to SYNs send to broadcast or multicast */
- if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST))
+ if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
/* TW buckets are converted to open requests without
tcp_parse_options(skb, &tmp_opt, 0);
- if (want_cookie) {
+ if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
- tmp_opt.saw_tstamp = 0;
- }
if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
/* Some OSes (unknown ones, but I see them on web server, which
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
syn_flood_warning(skb);
+ req->cookie_ts = tmp_opt.tstamp_ok;
#endif
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
} else if (!isn) {
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
- dst_release(dst);
- goto drop_and_free;
+ goto drop_and_release;
}
}
/* Kill the following clause, if you dislike this way. */
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
- "request from %u.%u.%u.%u/%u\n",
+ "request from " NIPQUAD_FMT "/%u\n",
NIPQUAD(saddr),
ntohs(tcp_hdr(skb)->source));
- dst_release(dst);
- goto drop_and_free;
+ goto drop_and_release;
}
isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
- if (tcp_v4_send_synack(sk, req, dst))
+ if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
goto drop_and_free;
- if (want_cookie) {
- reqsk_free(req);
- } else {
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- }
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
+drop_and_release:
+ dst_release(dst);
drop_and_free:
reqsk_free(req);
drop:
}
#endif
- __inet_hash(&tcp_hashinfo, newsk, 0);
- __inet_inherit_port(&tcp_hashinfo, sk, newsk);
+ __inet_hash_nolisten(newsk);
+ __inet_inherit_port(sk, newsk);
return newsk;
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
- iph->daddr, th->dest, inet_iif(skb));
+ nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
+ th->source, iph->daddr, th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
- iph->daddr, th->dest, inet_iif(skb));
+ sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
+ th->source, iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+ struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
+ &tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
+ .bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
- inet_put_port(&tcp_hashinfo, sk);
+ inet_put_port(sk);
/*
* If sendmsg cached page exists, toss it.
sk->sk_sndmsg_page = NULL;
}
+ if (tp->defer_tcp_accept.request) {
+ reqsk_free(tp->defer_tcp_accept.request);
+ sock_put(tp->defer_tcp_accept.listen_sk);
+ sock_put(sk);
+ tp->defer_tcp_accept.listen_sk = NULL;
+ tp->defer_tcp_accept.request = NULL;
+ }
+
atomic_dec(&tcp_sockets_allocated);
return 0;
struct hlist_node *node;
struct sock *sk = cur;
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
if (!sk) {
st->bucket = 0;
req = req->dl_next;
while (1) {
while (req) {
- if (req->rsk_ops->family == st->family) {
+ if (req->rsk_ops->family == st->family &&
+ net_eq(sock_net(req->sk), net)) {
cur = req;
goto out;
}
}
get_sk:
sk_for_each_from(sk, node) {
- if (sk->sk_family == st->family) {
+ if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
cur = sk;
goto out;
}
static void *established_get_first(struct seq_file *seq)
{
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
void *rc = NULL;
for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
read_lock_bh(lock);
sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if (sk->sk_family != st->family) {
+ if (sk->sk_family != st->family ||
+ !net_eq(sock_net(sk), net)) {
continue;
}
rc = sk;
st->state = TCP_SEQ_STATE_TIME_WAIT;
inet_twsk_for_each(tw, node,
&tcp_hashinfo.ehash[st->bucket].twchain) {
- if (tw->tw_family != st->family) {
+ if (tw->tw_family != st->family ||
+ !net_eq(twsk_net(tw), net)) {
continue;
}
rc = tw;
struct inet_timewait_sock *tw;
struct hlist_node *node;
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
++st->num;
tw = cur;
tw = tw_next(tw);
get_tw:
- while (tw && tw->tw_family != st->family) {
+ while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
tw = tw_next(tw);
}
if (tw) {
sk = sk_next(sk);
sk_for_each_from(sk, node) {
- if (sk->sk_family == st->family)
+ if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
goto found;
}
static int tcp_seq_open(struct inode *inode, struct file *file)
{
struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
- struct seq_file *seq;
struct tcp_iter_state *s;
- int rc;
+ int err;
if (unlikely(afinfo == NULL))
return -EINVAL;
- s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ err = seq_open_net(inode, file, &afinfo->seq_ops,
+ sizeof(struct tcp_iter_state));
+ if (err < 0)
+ return err;
+
+ s = ((struct seq_file *)file->private_data)->private;
s->family = afinfo->family;
- s->seq_ops.start = tcp_seq_start;
- s->seq_ops.next = tcp_seq_next;
- s->seq_ops.show = afinfo->seq_show;
- s->seq_ops.stop = tcp_seq_stop;
-
- rc = seq_open(file, &s->seq_ops);
- if (rc)
- goto out_kfree;
- seq = file->private_data;
- seq->private = s;
-out:
- return rc;
-out_kfree:
- kfree(s);
- goto out;
+ return 0;
}
-int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
+int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
{
int rc = 0;
struct proc_dir_entry *p;
- if (!afinfo)
- return -EINVAL;
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = tcp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
+ afinfo->seq_fops.open = tcp_seq_open;
+ afinfo->seq_fops.read = seq_read;
+ afinfo->seq_fops.llseek = seq_lseek;
+ afinfo->seq_fops.release = seq_release_net;
+
+ afinfo->seq_ops.start = tcp_seq_start;
+ afinfo->seq_ops.next = tcp_seq_next;
+ afinfo->seq_ops.stop = tcp_seq_stop;
- p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
if (p)
p->data = afinfo;
else
return rc;
}
-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
+void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
{
- if (!afinfo)
- return;
- proc_net_remove(&init_net, afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ proc_net_remove(net, afinfo->name);
}
static void get_openreq4(struct sock *sk, struct request_sock *req,
return 0;
}
-static struct file_operations tcp4_seq_fops;
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "tcp",
.family = AF_INET,
- .seq_show = tcp4_seq_show,
- .seq_fops = &tcp4_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = tcp4_seq_show,
+ },
+};
+
+static int tcp4_proc_init_net(struct net *net)
+{
+ return tcp_proc_register(net, &tcp4_seq_afinfo);
+}
+
+static void tcp4_proc_exit_net(struct net *net)
+{
+ tcp_proc_unregister(net, &tcp4_seq_afinfo);
+}
+
+static struct pernet_operations tcp4_net_ops = {
+ .init = tcp4_proc_init_net,
+ .exit = tcp4_proc_exit_net,
};
int __init tcp4_proc_init(void)
{
- return tcp_proc_register(&tcp4_seq_afinfo);
+ return register_pernet_subsys(&tcp4_net_ops);
}
void tcp4_proc_exit(void)
{
- tcp_proc_unregister(&tcp4_seq_afinfo);
+ unregister_pernet_subsys(&tcp4_net_ops);
}
#endif /* CONFIG_PROC_FS */
-DEFINE_PROTO_INUSE(tcp)
-
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
- .hash = tcp_v4_hash,
- .unhash = tcp_unhash,
- .get_port = tcp_v4_get_port,
+ .hash = inet_hash,
+ .unhash = inet_unhash,
+ .get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.obj_size = sizeof(struct tcp_sock),
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
+ .h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
- REF_PROTO_INUSE(tcp)
};
-void __init tcp_v4_init(struct net_proto_family *ops)
+
+static int __net_init tcp_sk_init(struct net *net)
+{
+ return inet_ctl_sock_create(&net->ipv4.tcp_sock,
+ PF_INET, SOCK_RAW, IPPROTO_TCP, net);
+}
+
+static void __net_exit tcp_sk_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv4.tcp_sock);
+}
+
+static struct pernet_operations __net_initdata tcp_sk_ops = {
+ .init = tcp_sk_init,
+ .exit = tcp_sk_exit,
+};
+
+void __init tcp_v4_init(void)
{
- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
- IPPROTO_TCP) < 0)
+ if (register_pernet_device(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}
EXPORT_SYMBOL(ipv4_specific);
EXPORT_SYMBOL(tcp_hashinfo);
EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(tcp_v4_conn_request);
EXPORT_SYMBOL(tcp_v4_connect);
EXPORT_SYMBOL(tcp_v4_do_rcv);