4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 static void dccp_finish_passive_close(struct sock *sk)
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
104 case DCCP_PASSIVE_CLOSEREQ:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
114 void dccp_done(struct sock *sk)
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
119 sk->sk_shutdown = SHUTDOWN_MASK;
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
124 inet_csk_destroy_sock(sk);
127 EXPORT_SYMBOL_GPL(dccp_done);
129 const char *dccp_packet_name(const int type)
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
144 if (type >= DCCP_NR_PKT_TYPES)
147 return dccp_packet_names[type];
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
152 const char *dccp_state_name(const int state)
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
171 return dccp_state_names[state];
174 EXPORT_SYMBOL_GPL(dccp_state_name);
176 void dccp_hash(struct sock *sk)
178 inet_hash(&dccp_hashinfo, sk);
181 EXPORT_SYMBOL_GPL(dccp_hash);
183 void dccp_unhash(struct sock *sk)
185 inet_unhash(&dccp_hashinfo, sk);
188 EXPORT_SYMBOL_GPL(dccp_unhash);
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
192 struct dccp_sock *dp = dccp_sk(sk);
193 struct dccp_minisock *dmsk = dccp_msk(sk);
194 struct inet_connection_sock *icsk = inet_csk(sk);
196 dccp_minisock_init(&dp->dccps_minisock);
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
209 dccp_init_xmit_timers(sk);
212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 * the listening (master) sock get CCID control blocks, which is not
214 * necessary, but for now, to not mess with the test userspace apps,
215 * lets leave it here, later the real solution is to do this in a
216 * setsockopt(CCIDs-I-want/accept). -acme
218 if (likely(ctl_sock_initialized)) {
219 int rc = dccp_feat_init(dmsk);
224 if (dmsk->dccpms_send_ack_vector) {
225 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226 if (dp->dccps_hc_rx_ackvec == NULL)
229 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
231 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
233 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234 dp->dccps_hc_tx_ccid == NULL)) {
235 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237 if (dmsk->dccpms_send_ack_vector) {
238 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239 dp->dccps_hc_rx_ackvec = NULL;
241 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
245 /* control socket doesn't need feat nego */
246 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
255 int dccp_destroy_sock(struct sock *sk)
257 struct dccp_sock *dp = dccp_sk(sk);
258 struct dccp_minisock *dmsk = dccp_msk(sk);
261 * DCCP doesn't use sk_write_queue, just sk_send_head
262 * for retransmissions
264 if (sk->sk_send_head != NULL) {
265 kfree_skb(sk->sk_send_head);
266 sk->sk_send_head = NULL;
269 /* Clean up a referenced DCCP bind bucket. */
270 if (inet_csk(sk)->icsk_bind_hash != NULL)
271 inet_put_port(&dccp_hashinfo, sk);
273 kfree(dp->dccps_service_list);
274 dp->dccps_service_list = NULL;
276 if (dmsk->dccpms_send_ack_vector) {
277 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278 dp->dccps_hc_rx_ackvec = NULL;
280 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
284 /* clean up feature negotiation state */
285 dccp_feat_clean(dmsk);
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
294 struct dccp_sock *dp = dccp_sk(sk);
296 dp->dccps_role = DCCP_ROLE_LISTEN;
297 return inet_csk_listen_start(sk, backlog);
300 static inline int dccp_need_reset(int state)
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
306 int dccp_disconnect(struct sock *sk, int flags)
308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct inet_sock *inet = inet_sk(sk);
311 const int old_state = sk->sk_state;
313 if (old_state != DCCP_CLOSED)
314 dccp_set_state(sk, DCCP_CLOSED);
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
320 if (old_state == DCCP_LISTEN) {
321 inet_csk_listen_stop(sk);
322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
325 } else if (old_state == DCCP_REQUESTING)
326 sk->sk_err = ECONNRESET;
328 dccp_clear_xmit_timers(sk);
329 __skb_queue_purge(&sk->sk_receive_queue);
330 if (sk->sk_send_head != NULL) {
331 __kfree_skb(sk->sk_send_head);
332 sk->sk_send_head = NULL;
337 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338 inet_reset_saddr(sk);
341 sock_reset_flag(sk, SOCK_DONE);
343 icsk->icsk_backoff = 0;
344 inet_csk_delack_init(sk);
347 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
349 sk->sk_error_report(sk);
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
356 * Wait for a DCCP event.
358 * Note that we don't need to lock the socket, as the upper poll layers
359 * take care of normal races (between the test and the event) and we don't
360 * go look at any of the socket buffers directly.
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
366 struct sock *sk = sock->sk;
368 poll_wait(file, sk->sk_sleep, wait);
369 if (sk->sk_state == DCCP_LISTEN)
370 return inet_csk_listen_poll(sk);
372 /* Socket is not locked. We are protected from async events
373 by poll logic and correct handling of state changes
374 made by another threads is impossible in any case.
381 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
383 if (sk->sk_shutdown & RCV_SHUTDOWN)
384 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
387 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389 mask |= POLLIN | POLLRDNORM;
391 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393 mask |= POLLOUT | POLLWRNORM;
394 } else { /* send SIGIO later */
395 set_bit(SOCK_ASYNC_NOSPACE,
396 &sk->sk_socket->flags);
397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
399 /* Race breaker. If space is freed after
400 * wspace test but before the flags are set,
401 * IO signal will be lost.
403 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404 mask |= POLLOUT | POLLWRNORM;
411 EXPORT_SYMBOL_GPL(dccp_poll);
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
419 if (sk->sk_state == DCCP_LISTEN)
425 unsigned long amount = 0;
427 skb = skb_peek(&sk->sk_receive_queue);
430 * We will only return the amount of this packet since
431 * that is all that will be read.
435 rc = put_user(amount, (int __user *)arg);
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450 char __user *optval, int optlen)
452 struct dccp_sock *dp = dccp_sk(sk);
453 struct dccp_service_list *sl = NULL;
455 if (service == DCCP_SERVICE_INVALID_VALUE ||
456 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
459 if (optlen > sizeof(service)) {
460 sl = kmalloc(optlen, GFP_KERNEL);
464 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465 if (copy_from_user(sl->dccpsl_list,
466 optval + sizeof(service),
467 optlen - sizeof(service)) ||
468 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
475 dp->dccps_service = service;
477 kfree(dp->dccps_service_list);
479 dp->dccps_service_list = sl;
484 /* byte 1 is feature. the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486 struct dccp_so_feat __user *optval)
488 struct dccp_so_feat opt;
492 if (copy_from_user(&opt, optval, sizeof(opt)))
495 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
499 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
504 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505 val, opt.dccpsf_len, GFP_KERNEL);
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 char __user *optval, int optlen)
520 struct dccp_sock *dp = dccp_sk(sk);
523 if (optlen < sizeof(int))
526 if (get_user(val, (int __user *)optval))
529 if (optname == DCCP_SOCKOPT_SERVICE)
530 return dccp_setsockopt_service(sk, val, optval, optlen);
534 case DCCP_SOCKOPT_PACKET_SIZE:
535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
538 case DCCP_SOCKOPT_CHANGE_L:
539 if (optlen != sizeof(struct dccp_so_feat))
542 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543 (struct dccp_so_feat __user *)
546 case DCCP_SOCKOPT_CHANGE_R:
547 if (optlen != sizeof(struct dccp_so_feat))
550 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551 (struct dccp_so_feat __user *)
554 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
555 if (val < 0 || val > 15)
558 dp->dccps_pcslen = val;
560 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
561 if (val < 0 || val > 15)
564 dp->dccps_pcrlen = val;
565 /* FIXME: add feature negotiation,
566 * ChangeL(MinimumChecksumCoverage, val) */
578 int dccp_setsockopt(struct sock *sk, int level, int optname,
579 char __user *optval, int optlen)
581 if (level != SOL_DCCP)
582 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
585 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
588 EXPORT_SYMBOL_GPL(dccp_setsockopt);
591 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
592 char __user *optval, int optlen)
594 if (level != SOL_DCCP)
595 return inet_csk_compat_setsockopt(sk, level, optname,
597 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
600 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
603 static int dccp_getsockopt_service(struct sock *sk, int len,
604 __be32 __user *optval,
607 const struct dccp_sock *dp = dccp_sk(sk);
608 const struct dccp_service_list *sl;
609 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
612 if ((sl = dp->dccps_service_list) != NULL) {
613 slen = sl->dccpsl_nr * sizeof(u32);
622 if (put_user(total_len, optlen) ||
623 put_user(dp->dccps_service, optval) ||
624 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
632 char __user *optval, int __user *optlen)
634 struct dccp_sock *dp;
637 if (get_user(len, optlen))
640 if (len < (int)sizeof(int))
646 case DCCP_SOCKOPT_PACKET_SIZE:
647 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
649 case DCCP_SOCKOPT_SERVICE:
650 return dccp_getsockopt_service(sk, len,
651 (__be32 __user *)optval, optlen);
652 case DCCP_SOCKOPT_GET_CUR_MPS:
653 val = dp->dccps_mss_cache;
656 case DCCP_SOCKOPT_SEND_CSCOV:
657 val = dp->dccps_pcslen;
660 case DCCP_SOCKOPT_RECV_CSCOV:
661 val = dp->dccps_pcrlen;
665 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
666 len, (u32 __user *)optval, optlen);
668 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
669 len, (u32 __user *)optval, optlen);
674 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
680 int dccp_getsockopt(struct sock *sk, int level, int optname,
681 char __user *optval, int __user *optlen)
683 if (level != SOL_DCCP)
684 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 EXPORT_SYMBOL_GPL(dccp_getsockopt);
693 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
694 char __user *optval, int __user *optlen)
696 if (level != SOL_DCCP)
697 return inet_csk_compat_getsockopt(sk, level, optname,
699 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
702 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
705 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
708 const struct dccp_sock *dp = dccp_sk(sk);
709 const int flags = msg->msg_flags;
710 const int noblock = flags & MSG_DONTWAIT;
715 if (len > dp->dccps_mss_cache)
720 if (sysctl_dccp_tx_qlen &&
721 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
726 timeo = sock_sndtimeo(sk, noblock);
729 * We have to use sk_stream_wait_connect here to set sk_write_pending,
730 * so that the trick in dccp_rcv_request_sent_state_process.
732 /* Wait for a connection to finish. */
733 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
734 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
737 size = sk->sk_prot->max_header + len;
739 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
744 skb_reserve(skb, sk->sk_prot->max_header);
745 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
749 skb_queue_tail(&sk->sk_write_queue, skb);
750 dccp_write_xmit(sk,0);
759 EXPORT_SYMBOL_GPL(dccp_sendmsg);
761 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
762 size_t len, int nonblock, int flags, int *addr_len)
764 const struct dccp_hdr *dh;
769 if (sk->sk_state == DCCP_LISTEN) {
774 timeo = sock_rcvtimeo(sk, nonblock);
777 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
780 goto verify_sock_status;
784 switch (dh->dccph_type) {
786 case DCCP_PKT_DATAACK:
790 case DCCP_PKT_CLOSEREQ:
791 if (!(flags & MSG_PEEK))
792 dccp_finish_passive_close(sk);
795 dccp_pr_debug("found fin (%s) ok!\n",
796 dccp_packet_name(dh->dccph_type));
800 dccp_pr_debug("packet_type=%s\n",
801 dccp_packet_name(dh->dccph_type));
802 sk_eat_skb(sk, skb, 0);
805 if (sock_flag(sk, SOCK_DONE)) {
811 len = sock_error(sk);
815 if (sk->sk_shutdown & RCV_SHUTDOWN) {
820 if (sk->sk_state == DCCP_CLOSED) {
821 if (!sock_flag(sk, SOCK_DONE)) {
822 /* This occurs when user tries to read
823 * from never connected socket.
837 if (signal_pending(current)) {
838 len = sock_intr_errno(timeo);
842 sk_wait_data(sk, &timeo);
847 else if (len < skb->len)
848 msg->msg_flags |= MSG_TRUNC;
850 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
851 /* Exception. Bailout! */
856 if (!(flags & MSG_PEEK))
857 sk_eat_skb(sk, skb, 0);
865 EXPORT_SYMBOL_GPL(dccp_recvmsg);
867 int inet_dccp_listen(struct socket *sock, int backlog)
869 struct sock *sk = sock->sk;
870 unsigned char old_state;
876 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
879 old_state = sk->sk_state;
880 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
883 /* Really, if the socket is already in listen state
884 * we can only allow the backlog to be adjusted.
886 if (old_state != DCCP_LISTEN) {
888 * FIXME: here it probably should be sk->sk_prot->listen_start
889 * see tcp_listen_start
891 err = dccp_listen_start(sk, backlog);
895 sk->sk_max_ack_backlog = backlog;
903 EXPORT_SYMBOL_GPL(inet_dccp_listen);
905 static void dccp_terminate_connection(struct sock *sk)
907 u8 next_state = DCCP_CLOSED;
909 switch (sk->sk_state) {
910 case DCCP_PASSIVE_CLOSE:
911 case DCCP_PASSIVE_CLOSEREQ:
912 dccp_finish_passive_close(sk);
915 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
916 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
919 dccp_send_close(sk, 1);
921 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
922 next_state = DCCP_ACTIVE_CLOSEREQ;
924 next_state = DCCP_CLOSING;
927 dccp_set_state(sk, next_state);
931 void dccp_close(struct sock *sk, long timeout)
933 struct dccp_sock *dp = dccp_sk(sk);
935 u32 data_was_unread = 0;
940 sk->sk_shutdown = SHUTDOWN_MASK;
942 if (sk->sk_state == DCCP_LISTEN) {
943 dccp_set_state(sk, DCCP_CLOSED);
946 inet_csk_listen_stop(sk);
948 goto adjudge_to_death;
951 sk_stop_timer(sk, &dp->dccps_xmit_timer);
954 * We need to flush the recv. buffs. We do this only on the
955 * descriptor close, not protocol-sourced closes, because the
956 *reader process may not have drained the data yet!
958 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
959 data_was_unread += skb->len;
963 if (data_was_unread) {
964 /* Unread data was tossed, send an appropriate Reset Code */
965 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
966 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
967 dccp_set_state(sk, DCCP_CLOSED);
968 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
969 /* Check zero linger _after_ checking for unread data. */
970 sk->sk_prot->disconnect(sk, 0);
971 } else if (sk->sk_state != DCCP_CLOSED) {
972 dccp_terminate_connection(sk);
975 sk_stream_wait_close(sk, timeout);
978 state = sk->sk_state;
981 atomic_inc(sk->sk_prot->orphan_count);
984 * It is the last release_sock in its life. It will remove backlog.
988 * Now socket is owned by kernel and we acquire BH lock
989 * to finish close. No need to check for user refs.
993 BUG_TRAP(!sock_owned_by_user(sk));
995 /* Have we already been destroyed by a softirq or backlog? */
996 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
999 if (sk->sk_state == DCCP_CLOSED)
1000 inet_csk_destroy_sock(sk);
1002 /* Otherwise, socket is reprieved until protocol close. */
1010 EXPORT_SYMBOL_GPL(dccp_close);
1012 void dccp_shutdown(struct sock *sk, int how)
1014 dccp_pr_debug("called shutdown(%x)\n", how);
1017 EXPORT_SYMBOL_GPL(dccp_shutdown);
1019 static int __init dccp_mib_init(void)
1023 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1024 if (dccp_statistics[0] == NULL)
1027 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1028 if (dccp_statistics[1] == NULL)
1035 free_percpu(dccp_statistics[0]);
1036 dccp_statistics[0] = NULL;
1041 static void dccp_mib_exit(void)
1043 free_percpu(dccp_statistics[0]);
1044 free_percpu(dccp_statistics[1]);
1045 dccp_statistics[0] = dccp_statistics[1] = NULL;
1048 static int thash_entries;
1049 module_param(thash_entries, int, 0444);
1050 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1052 #ifdef CONFIG_IP_DCCP_DEBUG
1054 module_param(dccp_debug, bool, 0444);
1055 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1057 EXPORT_SYMBOL_GPL(dccp_debug);
1060 static int __init dccp_init(void)
1063 int ehash_order, bhash_order, i;
1066 dccp_hashinfo.bind_bucket_cachep =
1067 kmem_cache_create("dccp_bind_bucket",
1068 sizeof(struct inet_bind_bucket), 0,
1069 SLAB_HWCACHE_ALIGN, NULL);
1070 if (!dccp_hashinfo.bind_bucket_cachep)
1074 * Size and allocate the main established and bind bucket
1077 * The methodology is similar to that of the buffer cache.
1079 if (num_physpages >= (128 * 1024))
1080 goal = num_physpages >> (21 - PAGE_SHIFT);
1082 goal = num_physpages >> (23 - PAGE_SHIFT);
1085 goal = (thash_entries *
1086 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1087 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1090 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1091 sizeof(struct inet_ehash_bucket);
1092 while (dccp_hashinfo.ehash_size &
1093 (dccp_hashinfo.ehash_size - 1))
1094 dccp_hashinfo.ehash_size--;
1095 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1096 __get_free_pages(GFP_ATOMIC, ehash_order);
1097 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1099 if (!dccp_hashinfo.ehash) {
1100 DCCP_CRIT("Failed to allocate DCCP established hash table");
1101 goto out_free_bind_bucket_cachep;
1104 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1105 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1106 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1109 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1110 goto out_free_dccp_ehash;
1112 bhash_order = ehash_order;
1115 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1116 sizeof(struct inet_bind_hashbucket);
1117 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1120 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1121 __get_free_pages(GFP_ATOMIC, bhash_order);
1122 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1124 if (!dccp_hashinfo.bhash) {
1125 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1126 goto out_free_dccp_locks;
1129 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1130 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1131 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1134 rc = dccp_mib_init();
1136 goto out_free_dccp_bhash;
1138 rc = dccp_ackvec_init();
1140 goto out_free_dccp_mib;
1142 rc = dccp_sysctl_init();
1144 goto out_ackvec_exit;
1146 dccp_timestamping_init();
1153 out_free_dccp_bhash:
1154 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1155 dccp_hashinfo.bhash = NULL;
1156 out_free_dccp_locks:
1157 inet_ehash_locks_free(&dccp_hashinfo);
1158 out_free_dccp_ehash:
1159 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1160 dccp_hashinfo.ehash = NULL;
1161 out_free_bind_bucket_cachep:
1162 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1163 dccp_hashinfo.bind_bucket_cachep = NULL;
1167 static void __exit dccp_fini(void)
1170 free_pages((unsigned long)dccp_hashinfo.bhash,
1171 get_order(dccp_hashinfo.bhash_size *
1172 sizeof(struct inet_bind_hashbucket)));
1173 free_pages((unsigned long)dccp_hashinfo.ehash,
1174 get_order(dccp_hashinfo.ehash_size *
1175 sizeof(struct inet_ehash_bucket)));
1176 inet_ehash_locks_free(&dccp_hashinfo);
1177 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1182 module_init(dccp_init);
1183 module_exit(dccp_fini);
1185 MODULE_LICENSE("GPL");
1186 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1187 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");