4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
52 void dccp_set_state(struct sock *sk, const int state)
54 const int oldstate = sk->sk_state;
56 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
57 dccp_state_name(oldstate), dccp_state_name(state));
58 WARN_ON(state == oldstate);
62 if (oldstate != DCCP_OPEN)
63 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
67 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
68 oldstate == DCCP_CLOSING)
69 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
71 sk->sk_prot->unhash(sk);
72 if (inet_csk(sk)->icsk_bind_hash != NULL &&
73 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77 if (oldstate == DCCP_OPEN)
78 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
81 /* Change state AFTER socket is unhashed to avoid closed
82 * socket sitting in hash tables.
87 EXPORT_SYMBOL_GPL(dccp_set_state);
89 static void dccp_finish_passive_close(struct sock *sk)
91 switch (sk->sk_state) {
92 case DCCP_PASSIVE_CLOSE:
93 /* Node (client or server) has received Close packet. */
94 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
95 dccp_set_state(sk, DCCP_CLOSED);
97 case DCCP_PASSIVE_CLOSEREQ:
99 * Client received CloseReq. We set the `active' flag so that
100 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
102 dccp_send_close(sk, 1);
103 dccp_set_state(sk, DCCP_CLOSING);
107 void dccp_done(struct sock *sk)
109 dccp_set_state(sk, DCCP_CLOSED);
110 dccp_clear_xmit_timers(sk);
112 sk->sk_shutdown = SHUTDOWN_MASK;
114 if (!sock_flag(sk, SOCK_DEAD))
115 sk->sk_state_change(sk);
117 inet_csk_destroy_sock(sk);
120 EXPORT_SYMBOL_GPL(dccp_done);
122 const char *dccp_packet_name(const int type)
124 static const char *dccp_packet_names[] = {
125 [DCCP_PKT_REQUEST] = "REQUEST",
126 [DCCP_PKT_RESPONSE] = "RESPONSE",
127 [DCCP_PKT_DATA] = "DATA",
128 [DCCP_PKT_ACK] = "ACK",
129 [DCCP_PKT_DATAACK] = "DATAACK",
130 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
131 [DCCP_PKT_CLOSE] = "CLOSE",
132 [DCCP_PKT_RESET] = "RESET",
133 [DCCP_PKT_SYNC] = "SYNC",
134 [DCCP_PKT_SYNCACK] = "SYNCACK",
137 if (type >= DCCP_NR_PKT_TYPES)
140 return dccp_packet_names[type];
143 EXPORT_SYMBOL_GPL(dccp_packet_name);
145 const char *dccp_state_name(const int state)
147 static char *dccp_state_names[] = {
148 [DCCP_OPEN] = "OPEN",
149 [DCCP_REQUESTING] = "REQUESTING",
150 [DCCP_PARTOPEN] = "PARTOPEN",
151 [DCCP_LISTEN] = "LISTEN",
152 [DCCP_RESPOND] = "RESPOND",
153 [DCCP_CLOSING] = "CLOSING",
154 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
155 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
156 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
157 [DCCP_TIME_WAIT] = "TIME_WAIT",
158 [DCCP_CLOSED] = "CLOSED",
161 if (state >= DCCP_MAX_STATES)
162 return "INVALID STATE!";
164 return dccp_state_names[state];
167 EXPORT_SYMBOL_GPL(dccp_state_name);
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
171 struct dccp_sock *dp = dccp_sk(sk);
172 struct dccp_minisock *dmsk = dccp_msk(sk);
173 struct inet_connection_sock *icsk = inet_csk(sk);
175 dccp_minisock_init(&dp->dccps_minisock);
177 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
178 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
179 sk->sk_state = DCCP_CLOSED;
180 sk->sk_write_space = dccp_write_space;
181 icsk->icsk_sync_mss = dccp_sync_mss;
182 dp->dccps_mss_cache = 536;
183 dp->dccps_rate_last = jiffies;
184 dp->dccps_role = DCCP_ROLE_UNDEFINED;
185 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
186 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dccp_init_xmit_timers(sk);
190 INIT_LIST_HEAD(&dp->dccps_featneg);
192 * FIXME: We're hardcoding the CCID, and doing this at this point makes
193 * the listening (master) sock get CCID control blocks, which is not
194 * necessary, but for now, to not mess with the test userspace apps,
195 * lets leave it here, later the real solution is to do this in a
196 * setsockopt(CCIDs-I-want/accept). -acme
198 if (likely(ctl_sock_initialized)) {
199 int rc = dccp_feat_init(sk);
204 if (dmsk->dccpms_send_ack_vector) {
205 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
206 if (dp->dccps_hc_rx_ackvec == NULL)
209 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
211 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
213 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
214 dp->dccps_hc_tx_ccid == NULL)) {
215 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
216 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
217 if (dmsk->dccpms_send_ack_vector) {
218 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
219 dp->dccps_hc_rx_ackvec = NULL;
221 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
225 /* control socket doesn't need feat nego */
226 INIT_LIST_HEAD(&dmsk->dccpms_pending);
227 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233 EXPORT_SYMBOL_GPL(dccp_init_sock);
235 void dccp_destroy_sock(struct sock *sk)
237 struct dccp_sock *dp = dccp_sk(sk);
238 struct dccp_minisock *dmsk = dccp_msk(sk);
241 * DCCP doesn't use sk_write_queue, just sk_send_head
242 * for retransmissions
244 if (sk->sk_send_head != NULL) {
245 kfree_skb(sk->sk_send_head);
246 sk->sk_send_head = NULL;
249 /* Clean up a referenced DCCP bind bucket. */
250 if (inet_csk(sk)->icsk_bind_hash != NULL)
253 kfree(dp->dccps_service_list);
254 dp->dccps_service_list = NULL;
256 if (dmsk->dccpms_send_ack_vector) {
257 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
258 dp->dccps_hc_rx_ackvec = NULL;
260 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
261 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
262 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
264 /* clean up feature negotiation state */
265 dccp_feat_list_purge(&dp->dccps_featneg);
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
272 struct dccp_sock *dp = dccp_sk(sk);
274 dp->dccps_role = DCCP_ROLE_LISTEN;
275 /* do not start to listen if feature negotiation setup fails */
276 if (dccp_feat_finalise_settings(dp))
278 return inet_csk_listen_start(sk, backlog);
281 static inline int dccp_need_reset(int state)
283 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
284 state != DCCP_REQUESTING;
287 int dccp_disconnect(struct sock *sk, int flags)
289 struct inet_connection_sock *icsk = inet_csk(sk);
290 struct inet_sock *inet = inet_sk(sk);
292 const int old_state = sk->sk_state;
294 if (old_state != DCCP_CLOSED)
295 dccp_set_state(sk, DCCP_CLOSED);
298 * This corresponds to the ABORT function of RFC793, sec. 3.8
299 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
301 if (old_state == DCCP_LISTEN) {
302 inet_csk_listen_stop(sk);
303 } else if (dccp_need_reset(old_state)) {
304 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
305 sk->sk_err = ECONNRESET;
306 } else if (old_state == DCCP_REQUESTING)
307 sk->sk_err = ECONNRESET;
309 dccp_clear_xmit_timers(sk);
311 __skb_queue_purge(&sk->sk_receive_queue);
312 __skb_queue_purge(&sk->sk_write_queue);
313 if (sk->sk_send_head != NULL) {
314 __kfree_skb(sk->sk_send_head);
315 sk->sk_send_head = NULL;
320 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 inet_reset_saddr(sk);
324 sock_reset_flag(sk, SOCK_DONE);
326 icsk->icsk_backoff = 0;
327 inet_csk_delack_init(sk);
330 WARN_ON(inet->num && !icsk->icsk_bind_hash);
332 sk->sk_error_report(sk);
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
339 * Wait for a DCCP event.
341 * Note that we don't need to lock the socket, as the upper poll layers
342 * take care of normal races (between the test and the event) and we don't
343 * go look at any of the socket buffers directly.
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
349 struct sock *sk = sock->sk;
351 poll_wait(file, sk->sk_sleep, wait);
352 if (sk->sk_state == DCCP_LISTEN)
353 return inet_csk_listen_poll(sk);
355 /* Socket is not locked. We are protected from async events
356 by poll logic and correct handling of state changes
357 made by another threads is impossible in any case.
364 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
366 if (sk->sk_shutdown & RCV_SHUTDOWN)
367 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
370 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 mask |= POLLIN | POLLRDNORM;
374 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 mask |= POLLOUT | POLLWRNORM;
377 } else { /* send SIGIO later */
378 set_bit(SOCK_ASYNC_NOSPACE,
379 &sk->sk_socket->flags);
380 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
382 /* Race breaker. If space is freed after
383 * wspace test but before the flags are set,
384 * IO signal will be lost.
386 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 mask |= POLLOUT | POLLWRNORM;
394 EXPORT_SYMBOL_GPL(dccp_poll);
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
402 if (sk->sk_state == DCCP_LISTEN)
408 unsigned long amount = 0;
410 skb = skb_peek(&sk->sk_receive_queue);
413 * We will only return the amount of this packet since
414 * that is all that will be read.
418 rc = put_user(amount, (int __user *)arg);
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433 char __user *optval, int optlen)
435 struct dccp_sock *dp = dccp_sk(sk);
436 struct dccp_service_list *sl = NULL;
438 if (service == DCCP_SERVICE_INVALID_VALUE ||
439 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
442 if (optlen > sizeof(service)) {
443 sl = kmalloc(optlen, GFP_KERNEL);
447 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 if (copy_from_user(sl->dccpsl_list,
449 optval + sizeof(service),
450 optlen - sizeof(service)) ||
451 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458 dp->dccps_service = service;
460 kfree(dp->dccps_service_list);
462 dp->dccps_service_list = sl;
467 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
472 if (cscov < 0 || cscov > 15)
475 * Populate a list of permissible values, in the range cscov...15. This
476 * is necessary since feature negotiation of single values only works if
477 * both sides incidentally choose the same value. Since the list starts
478 * lowest-value first, negotiation will pick the smallest shared value.
484 list = kmalloc(len, GFP_KERNEL);
488 for (i = 0; i < len; i++)
491 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
495 dccp_sk(sk)->dccps_pcrlen = cscov;
497 dccp_sk(sk)->dccps_pcslen = cscov;
503 static int dccp_setsockopt_ccid(struct sock *sk, int type,
504 char __user *optval, int optlen)
509 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
512 val = kmalloc(optlen, GFP_KERNEL);
516 if (copy_from_user(val, optval, optlen)) {
522 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
523 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
525 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
526 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
533 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
534 char __user *optval, int optlen)
536 struct dccp_sock *dp = dccp_sk(sk);
540 case DCCP_SOCKOPT_PACKET_SIZE:
541 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
543 case DCCP_SOCKOPT_CHANGE_L:
544 case DCCP_SOCKOPT_CHANGE_R:
545 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
547 case DCCP_SOCKOPT_CCID:
548 case DCCP_SOCKOPT_RX_CCID:
549 case DCCP_SOCKOPT_TX_CCID:
550 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
553 if (optlen < (int)sizeof(int))
556 if (get_user(val, (int __user *)optval))
559 if (optname == DCCP_SOCKOPT_SERVICE)
560 return dccp_setsockopt_service(sk, val, optval, optlen);
564 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
565 if (dp->dccps_role != DCCP_ROLE_SERVER)
568 dp->dccps_server_timewait = (val != 0);
570 case DCCP_SOCKOPT_SEND_CSCOV:
571 err = dccp_setsockopt_cscov(sk, val, false);
573 case DCCP_SOCKOPT_RECV_CSCOV:
574 err = dccp_setsockopt_cscov(sk, val, true);
585 int dccp_setsockopt(struct sock *sk, int level, int optname,
586 char __user *optval, int optlen)
588 if (level != SOL_DCCP)
589 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
592 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
595 EXPORT_SYMBOL_GPL(dccp_setsockopt);
598 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
599 char __user *optval, int optlen)
601 if (level != SOL_DCCP)
602 return inet_csk_compat_setsockopt(sk, level, optname,
604 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
607 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
610 static int dccp_getsockopt_service(struct sock *sk, int len,
611 __be32 __user *optval,
614 const struct dccp_sock *dp = dccp_sk(sk);
615 const struct dccp_service_list *sl;
616 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
619 if ((sl = dp->dccps_service_list) != NULL) {
620 slen = sl->dccpsl_nr * sizeof(u32);
629 if (put_user(total_len, optlen) ||
630 put_user(dp->dccps_service, optval) ||
631 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
638 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 char __user *optval, int __user *optlen)
641 struct dccp_sock *dp;
644 if (get_user(len, optlen))
647 if (len < (int)sizeof(int))
653 case DCCP_SOCKOPT_PACKET_SIZE:
654 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
656 case DCCP_SOCKOPT_SERVICE:
657 return dccp_getsockopt_service(sk, len,
658 (__be32 __user *)optval, optlen);
659 case DCCP_SOCKOPT_GET_CUR_MPS:
660 val = dp->dccps_mss_cache;
662 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
663 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
664 case DCCP_SOCKOPT_TX_CCID:
665 val = ccid_get_current_tx_ccid(dp);
669 case DCCP_SOCKOPT_RX_CCID:
670 val = ccid_get_current_rx_ccid(dp);
674 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
675 val = dp->dccps_server_timewait;
677 case DCCP_SOCKOPT_SEND_CSCOV:
678 val = dp->dccps_pcslen;
680 case DCCP_SOCKOPT_RECV_CSCOV:
681 val = dp->dccps_pcrlen;
684 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
685 len, (u32 __user *)optval, optlen);
687 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
688 len, (u32 __user *)optval, optlen);
694 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
700 int dccp_getsockopt(struct sock *sk, int level, int optname,
701 char __user *optval, int __user *optlen)
703 if (level != SOL_DCCP)
704 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
707 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
710 EXPORT_SYMBOL_GPL(dccp_getsockopt);
713 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
714 char __user *optval, int __user *optlen)
716 if (level != SOL_DCCP)
717 return inet_csk_compat_getsockopt(sk, level, optname,
719 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
722 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
725 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
728 const struct dccp_sock *dp = dccp_sk(sk);
729 const int flags = msg->msg_flags;
730 const int noblock = flags & MSG_DONTWAIT;
735 if (len > dp->dccps_mss_cache)
740 if (sysctl_dccp_tx_qlen &&
741 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
746 timeo = sock_sndtimeo(sk, noblock);
749 * We have to use sk_stream_wait_connect here to set sk_write_pending,
750 * so that the trick in dccp_rcv_request_sent_state_process.
752 /* Wait for a connection to finish. */
753 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
754 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
757 size = sk->sk_prot->max_header + len;
759 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
764 skb_reserve(skb, sk->sk_prot->max_header);
765 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
769 skb_queue_tail(&sk->sk_write_queue, skb);
770 dccp_write_xmit(sk,0);
779 EXPORT_SYMBOL_GPL(dccp_sendmsg);
781 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
782 size_t len, int nonblock, int flags, int *addr_len)
784 const struct dccp_hdr *dh;
789 if (sk->sk_state == DCCP_LISTEN) {
794 timeo = sock_rcvtimeo(sk, nonblock);
797 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
800 goto verify_sock_status;
804 switch (dh->dccph_type) {
806 case DCCP_PKT_DATAACK:
810 case DCCP_PKT_CLOSEREQ:
811 if (!(flags & MSG_PEEK))
812 dccp_finish_passive_close(sk);
815 dccp_pr_debug("found fin (%s) ok!\n",
816 dccp_packet_name(dh->dccph_type));
820 dccp_pr_debug("packet_type=%s\n",
821 dccp_packet_name(dh->dccph_type));
822 sk_eat_skb(sk, skb, 0);
825 if (sock_flag(sk, SOCK_DONE)) {
831 len = sock_error(sk);
835 if (sk->sk_shutdown & RCV_SHUTDOWN) {
840 if (sk->sk_state == DCCP_CLOSED) {
841 if (!sock_flag(sk, SOCK_DONE)) {
842 /* This occurs when user tries to read
843 * from never connected socket.
857 if (signal_pending(current)) {
858 len = sock_intr_errno(timeo);
862 sk_wait_data(sk, &timeo);
867 else if (len < skb->len)
868 msg->msg_flags |= MSG_TRUNC;
870 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
871 /* Exception. Bailout! */
876 if (!(flags & MSG_PEEK))
877 sk_eat_skb(sk, skb, 0);
885 EXPORT_SYMBOL_GPL(dccp_recvmsg);
887 int inet_dccp_listen(struct socket *sock, int backlog)
889 struct sock *sk = sock->sk;
890 unsigned char old_state;
896 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
899 old_state = sk->sk_state;
900 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
903 /* Really, if the socket is already in listen state
904 * we can only allow the backlog to be adjusted.
906 if (old_state != DCCP_LISTEN) {
908 * FIXME: here it probably should be sk->sk_prot->listen_start
909 * see tcp_listen_start
911 err = dccp_listen_start(sk, backlog);
915 sk->sk_max_ack_backlog = backlog;
923 EXPORT_SYMBOL_GPL(inet_dccp_listen);
925 static void dccp_terminate_connection(struct sock *sk)
927 u8 next_state = DCCP_CLOSED;
929 switch (sk->sk_state) {
930 case DCCP_PASSIVE_CLOSE:
931 case DCCP_PASSIVE_CLOSEREQ:
932 dccp_finish_passive_close(sk);
935 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
936 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
939 dccp_send_close(sk, 1);
941 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
942 !dccp_sk(sk)->dccps_server_timewait)
943 next_state = DCCP_ACTIVE_CLOSEREQ;
945 next_state = DCCP_CLOSING;
948 dccp_set_state(sk, next_state);
952 void dccp_close(struct sock *sk, long timeout)
954 struct dccp_sock *dp = dccp_sk(sk);
956 u32 data_was_unread = 0;
961 sk->sk_shutdown = SHUTDOWN_MASK;
963 if (sk->sk_state == DCCP_LISTEN) {
964 dccp_set_state(sk, DCCP_CLOSED);
967 inet_csk_listen_stop(sk);
969 goto adjudge_to_death;
972 sk_stop_timer(sk, &dp->dccps_xmit_timer);
975 * We need to flush the recv. buffs. We do this only on the
976 * descriptor close, not protocol-sourced closes, because the
977 *reader process may not have drained the data yet!
979 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
980 data_was_unread += skb->len;
984 if (data_was_unread) {
985 /* Unread data was tossed, send an appropriate Reset Code */
986 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
987 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
988 dccp_set_state(sk, DCCP_CLOSED);
989 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
990 /* Check zero linger _after_ checking for unread data. */
991 sk->sk_prot->disconnect(sk, 0);
992 } else if (sk->sk_state != DCCP_CLOSED) {
993 dccp_terminate_connection(sk);
996 sk_stream_wait_close(sk, timeout);
999 state = sk->sk_state;
1002 percpu_counter_inc(sk->sk_prot->orphan_count);
1005 * It is the last release_sock in its life. It will remove backlog.
1009 * Now socket is owned by kernel and we acquire BH lock
1010 * to finish close. No need to check for user refs.
1014 WARN_ON(sock_owned_by_user(sk));
1016 /* Have we already been destroyed by a softirq or backlog? */
1017 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1020 if (sk->sk_state == DCCP_CLOSED)
1021 inet_csk_destroy_sock(sk);
1023 /* Otherwise, socket is reprieved until protocol close. */
1031 EXPORT_SYMBOL_GPL(dccp_close);
1033 void dccp_shutdown(struct sock *sk, int how)
1035 dccp_pr_debug("called shutdown(%x)\n", how);
1038 EXPORT_SYMBOL_GPL(dccp_shutdown);
1040 static inline int dccp_mib_init(void)
1042 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1045 static inline void dccp_mib_exit(void)
1047 snmp_mib_free((void**)dccp_statistics);
1050 static int thash_entries;
1051 module_param(thash_entries, int, 0444);
1052 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1054 #ifdef CONFIG_IP_DCCP_DEBUG
1056 module_param(dccp_debug, bool, 0644);
1057 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1059 EXPORT_SYMBOL_GPL(dccp_debug);
1062 static int __init dccp_init(void)
1065 int ehash_order, bhash_order, i;
1068 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1069 FIELD_SIZEOF(struct sk_buff, cb));
1070 rc = percpu_counter_init(&dccp_orphan_count, 0);
1074 inet_hashinfo_init(&dccp_hashinfo);
1075 dccp_hashinfo.bind_bucket_cachep =
1076 kmem_cache_create("dccp_bind_bucket",
1077 sizeof(struct inet_bind_bucket), 0,
1078 SLAB_HWCACHE_ALIGN, NULL);
1079 if (!dccp_hashinfo.bind_bucket_cachep)
1080 goto out_free_percpu;
1083 * Size and allocate the main established and bind bucket
1086 * The methodology is similar to that of the buffer cache.
1088 if (num_physpages >= (128 * 1024))
1089 goal = num_physpages >> (21 - PAGE_SHIFT);
1091 goal = num_physpages >> (23 - PAGE_SHIFT);
1094 goal = (thash_entries *
1095 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1096 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1099 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1100 sizeof(struct inet_ehash_bucket);
1101 while (dccp_hashinfo.ehash_size &
1102 (dccp_hashinfo.ehash_size - 1))
1103 dccp_hashinfo.ehash_size--;
1104 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1105 __get_free_pages(GFP_ATOMIC, ehash_order);
1106 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1108 if (!dccp_hashinfo.ehash) {
1109 DCCP_CRIT("Failed to allocate DCCP established hash table");
1110 goto out_free_bind_bucket_cachep;
1113 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1114 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1115 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1118 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1119 goto out_free_dccp_ehash;
1121 bhash_order = ehash_order;
1124 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1125 sizeof(struct inet_bind_hashbucket);
1126 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1129 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1130 __get_free_pages(GFP_ATOMIC, bhash_order);
1131 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1133 if (!dccp_hashinfo.bhash) {
1134 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1135 goto out_free_dccp_locks;
1138 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1139 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1140 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1143 rc = dccp_mib_init();
1145 goto out_free_dccp_bhash;
1147 rc = dccp_ackvec_init();
1149 goto out_free_dccp_mib;
1151 rc = dccp_sysctl_init();
1153 goto out_ackvec_exit;
1155 dccp_timestamping_init();
1162 out_free_dccp_bhash:
1163 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1164 dccp_hashinfo.bhash = NULL;
1165 out_free_dccp_locks:
1166 inet_ehash_locks_free(&dccp_hashinfo);
1167 out_free_dccp_ehash:
1168 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1169 dccp_hashinfo.ehash = NULL;
1170 out_free_bind_bucket_cachep:
1171 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1172 dccp_hashinfo.bind_bucket_cachep = NULL;
1174 percpu_counter_destroy(&dccp_orphan_count);
1178 static void __exit dccp_fini(void)
1181 free_pages((unsigned long)dccp_hashinfo.bhash,
1182 get_order(dccp_hashinfo.bhash_size *
1183 sizeof(struct inet_bind_hashbucket)));
1184 free_pages((unsigned long)dccp_hashinfo.ehash,
1185 get_order(dccp_hashinfo.ehash_size *
1186 sizeof(struct inet_ehash_bucket)));
1187 inet_ehash_locks_free(&dccp_hashinfo);
1188 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");