]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/dccp/proto.c
[DCCP]: Add sysctls to control retransmission behaviour
[linux-2.6-omap-h63xx.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
216         sk->sk_state            = DCCP_CLOSED;
217         sk->sk_write_space      = dccp_write_space;
218         icsk->icsk_sync_mss     = dccp_sync_mss;
219         dp->dccps_mss_cache     = 536;
220         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
221         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
222         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
223
224         return 0;
225 }
226
227 EXPORT_SYMBOL_GPL(dccp_init_sock);
228
229 int dccp_destroy_sock(struct sock *sk)
230 {
231         struct dccp_sock *dp = dccp_sk(sk);
232         struct dccp_minisock *dmsk = dccp_msk(sk);
233
234         /*
235          * DCCP doesn't use sk_write_queue, just sk_send_head
236          * for retransmissions
237          */
238         if (sk->sk_send_head != NULL) {
239                 kfree_skb(sk->sk_send_head);
240                 sk->sk_send_head = NULL;
241         }
242
243         /* Clean up a referenced DCCP bind bucket. */
244         if (inet_csk(sk)->icsk_bind_hash != NULL)
245                 inet_put_port(&dccp_hashinfo, sk);
246
247         kfree(dp->dccps_service_list);
248         dp->dccps_service_list = NULL;
249
250         if (dmsk->dccpms_send_ack_vector) {
251                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
252                 dp->dccps_hc_rx_ackvec = NULL;
253         }
254         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
255         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
256         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
257
258         /* clean up feature negotiation state */
259         dccp_feat_clean(dmsk);
260
261         return 0;
262 }
263
264 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
265
266 static inline int dccp_listen_start(struct sock *sk, int backlog)
267 {
268         struct dccp_sock *dp = dccp_sk(sk);
269
270         dp->dccps_role = DCCP_ROLE_LISTEN;
271         return inet_csk_listen_start(sk, backlog);
272 }
273
274 int dccp_disconnect(struct sock *sk, int flags)
275 {
276         struct inet_connection_sock *icsk = inet_csk(sk);
277         struct inet_sock *inet = inet_sk(sk);
278         int err = 0;
279         const int old_state = sk->sk_state;
280
281         if (old_state != DCCP_CLOSED)
282                 dccp_set_state(sk, DCCP_CLOSED);
283
284         /* ABORT function of RFC793 */
285         if (old_state == DCCP_LISTEN) {
286                 inet_csk_listen_stop(sk);
287         /* FIXME: do the active reset thing */
288         } else if (old_state == DCCP_REQUESTING)
289                 sk->sk_err = ECONNRESET;
290
291         dccp_clear_xmit_timers(sk);
292         __skb_queue_purge(&sk->sk_receive_queue);
293         if (sk->sk_send_head != NULL) {
294                 __kfree_skb(sk->sk_send_head);
295                 sk->sk_send_head = NULL;
296         }
297
298         inet->dport = 0;
299
300         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
301                 inet_reset_saddr(sk);
302
303         sk->sk_shutdown = 0;
304         sock_reset_flag(sk, SOCK_DONE);
305
306         icsk->icsk_backoff = 0;
307         inet_csk_delack_init(sk);
308         __sk_dst_reset(sk);
309
310         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
311
312         sk->sk_error_report(sk);
313         return err;
314 }
315
316 EXPORT_SYMBOL_GPL(dccp_disconnect);
317
318 /*
319  *      Wait for a DCCP event.
320  *
321  *      Note that we don't need to lock the socket, as the upper poll layers
322  *      take care of normal races (between the test and the event) and we don't
323  *      go look at any of the socket buffers directly.
324  */
325 unsigned int dccp_poll(struct file *file, struct socket *sock,
326                        poll_table *wait)
327 {
328         unsigned int mask;
329         struct sock *sk = sock->sk;
330
331         poll_wait(file, sk->sk_sleep, wait);
332         if (sk->sk_state == DCCP_LISTEN)
333                 return inet_csk_listen_poll(sk);
334
335         /* Socket is not locked. We are protected from async events
336            by poll logic and correct handling of state changes
337            made by another threads is impossible in any case.
338          */
339
340         mask = 0;
341         if (sk->sk_err)
342                 mask = POLLERR;
343
344         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
345                 mask |= POLLHUP;
346         if (sk->sk_shutdown & RCV_SHUTDOWN)
347                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
348
349         /* Connected? */
350         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
351                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
352                         mask |= POLLIN | POLLRDNORM;
353
354                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
355                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
356                                 mask |= POLLOUT | POLLWRNORM;
357                         } else {  /* send SIGIO later */
358                                 set_bit(SOCK_ASYNC_NOSPACE,
359                                         &sk->sk_socket->flags);
360                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
361
362                                 /* Race breaker. If space is freed after
363                                  * wspace test but before the flags are set,
364                                  * IO signal will be lost.
365                                  */
366                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
367                                         mask |= POLLOUT | POLLWRNORM;
368                         }
369                 }
370         }
371         return mask;
372 }
373
374 EXPORT_SYMBOL_GPL(dccp_poll);
375
376 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
377 {
378         dccp_pr_debug("entry\n");
379         return -ENOIOCTLCMD;
380 }
381
382 EXPORT_SYMBOL_GPL(dccp_ioctl);
383
384 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
385                                    char __user *optval, int optlen)
386 {
387         struct dccp_sock *dp = dccp_sk(sk);
388         struct dccp_service_list *sl = NULL;
389
390         if (service == DCCP_SERVICE_INVALID_VALUE || 
391             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
392                 return -EINVAL;
393
394         if (optlen > sizeof(service)) {
395                 sl = kmalloc(optlen, GFP_KERNEL);
396                 if (sl == NULL)
397                         return -ENOMEM;
398
399                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
400                 if (copy_from_user(sl->dccpsl_list,
401                                    optval + sizeof(service),
402                                    optlen - sizeof(service)) ||
403                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
404                         kfree(sl);
405                         return -EFAULT;
406                 }
407         }
408
409         lock_sock(sk);
410         dp->dccps_service = service;
411
412         kfree(dp->dccps_service_list);
413
414         dp->dccps_service_list = sl;
415         release_sock(sk);
416         return 0;
417 }
418
419 /* byte 1 is feature.  the rest is the preference list */
420 static int dccp_setsockopt_change(struct sock *sk, int type,
421                                   struct dccp_so_feat __user *optval)
422 {
423         struct dccp_so_feat opt;
424         u8 *val;
425         int rc;
426
427         if (copy_from_user(&opt, optval, sizeof(opt)))
428                 return -EFAULT;
429
430         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
431         if (!val)
432                 return -ENOMEM;
433
434         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
435                 rc = -EFAULT;
436                 goto out_free_val;
437         }
438
439         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
440                               val, opt.dccpsf_len, GFP_KERNEL);
441         if (rc)
442                 goto out_free_val;
443
444 out:
445         return rc;
446
447 out_free_val:
448         kfree(val);
449         goto out;
450 }
451
452 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
453                 char __user *optval, int optlen)
454 {
455         struct dccp_sock *dp;
456         int err;
457         int val;
458
459         if (optlen < sizeof(int))
460                 return -EINVAL;
461
462         if (get_user(val, (int __user *)optval))
463                 return -EFAULT;
464
465         if (optname == DCCP_SOCKOPT_SERVICE)
466                 return dccp_setsockopt_service(sk, val, optval, optlen);
467
468         lock_sock(sk);
469         dp = dccp_sk(sk);
470         err = 0;
471
472         switch (optname) {
473         case DCCP_SOCKOPT_PACKET_SIZE:
474                 dp->dccps_packet_size = val;
475                 break;
476         case DCCP_SOCKOPT_CHANGE_L:
477                 if (optlen != sizeof(struct dccp_so_feat))
478                         err = -EINVAL;
479                 else
480                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481                                                      (struct dccp_so_feat __user *)
482                                                      optval);
483                 break;
484         case DCCP_SOCKOPT_CHANGE_R:
485                 if (optlen != sizeof(struct dccp_so_feat))
486                         err = -EINVAL;
487                 else
488                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
489                                                      (struct dccp_so_feat __user *)
490                                                      optval);
491                 break;
492         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
493                 if (val < 0 || val > 15)
494                         err = -EINVAL;
495                 else
496                         dp->dccps_pcslen = val;
497                 break;
498         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
499                 if (val < 0 || val > 15)
500                         err = -EINVAL;
501                 else {
502                         dp->dccps_pcrlen = val;
503                         /* FIXME: add feature negotiation,
504                          * ChangeL(MinimumChecksumCoverage, val) */
505                 }
506                 break;
507         default:
508                 err = -ENOPROTOOPT;
509                 break;
510         }
511
512         release_sock(sk);
513         return err;
514 }
515
516 int dccp_setsockopt(struct sock *sk, int level, int optname,
517                     char __user *optval, int optlen)
518 {
519         if (level != SOL_DCCP)
520                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
521                                                              optname, optval,
522                                                              optlen);
523         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
524 }
525
526 EXPORT_SYMBOL_GPL(dccp_setsockopt);
527
528 #ifdef CONFIG_COMPAT
529 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
530                            char __user *optval, int optlen)
531 {
532         if (level != SOL_DCCP)
533                 return inet_csk_compat_setsockopt(sk, level, optname,
534                                                   optval, optlen);
535         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
536 }
537
538 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
539 #endif
540
541 static int dccp_getsockopt_service(struct sock *sk, int len,
542                                    __be32 __user *optval,
543                                    int __user *optlen)
544 {
545         const struct dccp_sock *dp = dccp_sk(sk);
546         const struct dccp_service_list *sl;
547         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
548
549         lock_sock(sk);
550         if ((sl = dp->dccps_service_list) != NULL) {
551                 slen = sl->dccpsl_nr * sizeof(u32);
552                 total_len += slen;
553         }
554
555         err = -EINVAL;
556         if (total_len > len)
557                 goto out;
558
559         err = 0;
560         if (put_user(total_len, optlen) ||
561             put_user(dp->dccps_service, optval) ||
562             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
563                 err = -EFAULT;
564 out:
565         release_sock(sk);
566         return err;
567 }
568
569 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
570                     char __user *optval, int __user *optlen)
571 {
572         struct dccp_sock *dp;
573         int val, len;
574
575         if (get_user(len, optlen))
576                 return -EFAULT;
577
578         if (len < sizeof(int))
579                 return -EINVAL;
580
581         dp = dccp_sk(sk);
582
583         switch (optname) {
584         case DCCP_SOCKOPT_PACKET_SIZE:
585                 val = dp->dccps_packet_size;
586                 len = sizeof(dp->dccps_packet_size);
587                 break;
588         case DCCP_SOCKOPT_SERVICE:
589                 return dccp_getsockopt_service(sk, len,
590                                                (__be32 __user *)optval, optlen);
591         case DCCP_SOCKOPT_SEND_CSCOV:
592                 val = dp->dccps_pcslen;
593                 break;
594         case DCCP_SOCKOPT_RECV_CSCOV:
595                 val = dp->dccps_pcrlen;
596                 break;
597         case 128 ... 191:
598                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
599                                              len, (u32 __user *)optval, optlen);
600         case 192 ... 255:
601                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
602                                              len, (u32 __user *)optval, optlen);
603         default:
604                 return -ENOPROTOOPT;
605         }
606
607         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
608                 return -EFAULT;
609
610         return 0;
611 }
612
613 int dccp_getsockopt(struct sock *sk, int level, int optname,
614                     char __user *optval, int __user *optlen)
615 {
616         if (level != SOL_DCCP)
617                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
618                                                              optname, optval,
619                                                              optlen);
620         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
621 }
622
623 EXPORT_SYMBOL_GPL(dccp_getsockopt);
624
625 #ifdef CONFIG_COMPAT
626 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
627                            char __user *optval, int __user *optlen)
628 {
629         if (level != SOL_DCCP)
630                 return inet_csk_compat_getsockopt(sk, level, optname,
631                                                   optval, optlen);
632         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
633 }
634
635 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
636 #endif
637
638 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
639                  size_t len)
640 {
641         const struct dccp_sock *dp = dccp_sk(sk);
642         const int flags = msg->msg_flags;
643         const int noblock = flags & MSG_DONTWAIT;
644         struct sk_buff *skb;
645         int rc, size;
646         long timeo;
647
648         if (len > dp->dccps_mss_cache)
649                 return -EMSGSIZE;
650
651         lock_sock(sk);
652         timeo = sock_sndtimeo(sk, noblock);
653
654         /*
655          * We have to use sk_stream_wait_connect here to set sk_write_pending,
656          * so that the trick in dccp_rcv_request_sent_state_process.
657          */
658         /* Wait for a connection to finish. */
659         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
660                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
661                         goto out_release;
662
663         size = sk->sk_prot->max_header + len;
664         release_sock(sk);
665         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
666         lock_sock(sk);
667         if (skb == NULL)
668                 goto out_release;
669
670         skb_reserve(skb, sk->sk_prot->max_header);
671         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
672         if (rc != 0)
673                 goto out_discard;
674
675         skb_queue_tail(&sk->sk_write_queue, skb);
676         dccp_write_xmit(sk,0);
677 out_release:
678         release_sock(sk);
679         return rc ? : len;
680 out_discard:
681         kfree_skb(skb);
682         goto out_release;
683 }
684
685 EXPORT_SYMBOL_GPL(dccp_sendmsg);
686
687 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
688                  size_t len, int nonblock, int flags, int *addr_len)
689 {
690         const struct dccp_hdr *dh;
691         long timeo;
692
693         lock_sock(sk);
694
695         if (sk->sk_state == DCCP_LISTEN) {
696                 len = -ENOTCONN;
697                 goto out;
698         }
699
700         timeo = sock_rcvtimeo(sk, nonblock);
701
702         do {
703                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
704
705                 if (skb == NULL)
706                         goto verify_sock_status;
707
708                 dh = dccp_hdr(skb);
709
710                 if (dh->dccph_type == DCCP_PKT_DATA ||
711                     dh->dccph_type == DCCP_PKT_DATAACK)
712                         goto found_ok_skb;
713
714                 if (dh->dccph_type == DCCP_PKT_RESET ||
715                     dh->dccph_type == DCCP_PKT_CLOSE) {
716                         dccp_pr_debug("found fin ok!\n");
717                         len = 0;
718                         goto found_fin_ok;
719                 }
720                 dccp_pr_debug("packet_type=%s\n",
721                               dccp_packet_name(dh->dccph_type));
722                 sk_eat_skb(sk, skb, 0);
723 verify_sock_status:
724                 if (sock_flag(sk, SOCK_DONE)) {
725                         len = 0;
726                         break;
727                 }
728
729                 if (sk->sk_err) {
730                         len = sock_error(sk);
731                         break;
732                 }
733
734                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
735                         len = 0;
736                         break;
737                 }
738
739                 if (sk->sk_state == DCCP_CLOSED) {
740                         if (!sock_flag(sk, SOCK_DONE)) {
741                                 /* This occurs when user tries to read
742                                  * from never connected socket.
743                                  */
744                                 len = -ENOTCONN;
745                                 break;
746                         }
747                         len = 0;
748                         break;
749                 }
750
751                 if (!timeo) {
752                         len = -EAGAIN;
753                         break;
754                 }
755
756                 if (signal_pending(current)) {
757                         len = sock_intr_errno(timeo);
758                         break;
759                 }
760
761                 sk_wait_data(sk, &timeo);
762                 continue;
763         found_ok_skb:
764                 if (len > skb->len)
765                         len = skb->len;
766                 else if (len < skb->len)
767                         msg->msg_flags |= MSG_TRUNC;
768
769                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
770                         /* Exception. Bailout! */
771                         len = -EFAULT;
772                         break;
773                 }
774         found_fin_ok:
775                 if (!(flags & MSG_PEEK))
776                         sk_eat_skb(sk, skb, 0);
777                 break;
778         } while (1);
779 out:
780         release_sock(sk);
781         return len;
782 }
783
784 EXPORT_SYMBOL_GPL(dccp_recvmsg);
785
786 int inet_dccp_listen(struct socket *sock, int backlog)
787 {
788         struct sock *sk = sock->sk;
789         unsigned char old_state;
790         int err;
791
792         lock_sock(sk);
793
794         err = -EINVAL;
795         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
796                 goto out;
797
798         old_state = sk->sk_state;
799         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
800                 goto out;
801
802         /* Really, if the socket is already in listen state
803          * we can only allow the backlog to be adjusted.
804          */
805         if (old_state != DCCP_LISTEN) {
806                 /*
807                  * FIXME: here it probably should be sk->sk_prot->listen_start
808                  * see tcp_listen_start
809                  */
810                 err = dccp_listen_start(sk, backlog);
811                 if (err)
812                         goto out;
813         }
814         sk->sk_max_ack_backlog = backlog;
815         err = 0;
816
817 out:
818         release_sock(sk);
819         return err;
820 }
821
822 EXPORT_SYMBOL_GPL(inet_dccp_listen);
823
824 static const unsigned char dccp_new_state[] = {
825         /* current state:   new state:      action:     */
826         [0]               = DCCP_CLOSED,
827         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
828         [DCCP_REQUESTING] = DCCP_CLOSED,
829         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
830         [DCCP_LISTEN]     = DCCP_CLOSED,
831         [DCCP_RESPOND]    = DCCP_CLOSED,
832         [DCCP_CLOSING]    = DCCP_CLOSED,
833         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
834         [DCCP_CLOSED]     = DCCP_CLOSED,
835 };
836
837 static int dccp_close_state(struct sock *sk)
838 {
839         const int next = dccp_new_state[sk->sk_state];
840         const int ns = next & DCCP_STATE_MASK;
841
842         if (ns != sk->sk_state)
843                 dccp_set_state(sk, ns);
844
845         return next & DCCP_ACTION_FIN;
846 }
847
848 void dccp_close(struct sock *sk, long timeout)
849 {
850         struct dccp_sock *dp = dccp_sk(sk);
851         struct sk_buff *skb;
852         int state;
853
854         lock_sock(sk);
855
856         sk->sk_shutdown = SHUTDOWN_MASK;
857
858         if (sk->sk_state == DCCP_LISTEN) {
859                 dccp_set_state(sk, DCCP_CLOSED);
860
861                 /* Special case. */
862                 inet_csk_listen_stop(sk);
863
864                 goto adjudge_to_death;
865         }
866
867         sk_stop_timer(sk, &dp->dccps_xmit_timer);
868
869         /*
870          * We need to flush the recv. buffs.  We do this only on the
871          * descriptor close, not protocol-sourced closes, because the
872           *reader process may not have drained the data yet!
873          */
874         /* FIXME: check for unread data */
875         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
876                 __kfree_skb(skb);
877         }
878
879         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
880                 /* Check zero linger _after_ checking for unread data. */
881                 sk->sk_prot->disconnect(sk, 0);
882         } else if (dccp_close_state(sk)) {
883                 dccp_send_close(sk, 1);
884         }
885
886         sk_stream_wait_close(sk, timeout);
887
888 adjudge_to_death:
889         state = sk->sk_state;
890         sock_hold(sk);
891         sock_orphan(sk);
892         atomic_inc(sk->sk_prot->orphan_count);
893
894         /*
895          * It is the last release_sock in its life. It will remove backlog.
896          */
897         release_sock(sk);
898         /*
899          * Now socket is owned by kernel and we acquire BH lock
900          * to finish close. No need to check for user refs.
901          */
902         local_bh_disable();
903         bh_lock_sock(sk);
904         BUG_TRAP(!sock_owned_by_user(sk));
905
906         /* Have we already been destroyed by a softirq or backlog? */
907         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
908                 goto out;
909
910         /*
911          * The last release_sock may have processed the CLOSE or RESET
912          * packet moving sock to CLOSED state, if not we have to fire
913          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
914          * in draft-ietf-dccp-spec-11. -acme
915          */
916         if (sk->sk_state == DCCP_CLOSING) {
917                 /* FIXME: should start at 2 * RTT */
918                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
919                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
920                                           inet_csk(sk)->icsk_rto,
921                                           DCCP_RTO_MAX);
922 #if 0
923                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
924                 dccp_set_state(sk, DCCP_CLOSED);
925 #endif
926         }
927
928         if (sk->sk_state == DCCP_CLOSED)
929                 inet_csk_destroy_sock(sk);
930
931         /* Otherwise, socket is reprieved until protocol close. */
932
933 out:
934         bh_unlock_sock(sk);
935         local_bh_enable();
936         sock_put(sk);
937 }
938
939 EXPORT_SYMBOL_GPL(dccp_close);
940
941 void dccp_shutdown(struct sock *sk, int how)
942 {
943         dccp_pr_debug("entry\n");
944 }
945
946 EXPORT_SYMBOL_GPL(dccp_shutdown);
947
948 static int __init dccp_mib_init(void)
949 {
950         int rc = -ENOMEM;
951
952         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
953         if (dccp_statistics[0] == NULL)
954                 goto out;
955
956         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
957         if (dccp_statistics[1] == NULL)
958                 goto out_free_one;
959
960         rc = 0;
961 out:
962         return rc;
963 out_free_one:
964         free_percpu(dccp_statistics[0]);
965         dccp_statistics[0] = NULL;
966         goto out;
967
968 }
969
970 static void dccp_mib_exit(void)
971 {
972         free_percpu(dccp_statistics[0]);
973         free_percpu(dccp_statistics[1]);
974         dccp_statistics[0] = dccp_statistics[1] = NULL;
975 }
976
977 static int thash_entries;
978 module_param(thash_entries, int, 0444);
979 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
980
981 #ifdef CONFIG_IP_DCCP_DEBUG
982 int dccp_debug;
983 module_param(dccp_debug, int, 0444);
984 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
985
986 EXPORT_SYMBOL_GPL(dccp_debug);
987 #endif
988
989 static int __init dccp_init(void)
990 {
991         unsigned long goal;
992         int ehash_order, bhash_order, i;
993         int rc = -ENOBUFS;
994
995         dccp_hashinfo.bind_bucket_cachep =
996                 kmem_cache_create("dccp_bind_bucket",
997                                   sizeof(struct inet_bind_bucket), 0,
998                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
999         if (!dccp_hashinfo.bind_bucket_cachep)
1000                 goto out;
1001
1002         /*
1003          * Size and allocate the main established and bind bucket
1004          * hash tables.
1005          *
1006          * The methodology is similar to that of the buffer cache.
1007          */
1008         if (num_physpages >= (128 * 1024))
1009                 goal = num_physpages >> (21 - PAGE_SHIFT);
1010         else
1011                 goal = num_physpages >> (23 - PAGE_SHIFT);
1012
1013         if (thash_entries)
1014                 goal = (thash_entries *
1015                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1016         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1017                 ;
1018         do {
1019                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1020                                         sizeof(struct inet_ehash_bucket);
1021                 dccp_hashinfo.ehash_size >>= 1;
1022                 while (dccp_hashinfo.ehash_size &
1023                        (dccp_hashinfo.ehash_size - 1))
1024                         dccp_hashinfo.ehash_size--;
1025                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1026                         __get_free_pages(GFP_ATOMIC, ehash_order);
1027         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1028
1029         if (!dccp_hashinfo.ehash) {
1030                 printk(KERN_CRIT "Failed to allocate DCCP "
1031                                  "established hash table\n");
1032                 goto out_free_bind_bucket_cachep;
1033         }
1034
1035         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1036                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1037                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1038         }
1039
1040         bhash_order = ehash_order;
1041
1042         do {
1043                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1044                                         sizeof(struct inet_bind_hashbucket);
1045                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1046                     bhash_order > 0)
1047                         continue;
1048                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1049                         __get_free_pages(GFP_ATOMIC, bhash_order);
1050         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1051
1052         if (!dccp_hashinfo.bhash) {
1053                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1054                 goto out_free_dccp_ehash;
1055         }
1056
1057         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1058                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1059                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1060         }
1061
1062         rc = dccp_mib_init();
1063         if (rc)
1064                 goto out_free_dccp_bhash;
1065
1066         rc = dccp_ackvec_init();
1067         if (rc)
1068                 goto out_free_dccp_mib;
1069
1070         rc = dccp_sysctl_init();
1071         if (rc)
1072                 goto out_ackvec_exit;
1073 out:
1074         return rc;
1075 out_ackvec_exit:
1076         dccp_ackvec_exit();
1077 out_free_dccp_mib:
1078         dccp_mib_exit();
1079 out_free_dccp_bhash:
1080         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1081         dccp_hashinfo.bhash = NULL;
1082 out_free_dccp_ehash:
1083         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1084         dccp_hashinfo.ehash = NULL;
1085 out_free_bind_bucket_cachep:
1086         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1087         dccp_hashinfo.bind_bucket_cachep = NULL;
1088         goto out;
1089 }
1090
1091 static void __exit dccp_fini(void)
1092 {
1093         dccp_mib_exit();
1094         free_pages((unsigned long)dccp_hashinfo.bhash,
1095                    get_order(dccp_hashinfo.bhash_size *
1096                              sizeof(struct inet_bind_hashbucket)));
1097         free_pages((unsigned long)dccp_hashinfo.ehash,
1098                    get_order(dccp_hashinfo.ehash_size *
1099                              sizeof(struct inet_ehash_bucket)));
1100         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1101         dccp_ackvec_exit();
1102         dccp_sysctl_exit();
1103 }
1104
1105 module_init(dccp_init);
1106 module_exit(dccp_fini);
1107
1108 MODULE_LICENSE("GPL");
1109 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1110 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");