net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan.cox@linux.org>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate rwlock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned hash = (__force unsigned)n;
 153         hash ^= hash>>16;
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
 168 }
 169
 170 static struct sock *unix_peer_get(struct sock *s)
 171 {
 172         struct sock *peer;
 173
 174         unix_state_lock(s);
 175         peer = unix_peer(s);
 176         if (peer)
 177                 sock_hold(peer);
 178         unix_state_unlock(s);
 179         return peer;
 180 }
 181
 182 static inline void unix_release_addr(struct unix_address *addr)
 183 {
 184         if (atomic_dec_and_test(&addr->refcnt))
 185                 kfree(addr);
 186 }
 187
 188 /*
 189  *      Check unix socket name:
 190  *              - should be not zero length.
 191  *              - if started by not zero, should be NULL terminated (FS object)
 192  *              - if started by zero, it is abstract name.
 193  */
 194
 195 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 196 {
 197         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 198                 return -EINVAL;
 199         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 200                 return -EINVAL;
 201         if (sunaddr->sun_path[0]) {
 202                 /*
 203                  * This may look like an off by one error but it is a bit more
 204                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 205                  * sun_path[108] doesnt as such exist.  However in kernel space
 206                  * we are guaranteed that it is a valid memory location in our
 207                  * kernel address buffer.
 208                  */
 209                 ((char *)sunaddr)[len]=0;
 210                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 211                 return len;
 212         }
 213
 214         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
 215         return len;
 216 }
 217
 218 static void __unix_remove_socket(struct sock *sk)
 219 {
 220         sk_del_node_init(sk);
 221 }
 222
 223 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 224 {
 225         BUG_TRAP(sk_unhashed(sk));
 226         sk_add_node(sk, list);
 227 }
 228
 229 static inline void unix_remove_socket(struct sock *sk)
 230 {
 231         spin_lock(&unix_table_lock);
 232         __unix_remove_socket(sk);
 233         spin_unlock(&unix_table_lock);
 234 }
 235
 236 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 237 {
 238         spin_lock(&unix_table_lock);
 239         __unix_insert_socket(list, sk);
 240         spin_unlock(&unix_table_lock);
 241 }
 242
 243 static struct sock *__unix_find_socket_byname(struct net *net,
 244                                               struct sockaddr_un *sunname,
 245                                               int len, int type, unsigned hash)
 246 {
 247         struct sock *s;
 248         struct hlist_node *node;
 249
 250         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 251                 struct unix_sock *u = unix_sk(s);
 252
 253                 if (!net_eq(sock_net(s), net))
 254                         continue;
 255
 256                 if (u->addr->len == len &&
 257                     !memcmp(u->addr->name, sunname, len))
 258                         goto found;
 259         }
 260         s = NULL;
 261 found:
 262         return s;
 263 }
 264
 265 static inline struct sock *unix_find_socket_byname(struct net *net,
 266                                                    struct sockaddr_un *sunname,
 267                                                    int len, int type,
 268                                                    unsigned hash)
 269 {
 270         struct sock *s;
 271
 272         spin_lock(&unix_table_lock);
 273         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 274         if (s)
 275                 sock_hold(s);
 276         spin_unlock(&unix_table_lock);
 277         return s;
 278 }
 279
 280 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 281 {
 282         struct sock *s;
 283         struct hlist_node *node;
 284
 285         spin_lock(&unix_table_lock);
 286         sk_for_each(s, node,
 287                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 288                 struct dentry *dentry = unix_sk(s)->dentry;
 289
 290                 if (!net_eq(sock_net(s), net))
 291                         continue;
 292
 293                 if(dentry && dentry->d_inode == i)
 294                 {
 295                         sock_hold(s);
 296                         goto found;
 297                 }
 298         }
 299         s = NULL;
 300 found:
 301         spin_unlock(&unix_table_lock);
 302         return s;
 303 }
 304
 305 static inline int unix_writable(struct sock *sk)
 306 {
 307         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 308 }
 309
 310 static void unix_write_space(struct sock *sk)
 311 {
 312         read_lock(&sk->sk_callback_lock);
 313         if (unix_writable(sk)) {
 314                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 315                         wake_up_interruptible_sync(sk->sk_sleep);
 316                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 317         }
 318         read_unlock(&sk->sk_callback_lock);
 319 }
 320
 321 /* When dgram socket disconnects (or changes its peer), we clear its receive
 322  * queue of packets arrived from previous peer. First, it allows to do
 323  * flow control based only on wmem_alloc; second, sk connected to peer
 324  * may receive messages only from that peer. */
 325 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 326 {
 327         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 328                 skb_queue_purge(&sk->sk_receive_queue);
 329                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 330
 331                 /* If one link of bidirectional dgram pipe is disconnected,
 332                  * we signal error. Messages are lost. Do not make this,
 333                  * when peer was not connected to us.
 334                  */
 335                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 336                         other->sk_err = ECONNRESET;
 337                         other->sk_error_report(other);
 338                 }
 339         }
 340 }
 341
 342 static void unix_sock_destructor(struct sock *sk)
 343 {
 344         struct unix_sock *u = unix_sk(sk);
 345
 346         skb_queue_purge(&sk->sk_receive_queue);
 347
 348         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
 349         BUG_TRAP(sk_unhashed(sk));
 350         BUG_TRAP(!sk->sk_socket);
 351         if (!sock_flag(sk, SOCK_DEAD)) {
 352                 printk("Attempt to release alive unix socket: %p\n", sk);
 353                 return;
 354         }
 355
 356         if (u->addr)
 357                 unix_release_addr(u->addr);
 358
 359         atomic_dec(&unix_nr_socks);
 360 #ifdef UNIX_REFCNT_DEBUG
 361         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
 362 #endif
 363 }
 364
 365 static int unix_release_sock (struct sock *sk, int embrion)
 366 {
 367         struct unix_sock *u = unix_sk(sk);
 368         struct dentry *dentry;
 369         struct vfsmount *mnt;
 370         struct sock *skpair;
 371         struct sk_buff *skb;
 372         int state;
 373
 374         unix_remove_socket(sk);
 375
 376         /* Clear state */
 377         unix_state_lock(sk);
 378         sock_orphan(sk);
 379         sk->sk_shutdown = SHUTDOWN_MASK;
 380         dentry       = u->dentry;
 381         u->dentry    = NULL;
 382         mnt          = u->mnt;
 383         u->mnt       = NULL;
 384         state = sk->sk_state;
 385         sk->sk_state = TCP_CLOSE;
 386         unix_state_unlock(sk);
 387
 388         wake_up_interruptible_all(&u->peer_wait);
 389
 390         skpair=unix_peer(sk);
 391
 392         if (skpair!=NULL) {
 393                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 394                         unix_state_lock(skpair);
 395                         /* No more writes */
 396                         skpair->sk_shutdown = SHUTDOWN_MASK;
 397                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 398                                 skpair->sk_err = ECONNRESET;
 399                         unix_state_unlock(skpair);
 400                         skpair->sk_state_change(skpair);
 401                         read_lock(&skpair->sk_callback_lock);
 402                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 403                         read_unlock(&skpair->sk_callback_lock);
 404                 }
 405                 sock_put(skpair); /* It may now die */
 406                 unix_peer(sk) = NULL;
 407         }
 408
 409         /* Try to flush out this socket. Throw out buffers at least */
 410
 411         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 412                 if (state==TCP_LISTEN)
 413                         unix_release_sock(skb->sk, 1);
 414                 /* passed fds are erased in the kfree_skb hook        */
 415                 kfree_skb(skb);
 416         }
 417
 418         if (dentry) {
 419                 dput(dentry);
 420                 mntput(mnt);
 421         }
 422
 423         sock_put(sk);
 424
 425         /* ---- Socket is dead now and most probably destroyed ---- */
 426
 427         /*
 428          * Fixme: BSD difference: In BSD all sockets connected to use get
 429          *        ECONNRESET and we die on the spot. In Linux we behave
 430          *        like files and pipes do and wait for the last
 431          *        dereference.
 432          *
 433          * Can't we simply set sock->err?
 434          *
 435          *        What the above comment does talk about? --ANK(980817)
 436          */
 437
 438         if (unix_tot_inflight)
 439                 unix_gc();              /* Garbage collect fds */
 440
 441         return 0;
 442 }
 443
 444 static int unix_listen(struct socket *sock, int backlog)
 445 {
 446         int err;
 447         struct sock *sk = sock->sk;
 448         struct unix_sock *u = unix_sk(sk);
 449
 450         err = -EOPNOTSUPP;
 451         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
 452                 goto out;                       /* Only stream/seqpacket sockets accept */
 453         err = -EINVAL;
 454         if (!u->addr)
 455                 goto out;                       /* No listens on an unbound socket */
 456         unix_state_lock(sk);
 457         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 458                 goto out_unlock;
 459         if (backlog > sk->sk_max_ack_backlog)
 460                 wake_up_interruptible_all(&u->peer_wait);
 461         sk->sk_max_ack_backlog  = backlog;
 462         sk->sk_state            = TCP_LISTEN;
 463         /* set credentials so connect can copy them */
 464         sk->sk_peercred.pid     = task_tgid_vnr(current);
 465         sk->sk_peercred.uid     = current->euid;
 466         sk->sk_peercred.gid     = current->egid;
 467         err = 0;
 468
 469 out_unlock:
 470         unix_state_unlock(sk);
 471 out:
 472         return err;
 473 }
 474
 475 static int unix_release(struct socket *);
 476 static int unix_bind(struct socket *, struct sockaddr *, int);
 477 static int unix_stream_connect(struct socket *, struct sockaddr *,
 478                                int addr_len, int flags);
 479 static int unix_socketpair(struct socket *, struct socket *);
 480 static int unix_accept(struct socket *, struct socket *, int);
 481 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 482 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 483 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 484 static int unix_shutdown(struct socket *, int);
 485 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 486                                struct msghdr *, size_t);
 487 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 488                                struct msghdr *, size_t, int);
 489 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 490                               struct msghdr *, size_t);
 491 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 492                               struct msghdr *, size_t, int);
 493 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 494                               int, int);
 495 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 496                                   struct msghdr *, size_t);
 497
 498 static const struct proto_ops unix_stream_ops = {
 499         .family =       PF_UNIX,
 500         .owner =        THIS_MODULE,
 501         .release =      unix_release,
 502         .bind =         unix_bind,
 503         .connect =      unix_stream_connect,
 504         .socketpair =   unix_socketpair,
 505         .accept =       unix_accept,
 506         .getname =      unix_getname,
 507         .poll =         unix_poll,
 508         .ioctl =        unix_ioctl,
 509         .listen =       unix_listen,
 510         .shutdown =     unix_shutdown,
 511         .setsockopt =   sock_no_setsockopt,
 512         .getsockopt =   sock_no_getsockopt,
 513         .sendmsg =      unix_stream_sendmsg,
 514         .recvmsg =      unix_stream_recvmsg,
 515         .mmap =         sock_no_mmap,
 516         .sendpage =     sock_no_sendpage,
 517 };
 518
 519 static const struct proto_ops unix_dgram_ops = {
 520         .family =       PF_UNIX,
 521         .owner =        THIS_MODULE,
 522         .release =      unix_release,
 523         .bind =         unix_bind,
 524         .connect =      unix_dgram_connect,
 525         .socketpair =   unix_socketpair,
 526         .accept =       sock_no_accept,
 527         .getname =      unix_getname,
 528         .poll =         datagram_poll,
 529         .ioctl =        unix_ioctl,
 530         .listen =       sock_no_listen,
 531         .shutdown =     unix_shutdown,
 532         .setsockopt =   sock_no_setsockopt,
 533         .getsockopt =   sock_no_getsockopt,
 534         .sendmsg =      unix_dgram_sendmsg,
 535         .recvmsg =      unix_dgram_recvmsg,
 536         .mmap =         sock_no_mmap,
 537         .sendpage =     sock_no_sendpage,
 538 };
 539
 540 static const struct proto_ops unix_seqpacket_ops = {
 541         .family =       PF_UNIX,
 542         .owner =        THIS_MODULE,
 543         .release =      unix_release,
 544         .bind =         unix_bind,
 545         .connect =      unix_stream_connect,
 546         .socketpair =   unix_socketpair,
 547         .accept =       unix_accept,
 548         .getname =      unix_getname,
 549         .poll =         datagram_poll,
 550         .ioctl =        unix_ioctl,
 551         .listen =       unix_listen,
 552         .shutdown =     unix_shutdown,
 553         .setsockopt =   sock_no_setsockopt,
 554         .getsockopt =   sock_no_getsockopt,
 555         .sendmsg =      unix_seqpacket_sendmsg,
 556         .recvmsg =      unix_dgram_recvmsg,
 557         .mmap =         sock_no_mmap,
 558         .sendpage =     sock_no_sendpage,
 559 };
 560
 561 static struct proto unix_proto = {
 562         .name     = "UNIX",
 563         .owner    = THIS_MODULE,
 564         .obj_size = sizeof(struct unix_sock),
 565 };
 566
 567 /*
 568  * AF_UNIX sockets do not interact with hardware, hence they
 569  * dont trigger interrupts - so it's safe for them to have
 570  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 571  * this special lock-class by reinitializing the spinlock key:
 572  */
 573 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 574
 575 static struct sock * unix_create1(struct net *net, struct socket *sock)
 576 {
 577         struct sock *sk = NULL;
 578         struct unix_sock *u;
 579
 580         atomic_inc(&unix_nr_socks);
 581         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 582                 goto out;
 583
 584         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 585         if (!sk)
 586                 goto out;
 587
 588         sock_init_data(sock,sk);
 589         lockdep_set_class(&sk->sk_receive_queue.lock,
 590                                 &af_unix_sk_receive_queue_lock_key);
 591
 592         sk->sk_write_space      = unix_write_space;
 593         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 594         sk->sk_destruct         = unix_sock_destructor;
 595         u         = unix_sk(sk);
 596         u->dentry = NULL;
 597         u->mnt    = NULL;
 598         spin_lock_init(&u->lock);
 599         atomic_set(&u->inflight, 0);
 600         INIT_LIST_HEAD(&u->link);
 601         mutex_init(&u->readlock); /* single task reading lock */
 602         init_waitqueue_head(&u->peer_wait);
 603         unix_insert_socket(unix_sockets_unbound, sk);
 604 out:
 605         if (sk == NULL)
 606                 atomic_dec(&unix_nr_socks);
 607         return sk;
 608 }
 609
 610 static int unix_create(struct net *net, struct socket *sock, int protocol)
 611 {
 612         if (protocol && protocol != PF_UNIX)
 613                 return -EPROTONOSUPPORT;
 614
 615         sock->state = SS_UNCONNECTED;
 616
 617         switch (sock->type) {
 618         case SOCK_STREAM:
 619                 sock->ops = &unix_stream_ops;
 620                 break;
 621                 /*
 622                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 623                  *      nothing uses it.
 624                  */
 625         case SOCK_RAW:
 626                 sock->type=SOCK_DGRAM;
 627         case SOCK_DGRAM:
 628                 sock->ops = &unix_dgram_ops;
 629                 break;
 630         case SOCK_SEQPACKET:
 631                 sock->ops = &unix_seqpacket_ops;
 632                 break;
 633         default:
 634                 return -ESOCKTNOSUPPORT;
 635         }
 636
 637         return unix_create1(net, sock) ? 0 : -ENOMEM;
 638 }
 639
 640 static int unix_release(struct socket *sock)
 641 {
 642         struct sock *sk = sock->sk;
 643
 644         if (!sk)
 645                 return 0;
 646
 647         sock->sk = NULL;
 648
 649         return unix_release_sock (sk, 0);
 650 }
 651
 652 static int unix_autobind(struct socket *sock)
 653 {
 654         struct sock *sk = sock->sk;
 655         struct net *net = sock_net(sk);
 656         struct unix_sock *u = unix_sk(sk);
 657         static u32 ordernum = 1;
 658         struct unix_address * addr;
 659         int err;
 660
 661         mutex_lock(&u->readlock);
 662
 663         err = 0;
 664         if (u->addr)
 665                 goto out;
 666
 667         err = -ENOMEM;
 668         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 669         if (!addr)
 670                 goto out;
 671
 672         addr->name->sun_family = AF_UNIX;
 673         atomic_set(&addr->refcnt, 1);
 674
 675 retry:
 676         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 677         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 678
 679         spin_lock(&unix_table_lock);
 680         ordernum = (ordernum+1)&0xFFFFF;
 681
 682         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 683                                       addr->hash)) {
 684                 spin_unlock(&unix_table_lock);
 685                 /* Sanity yield. It is unusual case, but yet... */
 686                 if (!(ordernum&0xFF))
 687                         yield();
 688                 goto retry;
 689         }
 690         addr->hash ^= sk->sk_type;
 691
 692         __unix_remove_socket(sk);
 693         u->addr = addr;
 694         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 695         spin_unlock(&unix_table_lock);
 696         err = 0;
 697
 698 out:    mutex_unlock(&u->readlock);
 699         return err;
 700 }
 701
 702 static struct sock *unix_find_other(struct net *net,
 703                                     struct sockaddr_un *sunname, int len,
 704                                     int type, unsigned hash, int *error)
 705 {
 706         struct sock *u;
 707         struct nameidata nd;
 708         int err = 0;
 709
 710         if (sunname->sun_path[0]) {
 711                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
 712                 if (err)
 713                         goto fail;
 714                 err = vfs_permission(&nd, MAY_WRITE);
 715                 if (err)
 716                         goto put_fail;
 717
 718                 err = -ECONNREFUSED;
 719                 if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
 720                         goto put_fail;
 721                 u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
 722                 if (!u)
 723                         goto put_fail;
 724
 725                 if (u->sk_type == type)
 726                         touch_atime(nd.path.mnt, nd.path.dentry);
 727
 728                 path_put(&nd.path);
 729
 730                 err=-EPROTOTYPE;
 731                 if (u->sk_type != type) {
 732                         sock_put(u);
 733                         goto fail;
 734                 }
 735         } else {
 736                 err = -ECONNREFUSED;
 737                 u=unix_find_socket_byname(net, sunname, len, type, hash);
 738                 if (u) {
 739                         struct dentry *dentry;
 740                         dentry = unix_sk(u)->dentry;
 741                         if (dentry)
 742                                 touch_atime(unix_sk(u)->mnt, dentry);
 743                 } else
 744                         goto fail;
 745         }
 746         return u;
 747
 748 put_fail:
 749         path_put(&nd.path);
 750 fail:
 751         *error=err;
 752         return NULL;
 753 }
 754
 755
 756 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 757 {
 758         struct sock *sk = sock->sk;
 759         struct net *net = sock_net(sk);
 760         struct unix_sock *u = unix_sk(sk);
 761         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 762         struct dentry * dentry = NULL;
 763         struct nameidata nd;
 764         int err;
 765         unsigned hash;
 766         struct unix_address *addr;
 767         struct hlist_head *list;
 768
 769         err = -EINVAL;
 770         if (sunaddr->sun_family != AF_UNIX)
 771                 goto out;
 772
 773         if (addr_len==sizeof(short)) {
 774                 err = unix_autobind(sock);
 775                 goto out;
 776         }
 777
 778         err = unix_mkname(sunaddr, addr_len, &hash);
 779         if (err < 0)
 780                 goto out;
 781         addr_len = err;
 782
 783         mutex_lock(&u->readlock);
 784
 785         err = -EINVAL;
 786         if (u->addr)
 787                 goto out_up;
 788
 789         err = -ENOMEM;
 790         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 791         if (!addr)
 792                 goto out_up;
 793
 794         memcpy(addr->name, sunaddr, addr_len);
 795         addr->len = addr_len;
 796         addr->hash = hash ^ sk->sk_type;
 797         atomic_set(&addr->refcnt, 1);
 798
 799         if (sunaddr->sun_path[0]) {
 800                 unsigned int mode;
 801                 err = 0;
 802                 /*
 803                  * Get the parent directory, calculate the hash for last
 804                  * component.
 805                  */
 806                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 807                 if (err)
 808                         goto out_mknod_parent;
 809
 810                 dentry = lookup_create(&nd, 0);
 811                 err = PTR_ERR(dentry);
 812                 if (IS_ERR(dentry))
 813                         goto out_mknod_unlock;
 814
 815                 /*
 816                  * All right, let's create it.
 817                  */
 818                 mode = S_IFSOCK |
 819                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
 820                 err = mnt_want_write(nd.path.mnt);
 821                 if (err)
 822                         goto out_mknod_dput;
 823                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 824                 mnt_drop_write(nd.path.mnt);
 825                 if (err)
 826                         goto out_mknod_dput;
 827                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 828                 dput(nd.path.dentry);
 829                 nd.path.dentry = dentry;
 830
 831                 addr->hash = UNIX_HASH_SIZE;
 832         }
 833
 834         spin_lock(&unix_table_lock);
 835
 836         if (!sunaddr->sun_path[0]) {
 837                 err = -EADDRINUSE;
 838                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 839                                               sk->sk_type, hash)) {
 840                         unix_release_addr(addr);
 841                         goto out_unlock;
 842                 }
 843
 844                 list = &unix_socket_table[addr->hash];
 845         } else {
 846                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 847                 u->dentry = nd.path.dentry;
 848                 u->mnt    = nd.path.mnt;
 849         }
 850
 851         err = 0;
 852         __unix_remove_socket(sk);
 853         u->addr = addr;
 854         __unix_insert_socket(list, sk);
 855
 856 out_unlock:
 857         spin_unlock(&unix_table_lock);
 858 out_up:
 859         mutex_unlock(&u->readlock);
 860 out:
 861         return err;
 862
 863 out_mknod_dput:
 864         dput(dentry);
 865 out_mknod_unlock:
 866         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 867         path_put(&nd.path);
 868 out_mknod_parent:
 869         if (err==-EEXIST)
 870                 err=-EADDRINUSE;
 871         unix_release_addr(addr);
 872         goto out_up;
 873 }
 874
 875 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 876 {
 877         if (unlikely(sk1 == sk2) || !sk2) {
 878                 unix_state_lock(sk1);
 879                 return;
 880         }
 881         if (sk1 < sk2) {
 882                 unix_state_lock(sk1);
 883                 unix_state_lock_nested(sk2);
 884         } else {
 885                 unix_state_lock(sk2);
 886                 unix_state_lock_nested(sk1);
 887         }
 888 }
 889
 890 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 891 {
 892         if (unlikely(sk1 == sk2) || !sk2) {
 893                 unix_state_unlock(sk1);
 894                 return;
 895         }
 896         unix_state_unlock(sk1);
 897         unix_state_unlock(sk2);
 898 }
 899
 900 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 901                               int alen, int flags)
 902 {
 903         struct sock *sk = sock->sk;
 904         struct net *net = sock_net(sk);
 905         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
 906         struct sock *other;
 907         unsigned hash;
 908         int err;
 909
 910         if (addr->sa_family != AF_UNSPEC) {
 911                 err = unix_mkname(sunaddr, alen, &hash);
 912                 if (err < 0)
 913                         goto out;
 914                 alen = err;
 915
 916                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 917                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 918                         goto out;
 919
 920 restart:
 921                 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 922                 if (!other)
 923                         goto out;
 924
 925                 unix_state_double_lock(sk, other);
 926
 927                 /* Apparently VFS overslept socket death. Retry. */
 928                 if (sock_flag(other, SOCK_DEAD)) {
 929                         unix_state_double_unlock(sk, other);
 930                         sock_put(other);
 931                         goto restart;
 932                 }
 933
 934                 err = -EPERM;
 935                 if (!unix_may_send(sk, other))
 936                         goto out_unlock;
 937
 938                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 939                 if (err)
 940                         goto out_unlock;
 941
 942         } else {
 943                 /*
 944                  *      1003.1g breaking connected state with AF_UNSPEC
 945                  */
 946                 other = NULL;
 947                 unix_state_double_lock(sk, other);
 948         }
 949
 950         /*
 951          * If it was connected, reconnect.
 952          */
 953         if (unix_peer(sk)) {
 954                 struct sock *old_peer = unix_peer(sk);
 955                 unix_peer(sk)=other;
 956                 unix_state_double_unlock(sk, other);
 957
 958                 if (other != old_peer)
 959                         unix_dgram_disconnected(sk, old_peer);
 960                 sock_put(old_peer);
 961         } else {
 962                 unix_peer(sk)=other;
 963                 unix_state_double_unlock(sk, other);
 964         }
 965         return 0;
 966
 967 out_unlock:
 968         unix_state_double_unlock(sk, other);
 969         sock_put(other);
 970 out:
 971         return err;
 972 }
 973
 974 static long unix_wait_for_peer(struct sock *other, long timeo)
 975 {
 976         struct unix_sock *u = unix_sk(other);
 977         int sched;
 978         DEFINE_WAIT(wait);
 979
 980         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
 981
 982         sched = !sock_flag(other, SOCK_DEAD) &&
 983                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
 984                 (skb_queue_len(&other->sk_receive_queue) >
 985                  other->sk_max_ack_backlog);
 986
 987         unix_state_unlock(other);
 988
 989         if (sched)
 990                 timeo = schedule_timeout(timeo);
 991
 992         finish_wait(&u->peer_wait, &wait);
 993         return timeo;
 994 }
 995
 996 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 997                                int addr_len, int flags)
 998 {
 999         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1000         struct sock *sk = sock->sk;
1001         struct net *net = sock_net(sk);
1002         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1003         struct sock *newsk = NULL;
1004         struct sock *other = NULL;
1005         struct sk_buff *skb = NULL;
1006         unsigned hash;
1007         int st;
1008         int err;
1009         long timeo;
1010
1011         err = unix_mkname(sunaddr, addr_len, &hash);
1012         if (err < 0)
1013                 goto out;
1014         addr_len = err;
1015
1016         if (test_bit(SOCK_PASSCRED, &sock->flags)
1017                 && !u->addr && (err = unix_autobind(sock)) != 0)
1018                 goto out;
1019
1020         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1021
1022         /* First of all allocate resources.
1023            If we will make it after state is locked,
1024            we will have to recheck all again in any case.
1025          */
1026
1027         err = -ENOMEM;
1028
1029         /* create new sock for complete connection */
1030         newsk = unix_create1(sock_net(sk), NULL);
1031         if (newsk == NULL)
1032                 goto out;
1033
1034         /* Allocate skb for sending to listening sock */
1035         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1036         if (skb == NULL)
1037                 goto out;
1038
1039 restart:
1040         /*  Find listening sock. */
1041         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1042         if (!other)
1043                 goto out;
1044
1045         /* Latch state of peer */
1046         unix_state_lock(other);
1047
1048         /* Apparently VFS overslept socket death. Retry. */
1049         if (sock_flag(other, SOCK_DEAD)) {
1050                 unix_state_unlock(other);
1051                 sock_put(other);
1052                 goto restart;
1053         }
1054
1055         err = -ECONNREFUSED;
1056         if (other->sk_state != TCP_LISTEN)
1057                 goto out_unlock;
1058
1059         if (skb_queue_len(&other->sk_receive_queue) >
1060             other->sk_max_ack_backlog) {
1061                 err = -EAGAIN;
1062                 if (!timeo)
1063                         goto out_unlock;
1064
1065                 timeo = unix_wait_for_peer(other, timeo);
1066
1067                 err = sock_intr_errno(timeo);
1068                 if (signal_pending(current))
1069                         goto out;
1070                 sock_put(other);
1071                 goto restart;
1072         }
1073
1074         /* Latch our state.
1075
1076            It is tricky place. We need to grab write lock and cannot
1077            drop lock on peer. It is dangerous because deadlock is
1078            possible. Connect to self case and simultaneous
1079            attempt to connect are eliminated by checking socket
1080            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1081            check this before attempt to grab lock.
1082
1083            Well, and we have to recheck the state after socket locked.
1084          */
1085         st = sk->sk_state;
1086
1087         switch (st) {
1088         case TCP_CLOSE:
1089                 /* This is ok... continue with connect */
1090                 break;
1091         case TCP_ESTABLISHED:
1092                 /* Socket is already connected */
1093                 err = -EISCONN;
1094                 goto out_unlock;
1095         default:
1096                 err = -EINVAL;
1097                 goto out_unlock;
1098         }
1099
1100         unix_state_lock_nested(sk);
1101
1102         if (sk->sk_state != st) {
1103                 unix_state_unlock(sk);
1104                 unix_state_unlock(other);
1105                 sock_put(other);
1106                 goto restart;
1107         }
1108
1109         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1110         if (err) {
1111                 unix_state_unlock(sk);
1112                 goto out_unlock;
1113         }
1114
1115         /* The way is open! Fastly set all the necessary fields... */
1116
1117         sock_hold(sk);
1118         unix_peer(newsk)        = sk;
1119         newsk->sk_state         = TCP_ESTABLISHED;
1120         newsk->sk_type          = sk->sk_type;
1121         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1122         newsk->sk_peercred.uid  = current->euid;
1123         newsk->sk_peercred.gid  = current->egid;
1124         newu = unix_sk(newsk);
1125         newsk->sk_sleep         = &newu->peer_wait;
1126         otheru = unix_sk(other);
1127
1128         /* copy address information from listening to new sock*/
1129         if (otheru->addr) {
1130                 atomic_inc(&otheru->addr->refcnt);
1131                 newu->addr = otheru->addr;
1132         }
1133         if (otheru->dentry) {
1134                 newu->dentry    = dget(otheru->dentry);
1135                 newu->mnt       = mntget(otheru->mnt);
1136         }
1137
1138         /* Set credentials */
1139         sk->sk_peercred = other->sk_peercred;
1140
1141         sock->state     = SS_CONNECTED;
1142         sk->sk_state    = TCP_ESTABLISHED;
1143         sock_hold(newsk);
1144
1145         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1146         unix_peer(sk)   = newsk;
1147
1148         unix_state_unlock(sk);
1149
1150         /* take ten and and send info to listening sock */
1151         spin_lock(&other->sk_receive_queue.lock);
1152         __skb_queue_tail(&other->sk_receive_queue, skb);
1153         spin_unlock(&other->sk_receive_queue.lock);
1154         unix_state_unlock(other);
1155         other->sk_data_ready(other, 0);
1156         sock_put(other);
1157         return 0;
1158
1159 out_unlock:
1160         if (other)
1161                 unix_state_unlock(other);
1162
1163 out:
1164         if (skb)
1165                 kfree_skb(skb);
1166         if (newsk)
1167                 unix_release_sock(newsk, 0);
1168         if (other)
1169                 sock_put(other);
1170         return err;
1171 }
1172
1173 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1174 {
1175         struct sock *ska=socka->sk, *skb = sockb->sk;
1176
1177         /* Join our sockets back to back */
1178         sock_hold(ska);
1179         sock_hold(skb);
1180         unix_peer(ska)=skb;
1181         unix_peer(skb)=ska;
1182         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1183         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1184         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1185
1186         if (ska->sk_type != SOCK_DGRAM) {
1187                 ska->sk_state = TCP_ESTABLISHED;
1188                 skb->sk_state = TCP_ESTABLISHED;
1189                 socka->state  = SS_CONNECTED;
1190                 sockb->state  = SS_CONNECTED;
1191         }
1192         return 0;
1193 }
1194
1195 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1196 {
1197         struct sock *sk = sock->sk;
1198         struct sock *tsk;
1199         struct sk_buff *skb;
1200         int err;
1201
1202         err = -EOPNOTSUPP;
1203         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1204                 goto out;
1205
1206         err = -EINVAL;
1207         if (sk->sk_state != TCP_LISTEN)
1208                 goto out;
1209
1210         /* If socket state is TCP_LISTEN it cannot change (for now...),
1211          * so that no locks are necessary.
1212          */
1213
1214         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1215         if (!skb) {
1216                 /* This means receive shutdown. */
1217                 if (err == 0)
1218                         err = -EINVAL;
1219                 goto out;
1220         }
1221
1222         tsk = skb->sk;
1223         skb_free_datagram(sk, skb);
1224         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1225
1226         /* attach accepted sock to socket */
1227         unix_state_lock(tsk);
1228         newsock->state = SS_CONNECTED;
1229         sock_graft(tsk, newsock);
1230         unix_state_unlock(tsk);
1231         return 0;
1232
1233 out:
1234         return err;
1235 }
1236
1237
1238 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1239 {
1240         struct sock *sk = sock->sk;
1241         struct unix_sock *u;
1242         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1243         int err = 0;
1244
1245         if (peer) {
1246                 sk = unix_peer_get(sk);
1247
1248                 err = -ENOTCONN;
1249                 if (!sk)
1250                         goto out;
1251                 err = 0;
1252         } else {
1253                 sock_hold(sk);
1254         }
1255
1256         u = unix_sk(sk);
1257         unix_state_lock(sk);
1258         if (!u->addr) {
1259                 sunaddr->sun_family = AF_UNIX;
1260                 sunaddr->sun_path[0] = 0;
1261                 *uaddr_len = sizeof(short);
1262         } else {
1263                 struct unix_address *addr = u->addr;
1264
1265                 *uaddr_len = addr->len;
1266                 memcpy(sunaddr, addr->name, *uaddr_len);
1267         }
1268         unix_state_unlock(sk);
1269         sock_put(sk);
1270 out:
1271         return err;
1272 }
1273
1274 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1275 {
1276         int i;
1277
1278         scm->fp = UNIXCB(skb).fp;
1279         skb->destructor = sock_wfree;
1280         UNIXCB(skb).fp = NULL;
1281
1282         for (i=scm->fp->count-1; i>=0; i--)
1283                 unix_notinflight(scm->fp->fp[i]);
1284 }
1285
1286 static void unix_destruct_fds(struct sk_buff *skb)
1287 {
1288         struct scm_cookie scm;
1289         memset(&scm, 0, sizeof(scm));
1290         unix_detach_fds(&scm, skb);
1291
1292         /* Alas, it calls VFS */
1293         /* So fscking what? fput() had been SMP-safe since the last Summer */
1294         scm_destroy(&scm);
1295         sock_wfree(skb);
1296 }
1297
1298 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1299 {
1300         int i;
1301         for (i=scm->fp->count-1; i>=0; i--)
1302                 unix_inflight(scm->fp->fp[i]);
1303         UNIXCB(skb).fp = scm->fp;
1304         skb->destructor = unix_destruct_fds;
1305         scm->fp = NULL;
1306 }
1307
1308 /*
1309  *      Send AF_UNIX data.
1310  */
1311
1312 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1313                               struct msghdr *msg, size_t len)
1314 {
1315         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1316         struct sock *sk = sock->sk;
1317         struct net *net = sock_net(sk);
1318         struct unix_sock *u = unix_sk(sk);
1319         struct sockaddr_un *sunaddr=msg->msg_name;
1320         struct sock *other = NULL;
1321         int namelen = 0; /* fake GCC */
1322         int err;
1323         unsigned hash;
1324         struct sk_buff *skb;
1325         long timeo;
1326         struct scm_cookie tmp_scm;
1327
1328         if (NULL == siocb->scm)
1329                 siocb->scm = &tmp_scm;
1330         err = scm_send(sock, msg, siocb->scm);
1331         if (err < 0)
1332                 return err;
1333
1334         err = -EOPNOTSUPP;
1335         if (msg->msg_flags&MSG_OOB)
1336                 goto out;
1337
1338         if (msg->msg_namelen) {
1339                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1340                 if (err < 0)
1341                         goto out;
1342                 namelen = err;
1343         } else {
1344                 sunaddr = NULL;
1345                 err = -ENOTCONN;
1346                 other = unix_peer_get(sk);
1347                 if (!other)
1348                         goto out;
1349         }
1350
1351         if (test_bit(SOCK_PASSCRED, &sock->flags)
1352                 && !u->addr && (err = unix_autobind(sock)) != 0)
1353                 goto out;
1354
1355         err = -EMSGSIZE;
1356         if (len > sk->sk_sndbuf - 32)
1357                 goto out;
1358
1359         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1360         if (skb==NULL)
1361                 goto out;
1362
1363         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1364         if (siocb->scm->fp)
1365                 unix_attach_fds(siocb->scm, skb);
1366         unix_get_secdata(siocb->scm, skb);
1367
1368         skb_reset_transport_header(skb);
1369         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1370         if (err)
1371                 goto out_free;
1372
1373         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1374
1375 restart:
1376         if (!other) {
1377                 err = -ECONNRESET;
1378                 if (sunaddr == NULL)
1379                         goto out_free;
1380
1381                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1382                                         hash, &err);
1383                 if (other==NULL)
1384                         goto out_free;
1385         }
1386
1387         unix_state_lock(other);
1388         err = -EPERM;
1389         if (!unix_may_send(sk, other))
1390                 goto out_unlock;
1391
1392         if (sock_flag(other, SOCK_DEAD)) {
1393                 /*
1394                  *      Check with 1003.1g - what should
1395                  *      datagram error
1396                  */
1397                 unix_state_unlock(other);
1398                 sock_put(other);
1399
1400                 err = 0;
1401                 unix_state_lock(sk);
1402                 if (unix_peer(sk) == other) {
1403                         unix_peer(sk)=NULL;
1404                         unix_state_unlock(sk);
1405
1406                         unix_dgram_disconnected(sk, other);
1407                         sock_put(other);
1408                         err = -ECONNREFUSED;
1409                 } else {
1410                         unix_state_unlock(sk);
1411                 }
1412
1413                 other = NULL;
1414                 if (err)
1415                         goto out_free;
1416                 goto restart;
1417         }
1418
1419         err = -EPIPE;
1420         if (other->sk_shutdown & RCV_SHUTDOWN)
1421                 goto out_unlock;
1422
1423         if (sk->sk_type != SOCK_SEQPACKET) {
1424                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1425                 if (err)
1426                         goto out_unlock;
1427         }
1428
1429         if (unix_peer(other) != sk &&
1430             (skb_queue_len(&other->sk_receive_queue) >
1431              other->sk_max_ack_backlog)) {
1432                 if (!timeo) {
1433                         err = -EAGAIN;
1434                         goto out_unlock;
1435                 }
1436
1437                 timeo = unix_wait_for_peer(other, timeo);
1438
1439                 err = sock_intr_errno(timeo);
1440                 if (signal_pending(current))
1441                         goto out_free;
1442
1443                 goto restart;
1444         }
1445
1446         skb_queue_tail(&other->sk_receive_queue, skb);
1447         unix_state_unlock(other);
1448         other->sk_data_ready(other, len);
1449         sock_put(other);
1450         scm_destroy(siocb->scm);
1451         return len;
1452
1453 out_unlock:
1454         unix_state_unlock(other);
1455 out_free:
1456         kfree_skb(skb);
1457 out:
1458         if (other)
1459                 sock_put(other);
1460         scm_destroy(siocb->scm);
1461         return err;
1462 }
1463
1464
1465 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1466                                struct msghdr *msg, size_t len)
1467 {
1468         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1469         struct sock *sk = sock->sk;
1470         struct sock *other = NULL;
1471         struct sockaddr_un *sunaddr=msg->msg_name;
1472         int err,size;
1473         struct sk_buff *skb;
1474         int sent=0;
1475         struct scm_cookie tmp_scm;
1476
1477         if (NULL == siocb->scm)
1478                 siocb->scm = &tmp_scm;
1479         err = scm_send(sock, msg, siocb->scm);
1480         if (err < 0)
1481                 return err;
1482
1483         err = -EOPNOTSUPP;
1484         if (msg->msg_flags&MSG_OOB)
1485                 goto out_err;
1486
1487         if (msg->msg_namelen) {
1488                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1489                 goto out_err;
1490         } else {
1491                 sunaddr = NULL;
1492                 err = -ENOTCONN;
1493                 other = unix_peer(sk);
1494                 if (!other)
1495                         goto out_err;
1496         }
1497
1498         if (sk->sk_shutdown & SEND_SHUTDOWN)
1499                 goto pipe_err;
1500
1501         while(sent < len)
1502         {
1503                 /*
1504                  *      Optimisation for the fact that under 0.01% of X
1505                  *      messages typically need breaking up.
1506                  */
1507
1508                 size = len-sent;
1509
1510                 /* Keep two messages in the pipe so it schedules better */
1511                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1512                         size = (sk->sk_sndbuf >> 1) - 64;
1513
1514                 if (size > SKB_MAX_ALLOC)
1515                         size = SKB_MAX_ALLOC;
1516
1517                 /*
1518                  *      Grab a buffer
1519                  */
1520
1521                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1522
1523                 if (skb==NULL)
1524                         goto out_err;
1525
1526                 /*
1527                  *      If you pass two values to the sock_alloc_send_skb
1528                  *      it tries to grab the large buffer with GFP_NOFS
1529                  *      (which can fail easily), and if it fails grab the
1530                  *      fallback size buffer which is under a page and will
1531                  *      succeed. [Alan]
1532                  */
1533                 size = min_t(int, size, skb_tailroom(skb));
1534
1535                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1536                 if (siocb->scm->fp)
1537                         unix_attach_fds(siocb->scm, skb);
1538
1539                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1540                         kfree_skb(skb);
1541                         goto out_err;
1542                 }
1543
1544                 unix_state_lock(other);
1545
1546                 if (sock_flag(other, SOCK_DEAD) ||
1547                     (other->sk_shutdown & RCV_SHUTDOWN))
1548                         goto pipe_err_free;
1549
1550                 skb_queue_tail(&other->sk_receive_queue, skb);
1551                 unix_state_unlock(other);
1552                 other->sk_data_ready(other, size);
1553                 sent+=size;
1554         }
1555
1556         scm_destroy(siocb->scm);
1557         siocb->scm = NULL;
1558
1559         return sent;
1560
1561 pipe_err_free:
1562         unix_state_unlock(other);
1563         kfree_skb(skb);
1564 pipe_err:
1565         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1566                 send_sig(SIGPIPE,current,0);
1567         err = -EPIPE;
1568 out_err:
1569         scm_destroy(siocb->scm);
1570         siocb->scm = NULL;
1571         return sent ? : err;
1572 }
1573
1574 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1575                                   struct msghdr *msg, size_t len)
1576 {
1577         int err;
1578         struct sock *sk = sock->sk;
1579
1580         err = sock_error(sk);
1581         if (err)
1582                 return err;
1583
1584         if (sk->sk_state != TCP_ESTABLISHED)
1585                 return -ENOTCONN;
1586
1587         if (msg->msg_namelen)
1588                 msg->msg_namelen = 0;
1589
1590         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1591 }
1592
1593 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1594 {
1595         struct unix_sock *u = unix_sk(sk);
1596
1597         msg->msg_namelen = 0;
1598         if (u->addr) {
1599                 msg->msg_namelen = u->addr->len;
1600                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1601         }
1602 }
1603
1604 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1605                               struct msghdr *msg, size_t size,
1606                               int flags)
1607 {
1608         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1609         struct scm_cookie tmp_scm;
1610         struct sock *sk = sock->sk;
1611         struct unix_sock *u = unix_sk(sk);
1612         int noblock = flags & MSG_DONTWAIT;
1613         struct sk_buff *skb;
1614         int err;
1615
1616         err = -EOPNOTSUPP;
1617         if (flags&MSG_OOB)
1618                 goto out;
1619
1620         msg->msg_namelen = 0;
1621
1622         mutex_lock(&u->readlock);
1623
1624         skb = skb_recv_datagram(sk, flags, noblock, &err);
1625         if (!skb) {
1626                 unix_state_lock(sk);
1627                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1628                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1629                     (sk->sk_shutdown & RCV_SHUTDOWN))
1630                         err = 0;
1631                 unix_state_unlock(sk);
1632                 goto out_unlock;
1633         }
1634
1635         wake_up_interruptible_sync(&u->peer_wait);
1636
1637         if (msg->msg_name)
1638                 unix_copy_addr(msg, skb->sk);
1639
1640         if (size > skb->len)
1641                 size = skb->len;
1642         else if (size < skb->len)
1643                 msg->msg_flags |= MSG_TRUNC;
1644
1645         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1646         if (err)
1647                 goto out_free;
1648
1649         if (!siocb->scm) {
1650                 siocb->scm = &tmp_scm;
1651                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1652         }
1653         siocb->scm->creds = *UNIXCREDS(skb);
1654         unix_set_secdata(siocb->scm, skb);
1655
1656         if (!(flags & MSG_PEEK))
1657         {
1658                 if (UNIXCB(skb).fp)
1659                         unix_detach_fds(siocb->scm, skb);
1660         }
1661         else
1662         {
1663                 /* It is questionable: on PEEK we could:
1664                    - do not return fds - good, but too simple 8)
1665                    - return fds, and do not return them on read (old strategy,
1666                      apparently wrong)
1667                    - clone fds (I chose it for now, it is the most universal
1668                      solution)
1669
1670                    POSIX 1003.1g does not actually define this clearly
1671                    at all. POSIX 1003.1g doesn't define a lot of things
1672                    clearly however!
1673
1674                 */
1675                 if (UNIXCB(skb).fp)
1676                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1677         }
1678         err = size;
1679
1680         scm_recv(sock, msg, siocb->scm, flags);
1681
1682 out_free:
1683         skb_free_datagram(sk,skb);
1684 out_unlock:
1685         mutex_unlock(&u->readlock);
1686 out:
1687         return err;
1688 }
1689
1690 /*
1691  *      Sleep until data has arrive. But check for races..
1692  */
1693
1694 static long unix_stream_data_wait(struct sock * sk, long timeo)
1695 {
1696         DEFINE_WAIT(wait);
1697
1698         unix_state_lock(sk);
1699
1700         for (;;) {
1701                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1702
1703                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1704                     sk->sk_err ||
1705                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1706                     signal_pending(current) ||
1707                     !timeo)
1708                         break;
1709
1710                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1711                 unix_state_unlock(sk);
1712                 timeo = schedule_timeout(timeo);
1713                 unix_state_lock(sk);
1714                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1715         }
1716
1717         finish_wait(sk->sk_sleep, &wait);
1718         unix_state_unlock(sk);
1719         return timeo;
1720 }
1721
1722
1723
1724 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1725                                struct msghdr *msg, size_t size,
1726                                int flags)
1727 {
1728         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1729         struct scm_cookie tmp_scm;
1730         struct sock *sk = sock->sk;
1731         struct unix_sock *u = unix_sk(sk);
1732         struct sockaddr_un *sunaddr=msg->msg_name;
1733         int copied = 0;
1734         int check_creds = 0;
1735         int target;
1736         int err = 0;
1737         long timeo;
1738
1739         err = -EINVAL;
1740         if (sk->sk_state != TCP_ESTABLISHED)
1741                 goto out;
1742
1743         err = -EOPNOTSUPP;
1744         if (flags&MSG_OOB)
1745                 goto out;
1746
1747         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1748         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1749
1750         msg->msg_namelen = 0;
1751
1752         /* Lock the socket to prevent queue disordering
1753          * while sleeps in memcpy_tomsg
1754          */
1755
1756         if (!siocb->scm) {
1757                 siocb->scm = &tmp_scm;
1758                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1759         }
1760
1761         mutex_lock(&u->readlock);
1762
1763         do
1764         {
1765                 int chunk;
1766                 struct sk_buff *skb;
1767
1768                 unix_state_lock(sk);
1769                 skb = skb_dequeue(&sk->sk_receive_queue);
1770                 if (skb==NULL)
1771                 {
1772                         if (copied >= target)
1773                                 goto unlock;
1774
1775                         /*
1776                          *      POSIX 1003.1g mandates this order.
1777                          */
1778
1779                         if ((err = sock_error(sk)) != 0)
1780                                 goto unlock;
1781                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1782                                 goto unlock;
1783
1784                         unix_state_unlock(sk);
1785                         err = -EAGAIN;
1786                         if (!timeo)
1787                                 break;
1788                         mutex_unlock(&u->readlock);
1789
1790                         timeo = unix_stream_data_wait(sk, timeo);
1791
1792                         if (signal_pending(current)) {
1793                                 err = sock_intr_errno(timeo);
1794                                 goto out;
1795                         }
1796                         mutex_lock(&u->readlock);
1797                         continue;
1798  unlock:
1799                         unix_state_unlock(sk);
1800                         break;
1801                 }
1802                 unix_state_unlock(sk);
1803
1804                 if (check_creds) {
1805                         /* Never glue messages from different writers */
1806                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1807                                 skb_queue_head(&sk->sk_receive_queue, skb);
1808                                 break;
1809                         }
1810                 } else {
1811                         /* Copy credentials */
1812                         siocb->scm->creds = *UNIXCREDS(skb);
1813                         check_creds = 1;
1814                 }
1815
1816                 /* Copy address just once */
1817                 if (sunaddr)
1818                 {
1819                         unix_copy_addr(msg, skb->sk);
1820                         sunaddr = NULL;
1821                 }
1822
1823                 chunk = min_t(unsigned int, skb->len, size);
1824                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1825                         skb_queue_head(&sk->sk_receive_queue, skb);
1826                         if (copied == 0)
1827                                 copied = -EFAULT;
1828                         break;
1829                 }
1830                 copied += chunk;
1831                 size -= chunk;
1832
1833                 /* Mark read part of skb as used */
1834                 if (!(flags & MSG_PEEK))
1835                 {
1836                         skb_pull(skb, chunk);
1837
1838                         if (UNIXCB(skb).fp)
1839                                 unix_detach_fds(siocb->scm, skb);
1840
1841                         /* put the skb back if we didn't use it up.. */
1842                         if (skb->len)
1843                         {
1844                                 skb_queue_head(&sk->sk_receive_queue, skb);
1845                                 break;
1846                         }
1847
1848                         kfree_skb(skb);
1849
1850                         if (siocb->scm->fp)
1851                                 break;
1852                 }
1853                 else
1854                 {
1855                         /* It is questionable, see note in unix_dgram_recvmsg.
1856                          */
1857                         if (UNIXCB(skb).fp)
1858                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1859
1860                         /* put message back and return */
1861                         skb_queue_head(&sk->sk_receive_queue, skb);
1862                         break;
1863                 }
1864         } while (size);
1865
1866         mutex_unlock(&u->readlock);
1867         scm_recv(sock, msg, siocb->scm, flags);
1868 out:
1869         return copied ? : err;
1870 }
1871
1872 static int unix_shutdown(struct socket *sock, int mode)
1873 {
1874         struct sock *sk = sock->sk;
1875         struct sock *other;
1876
1877         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1878
1879         if (mode) {
1880                 unix_state_lock(sk);
1881                 sk->sk_shutdown |= mode;
1882                 other=unix_peer(sk);
1883                 if (other)
1884                         sock_hold(other);
1885                 unix_state_unlock(sk);
1886                 sk->sk_state_change(sk);
1887
1888                 if (other &&
1889                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1890
1891                         int peer_mode = 0;
1892
1893                         if (mode&RCV_SHUTDOWN)
1894                                 peer_mode |= SEND_SHUTDOWN;
1895                         if (mode&SEND_SHUTDOWN)
1896                                 peer_mode |= RCV_SHUTDOWN;
1897                         unix_state_lock(other);
1898                         other->sk_shutdown |= peer_mode;
1899                         unix_state_unlock(other);
1900                         other->sk_state_change(other);
1901                         read_lock(&other->sk_callback_lock);
1902                         if (peer_mode == SHUTDOWN_MASK)
1903                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1904                         else if (peer_mode & RCV_SHUTDOWN)
1905                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1906                         read_unlock(&other->sk_callback_lock);
1907                 }
1908                 if (other)
1909                         sock_put(other);
1910         }
1911         return 0;
1912 }
1913
1914 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1915 {
1916         struct sock *sk = sock->sk;
1917         long amount=0;
1918         int err;
1919
1920         switch(cmd)
1921         {
1922                 case SIOCOUTQ:
1923                         amount = atomic_read(&sk->sk_wmem_alloc);
1924                         err = put_user(amount, (int __user *)arg);
1925                         break;
1926                 case SIOCINQ:
1927                 {
1928                         struct sk_buff *skb;
1929
1930                         if (sk->sk_state == TCP_LISTEN) {
1931                                 err = -EINVAL;
1932                                 break;
1933                         }
1934
1935                         spin_lock(&sk->sk_receive_queue.lock);
1936                         if (sk->sk_type == SOCK_STREAM ||
1937                             sk->sk_type == SOCK_SEQPACKET) {
1938                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1939                                         amount += skb->len;
1940                         } else {
1941                                 skb = skb_peek(&sk->sk_receive_queue);
1942                                 if (skb)
1943                                         amount=skb->len;
1944                         }
1945                         spin_unlock(&sk->sk_receive_queue.lock);
1946                         err = put_user(amount, (int __user *)arg);
1947                         break;
1948                 }
1949
1950                 default:
1951                         err = -ENOIOCTLCMD;
1952                         break;
1953         }
1954         return err;
1955 }
1956
1957 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1958 {
1959         struct sock *sk = sock->sk;
1960         unsigned int mask;
1961
1962         poll_wait(file, sk->sk_sleep, wait);
1963         mask = 0;
1964
1965         /* exceptional events? */
1966         if (sk->sk_err)
1967                 mask |= POLLERR;
1968         if (sk->sk_shutdown == SHUTDOWN_MASK)
1969                 mask |= POLLHUP;
1970         if (sk->sk_shutdown & RCV_SHUTDOWN)
1971                 mask |= POLLRDHUP;
1972
1973         /* readable? */
1974         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1975             (sk->sk_shutdown & RCV_SHUTDOWN))
1976                 mask |= POLLIN | POLLRDNORM;
1977
1978         /* Connection-based need to check for termination and startup */
1979         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1980                 mask |= POLLHUP;
1981
1982         /*
1983          * we set writable also when the other side has shut down the
1984          * connection. This prevents stuck sockets.
1985          */
1986         if (unix_writable(sk))
1987                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1988
1989         return mask;
1990 }
1991
1992
1993 #ifdef CONFIG_PROC_FS
1994 static struct sock *first_unix_socket(int *i)
1995 {
1996         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1997                 if (!hlist_empty(&unix_socket_table[*i]))
1998                         return __sk_head(&unix_socket_table[*i]);
1999         }
2000         return NULL;
2001 }
2002
2003 static struct sock *next_unix_socket(int *i, struct sock *s)
2004 {
2005         struct sock *next = sk_next(s);
2006         /* More in this chain? */
2007         if (next)
2008                 return next;
2009         /* Look for next non-empty chain. */
2010         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2011                 if (!hlist_empty(&unix_socket_table[*i]))
2012                         return __sk_head(&unix_socket_table[*i]);
2013         }
2014         return NULL;
2015 }
2016
2017 struct unix_iter_state {
2018         struct seq_net_private p;
2019         int i;
2020 };
2021 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2022 {
2023         struct unix_iter_state *iter = seq->private;
2024         loff_t off = 0;
2025         struct sock *s;
2026
2027         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2028                 if (sock_net(s) != seq_file_net(seq))
2029                         continue;
2030                 if (off == pos)
2031                         return s;
2032                 ++off;
2033         }
2034         return NULL;
2035 }
2036
2037
2038 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2039         __acquires(unix_table_lock)
2040 {
2041         spin_lock(&unix_table_lock);
2042         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2043 }
2044
2045 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2046 {
2047         struct unix_iter_state *iter = seq->private;
2048         struct sock *sk = v;
2049         ++*pos;
2050
2051         if (v == SEQ_START_TOKEN)
2052                 sk = first_unix_socket(&iter->i);
2053         else
2054                 sk = next_unix_socket(&iter->i, sk);
2055         while (sk && (sock_net(sk) != seq_file_net(seq)))
2056                 sk = next_unix_socket(&iter->i, sk);
2057         return sk;
2058 }
2059
2060 static void unix_seq_stop(struct seq_file *seq, void *v)
2061         __releases(unix_table_lock)
2062 {
2063         spin_unlock(&unix_table_lock);
2064 }
2065
2066 static int unix_seq_show(struct seq_file *seq, void *v)
2067 {
2068
2069         if (v == SEQ_START_TOKEN)
2070                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2071                          "Inode Path\n");
2072         else {
2073                 struct sock *s = v;
2074                 struct unix_sock *u = unix_sk(s);
2075                 unix_state_lock(s);
2076
2077                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2078                         s,
2079                         atomic_read(&s->sk_refcnt),
2080                         0,
2081                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2082                         s->sk_type,
2083                         s->sk_socket ?
2084                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2085                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2086                         sock_i_ino(s));
2087
2088                 if (u->addr) {
2089                         int i, len;
2090                         seq_putc(seq, ' ');
2091
2092                         i = 0;
2093                         len = u->addr->len - sizeof(short);
2094                         if (!UNIX_ABSTRACT(s))
2095                                 len--;
2096                         else {
2097                                 seq_putc(seq, '@');
2098                                 i++;
2099                         }
2100                         for ( ; i < len; i++)
2101                                 seq_putc(seq, u->addr->name->sun_path[i]);
2102                 }
2103                 unix_state_unlock(s);
2104                 seq_putc(seq, '\n');
2105         }
2106
2107         return 0;
2108 }
2109
2110 static const struct seq_operations unix_seq_ops = {
2111         .start  = unix_seq_start,
2112         .next   = unix_seq_next,
2113         .stop   = unix_seq_stop,
2114         .show   = unix_seq_show,
2115 };
2116
2117
2118 static int unix_seq_open(struct inode *inode, struct file *file)
2119 {
2120         return seq_open_net(inode, file, &unix_seq_ops,
2121                             sizeof(struct unix_iter_state));
2122 }
2123
2124 static const struct file_operations unix_seq_fops = {
2125         .owner          = THIS_MODULE,
2126         .open           = unix_seq_open,
2127         .read           = seq_read,
2128         .llseek         = seq_lseek,
2129         .release        = seq_release_net,
2130 };
2131
2132 #endif
2133
2134 static struct net_proto_family unix_family_ops = {
2135         .family = PF_UNIX,
2136         .create = unix_create,
2137         .owner  = THIS_MODULE,
2138 };
2139
2140
2141 static int unix_net_init(struct net *net)
2142 {
2143         int error = -ENOMEM;
2144
2145         net->unx.sysctl_max_dgram_qlen = 10;
2146         if (unix_sysctl_register(net))
2147                 goto out;
2148
2149 #ifdef CONFIG_PROC_FS
2150         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2151                 unix_sysctl_unregister(net);
2152                 goto out;
2153         }
2154 #endif
2155         error = 0;
2156 out:
2157         return 0;
2158 }
2159
2160 static void unix_net_exit(struct net *net)
2161 {
2162         unix_sysctl_unregister(net);
2163         proc_net_remove(net, "unix");
2164 }
2165
2166 static struct pernet_operations unix_net_ops = {
2167         .init = unix_net_init,
2168         .exit = unix_net_exit,
2169 };
2170
2171 static int __init af_unix_init(void)
2172 {
2173         int rc = -1;
2174         struct sk_buff *dummy_skb;
2175
2176         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2177
2178         rc = proto_register(&unix_proto, 1);
2179         if (rc != 0) {
2180                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2181                        __func__);
2182                 goto out;
2183         }
2184
2185         sock_register(&unix_family_ops);
2186         register_pernet_subsys(&unix_net_ops);
2187 out:
2188         return rc;
2189 }
2190
2191 static void __exit af_unix_exit(void)
2192 {
2193         sock_unregister(PF_UNIX);
2194         proto_unregister(&unix_proto);
2195         unregister_pernet_subsys(&unix_net_ops);
2196 }
2197
2198 /* Earlier than device_initcall() so that other drivers invoking
2199    request_module() don't end up in a loop when modprobe tries
2200    to use a UNIX socket. But later than subsys_initcall() because
2201    we depend on stuff initialised there */
2202 fs_initcall(af_unix_init);
2203 module_exit(af_unix_exit);
2204
2205 MODULE_LICENSE("GPL");
2206 MODULE_ALIAS_NETPROTO(PF_UNIX);