]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/futex.c
clockchips.h: kernel-doc fix
[linux-2.6-omap-h63xx.git] / kernel / futex.c
index b364e0026191caee9dd0ebee3a657e2f65b5c52e..600bc9d801f2834c5e56203ccb614527c6b0d421 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
+#include <linux/module.h>
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
-/*
- * Futexes are matched on equal values of this key.
- * The key type depends on whether it's a shared or private mapping.
- * Don't rearrange members without looking at hash_futex().
- *
- * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
-union futex_key {
-       struct {
-               unsigned long pgoff;
-               struct inode *inode;
-               int offset;
-       } shared;
-       struct {
-               unsigned long address;
-               struct mm_struct *mm;
-               int offset;
-       } private;
-       struct {
-               unsigned long word;
-               void *ptr;
-               int offset;
-       } both;
-};
-
 /*
  * Priority Inheritance state:
  */
@@ -166,7 +141,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
 /*
  * Get parameters which are the keys for a futex.
  *
- * For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode,
+ * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
  * offset_within_page).  For private mappings, it's (uaddr, current->mm).
  * We can usually work out the index without swapping in the page.
  *
@@ -175,7 +150,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
  *
  * Should be called with &current->mm->mmap_sem but NOT any spinlocks.
  */
-static int get_futex_key(u32 __user *uaddr, union futex_key *key)
+int get_futex_key(u32 __user *uaddr, union futex_key *key)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
@@ -223,7 +198,7 @@ static int get_futex_key(u32 __user *uaddr, union futex_key *key)
        /*
         * Linear file mappings are also simple.
         */
-       key->shared.inode = vma->vm_file->f_dentry->d_inode;
+       key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
        key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
        if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
                key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
@@ -246,6 +221,7 @@ static int get_futex_key(u32 __user *uaddr, union futex_key *key)
        }
        return err;
 }
+EXPORT_SYMBOL_GPL(get_futex_key);
 
 /*
  * Take a reference to the resource addressed by a key.
@@ -254,7 +230,7 @@ static int get_futex_key(u32 __user *uaddr, union futex_key *key)
  * NOTE: mmap_sem MUST be held between get_futex_key() and calling this
  * function, if it is called at all.  mmap_sem keeps key->shared.inode valid.
  */
-static inline void get_key_refs(union futex_key *key)
+inline void get_futex_key_refs(union futex_key *key)
 {
        if (key->both.ptr != 0) {
                if (key->both.offset & 1)
@@ -263,12 +239,13 @@ static inline void get_key_refs(union futex_key *key)
                        atomic_inc(&key->private.mm->mm_count);
        }
 }
+EXPORT_SYMBOL_GPL(get_futex_key_refs);
 
 /*
  * Drop a reference to the resource addressed by a key.
  * The hash bucket spinlock must not be held.
  */
-static void drop_key_refs(union futex_key *key)
+void drop_futex_key_refs(union futex_key *key)
 {
        if (key->both.ptr != 0) {
                if (key->both.offset & 1)
@@ -277,14 +254,15 @@ static void drop_key_refs(union futex_key *key)
                        mmdrop(key->private.mm);
        }
 }
+EXPORT_SYMBOL_GPL(drop_futex_key_refs);
 
 static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
 {
        int ret;
 
-       inc_preempt_count();
+       pagefault_disable();
        ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
-       dec_preempt_count();
+       pagefault_enable();
 
        return ret ? -EFAULT : 0;
 }
@@ -324,12 +302,11 @@ static int refill_pi_state_cache(void)
        if (likely(current->pi_state_cache))
                return 0;
 
-       pi_state = kmalloc(sizeof(*pi_state), GFP_KERNEL);
+       pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
 
        if (!pi_state)
                return -ENOMEM;
 
-       memset(pi_state, 0, sizeof(*pi_state));
        INIT_LIST_HEAD(&pi_state->list);
        /* pi_mutex gets initialized later */
        pi_state->owner = NULL;
@@ -553,7 +530,7 @@ static void wake_futex(struct futex_q *q)
         * at the end of wake_up_all() does not prevent this store from
         * moving.
         */
-       wmb();
+       smp_wmb();
        q->lock_ptr = NULL;
 }
 
@@ -566,6 +543,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        if (!pi_state)
                return -EINVAL;
 
+       spin_lock(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
@@ -585,9 +563,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        if (!(uval & FUTEX_OWNER_DIED)) {
                newval = FUTEX_WAITERS | new_owner->pid;
 
-               inc_preempt_count();
+               pagefault_disable();
                curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
-               dec_preempt_count();
+               pagefault_enable();
                if (curval == -EFAULT)
                        return -EFAULT;
                if (curval != uval)
@@ -605,6 +583,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        pi_state->owner = new_owner;
        spin_unlock_irq(&new_owner->pi_lock);
 
+       spin_unlock(&pi_state->pi_mutex.wait_lock);
        rt_mutex_unlock(&pi_state->pi_mutex);
 
        return 0;
@@ -618,9 +597,9 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
         * There is no waiter, so we unlock the futex. The owner died
         * bit has not to be preserved here. We are the owner:
         */
-       inc_preempt_count();
+       pagefault_disable();
        oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
-       dec_preempt_count();
+       pagefault_enable();
 
        if (oldval == -EFAULT)
                return oldval;
@@ -872,7 +851,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
                                this->lock_ptr = &hb2->lock;
                        }
                        this->key = key2;
-                       get_key_refs(&key2);
+                       get_futex_key_refs(&key2);
                        drop_count++;
 
                        if (ret - nr_wake >= nr_requeue)
@@ -885,9 +864,9 @@ out_unlock:
        if (hb1 != hb2)
                spin_unlock(&hb2->lock);
 
-       /* drop_key_refs() must be called outside the spinlocks. */
+       /* drop_futex_key_refs() must be called outside the spinlocks. */
        while (--drop_count >= 0)
-               drop_key_refs(&key1);
+               drop_futex_key_refs(&key1);
 
 out:
        up_read(&current->mm->mmap_sem);
@@ -905,7 +884,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
 
        init_waitqueue_head(&q->waiters);
 
-       get_key_refs(&q->key);
+       get_futex_key_refs(&q->key);
        hb = hash_futex(&q->key);
        q->lock_ptr = &hb->lock;
 
@@ -924,7 +903,7 @@ static inline void
 queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
 {
        spin_unlock(&hb->lock);
-       drop_key_refs(&q->key);
+       drop_futex_key_refs(&q->key);
 }
 
 /*
@@ -979,7 +958,7 @@ static int unqueue_me(struct futex_q *q)
                ret = 1;
        }
 
-       drop_key_refs(&q->key);
+       drop_futex_key_refs(&q->key);
        return ret;
 }
 
@@ -998,15 +977,18 @@ static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
 
        spin_unlock(&hb->lock);
 
-       drop_key_refs(&q->key);
+       drop_futex_key_refs(&q->key);
 }
 
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+static long futex_wait_restart(struct restart_block *restart);
+static int futex_wait_abstime(u32 __user *uaddr, u32 val,
+                       int timed, unsigned long abs_time)
 {
        struct task_struct *curr = current;
        DECLARE_WAITQUEUE(wait, curr);
        struct futex_hash_bucket *hb;
        struct futex_q q;
+       unsigned long time_left = 0;
        u32 uval;
        int ret;
 
@@ -1086,8 +1068,21 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
         * !list_empty() is safe here without any lock.
         * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
         */
-       if (likely(!list_empty(&q.list)))
-               time = schedule_timeout(time);
+       time_left = 0;
+       if (likely(!list_empty(&q.list))) {
+               unsigned long rel_time;
+
+               if (timed) {
+                       unsigned long now = jiffies;
+                       if (time_after(now, abs_time))
+                               rel_time = 0;
+                       else
+                               rel_time = abs_time - now;
+               } else
+                       rel_time = MAX_SCHEDULE_TIMEOUT;
+
+               time_left = schedule_timeout(rel_time);
+       }
        __set_current_state(TASK_RUNNING);
 
        /*
@@ -1098,13 +1093,25 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
        /* If we were woken (and unqueued), we succeeded, whatever. */
        if (!unqueue_me(&q))
                return 0;
-       if (time == 0)
+       if (time_left == 0)
                return -ETIMEDOUT;
+
        /*
         * We expect signal_pending(current), but another thread may
         * have handled it for us already.
         */
-       return -EINTR;
+       if (time_left == MAX_SCHEDULE_TIMEOUT)
+               return -ERESTARTSYS;
+       else {
+               struct restart_block *restart;
+               restart = &current_thread_info()->restart_block;
+               restart->fn = futex_wait_restart;
+               restart->arg0 = (unsigned long)uaddr;
+               restart->arg1 = (unsigned long)val;
+               restart->arg2 = (unsigned long)timed;
+               restart->arg3 = abs_time;
+               return -ERESTART_RESTARTBLOCK;
+       }
 
  out_unlock_release_sem:
        queue_unlock(&q, hb);
@@ -1114,6 +1121,24 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
        return ret;
 }
 
+static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time)
+{
+       int timed = (rel_time != MAX_SCHEDULE_TIMEOUT);
+       return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time);
+}
+
+static long futex_wait_restart(struct restart_block *restart)
+{
+       u32 __user *uaddr = (u32 __user *)restart->arg0;
+       u32 val = (u32)restart->arg1;
+       int timed = (int)restart->arg2;
+       unsigned long abs_time = restart->arg3;
+
+       restart->fn = do_no_restart_syscall;
+       return (long)futex_wait_abstime(uaddr, val, timed, abs_time);
+}
+
+
 /*
  * Userspace tried a 0 -> TID atomic transition of the futex value
  * and failed. The kernel side here does the whole locking operation:
@@ -1135,7 +1160,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
 
        if (sec != MAX_SCHEDULE_TIMEOUT) {
                to = &timeout;
-               hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
+               hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
                to->timer.expires = ktime_set(sec, nsec);
        }
@@ -1158,9 +1183,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
         */
        newval = current->pid;
 
-       inc_preempt_count();
+       pagefault_disable();
        curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
-       dec_preempt_count();
+       pagefault_enable();
 
        if (unlikely(curval == -EFAULT))
                goto uaddr_faulted;
@@ -1183,9 +1208,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
        uval = curval;
        newval = uval | FUTEX_WAITERS;
 
-       inc_preempt_count();
+       pagefault_disable();
        curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
-       dec_preempt_count();
+       pagefault_enable();
 
        if (unlikely(curval == -EFAULT))
                goto uaddr_faulted;
@@ -1215,10 +1240,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
                        newval = current->pid |
                                FUTEX_OWNER_DIED | FUTEX_WAITERS;
 
-                       inc_preempt_count();
+                       pagefault_disable();
                        curval = futex_atomic_cmpxchg_inatomic(uaddr,
                                                               uval, newval);
-                       dec_preempt_count();
+                       pagefault_enable();
 
                        if (unlikely(curval == -EFAULT))
                                goto uaddr_faulted;
@@ -1390,9 +1415,9 @@ retry_locked:
         * anyone else up:
         */
        if (!(uval & FUTEX_OWNER_DIED)) {
-               inc_preempt_count();
+               pagefault_disable();
                uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
-               dec_preempt_count();
+               pagefault_enable();
        }
 
        if (unlikely(uval == -EFAULT))
@@ -1493,7 +1518,7 @@ static unsigned int futex_poll(struct file *filp,
        return ret;
 }
 
-static struct file_operations futex_fops = {
+static const struct file_operations futex_fops = {
        .release        = futex_close,
        .poll           = futex_poll,
 };
@@ -1507,6 +1532,13 @@ static int futex_fd(u32 __user *uaddr, int signal)
        struct futex_q *q;
        struct file *filp;
        int ret, err;
+       static unsigned long printk_interval;
+
+       if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
+               printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
+                       "will be removed from the kernel in June 2007\n",
+                       current->comm);
+       }
 
        ret = -EINVAL;
        if (!valid_signal(signal))
@@ -1522,9 +1554,9 @@ static int futex_fd(u32 __user *uaddr, int signal)
                goto out;
        }
        filp->f_op = &futex_fops;
-       filp->f_vfsmnt = mntget(futex_mnt);
-       filp->f_dentry = dget(futex_mnt->mnt_root);
-       filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+       filp->f_path.mnt = mntget(futex_mnt);
+       filp->f_path.dentry = dget(futex_mnt->mnt_root);
+       filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 
        if (signal) {
                err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
@@ -1851,10 +1883,16 @@ static struct file_system_type futex_fs_type = {
 
 static int __init init(void)
 {
-       unsigned int i;
+       int i = register_filesystem(&futex_fs_type);
+
+       if (i)
+               return i;
 
-       register_filesystem(&futex_fs_type);
        futex_mnt = kern_mount(&futex_fs_type);
+       if (IS_ERR(futex_mnt)) {
+               unregister_filesystem(&futex_fs_type);
+               return PTR_ERR(futex_mnt);
+       }
 
        for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
                INIT_LIST_HEAD(&futex_queues[i].chain);