struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- if (attempt >= 2 || !(vma = find_vma(mm, address)) ||
+ if (attempt > 2 || !(vma = find_vma(mm, address)) ||
vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
return -EFAULT;
{
struct task_struct *p;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_task_by_pid(pid);
if (!p)
goto out_unlock;
p = NULL;
goto out_unlock;
}
- if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) {
+ if (p->exit_state != 0) {
p = NULL;
goto out_unlock;
}
get_task_struct(p);
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return p;
}
*/
void exit_pi_state_list(struct task_struct *curr)
{
- struct futex_hash_bucket *hb;
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
+ struct futex_hash_bucket *hb;
union futex_key key;
/*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
- * versus waiters unqueueing themselfs
+ * versus waiters unqueueing themselves:
*/
spin_lock_irq(&curr->pi_lock);
while (!list_empty(head)) {
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
+ hb = hash_futex(&key);
spin_unlock_irq(&curr->pi_lock);
- hb = hash_futex(&key);
spin_lock(&hb->lock);
spin_lock_irq(&curr->pi_lock);
+ /*
+ * We dropped the pi-lock, so re-check whether this
+ * task still owns the PI-state:
+ */
if (head->next != next) {
spin_unlock(&hb->lock);
continue;
}
- list_del_init(&pi_state->list);
-
WARN_ON(pi_state->owner != curr);
-
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
pi_state->owner = NULL;
spin_unlock_irq(&curr->pi_lock);
head = &hb->chain;
list_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, &me->key)) {
+ if (match_futex(&this->key, &me->key)) {
/*
* Another waiter already exists - bump up
* the refcount and return its pi_state:
*/
pi_state = this->pi_state;
+ /*
+ * Userspace might have messed up non PI and PI futexes
+ */
+ if (unlikely(!pi_state))
+ return -EINVAL;
+
+ WARN_ON(!atomic_read(&pi_state->refcount));
+
atomic_inc(&pi_state->refcount);
me->pi_state = pi_state;
}
/*
- * We are the first waiter - try to look up the real owner and
- * attach the new pi_state to it:
+ * We are the first waiter - try to look up the real owner and attach
+ * the new pi_state to it, but bail out when the owner died bit is set
+ * and TID = 0:
*/
pid = uval & FUTEX_TID_MASK;
+ if (!pid && (uval & FUTEX_OWNER_DIED))
+ return -ESRCH;
p = futex_find_get_task(pid);
if (!p)
return -ESRCH;
pi_state->key = me->key;
spin_lock_irq(&p->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
pi_state->owner = p;
spin_unlock_irq(&p->pi_lock);
* kept enabled while there is PI state around. We must also
* preserve the owner died bit.)
*/
- newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid;
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ newval = FUTEX_WAITERS | new_owner->pid;
- inc_preempt_count();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- dec_preempt_count();
+ inc_preempt_count();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ dec_preempt_count();
+ if (curval == -EFAULT)
+ return -EFAULT;
+ if (curval != uval)
+ return -EINVAL;
+ }
- if (curval == -EFAULT)
- return -EFAULT;
- if (curval != uval)
- return -EINVAL;
+ spin_lock_irq(&pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ spin_unlock_irq(&pi_state->owner->pi_lock);
- list_del_init(&pi_state->owner->pi_state_list);
+ spin_lock_irq(&new_owner->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
+ spin_unlock_irq(&new_owner->pi_lock);
+
rt_mutex_unlock(&pi_state->pi_mutex);
return 0;
return 0;
}
+/*
+ * Express the locking dependencies for lockdep:
+ */
+static inline void
+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+{
+ if (hb1 <= hb2) {
+ spin_lock(&hb1->lock);
+ if (hb1 < hb2)
+ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+ } else { /* hb1 > hb2 */
+ spin_lock(&hb2->lock);
+ spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
hb2 = hash_futex(&key2);
retry:
- if (hb1 < hb2)
- spin_lock(&hb1->lock);
- spin_lock(&hb2->lock);
- if (hb1 > hb2)
- spin_lock(&hb1->lock);
+ double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
*/
if (attempt++) {
if (futex_handle_fault((unsigned long)uaddr2,
- attempt))
+ attempt)) {
+ ret = -EFAULT;
goto out;
+ }
goto retry;
}
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
- if (hb1 < hb2)
- spin_lock(&hb1->lock);
- spin_lock(&hb2->lock);
- if (hb1 > hb2)
- spin_lock(&hb1->lock);
+ double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
u32 curval;
/* In the common case we don't take the spinlock, which is nice. */
retry:
lock_ptr = q->lock_ptr;
+ barrier();
if (lock_ptr != 0) {
spin_lock(lock_ptr);
/*
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
- struct hrtimer_sleeper *to)
+static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
+ long nsec, int trylock)
{
+ struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
if (refill_pi_state_cache())
return -ENOMEM;
+ if (sec != MAX_SCHEDULE_TIMEOUT) {
+ to = &timeout;
+ hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
+ hrtimer_init_sleeper(to, current);
+ to->timer.expires = ktime_set(sec, nsec);
+ }
+
q.pi_state = NULL;
retry:
down_read(&curr->mm->mmap_sem);
/* Owner died? */
if (q.pi_state->owner != NULL) {
spin_lock_irq(&q.pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&q.pi_state->list));
list_del_init(&q.pi_state->list);
spin_unlock_irq(&q.pi_state->owner->pi_lock);
} else
q.pi_state->owner = current;
spin_lock_irq(¤t->pi_lock);
+ WARN_ON(!list_empty(&q.pi_state->list));
list_add(&q.pi_state->list, ¤t->pi_state_list);
spin_unlock_irq(¤t->pi_lock);
if (!detect && ret == -EDEADLK && 0)
force_sig(SIGKILL, current);
- return ret;
+ return ret != -EINTR ? ret : -ERESTARTNOINTR;
out_unlock_release_sem:
queue_unlock(&q, hb);
* still holding the mmap_sem.
*/
if (attempt++) {
- if (futex_handle_fault((unsigned long)uaddr, attempt))
+ if (futex_handle_fault((unsigned long)uaddr, attempt)) {
+ ret = -EFAULT;
goto out_unlock_release_sem;
-
+ }
goto retry_locked;
}
return ret;
}
-/*
- * Restart handler
- */
-static long futex_lock_pi_restart(struct restart_block *restart)
-{
- struct hrtimer_sleeper timeout, *to = NULL;
- int ret;
-
- restart->fn = do_no_restart_syscall;
-
- if (restart->arg2 || restart->arg3) {
- to = &timeout;
- hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
- hrtimer_init_sleeper(to, current);
- to->timer.expires.tv64 = ((u64)restart->arg1 << 32) |
- (u64) restart->arg0;
- }
-
- pr_debug("lock_pi restart: %p, %d (%d)\n",
- (u32 __user *)restart->arg0, current->pid);
-
- ret = do_futex_lock_pi((u32 __user *)restart->arg0, restart->arg1,
- 0, to);
-
- if (ret != -EINTR)
- return ret;
-
- restart->fn = futex_lock_pi_restart;
-
- /* The other values are filled in */
- return -ERESTART_RESTARTBLOCK;
-}
-
-/*
- * Called from the syscall entry below.
- */
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
- long nsec, int trylock)
-{
- struct hrtimer_sleeper timeout, *to = NULL;
- struct restart_block *restart;
- int ret;
-
- if (sec != MAX_SCHEDULE_TIMEOUT) {
- to = &timeout;
- hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
- hrtimer_init_sleeper(to, current);
- to->timer.expires = ktime_set(sec, nsec);
- }
-
- ret = do_futex_lock_pi(uaddr, detect, trylock, to);
-
- if (ret != -EINTR)
- return ret;
-
- pr_debug("lock_pi interrupted: %p, %d (%d)\n", uaddr, current->pid);
-
- restart = ¤t_thread_info()->restart_block;
- restart->fn = futex_lock_pi_restart;
- restart->arg0 = (unsigned long) uaddr;
- restart->arg1 = detect;
- if (to) {
- restart->arg2 = to->timer.expires.tv64 & 0xFFFFFFFF;
- restart->arg3 = to->timer.expires.tv64 >> 32;
- } else
- restart->arg2 = restart->arg3 = 0;
-
- return -ERESTART_RESTARTBLOCK;
-}
-
/*
* Userspace attempted a TID -> 0 atomic transition, and failed.
* This is the in-kernel slowpath: we look up the PI state (if any),
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- inc_preempt_count();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
- dec_preempt_count();
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ inc_preempt_count();
+ uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+ dec_preempt_count();
+ }
if (unlikely(uval == -EFAULT))
goto pi_faulted;
/*
* No waiters - kernel unlocks the futex:
*/
- ret = unlock_futex_pi(uaddr, uval);
- if (ret == -EFAULT)
- goto pi_faulted;
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ ret = unlock_futex_pi(uaddr, uval);
+ if (ret == -EFAULT)
+ goto pi_faulted;
+ }
out_unlock:
spin_unlock(&hb->lock);
* still holding the mmap_sem.
*/
if (attempt++) {
- if (futex_handle_fault((unsigned long)uaddr, attempt))
+ if (futex_handle_fault((unsigned long)uaddr, attempt)) {
+ ret = -EFAULT;
goto out_unlock;
-
+ }
goto retry_locked;
}
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
if (signal) {
- err = f_setown(filp, current->pid, 1);
+ err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
if (err < 0) {
goto error;
}
struct task_struct *p;
ret = -ESRCH;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_task_by_pid(pid);
if (!p)
goto err_unlock;
!capable(CAP_SYS_PTRACE))
goto err_unlock;
head = p->robust_list;
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
}
if (put_user(sizeof(*head), len_ptr))
return put_user(head, head_ptr);
err_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
* Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so:
*/
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
+int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
- u32 uval, nval;
+ u32 uval, nval, mval;
retry:
if (get_user(uval, uaddr))
* thread-death.) The rest of the cleanup is done in
* userspace.
*/
- nval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
- uval | FUTEX_OWNER_DIED);
+ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+ nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
+
if (nval == -EFAULT)
return -1;
if (nval != uval)
goto retry;
- if (uval & FUTEX_WAITERS)
- futex_wake(uaddr, 1);
+ /*
+ * Wake robust non-PI futexes here. The wakeup of
+ * PI futexes happens in exit_pi_state():
+ */
+ if (!pi) {
+ if (uval & FUTEX_WAITERS)
+ futex_wake(uaddr, 1);
+ }
}
return 0;
}
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int fetch_robust_entry(struct robust_list __user **entry,
+ struct robust_list __user **head, int *pi)
+{
+ unsigned long uentry;
+
+ if (get_user(uentry, (unsigned long *)head))
+ return -EFAULT;
+
+ *entry = (void *)(uentry & ~1UL);
+ *pi = uentry & 1;
+
+ return 0;
+}
+
/*
* Walk curr->robust_list (very carefully, it's a userspace list!)
* and mark any locks found there dead, and notify any waiters.
{
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
unsigned long futex_offset;
/*
* Fetch the list head (which was registered earlier, via
* sys_set_robust_list()):
*/
- if (get_user(entry, &head->list.next))
+ if (fetch_robust_entry(&entry, &head->list.next, &pi))
return;
/*
* Fetch the relative futex offset:
* Fetch any possibly pending lock-add first, and handle it
* if it exists:
*/
- if (get_user(pending, &head->list_op_pending))
+ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
return;
+
if (pending)
- handle_futex_death((void *)pending + futex_offset, curr);
+ handle_futex_death((void *)pending + futex_offset, curr, pip);
while (entry != &head->list) {
/*
*/
if (entry != pending)
if (handle_futex_death((void *)entry + futex_offset,
- curr))
+ curr, pi))
return;
/*
* Fetch the next entry in the list:
*/
- if (get_user(entry, &entry->next))
+ if (fetch_robust_entry(&entry, &entry->next, &pi))
return;
/*
* Avoid excessively long or circular lists: