]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/hrtimer.c
cgroup: annotate cgroup_init_subsys with __init
[linux-2.6-omap-h63xx.git] / kernel / hrtimer.c
index 668f3967eb394e37fcecc65e47e6dce75173b5dc..dea4c9124ac808b872d43a3dd83c15fc7cbb38a9 100644 (file)
@@ -325,6 +325,23 @@ u64 ktime_divns(const ktime_t kt, s64 div)
 }
 #endif /* BITS_PER_LONG >= 64 */
 
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+       ktime_t res = ktime_add(lhs, rhs);
+
+       /*
+        * We use KTIME_SEC_MAX here, the maximum timeout which we can
+        * return to user space in a timespec:
+        */
+       if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+               res = ktime_set(KTIME_SEC_MAX, 0);
+
+       return res;
+}
+
 /*
  * Check, whether the timer is on the callback pending list
  */
@@ -425,6 +442,8 @@ static int hrtimer_reprogram(struct hrtimer *timer,
        ktime_t expires = ktime_sub(timer->expires, base->offset);
        int res;
 
+       WARN_ON_ONCE(timer->expires.tv64 < 0);
+
        /*
         * When the callback is running, we do not reprogram the clock event
         * device. The timer callback is either running on a different CPU or
@@ -435,6 +454,15 @@ static int hrtimer_reprogram(struct hrtimer *timer,
        if (hrtimer_callback_running(timer))
                return 0;
 
+       /*
+        * CLOCK_REALTIME timer might be requested with an absolute
+        * expiry time which is less than base->offset. Nothing wrong
+        * about that, just avoid to call into the tick code, which
+        * has now objections against negative expiry values.
+        */
+       if (expires.tv64 < 0)
+               return -ETIME;
+
        if (expires.tv64 >= expires_next->tv64)
                return 0;
 
@@ -562,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
                        list_add_tail(&timer->cb_entry,
                                      &base->cpu_base->cb_pending);
                        timer->state = HRTIMER_STATE_PENDING;
-                       raise_softirq(HRTIMER_SOFTIRQ);
                        return 1;
                default:
                        BUG();
@@ -605,6 +632,11 @@ static int hrtimer_switch_to_hres(void)
        return 1;
 }
 
+static inline void hrtimer_raise_softirq(void)
+{
+       raise_softirq(HRTIMER_SOFTIRQ);
+}
+
 #else
 
 static inline int hrtimer_hres_active(void) { return 0; }
@@ -623,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
 {
        return 0;
 }
+static inline void hrtimer_raise_softirq(void) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
@@ -682,13 +715,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
                 */
                orun++;
        }
-       timer->expires = ktime_add(timer->expires, interval);
-       /*
-        * Make sure, that the result did not wrap with a very large
-        * interval.
-        */
-       if (timer->expires.tv64 < 0)
-               timer->expires = ktime_set(KTIME_SEC_MAX, 0);
+       timer->expires = ktime_add_safe(timer->expires, interval);
 
        return orun;
 }
@@ -828,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 {
        struct hrtimer_clock_base *base, *new_base;
        unsigned long flags;
-       int ret;
+       int ret, raise;
 
        base = lock_hrtimer_base(timer, &flags);
 
@@ -839,7 +866,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        new_base = switch_hrtimer_base(timer, base);
 
        if (mode == HRTIMER_MODE_REL) {
-               tim = ktime_add(tim, new_base->get_time());
+               tim = ktime_add_safe(tim, new_base->get_time());
                /*
                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
                 * to signal that they simply return xtime in
@@ -848,16 +875,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
                 * timeouts. This will go away with the GTOD framework.
                 */
 #ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add(tim, base->resolution);
+               tim = ktime_add_safe(tim, base->resolution);
 #endif
-               /*
-                * Careful here: User space might have asked for a
-                * very long sleep, so the add above might result in a
-                * negative number, which enqueues the timer in front
-                * of the queue.
-                */
-               if (tim.tv64 < 0)
-                       tim.tv64 = KTIME_MAX;
        }
        timer->expires = tim;
 
@@ -870,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        enqueue_hrtimer(timer, new_base,
                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 
+       /*
+        * The timer may be expired and moved to the cb_pending
+        * list. We can not raise the softirq with base lock held due
+        * to a possible deadlock with runqueue lock.
+        */
+       raise = timer->state == HRTIMER_STATE_PENDING;
+
        unlock_hrtimer_base(timer, &flags);
 
+       if (raise)
+               hrtimer_raise_softirq();
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_start);
@@ -1066,8 +1095,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
                         * If the timer was rearmed on another CPU, reprogram
                         * the event device.
                         */
-                       if (timer->base->first == &timer->node)
-                               hrtimer_reprogram(timer, timer->base);
+                       struct hrtimer_clock_base *base = timer->base;
+
+                       if (base->first == &timer->node &&
+                           hrtimer_reprogram(timer, base)) {
+                               /*
+                                * Timer is expired. Thus move it from tree to
+                                * pending list again.
+                                */
+                               __remove_hrtimer(timer, base,
+                                                HRTIMER_STATE_PENDING, 0);
+                               list_add_tail(&timer->cb_entry,
+                                             &base->cpu_base->cb_pending);
+                       }
                }
        }
        spin_unlock_irq(&cpu_base->lock);
@@ -1224,51 +1264,50 @@ void hrtimer_run_pending(void)
 /*
  * Called from hardirq context every jiffy
  */
-static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
-                                    int index)
+void hrtimer_run_queues(void)
 {
        struct rb_node *node;
-       struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
+       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+       struct hrtimer_clock_base *base;
+       int index, gettime = 1;
 
-       if (!base->first)
+       if (hrtimer_hres_active())
                return;
 
-       if (base->get_softirq_time)
-               base->softirq_time = base->get_softirq_time();
-
-       spin_lock(&cpu_base->lock);
+       for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
+               base = &cpu_base->clock_base[index];
 
-       while ((node = base->first)) {
-               struct hrtimer *timer;
-
-               timer = rb_entry(node, struct hrtimer, node);
-               if (base->softirq_time.tv64 <= timer->expires.tv64)
-                       break;
-
-               if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
-                       __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
-                       list_add_tail(&timer->cb_entry,
-                                       &base->cpu_base->cb_pending);
+               if (!base->first)
                        continue;
+
+               if (base->get_softirq_time)
+                       base->softirq_time = base->get_softirq_time();
+               else if (gettime) {
+                       hrtimer_get_softirq_time(cpu_base);
+                       gettime = 0;
                }
 
-               __run_hrtimer(timer);
-       }
-       spin_unlock(&cpu_base->lock);
-}
+               spin_lock(&cpu_base->lock);
 
-void hrtimer_run_queues(void)
-{
-       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-       int i;
+               while ((node = base->first)) {
+                       struct hrtimer *timer;
 
-       if (hrtimer_hres_active())
-               return;
+                       timer = rb_entry(node, struct hrtimer, node);
+                       if (base->softirq_time.tv64 <= timer->expires.tv64)
+                               break;
 
-       hrtimer_get_softirq_time(cpu_base);
+                       if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+                               __remove_hrtimer(timer, base,
+                                       HRTIMER_STATE_PENDING, 0);
+                               list_add_tail(&timer->cb_entry,
+                                       &base->cpu_base->cb_pending);
+                               continue;
+                       }
 
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
-               run_hrtimer_queue(cpu_base, i);
+                       __run_hrtimer(timer);
+               }
+               spin_unlock(&cpu_base->lock);
+       }
 }
 
 /*
@@ -1319,40 +1358,49 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
        return t->task == NULL;
 }
 
+static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
+{
+       struct timespec rmt;
+       ktime_t rem;
+
+       rem = ktime_sub(timer->expires, timer->base->get_time());
+       if (rem.tv64 <= 0)
+               return 0;
+       rmt = ktime_to_timespec(rem);
+
+       if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+               return -EFAULT;
+
+       return 1;
+}
+
 long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 {
        struct hrtimer_sleeper t;
-       struct timespec *rmtp;
-       ktime_t time;
-
-       restart->fn = do_no_restart_syscall;
+       struct timespec __user  *rmtp;
 
-       hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS);
-       t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2;
+       hrtimer_init(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS);
+       t.timer.expires.tv64 = restart->nanosleep.expires;
 
        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
                return 0;
 
-       rmtp = (struct timespec *)restart->arg1;
+       rmtp = restart->nanosleep.rmtp;
        if (rmtp) {
-               time = ktime_sub(t.timer.expires, t.timer.base->get_time());
-               if (time.tv64 <= 0)
-                       return 0;
-               *rmtp = ktime_to_timespec(time);
+               int ret = update_rmtp(&t.timer, rmtp);
+               if (ret <= 0)
+                       return ret;
        }
 
-       restart->fn = hrtimer_nanosleep_restart;
-
        /* The other values in restart are already filled in */
        return -ERESTART_RESTARTBLOCK;
 }
 
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
                       const enum hrtimer_mode mode, const clockid_t clockid)
 {
        struct restart_block *restart;
        struct hrtimer_sleeper t;
-       ktime_t rem;
 
        hrtimer_init(&t.timer, clockid, mode);
        t.timer.expires = timespec_to_ktime(*rqtp);
@@ -1364,18 +1412,16 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
                return -ERESTARTNOHAND;
 
        if (rmtp) {
-               rem = ktime_sub(t.timer.expires, t.timer.base->get_time());
-               if (rem.tv64 <= 0)
-                       return 0;
-               *rmtp = ktime_to_timespec(rem);
+               int ret = update_rmtp(&t.timer, rmtp);
+               if (ret <= 0)
+                       return ret;
        }
 
        restart = &current_thread_info()->restart_block;
        restart->fn = hrtimer_nanosleep_restart;
-       restart->arg0 = (unsigned long) t.timer.base->index;
-       restart->arg1 = (unsigned long) rmtp;
-       restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF;
-       restart->arg3 = t.timer.expires.tv64 >> 32;
+       restart->nanosleep.index = t.timer.base->index;
+       restart->nanosleep.rmtp = rmtp;
+       restart->nanosleep.expires = t.timer.expires.tv64;
 
        return -ERESTART_RESTARTBLOCK;
 }
@@ -1383,8 +1429,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
 asmlinkage long
 sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
 {
-       struct timespec tu, rmt;
-       int ret;
+       struct timespec tu;
 
        if (copy_from_user(&tu, rqtp, sizeof(tu)))
                return -EFAULT;
@@ -1392,15 +1437,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
        if (!timespec_valid(&tu))
                return -EINVAL;
 
-       ret = hrtimer_nanosleep(&tu, rmtp ? &rmt : NULL, HRTIMER_MODE_REL,
-                               CLOCK_MONOTONIC);
-
-       if (ret && rmtp) {
-               if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
-                       return -EFAULT;
-       }
-
-       return ret;
+       return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
 /*
@@ -1412,7 +1449,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
        int i;
 
        spin_lock_init(&cpu_base->lock);
-       lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key);
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
                cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1453,16 +1489,16 @@ static void migrate_hrtimers(int cpu)
        tick_cancel_sched_timer(cpu);
 
        local_irq_disable();
-       double_spin_lock(&new_base->lock, &old_base->lock,
-                        smp_processor_id() < cpu);
+       spin_lock(&new_base->lock);
+       spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                migrate_hrtimer_list(&old_base->clock_base[i],
                                     &new_base->clock_base[i]);
        }
 
-       double_spin_unlock(&new_base->lock, &old_base->lock,
-                          smp_processor_id() < cpu);
+       spin_unlock(&old_base->lock);
+       spin_unlock(&new_base->lock);
        local_irq_enable();
        put_cpu_var(hrtimer_bases);
 }