#include <linux/reciprocal_div.h>
 #include <linux/unistd.h>
 #include <linux/pagemap.h>
+#include <linux/hrtimer.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
        struct list_head migration_queue;
 #endif
 
+#ifdef CONFIG_SCHED_HRTICK
+       unsigned long hrtick_flags;
+       ktime_t hrtick_expire;
+       struct hrtimer hrtick_timer;
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
        /* latency stats */
        struct sched_info rq_sched_info;
        SCHED_FEAT_START_DEBIT          = 4,
        SCHED_FEAT_TREE_AVG             = 8,
        SCHED_FEAT_APPROX_AVG           = 16,
+       SCHED_FEAT_HRTICK               = 32,
+       SCHED_FEAT_DOUBLE_TICK          = 64,
 };
 
 const_debug unsigned int sysctl_sched_features =
                SCHED_FEAT_WAKEUP_PREEMPT       * 1 |
                SCHED_FEAT_START_DEBIT          * 1 |
                SCHED_FEAT_TREE_AVG             * 0 |
-               SCHED_FEAT_APPROX_AVG           * 0;
+               SCHED_FEAT_APPROX_AVG           * 0 |
+               SCHED_FEAT_HRTICK               * 1 |
+               SCHED_FEAT_DOUBLE_TICK          * 0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
+static void __resched_task(struct task_struct *p, int tif_bit);
+
+static inline void resched_task(struct task_struct *p)
+{
+       __resched_task(p, TIF_NEED_RESCHED);
+}
+
+#ifdef CONFIG_SCHED_HRTICK
+/*
+ * Use HR-timers to deliver accurate preemption points.
+ *
+ * Its all a bit involved since we cannot program an hrt while holding the
+ * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
+ * reschedule event.
+ *
+ * When we get rescheduled we reprogram the hrtick_timer outside of the
+ * rq->lock.
+ */
+static inline void resched_hrt(struct task_struct *p)
+{
+       __resched_task(p, TIF_HRTICK_RESCHED);
+}
+
+static inline void resched_rq(struct rq *rq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rq->lock, flags);
+       resched_task(rq->curr);
+       spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+enum {
+       HRTICK_SET,             /* re-programm hrtick_timer */
+       HRTICK_RESET,           /* not a new slice */
+};
+
+/*
+ * Use hrtick when:
+ *  - enabled by features
+ *  - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+       if (!sched_feat(HRTICK))
+               return 0;
+       return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay, int reset)
+{
+       assert_spin_locked(&rq->lock);
+
+       /*
+        * preempt at: now + delay
+        */
+       rq->hrtick_expire =
+               ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
+       /*
+        * indicate we need to program the timer
+        */
+       __set_bit(HRTICK_SET, &rq->hrtick_flags);
+       if (reset)
+               __set_bit(HRTICK_RESET, &rq->hrtick_flags);
+
+       /*
+        * New slices are called from the schedule path and don't need a
+        * forced reschedule.
+        */
+       if (reset)
+               resched_hrt(rq->curr);
+}
+
+static void hrtick_clear(struct rq *rq)
+{
+       if (hrtimer_active(&rq->hrtick_timer))
+               hrtimer_cancel(&rq->hrtick_timer);
+}
+
+/*
+ * Update the timer from the possible pending state.
+ */
+static void hrtick_set(struct rq *rq)
+{
+       ktime_t time;
+       int set, reset;
+       unsigned long flags;
+
+       WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+
+       spin_lock_irqsave(&rq->lock, flags);
+       set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
+       reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
+       time = rq->hrtick_expire;
+       clear_thread_flag(TIF_HRTICK_RESCHED);
+       spin_unlock_irqrestore(&rq->lock, flags);
+
+       if (set) {
+               hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
+               if (reset && !hrtimer_active(&rq->hrtick_timer))
+                       resched_rq(rq);
+       } else
+               hrtick_clear(rq);
+}
+
+/*
+ * High-resolution timer tick.
+ * Runs from hardirq context with interrupts disabled.
+ */
+static enum hrtimer_restart hrtick(struct hrtimer *timer)
+{
+       struct rq *rq = container_of(timer, struct rq, hrtick_timer);
+
+       WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+
+       spin_lock(&rq->lock);
+       __update_rq_clock(rq);
+       rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+       spin_unlock(&rq->lock);
+
+       return HRTIMER_NORESTART;
+}
+
+static inline void init_rq_hrtick(struct rq *rq)
+{
+       rq->hrtick_flags = 0;
+       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       rq->hrtick_timer.function = hrtick;
+       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+}
+
+void hrtick_resched(void)
+{
+       struct rq *rq;
+       unsigned long flags;
+
+       if (!test_thread_flag(TIF_HRTICK_RESCHED))
+               return;
+
+       local_irq_save(flags);
+       rq = cpu_rq(smp_processor_id());
+       hrtick_set(rq);
+       local_irq_restore(flags);
+}
+#else
+static inline void hrtick_clear(struct rq *rq)
+{
+}
+
+static inline void hrtick_set(struct rq *rq)
+{
+}
+
+static inline void init_rq_hrtick(struct rq *rq)
+{
+}
+
+void hrtick_resched(void)
+{
+}
+#endif
+
 /*
  * resched_task - mark a task 'to be rescheduled now'.
  *
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
 
-static void resched_task(struct task_struct *p)
+static void __resched_task(struct task_struct *p, int tif_bit)
 {
        int cpu;
 
        assert_spin_locked(&task_rq(p)->lock);
 
-       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+       if (unlikely(test_tsk_thread_flag(p, tif_bit)))
                return;
 
-       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+       set_tsk_thread_flag(p, tif_bit);
 
        cpu = task_cpu(p);
        if (cpu == smp_processor_id())
        spin_unlock_irqrestore(&rq->lock, flags);
 }
 #else
-static inline void resched_task(struct task_struct *p)
+static void __resched_task(struct task_struct *p, int tif_bit)
 {
        assert_spin_locked(&task_rq(p)->lock);
-       set_tsk_need_resched(p);
+       set_tsk_thread_flag(p, tif_bit);
 }
 #endif
 
        rq->tick_timestamp = rq->clock;
        update_cpu_load(rq);
        if (curr != rq->idle) /* FIXME: needed? */
-               curr->sched_class->task_tick(rq, curr);
+               curr->sched_class->task_tick(rq, curr, 0);
        spin_unlock(&rq->lock);
 
 #ifdef CONFIG_SMP
 
        schedule_debug(prev);
 
+       hrtick_clear(rq);
+
        /*
         * Do the rq-clock update outside the rq lock:
         */
                ++*switch_count;
 
                context_switch(rq, prev, next); /* unlocks the rq */
+               /*
+                * the context switch might have flipped the stack from under
+                * us, hence refresh the local variables.
+                */
+               cpu = smp_processor_id();
+               rq = cpu_rq(cpu);
        } else
                spin_unlock_irq(&rq->lock);
 
-       if (unlikely(reacquire_kernel_lock(current) < 0)) {
-               cpu = smp_processor_id();
-               rq = cpu_rq(cpu);
+       hrtick_set(rq);
+
+       if (unlikely(reacquire_kernel_lock(current) < 0))
                goto need_resched_nonpreemptible;
-       }
+
        preempt_enable_no_resched();
        if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                goto need_resched;
                rq->rt.overloaded = 0;
                rq_attach_root(rq, &def_root_domain);
 #endif
+               init_rq_hrtick(rq);
+
                atomic_set(&rq->nr_iowait, 0);
 
                array = &rq->rt.active;
 
        cfs_rq->curr = NULL;
 }
 
-static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+static void
+entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 {
        /*
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
 
+#ifdef CONFIG_SCHED_HRTICK
+       /*
+        * queued ticks are scheduled to match the slice, so don't bother
+        * validating it and just reschedule.
+        */
+       if (queued)
+               return resched_task(rq_of(cfs_rq)->curr);
+       /*
+        * don't let the period tick interfere with the hrtick preemption
+        */
+       if (!sched_feat(DOUBLE_TICK) &&
+                       hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
+               return;
+#endif
+
        if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
                check_preempt_tick(cfs_rq, curr);
 }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
+#ifdef CONFIG_SCHED_HRTICK
+static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+{
+       int requeue = rq->curr == p;
+       struct sched_entity *se = &p->se;
+       struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+       WARN_ON(task_rq(p) != rq);
+
+       if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
+               u64 slice = sched_slice(cfs_rq, se);
+               u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+               s64 delta = slice - ran;
+
+               if (delta < 0) {
+                       if (rq->curr == p)
+                               resched_task(p);
+                       return;
+               }
+
+               /*
+                * Don't schedule slices shorter than 10000ns, that just
+                * doesn't make sense. Rely on vruntime for fairness.
+                */
+               if (!requeue)
+                       delta = max(10000LL, delta);
+
+               hrtick_start(rq, delta, requeue);
+       }
+}
+#else
+static inline void
+hrtick_start_fair(struct rq *rq, struct task_struct *p)
+{
+}
+#endif
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
         */
        if (incload)
                inc_cpu_load(rq, topse->load.weight);
+
+       hrtick_start_fair(rq, rq->curr);
 }
 
 /*
         */
        if (decload)
                dec_cpu_load(rq, topse->load.weight);
+
+       hrtick_start_fair(rq, rq->curr);
 }
 
 /*
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)
 {
+       struct task_struct *p;
        struct cfs_rq *cfs_rq = &rq->cfs;
        struct sched_entity *se;
 
                cfs_rq = group_cfs_rq(se);
        } while (cfs_rq);
 
-       return task_of(se);
+       p = task_of(se);
+       hrtick_start_fair(rq, p);
+
+       return p;
 }
 
 /*
 /*
  * scheduler tick hitting a task of our scheduling class:
  */
-static void task_tick_fair(struct rq *rq, struct task_struct *curr)
+static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 {
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &curr->se;
 
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
-               entity_tick(cfs_rq, se);
+               entity_tick(cfs_rq, se, queued);
        }
 }