--- /dev/null
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#define CALL_BACK              5
+
+#define JMPFWD                 0x03eb
+
+static unsigned short ftrace_jmp = JMPFWD;
+
+struct ftrace_record {
+       struct dyn_ftrace       rec;
+       int                     failed;
+} __attribute__((packed));
+
+struct ftrace_page {
+       struct ftrace_page      *next;
+       int                     index;
+       struct ftrace_record    records[];
+} __attribute__((packed));
+
+#define ENTRIES_PER_PAGE \
+  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT             10000
+
+#define MCOUNT_ADDR ((long)(&mcount))
+
+union ftrace_code_union {
+       char code[5];
+       struct {
+               char e8;
+               int offset;
+       } __attribute__((packed));
+};
+
+static struct ftrace_page      *ftrace_pages_start;
+static struct ftrace_page      *ftrace_pages;
+
+notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip)
+{
+       struct ftrace_record *rec;
+       unsigned short save;
+
+       ip -= CALL_BACK;
+       save = *(short *)ip;
+
+       /* If this was already converted, skip it */
+       if (save == JMPFWD)
+               return NULL;
+
+       if (ftrace_pages->index == ENTRIES_PER_PAGE) {
+               if (!ftrace_pages->next)
+                       return NULL;
+               ftrace_pages = ftrace_pages->next;
+       }
+
+       rec = &ftrace_pages->records[ftrace_pages->index++];
+
+       return &rec->rec;
+}
+
+static int notrace
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+                  unsigned char *new_code)
+{
+       unsigned short old = *(unsigned short *)old_code;
+       unsigned short new = *(unsigned short *)new_code;
+       unsigned short replaced;
+       int faulted = 0;
+
+       /*
+        * Note: Due to modules and __init, code can
+        *  disappear and change, we need to protect against faulting
+        *  as well as code changing.
+        *
+        * No real locking needed, this code is run through
+        * kstop_machine.
+        */
+       asm volatile (
+               "1: lock\n"
+               "   cmpxchg %w3, (%2)\n"
+               "2:\n"
+               ".section .fixup, \"ax\"\n"
+               "       movl $1, %0\n"
+               "3:     jmp 2b\n"
+               ".previous\n"
+               _ASM_EXTABLE(1b, 3b)
+               : "=r"(faulted), "=a"(replaced)
+               : "r"(ip), "r"(new), "0"(faulted), "a"(old)
+               : "memory");
+       sync_core();
+
+       if (replaced != old)
+               faulted = 2;
+
+       return faulted;
+}
+
+static int notrace ftrace_calc_offset(long ip)
+{
+       return (int)(MCOUNT_ADDR - ip);
+}
+
+notrace void ftrace_code_disable(struct dyn_ftrace *rec)
+{
+       unsigned long ip;
+       union ftrace_code_union save;
+       struct ftrace_record *r =
+               container_of(rec, struct ftrace_record, rec);
+
+       ip = rec->ip;
+
+       save.e8         = 0xe8;
+       save.offset     = ftrace_calc_offset(ip);
+
+       /* move the IP back to the start of the call */
+       ip -= CALL_BACK;
+
+       r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp);
+}
+
+static void notrace ftrace_replace_code(int saved)
+{
+       unsigned char *new = NULL, *old = NULL;
+       struct ftrace_record *rec;
+       struct ftrace_page *pg;
+       unsigned long ip;
+       int i;
+
+       if (saved)
+               old = (char *)&ftrace_jmp;
+       else
+               new = (char *)&ftrace_jmp;
+
+       for (pg = ftrace_pages_start; pg; pg = pg->next) {
+               for (i = 0; i < pg->index; i++) {
+                       union ftrace_code_union calc;
+                       rec = &pg->records[i];
+
+                       /* don't modify code that has already faulted */
+                       if (rec->failed)
+                               continue;
+
+                       ip = rec->rec.ip;
+
+                       calc.e8         = 0xe8;
+                       calc.offset     = ftrace_calc_offset(ip);
+
+                       if (saved)
+                               new = calc.code;
+                       else
+                               old = calc.code;
+
+                       ip -= CALL_BACK;
+
+                       rec->failed = ftrace_modify_code(ip, old, new);
+               }
+       }
+
+}
+
+notrace void ftrace_startup_code(void)
+{
+       ftrace_replace_code(1);
+}
+
+notrace void ftrace_shutdown_code(void)
+{
+       ftrace_replace_code(0);
+}
+
+notrace void ftrace_shutdown_replenish(void)
+{
+       if (ftrace_pages->next)
+               return;
+
+       /* allocate another page */
+       ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
+}
+
+notrace int ftrace_shutdown_arch_init(void)
+{
+       struct ftrace_page *pg;
+       int cnt;
+       int i;
+
+       /* allocate a few pages */
+       ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!ftrace_pages_start)
+               return -1;
+
+       /*
+        * Allocate a few more pages.
+        *
+        * TODO: have some parser search vmlinux before
+        *   final linking to find all calls to ftrace.
+        *   Then we can:
+        *    a) know how many pages to allocate.
+        *     and/or
+        *    b) set up the table then.
+        *
+        *  The dynamic code is still necessary for
+        *  modules.
+        */
+
+       pg = ftrace_pages = ftrace_pages_start;
+
+       cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+
+       for (i = 0; i < cnt; i++) {
+               pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+
+               /* If we fail, we'll try later anyway */
+               if (!pg->next)
+                       break;
+
+               pg = pg->next;
+       }
+
+       return 0;
+}
 
  *  Copyright (C) 2004 William Lee Irwin III
  */
 
-#include <linux/module.h>
+#include <linux/stop_machine.h>
+#include <linux/clocksource.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/hardirq.h>
 #include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+
+#include "trace.h"
 
-static DEFINE_SPINLOCK(ftrace_func_lock);
+static DEFINE_SPINLOCK(ftrace_lock);
 static struct ftrace_ops ftrace_list_end __read_mostly =
 {
        .func = ftrace_stub,
 }
 
 /**
- * register_ftrace_function - register a function for profiling
- * @ops - ops structure that holds the function for profiling.
- *
- * Register a function to be called by all functions in the
- * kernel.
+ * clear_ftrace_function - reset the ftrace function
  *
- * Note: @ops->func and all the functions it calls must be labeled
- *       with "notrace", otherwise it will go into a
- *       recursive loop.
+ * This NULLs the ftrace function and in essence stops
+ * tracing.  There may be lag
  */
-int register_ftrace_function(struct ftrace_ops *ops)
+void clear_ftrace_function(void)
 {
-       unsigned long flags;
+       ftrace_trace_function = ftrace_stub;
+}
+
+static int notrace __register_ftrace_function(struct ftrace_ops *ops)
+{
+       /* Should never be called by interrupts */
+       spin_lock(&ftrace_lock);
 
-       spin_lock_irqsave(&ftrace_func_lock, flags);
        ops->next = ftrace_list;
        /*
         * We are entering ops into the ftrace_list but another
         */
        smp_wmb();
        ftrace_list = ops;
+
        /*
         * For one func, simply call it directly.
         * For more than one func, call the chain.
                ftrace_trace_function = ops->func;
        else
                ftrace_trace_function = ftrace_list_func;
-       spin_unlock_irqrestore(&ftrace_func_lock, flags);
+
+       spin_unlock(&ftrace_lock);
 
        return 0;
 }
 
-/**
- * unregister_ftrace_function - unresgister a function for profiling.
- * @ops - ops structure that holds the function to unregister
- *
- * Unregister a function that was added to be called by ftrace profiling.
- */
-int unregister_ftrace_function(struct ftrace_ops *ops)
+static int notrace __unregister_ftrace_function(struct ftrace_ops *ops)
 {
-       unsigned long flags;
        struct ftrace_ops **p;
        int ret = 0;
 
-       spin_lock_irqsave(&ftrace_func_lock, flags);
+       spin_lock(&ftrace_lock);
 
        /*
-        * If we are the only function, then the ftrace pointer is
-        * pointing directly to that function.
+        * If we are removing the last function, then simply point
+        * to the ftrace_stub.
         */
        if (ftrace_list == ops && ops->next == &ftrace_list_end) {
                ftrace_trace_function = ftrace_stub;
        *p = (*p)->next;
 
        /* If we only have one func left, then call that directly */
-       if (ftrace_list->next == &ftrace_list_end)
+       if (ftrace_list == &ftrace_list_end ||
+           ftrace_list->next == &ftrace_list_end)
                ftrace_trace_function = ftrace_list->func;
 
  out:
-       spin_unlock_irqrestore(&ftrace_func_lock, flags);
+       spin_unlock(&ftrace_lock);
+
+       return ret;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
+
+static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
+
+static DEFINE_SPINLOCK(ftrace_shutdown_lock);
+static DEFINE_MUTEX(ftraced_lock);
+
+static int ftraced_trigger;
+static int ftraced_suspend;
+
+static int ftrace_record_suspend;
+
+static inline int
+notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key)
+{
+       struct dyn_ftrace *p;
+       struct hlist_node *t;
+       int found = 0;
+
+       hlist_for_each_entry(p, t, &ftrace_hash[key], node) {
+               if (p->ip == ip) {
+                       found = 1;
+                       break;
+               }
+       }
+
+       return found;
+}
+
+static inline void notrace
+ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
+{
+       hlist_add_head(&node->node, &ftrace_hash[key]);
+}
+
+static void notrace
+ftrace_record_ip(unsigned long ip, unsigned long parent_ip)
+{
+       struct dyn_ftrace *node;
+       unsigned long flags;
+       unsigned long key;
+       int resched;
+       int atomic;
+
+       resched = need_resched();
+       preempt_disable_notrace();
+
+       /* We simply need to protect against recursion */
+       __get_cpu_var(ftrace_shutdown_disable_cpu)++;
+       if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1)
+               goto out;
+
+       if (unlikely(ftrace_record_suspend))
+               goto out;
+
+       key = hash_long(ip, FTRACE_HASHBITS);
+
+       WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
+
+       if (ftrace_ip_in_hash(ip, key))
+               goto out;
+
+       atomic = irqs_disabled();
+
+       spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+
+       /* This ip may have hit the hash before the lock */
+       if (ftrace_ip_in_hash(ip, key))
+               goto out_unlock;
+
+       /*
+        * There's a slight race that the ftraced will update the
+        * hash and reset here. The arch alloc is responsible
+        * for seeing if the IP has already changed, and if
+        * it has, the alloc will fail.
+        */
+       node = ftrace_alloc_shutdown_node(ip);
+       if (!node)
+               goto out_unlock;
+
+       node->ip = ip;
+
+       ftrace_add_hash(node, key);
+
+       ftraced_trigger = 1;
+
+ out_unlock:
+       spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+ out:
+       __get_cpu_var(ftrace_shutdown_disable_cpu)--;
+
+       /* prevent recursion with scheduler */
+       if (resched)
+               preempt_enable_no_resched_notrace();
+       else
+               preempt_enable_notrace();
+}
+
+static struct ftrace_ops ftrace_shutdown_ops __read_mostly =
+{
+       .func = ftrace_record_ip,
+};
+
+
+static int notrace __ftrace_modify_code(void *data)
+{
+       void (*func)(void) = data;
+
+       func();
+       return 0;
+}
+
+static void notrace ftrace_run_startup_code(void)
+{
+       stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS);
+}
+
+static void notrace ftrace_run_shutdown_code(void)
+{
+       stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS);
+}
+
+static void notrace ftrace_startup(void)
+{
+       mutex_lock(&ftraced_lock);
+       ftraced_suspend++;
+       if (ftraced_suspend != 1)
+               goto out;
+       __unregister_ftrace_function(&ftrace_shutdown_ops);
+
+       ftrace_run_startup_code();
+ out:
+       mutex_unlock(&ftraced_lock);
+}
+
+static void notrace ftrace_shutdown(void)
+{
+       mutex_lock(&ftraced_lock);
+       ftraced_suspend--;
+       if (ftraced_suspend)
+               goto out;
+
+       ftrace_run_shutdown_code();
+
+       __register_ftrace_function(&ftrace_shutdown_ops);
+ out:
+       mutex_unlock(&ftraced_lock);
+}
+
+static cycle_t         ftrace_update_time;
+static unsigned long   ftrace_update_cnt;
+unsigned long          ftrace_update_tot_cnt;
+
+static int notrace __ftrace_update_code(void *ignore)
+{
+       struct dyn_ftrace *p;
+       struct hlist_head head;
+       struct hlist_node *t;
+       cycle_t start, stop;
+       int i;
+
+       /* Don't be calling ftrace ops now */
+       __unregister_ftrace_function(&ftrace_shutdown_ops);
+
+       start = now(raw_smp_processor_id());
+       ftrace_update_cnt = 0;
+
+       /* No locks needed, the machine is stopped! */
+       for (i = 0; i < FTRACE_HASHSIZE; i++) {
+               if (hlist_empty(&ftrace_hash[i]))
+                       continue;
+
+               head = ftrace_hash[i];
+               INIT_HLIST_HEAD(&ftrace_hash[i]);
+
+               /* all CPUS are stopped, we are safe to modify code */
+               hlist_for_each_entry(p, t, &head, node) {
+                       ftrace_code_disable(p);
+                       ftrace_update_cnt++;
+               }
+
+       }
+
+       stop = now(raw_smp_processor_id());
+       ftrace_update_time = stop - start;
+       ftrace_update_tot_cnt += ftrace_update_cnt;
+
+       __register_ftrace_function(&ftrace_shutdown_ops);
 
        return 0;
 }
 
+static void notrace ftrace_update_code(void)
+{
+       stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
+}
+
+static int notrace ftraced(void *ignore)
+{
+       unsigned long usecs;
+
+       set_current_state(TASK_INTERRUPTIBLE);
+
+       while (!kthread_should_stop()) {
+
+               /* check once a second */
+               schedule_timeout(HZ);
+
+               mutex_lock(&ftraced_lock);
+               if (ftraced_trigger && !ftraced_suspend) {
+                       ftrace_record_suspend++;
+                       ftrace_update_code();
+                       usecs = nsecs_to_usecs(ftrace_update_time);
+                       if (ftrace_update_tot_cnt > 100000) {
+                               ftrace_update_tot_cnt = 0;
+                               pr_info("hm, dftrace overflow: %lu change%s"
+                                        " (%lu total) in %lu usec%s\n",
+                                       ftrace_update_cnt,
+                                       ftrace_update_cnt != 1 ? "s" : "",
+                                       ftrace_update_tot_cnt,
+                                       usecs, usecs != 1 ? "s" : "");
+                               WARN_ON_ONCE(1);
+                       }
+                       ftraced_trigger = 0;
+                       ftrace_record_suspend--;
+               }
+               mutex_unlock(&ftraced_lock);
+
+               ftrace_shutdown_replenish();
+
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+       __set_current_state(TASK_RUNNING);
+       return 0;
+}
+
+static int __init notrace ftrace_shutdown_init(void)
+{
+       struct task_struct *p;
+       int ret;
+
+       ret = ftrace_shutdown_arch_init();
+       if (ret)
+               return ret;
+
+       p = kthread_run(ftraced, NULL, "ftraced");
+       if (IS_ERR(p))
+               return -1;
+
+       __register_ftrace_function(&ftrace_shutdown_ops);
+
+       return 0;
+}
+
+core_initcall(ftrace_shutdown_init);
+#else
+# define ftrace_startup()      do { } while (0)
+# define ftrace_shutdown()     do { } while (0)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 /**
- * clear_ftrace_function - reset the ftrace function
+ * register_ftrace_function - register a function for profiling
+ * @ops - ops structure that holds the function for profiling.
  *
- * This NULLs the ftrace function and in essence stops
- * tracing.  There may be lag
+ * Register a function to be called by all functions in the
+ * kernel.
+ *
+ * Note: @ops->func and all the functions it calls must be labeled
+ *       with "notrace", otherwise it will go into a
+ *       recursive loop.
  */
-void clear_ftrace_function(void)
+int register_ftrace_function(struct ftrace_ops *ops)
 {
-       ftrace_trace_function = ftrace_stub;
+       ftrace_startup();
+
+       return __register_ftrace_function(ops);
+}
+
+/**
+ * unregister_ftrace_function - unresgister a function for profiling.
+ * @ops - ops structure that holds the function to unregister
+ *
+ * Unregister a function that was added to be called by ftrace profiling.
+ */
+int unregister_ftrace_function(struct ftrace_ops *ops)
+{
+       int ret;
+
+       ret = __unregister_ftrace_function(ops);
+
+       if (ftrace_list == &ftrace_list_end)
+               ftrace_shutdown();
+
+       return ret;
 }