#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
+#include <asm/pgtable.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_alt_once;
                unsigned int noplen = len;
                if (noplen > ASM_NOP_MAX)
                        noplen = ASM_NOP_MAX;
-               memcpy(insns, noptable[noplen], noplen);
+               text_poke(insns, noptable[noplen], noplen);
                insns += noplen;
                len -= noplen;
        }
                        continue;
                if (*ptr > text_end)
                        continue;
-               **ptr = 0xf0; /* lock prefix */
+               text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
        };
 }
 
                /* Pad the rest with nops */
                nop_out(p->instr + used, p->len - used);
        }
-
-       /* Sync to be conservative, in case we patched following
-        * instructions */
-       sync_core();
 }
 extern struct paravirt_patch_site __start_parainstructions[],
        __stop_parainstructions[];
        apply_paravirt(__parainstructions, __parainstructions_end);
        local_irq_restore(flags);
 }
+
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these instructions.
+ * And on the local CPU you need to be protected again NMI or MCE handlers
+ * seeing an inconsistent instruction while you patch.
+ */
+void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len)
+{
+        u8 *addr = oaddr;
+       if (!pte_write(*lookup_address((unsigned long)addr))) {
+               struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
+               addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
+               if (!addr)
+                       return;
+               addr += ((unsigned long)oaddr) % PAGE_SIZE;
+       }
+       memcpy(addr, opcode, len);
+       sync_core();
+       /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
+          case. */
+       if (cpu_has_clflush)
+               asm("clflush (%0) " :: "r" (oaddr) : "memory");
+       if (addr != oaddr)
+               vunmap(addr);
+}
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/uaccess.h>
+#include <asm/alternative.h>
 
 void jprobe_return_end(void);
 
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-       *p->addr = BREAKPOINT_INSTRUCTION;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-       *p->addr = p->opcode;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, &p->opcode, 1);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 
        return len;
 }
 
+struct branch {
+       unsigned char opcode;
+       u32 delta;
+} __attribute__((packed));
+
 unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
                             void *site, u16 site_clobbers,
                             unsigned len)
 {
        unsigned char *call = site;
        unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
+       struct branch b;
 
        if (tgt_clobbers & ~site_clobbers)
                return len;     /* target would clobber too much for this site */
        if (len < 5)
                return len;     /* call too long for patch site */
 
-       *call++ = 0xe8;         /* call */
-       *(unsigned long *)call = delta;
+       b.opcode = 0xe8; /* call */
+       b.delta = delta;
+       BUILD_BUG_ON(sizeof(b) != 5);
+       text_poke(call, (unsigned char *)&b, 5);
 
        return 5;
 }
        if (len < 5)
                return len;     /* call too long for patch site */
 
-       *jmp++ = 0xe9;          /* jmp */
-       *(unsigned long *)jmp = delta;
+       b.opcode = 0xe9;        /* jmp */
+       b.delta = delta;
+       text_poke(call, (unsigned char *)&b, 5);
 
        return 5;
 }
 
        unsigned long start = PFN_ALIGN(_text);
        unsigned long size = PFN_ALIGN(_etext) - start;
 
-#ifndef CONFIG_KPROBES
-#ifdef CONFIG_HOTPLUG_CPU
-       /* It must still be possible to apply SMP alternatives. */
-       if (num_possible_cpus() <= 1)
-#endif
-       {
-               change_page_attr(virt_to_page(start),
-                                size >> PAGE_SHIFT, PAGE_KERNEL_RX);
-               printk("Write protecting the kernel text: %luk\n", size >> 10);
-       }
-#endif
+       change_page_attr(virt_to_page(start),
+                        size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+       printk("Write protecting the kernel text: %luk\n", size >> 10);
        start += size;
        size = (unsigned long)__end_rodata - start;
        change_page_attr(virt_to_page(start),
 
 #include <linux/module.h>
 #include <linux/kdebug.h>
 
-#include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
+#include <asm/alternative.h>
 
 void jprobe_return_end(void);
 static void __kprobes arch_copy_kprobe(struct kprobe *p);
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-       *p->addr = BREAKPOINT_INSTRUCTION;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-       *p->addr = p->opcode;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, &p->opcode, 1);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 
 {
        unsigned long start = (unsigned long)_stext, end;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       /* It must still be possible to apply SMP alternatives. */
-       if (num_possible_cpus() > 1)
-               start = (unsigned long)_etext;
-#endif
-
-#ifdef CONFIG_KPROBES
-       start = (unsigned long)__start_rodata;
-#endif
-       
        end = (unsigned long)__end_rodata;
        start = (start + PAGE_SIZE - 1) & PAGE_MASK;
        end &= PAGE_MASK;
 
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 
-static inline pte_t *lookup_address(unsigned long address) 
+pte_t *lookup_address(unsigned long address)
 { 
        pgd_t *pgd = pgd_offset_k(address);
        pud_t *pud;
 
 #define __parainstructions_end NULL
 #endif
 
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
 #endif /* _I386_ALTERNATIVE_H */
 
 #define __parainstructions_end NULL
 #endif
 
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
 #endif /* _X86_64_ALTERNATIVE_H */
 
 
 extern int kern_addr_valid(unsigned long addr); 
 
+pte_t *lookup_address(unsigned long addr);
+
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
                remap_pfn_range(vma, vaddr, pfn, size, prot)