]> pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'linus' into cpumask-for-linus
authorIngo Molnar <mingo@elte.hu>
Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
Conflicts:
arch/x86/kernel/cpu/common.c

12 files changed:
1  2 
arch/x86/include/asm/processor.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/process.c
arch/x86/kernel/tlb_uv.c
kernel/sched.c
mm/vmscan.c

index 9874dd98a29f41f1f0a53cf7f7af56f225f81c8b,ae85a8d66a30601a1a22c3b6b84f3df6d3056477..34c52370f2fe848a2e7c9dcb73fdc4a3048c087f
@@@ -75,9 -75,9 +75,9 @@@ struct cpuinfo_x86 
  #else
        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
        int                     x86_tlbsize;
+ #endif
        __u8                    x86_virt_bits;
        __u8                    x86_phys_bits;
- #endif
        /* CPUID returned core id bits: */
        __u8                    x86_coreid_bits;
        /* Max extended CPUID function supported: */
@@@ -94,7 -94,7 +94,7 @@@
        unsigned long           loops_per_jiffy;
  #ifdef CONFIG_SMP
        /* cpus sharing the last level cache: */
 -      cpumask_t               llc_shared_map;
 +      cpumask_var_t           llc_shared_map;
  #endif
        /* cpuid returned max cores value: */
        u16                      x86_max_cores;
@@@ -391,6 -391,9 +391,9 @@@ DECLARE_PER_CPU(union irq_stack_union, 
  DECLARE_INIT_PER_CPU(irq_stack_union);
  
  DECLARE_PER_CPU(char *, irq_stack_ptr);
+ DECLARE_PER_CPU(unsigned int, irq_count);
+ extern unsigned long kernel_eflags;
+ extern asmlinkage void ignore_sysret(void);
  #else /* X86_64 */
  #ifdef CONFIG_CC_STACKPROTECTOR
  DECLARE_PER_CPU(unsigned long, stack_canary);
@@@ -733,7 -736,6 +736,7 @@@ static inline void __sti_mwait(unsigne
  extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
  
  extern void select_idle_routine(const struct cpuinfo_x86 *c);
 +extern void init_c1e_mask(void);
  
  extern unsigned long          boot_option_idle_override;
  extern unsigned long          idle_halt;
index d7dd3c294e2a30669ae3cdf6faf3563d89a55c42,e2962cc1e27b742965f6af45a8cfdcf9b4c1a0b8..c4f667896c28f1a3eb7e6ae33341f5ba98f74c98
@@@ -1,50 -1,52 +1,50 @@@
- #include <linux/init.h>
- #include <linux/kernel.h>
- #include <linux/sched.h>
- #include <linux/string.h>
  #include <linux/bootmem.h>
+ #include <linux/linkage.h>
  #include <linux/bitops.h>
+ #include <linux/kernel.h>
  #include <linux/module.h>
- #include <linux/kgdb.h>
- #include <linux/topology.h>
+ #include <linux/percpu.h>
+ #include <linux/string.h>
  #include <linux/delay.h>
+ #include <linux/sched.h>
+ #include <linux/init.h>
+ #include <linux/kgdb.h>
  #include <linux/smp.h>
- #include <linux/percpu.h>
- #include <asm/i387.h>
- #include <asm/msr.h>
- #include <asm/io.h>
- #include <asm/linkage.h>
+ #include <linux/io.h>
+ #include <asm/stackprotector.h>
  #include <asm/mmu_context.h>
+ #include <asm/hypervisor.h>
+ #include <asm/processor.h>
+ #include <asm/sections.h>
+ #include <asm/topology.h>
+ #include <asm/cpumask.h>
+ #include <asm/pgtable.h>
+ #include <asm/atomic.h>
+ #include <asm/proto.h>
+ #include <asm/setup.h>
+ #include <asm/apic.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
  #include <asm/mtrr.h>
+ #include <asm/numa.h>
+ #include <asm/asm.h>
+ #include <asm/cpu.h>
  #include <asm/mce.h>
+ #include <asm/msr.h>
  #include <asm/pat.h>
- #include <asm/asm.h>
- #include <asm/numa.h>
  #include <asm/smp.h>
- #include <asm/cpu.h>
- #include <asm/cpumask.h>
- #include <asm/apic.h>
  
  #ifdef CONFIG_X86_LOCAL_APIC
  #include <asm/uv/uv.h>
  #endif
  
- #include <asm/pgtable.h>
- #include <asm/processor.h>
- #include <asm/desc.h>
- #include <asm/atomic.h>
- #include <asm/proto.h>
- #include <asm/sections.h>
- #include <asm/setup.h>
- #include <asm/hypervisor.h>
- #include <asm/stackprotector.h>
  #include "cpu.h"
  
 -#ifdef CONFIG_X86_64
 -
  /* all of these masks are initialized in setup_cpu_local_masks() */
- cpumask_var_t cpu_callin_mask;
- cpumask_var_t cpu_callout_mask;
  cpumask_var_t cpu_initialized_mask;
+ cpumask_var_t cpu_callout_mask;
+ cpumask_var_t cpu_callin_mask;
  
  /* representing cpus for which sibling maps can be computed */
  cpumask_var_t cpu_sibling_setup_mask;
@@@ -58,7 -60,17 +58,7 @@@ void __init setup_cpu_local_masks(void
        alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
  }
  
- static struct cpu_dev *this_cpu __cpuinitdata;
 -#else /* CONFIG_X86_32 */
 -
 -cpumask_t cpu_sibling_setup_map;
 -cpumask_t cpu_callout_map;
 -cpumask_t cpu_initialized;
 -cpumask_t cpu_callin_map;
 -
 -#endif /* CONFIG_X86_32 */
 -
 -
+ static const struct cpu_dev *this_cpu __cpuinitdata;
  
  DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
  #ifdef CONFIG_X86_64
         * IRET will check the segment types  kkeil 2000/10/28
         * Also sysret mandates a special GDT layout
         *
-        * The TLS descriptors are currently at a different place compared to i386.
+        * TLS descriptors are currently at a different place compared to i386.
         * Hopefully nobody expects them at a fixed place (Wine?)
         */
-       [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
-       [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
-       [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
-       [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
-       [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
-       [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+       [GDT_ENTRY_KERNEL32_CS]         = { { { 0x0000ffff, 0x00cf9b00 } } },
+       [GDT_ENTRY_KERNEL_CS]           = { { { 0x0000ffff, 0x00af9b00 } } },
+       [GDT_ENTRY_KERNEL_DS]           = { { { 0x0000ffff, 0x00cf9300 } } },
+       [GDT_ENTRY_DEFAULT_USER32_CS]   = { { { 0x0000ffff, 0x00cffb00 } } },
+       [GDT_ENTRY_DEFAULT_USER_DS]     = { { { 0x0000ffff, 0x00cff300 } } },
+       [GDT_ENTRY_DEFAULT_USER_CS]     = { { { 0x0000ffff, 0x00affb00 } } },
  #else
-       [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
-       [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
-       [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
-       [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
+       [GDT_ENTRY_KERNEL_CS]           = { { { 0x0000ffff, 0x00cf9a00 } } },
+       [GDT_ENTRY_KERNEL_DS]           = { { { 0x0000ffff, 0x00cf9200 } } },
+       [GDT_ENTRY_DEFAULT_USER_CS]     = { { { 0x0000ffff, 0x00cffa00 } } },
+       [GDT_ENTRY_DEFAULT_USER_DS]     = { { { 0x0000ffff, 0x00cff200 } } },
        /*
         * Segments used for calling PnP BIOS have byte granularity.
         * They code segments and data segments have fixed 64k limits,
         * the transfer segment sizes are set at run time.
         */
        /* 32-bit code */
-       [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+       [GDT_ENTRY_PNPBIOS_CS32]        = { { { 0x0000ffff, 0x00409a00 } } },
        /* 16-bit code */
-       [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+       [GDT_ENTRY_PNPBIOS_CS16]        = { { { 0x0000ffff, 0x00009a00 } } },
        /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_DS]          = { { { 0x0000ffff, 0x00009200 } } },
        /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_TS1]         = { { { 0x00000000, 0x00009200 } } },
        /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_TS2]         = { { { 0x00000000, 0x00009200 } } },
        /*
         * The APM segments have byte granularity and their bases
         * are set at run time.  All have 64k limits.
         */
        /* 32-bit code */
-       [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
+       [GDT_ENTRY_APMBIOS_BASE]        = { { { 0x0000ffff, 0x00409a00 } } },
        /* 16-bit code */
-       [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+       [GDT_ENTRY_APMBIOS_BASE+1]      = { { { 0x0000ffff, 0x00009a00 } } },
        /* data */
-       [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
+       [GDT_ENTRY_APMBIOS_BASE+2]      = { { { 0x0000ffff, 0x00409200 } } },
  
-       [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
-       [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+       [GDT_ENTRY_ESPFIX_SS]           = { { { 0x00000000, 0x00c09200 } } },
+       [GDT_ENTRY_PERCPU]              = { { { 0x0000ffff, 0x00cf9200 } } },
        GDT_STACK_CANARY_INIT
  #endif
  } };
@@@ -152,16 -164,17 +152,17 @@@ static inline int flag_is_changeable_p(
         * the CPUID. Add "volatile" to not allow gcc to
         * optimize the subsequent calls to this function.
         */
-       asm volatile ("pushfl\n\t"
-                     "pushfl\n\t"
-                     "popl %0\n\t"
-                     "movl %0,%1\n\t"
-                     "xorl %2,%0\n\t"
-                     "pushl %0\n\t"
-                     "popfl\n\t"
-                     "pushfl\n\t"
-                     "popl %0\n\t"
-                     "popfl\n\t"
+       asm volatile ("pushfl           \n\t"
+                     "pushfl           \n\t"
+                     "popl %0          \n\t"
+                     "movl %0, %1      \n\t"
+                     "xorl %2, %0      \n\t"
+                     "pushl %0         \n\t"
+                     "popfl            \n\t"
+                     "pushfl           \n\t"
+                     "popl %0          \n\t"
+                     "popfl            \n\t"
                      : "=&r" (f1), "=&r" (f2)
                      : "ir" (flag));
  
@@@ -176,18 -189,22 +177,22 @@@ static int __cpuinit have_cpuid_p(void
  
  static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
  {
-       if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
-               /* Disable processor serial number */
-               unsigned long lo, hi;
-               rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-               lo |= 0x200000;
-               wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-               printk(KERN_NOTICE "CPU serial number disabled.\n");
-               clear_cpu_cap(c, X86_FEATURE_PN);
-               /* Disabling the serial number may affect the cpuid level */
-               c->cpuid_level = cpuid_eax(0);
-       }
+       unsigned long lo, hi;
+       if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
+               return;
+       /* Disable processor serial number: */
+       rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+       lo |= 0x200000;
+       wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+       printk(KERN_NOTICE "CPU serial number disabled.\n");
+       clear_cpu_cap(c, X86_FEATURE_PN);
+       /* Disabling the serial number may affect the cpuid level */
+       c->cpuid_level = cpuid_eax(0);
  }
  
  static int __init x86_serial_nr_setup(char *s)
@@@ -220,6 -237,7 +225,7 @@@ struct cpuid_dependent_feature 
        u32 feature;
        u32 level;
  };
  static const struct cpuid_dependent_feature __cpuinitconst
  cpuid_dependent_features[] = {
        { X86_FEATURE_MWAIT,            0x00000005 },
  static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
  {
        const struct cpuid_dependent_feature *df;
        for (df = cpuid_dependent_features; df->feature; df++) {
+               if (!cpu_has(c, df->feature))
+                       continue;
                /*
                 * Note: cpuid_level is set to -1 if unavailable, but
                 * extended_extended_level is set to 0 if unavailable
                 * when signed; hence the weird messing around with
                 * signs here...
                 */
-               if (cpu_has(c, df->feature) &&
-                   ((s32)df->level < 0 ?
+               if (!((s32)df->level < 0 ?
                     (u32)df->level > (u32)c->extended_cpuid_level :
-                    (s32)df->level > (s32)c->cpuid_level)) {
-                       clear_cpu_cap(c, df->feature);
-                       if (warn)
-                               printk(KERN_WARNING
-                                      "CPU: CPU feature %s disabled "
-                                      "due to lack of CPUID level 0x%x\n",
-                                      x86_cap_flags[df->feature],
-                                      df->level);
-               }
+                    (s32)df->level > (s32)c->cpuid_level))
+                       continue;
+               clear_cpu_cap(c, df->feature);
+               if (!warn)
+                       continue;
+               printk(KERN_WARNING
+                      "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
+                               x86_cap_flags[df->feature], df->level);
        }
  }
  
  /*
   * Naming convention should be: <Name> [(<Codename>)]
   * This table only is used unless init_<vendor>() below doesn't set it;
-  * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
-  *
+  * in particular, if CPUID levels 0x80000002..4 are supported, this
+  * isn't used
   */
  
  /* Look up CPU names by table lookup. */
- static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
  {
-       struct cpu_model_info *info;
+       const struct cpu_model_info *info;
  
        if (c->x86_model >= 16)
                return NULL;    /* Range check */
@@@ -295,8 -317,10 +305,10 @@@ void load_percpu_segment(int cpu
        load_stack_canary_segment();
  }
  
- /* Current gdt points %fs at the "master" per-cpu area: after this,
-  * it's on the real one. */
+ /*
+  * Current gdt points %fs at the "master" per-cpu area: after this,
+  * it's on the real one.
+  */
  void switch_to_new_gdt(int cpu)
  {
        struct desc_ptr gdt_descr;
        load_percpu_segment(cpu);
  }
  
- static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+ static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
  
  static void __cpuinit default_init(struct cpuinfo_x86 *c)
  {
  #endif
  }
  
- static struct cpu_dev __cpuinitdata default_cpu = {
+ static const struct cpu_dev __cpuinitconst default_cpu = {
        .c_init = default_init,
        .c_vendor = "Unknown",
        .c_x86_vendor = X86_VENDOR_UNKNOWN,
@@@ -342,22 -366,24 +354,24 @@@ static void __cpuinit get_model_name(st
        if (c->extended_cpuid_level < 0x80000004)
                return;
  
-       v = (unsigned int *) c->x86_model_id;
+       v = (unsigned int *)c->x86_model_id;
        cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
        cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
        cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
        c->x86_model_id[48] = 0;
  
-       /* Intel chips right-justify this string for some dumb reason;
-          undo that brain damage */
+       /*
+        * Intel chips right-justify this string for some dumb reason;
+        * undo that brain damage:
+        */
        p = q = &c->x86_model_id[0];
        while (*p == ' ')
-            p++;
+               p++;
        if (p != q) {
-            while (*p)
-                 *q++ = *p++;
-            while (q <= &c->x86_model_id[48])
-                 *q++ = '\0';  /* Zero-pad the rest */
+               while (*p)
+                       *q++ = *p++;
+               while (q <= &c->x86_model_id[48])
+                       *q++ = '\0';    /* Zero-pad the rest */
        }
  }
  
@@@ -426,27 -452,30 +440,30 @@@ void __cpuinit detect_ht(struct cpuinfo
  
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1) {
+               goto out;
+       }
  
-               if (smp_num_siblings > nr_cpu_ids) {
-                       printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
-                                       smp_num_siblings);
-                       smp_num_siblings = 1;
-                       return;
-               }
+       if (smp_num_siblings <= 1)
+               goto out;
  
-               index_msb = get_count_order(smp_num_siblings);
-               c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
+       if (smp_num_siblings > nr_cpu_ids) {
+               pr_warning("CPU: Unsupported number of siblings %d",
+                          smp_num_siblings);
+               smp_num_siblings = 1;
+               return;
+       }
  
-               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+       index_msb = get_count_order(smp_num_siblings);
+       c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
  
-               index_msb = get_count_order(smp_num_siblings);
+       smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  
-               core_bits = get_count_order(c->x86_max_cores);
+       index_msb = get_count_order(smp_num_siblings);
  
-               c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
-                                              ((1 << core_bits) - 1);
-       }
+       core_bits = get_count_order(c->x86_max_cores);
+       c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
+                                      ((1 << core_bits) - 1);
  
  out:
        if ((c->x86_max_cores * smp_num_siblings) > 1) {
  static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
  {
        char *v = c->x86_vendor_id;
-       int i;
        static int printed;
+       int i;
  
        for (i = 0; i < X86_VENDOR_NUM; i++) {
                if (!cpu_devs[i])
                if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
                    (cpu_devs[i]->c_ident[1] &&
                     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
                        this_cpu = cpu_devs[i];
                        c->x86_vendor = this_cpu->c_x86_vendor;
                        return;
  
        if (!printed) {
                printed++;
-               printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
+               printk(KERN_ERR
+                   "CPU: vendor_id '%s' unknown, using generic init.\n", v);
                printk(KERN_ERR "CPU: Your system may be unstable.\n");
        }
  
@@@ -499,14 -531,17 +519,17 @@@ void __cpuinit cpu_detect(struct cpuinf
        /* Intel-defined flags: level 0x00000001 */
        if (c->cpuid_level >= 0x00000001) {
                u32 junk, tfms, cap0, misc;
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86 = (tfms >> 8) & 0xf;
                c->x86_model = (tfms >> 4) & 0xf;
                c->x86_mask = tfms & 0xf;
                if (c->x86 == 0xf)
                        c->x86 += (tfms >> 20) & 0xff;
                if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xf) << 4;
                if (cap0 & (1<<19)) {
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
                        c->x86_cache_alignment = c->x86_clflush_size;
@@@ -522,6 -557,7 +545,7 @@@ static void __cpuinit get_cpu_cap(struc
        /* Intel-defined flags: level 0x00000001 */
        if (c->cpuid_level >= 0x00000001) {
                u32 capability, excap;
                cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
                c->x86_capability[0] = capability;
                c->x86_capability[4] = excap;
        /* AMD-defined flags: level 0x80000001 */
        xlvl = cpuid_eax(0x80000000);
        c->extended_cpuid_level = xlvl;
        if ((xlvl & 0xffff0000) == 0x80000000) {
                if (xlvl >= 0x80000001) {
                        c->x86_capability[1] = cpuid_edx(0x80000001);
                }
        }
  
- #ifdef CONFIG_X86_64
        if (c->extended_cpuid_level >= 0x80000008) {
                u32 eax = cpuid_eax(0x80000008);
  
                c->x86_virt_bits = (eax >> 8) & 0xff;
                c->x86_phys_bits = eax & 0xff;
        }
+ #ifdef CONFIG_X86_32
+       else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
+               c->x86_phys_bits = 36;
  #endif
  
        if (c->extended_cpuid_level >= 0x80000007)
@@@ -590,8 -629,12 +617,12 @@@ static void __init early_identify_cpu(s
  {
  #ifdef CONFIG_X86_64
        c->x86_clflush_size = 64;
+       c->x86_phys_bits = 36;
+       c->x86_virt_bits = 48;
  #else
        c->x86_clflush_size = 32;
+       c->x86_phys_bits = 32;
+       c->x86_virt_bits = 32;
  #endif
        c->x86_cache_alignment = c->x86_clflush_size;
  
  
  void __init early_cpu_init(void)
  {
-       struct cpu_dev **cdev;
+       const struct cpu_dev *const *cdev;
        int count = 0;
  
-       printk("KERNEL supported cpus:\n");
+       printk(KERN_INFO "KERNEL supported cpus:\n");
        for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
-               struct cpu_dev *cpudev = *cdev;
+               const struct cpu_dev *cpudev = *cdev;
                unsigned int j;
  
                if (count >= X86_VENDOR_NUM)
                for (j = 0; j < 2; j++) {
                        if (!cpudev->c_ident[j])
                                continue;
-                       printk("  %s %s\n", cpudev->c_vendor,
+                       printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
                                cpudev->c_ident[j]);
                }
        }
@@@ -714,9 -757,13 +745,13 @@@ static void __cpuinit identify_cpu(stru
        c->x86_coreid_bits = 0;
  #ifdef CONFIG_X86_64
        c->x86_clflush_size = 64;
+       c->x86_phys_bits = 36;
+       c->x86_virt_bits = 48;
  #else
        c->cpuid_level = -1;    /* CPUID not detected */
        c->x86_clflush_size = 32;
+       c->x86_phys_bits = 32;
+       c->x86_virt_bits = 32;
  #endif
        c->x86_cache_alignment = c->x86_clflush_size;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
        squash_the_stupid_serial_number(c);
  
        /*
-        * The vendor-specific functions might have changed features.  Now
-        * we do "generic changes."
+        * The vendor-specific functions might have changed features.
+        * Now we do "generic changes."
         */
  
        /* Filter out anything that depends on CPUID levels we don't have */
  
        /* If the model name is still unset, do table lookup. */
        if (!c->x86_model_id[0]) {
-               char *p;
+               const char *p;
                p = table_lookup_model(c);
                if (p)
                        strcpy(c->x86_model_id, p);
@@@ -812,7 -859,6 +847,7 @@@ static void vgetcpu_set_mode(void
  void __init identify_boot_cpu(void)
  {
        identify_cpu(&boot_cpu_data);
 +      init_c1e_mask();
  #ifdef CONFIG_X86_32
        sysenter_setup();
        enable_sep_cpu();
@@@ -832,11 -878,11 +867,11 @@@ void __cpuinit identify_secondary_cpu(s
  }
  
  struct msr_range {
-       unsigned min;
-       unsigned max;
+       unsigned        min;
+       unsigned        max;
  };
  
- static struct msr_range msr_range_array[] __cpuinitdata = {
+ static const struct msr_range msr_range_array[] __cpuinitconst = {
        { 0x00000000, 0x00000418},
        { 0xc0000000, 0xc000040b},
        { 0xc0010000, 0xc0010142},
  
  static void __cpuinit print_cpu_msr(void)
  {
+       unsigned index_min, index_max;
        unsigned index;
        u64 val;
        int i;
-       unsigned index_min, index_max;
  
        for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
                index_min = msr_range_array[i].min;
                index_max = msr_range_array[i].max;
                for (index = index_min; index < index_max; index++) {
                        if (rdmsrl_amd_safe(index, &val))
                                continue;
  }
  
  static int show_msr __cpuinitdata;
  static __init int setup_show_msr(char *arg)
  {
        int num;
@@@ -883,12 -931,14 +920,14 @@@ __setup("noclflush", setup_noclflush)
  
  void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
  {
-       char *vendor = NULL;
+       const char *vendor = NULL;
  
-       if (c->x86_vendor < X86_VENDOR_NUM)
+       if (c->x86_vendor < X86_VENDOR_NUM) {
                vendor = this_cpu->c_vendor;
-       else if (c->cpuid_level >= 0)
-               vendor = c->x86_vendor_id;
+       } else {
+               if (c->cpuid_level >= 0)
+                       vendor = c->x86_vendor_id;
+       }
  
        if (vendor && !strstr(c->x86_model_id, vendor))
                printk(KERN_CONT "%s ", vendor);
  static __init int setup_disablecpuid(char *arg)
  {
        int bit;
        if (get_option(&arg, &bit) && bit < NCAPINTS*32)
                setup_clear_cpu_cap(bit);
        else
                return 0;
        return 1;
  }
  __setup("clearcpuid=", setup_disablecpuid);
@@@ -928,6 -980,7 +969,7 @@@ struct desc_ptr idt_descr = { 256 * 16 
  
  DEFINE_PER_CPU_FIRST(union irq_stack_union,
                     irq_stack_union) __aligned(PAGE_SIZE);
  DEFINE_PER_CPU(char *, irq_stack_ptr) =
        init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
  
@@@ -937,12 -990,21 +979,21 @@@ EXPORT_PER_CPU_SYMBOL(kernel_stack)
  
  DEFINE_PER_CPU(unsigned int, irq_count) = -1;
  
+ /*
+  * Special IST stacks which the CPU switches to when it calls
+  * an IST-marked descriptor entry. Up to 7 stacks (hardware
+  * limit), all of them are 4K, except the debug stack which
+  * is 8K.
+  */
+ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+ };
  static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
        [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
        __aligned(PAGE_SIZE);
  
- extern asmlinkage void ignore_sysret(void);
  /* May not be marked __init: used by software suspend */
  void syscall_init(void)
  {
@@@ -972,7 -1034,7 +1023,7 @@@ unsigned long kernel_eflags
   */
  DEFINE_PER_CPU(struct orig_ist, orig_ist);
  
- #else /* x86_64 */
+ #else /* CONFIG_X86_64 */
  
  #ifdef CONFIG_CC_STACKPROTECTOR
  DEFINE_PER_CPU(unsigned long, stack_canary);
@@@ -984,9 -1046,26 +1035,26 @@@ struct pt_regs * __cpuinit idle_regs(st
        memset(regs, 0, sizeof(struct pt_regs));
        regs->fs = __KERNEL_PERCPU;
        regs->gs = __KERNEL_STACK_CANARY;
        return regs;
  }
- #endif        /* x86_64 */
+ #endif        /* CONFIG_X86_64 */
+ /*
+  * Clear all 6 debug registers:
+  */
+ static void clear_all_debug_regs(void)
+ {
+       int i;
+       for (i = 0; i < 8; i++) {
+               /* Ignore db4, db5 */
+               if ((i == 4) || (i == 5))
+                       continue;
+               set_debugreg(0, i);
+       }
+ }
  
  /*
   * cpu_init() initializes state that is per-CPU. Some data is already
   * A lot of state is already set up in PDA init for 64 bit
   */
  #ifdef CONFIG_X86_64
  void __cpuinit cpu_init(void)
  {
-       int cpu = stack_smp_processor_id();
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
-       struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
-       unsigned long v;
+       struct orig_ist *orig_ist;
        struct task_struct *me;
+       struct tss_struct *t;
+       unsigned long v;
+       int cpu;
        int i;
  
+       cpu = stack_smp_processor_id();
+       t = &per_cpu(init_tss, cpu);
+       orig_ist = &per_cpu(orig_ist, cpu);
  #ifdef CONFIG_NUMA
        if (cpu != 0 && percpu_read(node_number) == 0 &&
            cpu_to_node(cpu) != NUMA_NO_NODE)
         * set up and load the per-CPU TSS
         */
        if (!orig_ist->ist[0]) {
-               static const unsigned int sizes[N_EXCEPTION_STACKS] = {
-                 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
-                 [DEBUG_STACK - 1] = DEBUG_STKSZ
-               };
                char *estacks = per_cpu(exception_stacks, cpu);
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-                       estacks += sizes[v];
+                       estacks += exception_stack_sizes[v];
                        orig_ist->ist[v] = t->x86_tss.ist[v] =
                                        (unsigned long)estacks;
                }
        }
  
        t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
        /*
         * <= is required because the CPU will access up to
         * 8 bits beyond the end of the IO permission bitmap.
  
        atomic_inc(&init_mm.mm_count);
        me->active_mm = &init_mm;
-       if (me->mm)
-               BUG();
+       BUG_ON(me->mm);
        enter_lazy_tlb(&init_mm, me);
  
        load_sp0(t, &current->thread);
                arch_kgdb_ops.correct_hw_break();
        else
  #endif
-       {
-               /*
-                * Clear all 6 debug registers:
-                */
-               set_debugreg(0UL, 0);
-               set_debugreg(0UL, 1);
-               set_debugreg(0UL, 2);
-               set_debugreg(0UL, 3);
-               set_debugreg(0UL, 6);
-               set_debugreg(0UL, 7);
-       }
+               clear_all_debug_regs();
  
        fpu_init();
  
@@@ -1118,7 -1189,8 +1178,8 @@@ void __cpuinit cpu_init(void
  
        if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
                printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
-               for (;;) local_irq_enable();
+               for (;;)
+                       local_irq_enable();
        }
  
        printk(KERN_INFO "Initializing CPU#%d\n", cpu);
         */
        atomic_inc(&init_mm.mm_count);
        curr->active_mm = &init_mm;
-       if (curr->mm)
-               BUG();
+       BUG_ON(curr->mm);
        enter_lazy_tlb(&init_mm, curr);
  
        load_sp0(t, thread);
        __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
  #endif
  
-       /* Clear all 6 debug registers: */
-       set_debugreg(0, 0);
-       set_debugreg(0, 1);
-       set_debugreg(0, 2);
-       set_debugreg(0, 3);
-       set_debugreg(0, 6);
-       set_debugreg(0, 7);
+       clear_all_debug_regs();
  
        /*
         * Force FPU initialization:
  
        xsave_init();
  }
  #endif
index d8341d17c1890e239c470f211efc9806d358d7b1,41ed94915f974ce0310510d9864b54d8e96f37df..6ac55bd341ae8802abf29c6180533d08ab68930c
  #include <linux/cpufreq.h>
  #include <linux/slab.h>
  #include <linux/cpumask.h>
+ #include <linux/timex.h>
  
  #include <asm/processor.h>
  #include <asm/msr.h>
- #include <asm/timex.h>
+ #include <asm/timer.h>
  
  #include "speedstep-lib.h"
  
  #define PFX   "p4-clockmod: "
- #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+               "p4-clockmod", msg)
  
  /*
   * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@@ -58,7 -60,8 +60,8 @@@ static int cpufreq_p4_setdc(unsigned in
  {
        u32 l, h;
  
-       if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+       if (!cpu_online(cpu) ||
+           (newstate > DC_DISABLE) || (newstate == DC_RESV))
                return -EINVAL;
  
        rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@@ -66,7 -69,8 +69,8 @@@
        if (l & 0x01)
                dprintk("CPU#%d currently thermal throttled\n", cpu);
  
-       if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+       if (has_N44_O17_errata[cpu] &&
+           (newstate == DC_25PT || newstate == DC_DFLT))
                newstate = DC_38PT;
  
        rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@@ -112,7 -116,8 +116,8 @@@ static int cpufreq_p4_target(struct cpu
        struct cpufreq_freqs freqs;
        int i;
  
-       if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+       if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+                               target_freq, relation, &newstate))
                return -EINVAL;
  
        freqs.old = cpufreq_p4_get(policy->cpu);
                cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
        }
  
-       /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+       /* run on each logical CPU,
+        * see section 13.15.3 of IA32 Intel Architecture Software
         * Developer's Manual, Volume 3
         */
        for_each_cpu(i, policy->cpus)
@@@ -153,28 -159,30 +159,30 @@@ static unsigned int cpufreq_p4_get_freq
  {
        if (c->x86 == 0x06) {
                if (cpu_has(c, X86_FEATURE_EST))
-                       printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. "
-                              "The acpi-cpufreq module offers voltage scaling"
-                              " in addition of frequency scaling. You should use "
-                              "that instead of p4-clockmod, if possible.\n");
+                       printk(KERN_WARNING PFX "Warning: EST-capable CPU "
+                              "detected. The acpi-cpufreq module offers "
+                              "voltage scaling in addition of frequency "
+                              "scaling. You should use that instead of "
+                              "p4-clockmod, if possible.\n");
                switch (c->x86_model) {
                case 0x0E: /* Core */
                case 0x0F: /* Core Duo */
                case 0x16: /* Celeron Core */
                        p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-                       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
+                       return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
                case 0x0D: /* Pentium M (Dothan) */
                        p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
                        /* fall through */
                case 0x09: /* Pentium M (Banias) */
-                       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+                       return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
                }
        }
  
        if (c->x86 != 0xF) {
                if (!cpu_has(c, X86_FEATURE_EST))
-                       printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
-                               "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+                       printk(KERN_WARNING PFX "Unknown CPU. "
+                               "Please send an e-mail to "
+                               "<cpufreq@vger.kernel.org>\n");
                return 0;
        }
  
         * throttling is active or not. */
        p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
  
-       if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+       if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
                printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
                       "The speedstep-ich or acpi cpufreq modules offer "
                       "voltage scaling in addition of frequency scaling. "
                       "You should use either one instead of p4-clockmod, "
                       "if possible.\n");
-               return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+               return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
        }
  
-       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+       return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
  }
  
  
@@@ -203,7 -211,7 +211,7 @@@ static int cpufreq_p4_cpu_init(struct c
        unsigned int i;
  
  #ifdef CONFIG_SMP
 -      cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
 +      cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
  #endif
  
        /* Errata workaround */
                dprintk("has errata -- disabling low frequencies\n");
        }
  
+       if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+           c->x86_model < 2) {
+               /* switch to maximum frequency and measure result */
+               cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+               recalibrate_cpu_khz();
+       }
        /* get max frequency */
        stock_freq = cpufreq_p4_get_frequency(c);
        if (!stock_freq)
                return -EINVAL;
  
        /* table init */
-       for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-               if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+       for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+               if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
                        p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
                else
                        p4clockmod_table[i].frequency = (stock_freq * i)/8;
        cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
  
        /* cpuinfo and default policy values */
-       policy->cpuinfo.transition_latency = 1000000; /* assumed */
+       /* the transition latency is set to be 1 higher than the maximum
+        * transition latency of the ondemand governor */
+       policy->cpuinfo.transition_latency = 10000001;
        policy->cur = stock_freq;
  
        return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@@ -258,12 -275,12 +275,12 @@@ static unsigned int cpufreq_p4_get(unsi
                l = DC_DISABLE;
  
        if (l != DC_DISABLE)
-               return (stock_freq * l / 8);
+               return stock_freq * l / 8;
  
        return stock_freq;
  }
  
- static struct freq_attrp4clockmod_attr[] = {
+ static struct freq_attr *p4clockmod_attr[] = {
        &cpufreq_freq_attr_scaling_available_freqs,
        NULL,
  };
@@@ -298,9 -315,10 +315,10 @@@ static int __init cpufreq_p4_init(void
  
        ret = cpufreq_register_driver(&p4clockmod_driver);
        if (!ret)
-               printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+               printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+                               "Modulation available\n");
  
-       return (ret);
+       return ret;
  }
  
  
@@@ -310,9 -328,9 +328,9 @@@ static void __exit cpufreq_p4_exit(void
  }
  
  
- MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
- MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
- MODULE_LICENSE ("GPL");
+ MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+ MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+ MODULE_LICENSE("GPL");
  
  late_initcall(cpufreq_p4_init);
  module_exit(cpufreq_p4_exit);
index e8fd76f9888321e2843ba80214810f24f1fc02cc,a15ac94e0b9b8c2ad1ee540fc7bad1b5415aff36..4709ead2db526bfc59b803ea06791a0717d539c8
  #include <linux/string.h>
  #include <linux/cpumask.h>
  #include <linux/sched.h>      /* for current / set_cpus_allowed() */
+ #include <linux/io.h>
+ #include <linux/delay.h>
  
  #include <asm/msr.h>
- #include <asm/io.h>
- #include <asm/delay.h>
  
- #ifdef CONFIG_X86_POWERNOW_K8_ACPI
  #include <linux/acpi.h>
  #include <linux/mutex.h>
  #include <acpi/processor.h>
- #endif
  
  #define PFX "powernow-k8: "
  #define VERSION "version 2.20.00"
@@@ -56,10 -54,7 +54,10 @@@ static DEFINE_PER_CPU(struct powernow_k
  static int cpu_family = CPU_OPTERON;
  
  #ifndef CONFIG_SMP
 -DEFINE_PER_CPU(cpumask_t, cpu_core_map);
 +static inline const struct cpumask *cpu_core_mask(int cpu)
 +{
 +      return cpumask_of(0);
 +}
  #endif
  
  /* Return a frequency in MHz, given an input fid */
@@@ -74,7 -69,8 +72,8 @@@ static u32 find_khz_freq_from_fid(u32 f
        return 1000 * find_freq_from_fid(fid);
  }
  
- static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
+ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+               u32 pstate)
  {
        return data[pstate].frequency;
  }
@@@ -189,7 -185,9 +188,9 @@@ static int write_new_fid(struct powerno
                return 1;
        }
  
-       lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+       lo = fid;
+       lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+       lo |= MSR_C_LO_INIT_FID_VID;
  
        dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
                fid, lo, data->plllock * PLL_LOCK_CONVERSION);
        do {
                wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
                if (i++ > 100) {
-                       printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
+                       printk(KERN_ERR PFX
+                               "Hardware error - pending bit very stuck - "
+                               "no further pstate changes possible\n");
                        return 1;
                }
        } while (query_current_values_with_pending_wait(data));
        count_off_irt(data);
  
        if (savevid != data->currvid) {
-               printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
-                      savevid, data->currvid);
+               printk(KERN_ERR PFX
+                       "vid change on fid trans, old 0x%x, new 0x%x\n",
+                       savevid, data->currvid);
                return 1;
        }
  
        if (fid != data->currfid) {
-               printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
-                       data->currfid);
+               printk(KERN_ERR PFX
+                       "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+                       data->currfid);
                return 1;
        }
  
@@@ -231,7 -233,9 +236,9 @@@ static int write_new_vid(struct powerno
                return 1;
        }
  
-       lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+       lo = data->currfid;
+       lo |= (vid << MSR_C_LO_VID_SHIFT);
+       lo |= MSR_C_LO_INIT_FID_VID;
  
        dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
                vid, lo, STOP_GRANT_5NS);
        do {
                wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
                if (i++ > 100) {
-                       printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+                       printk(KERN_ERR PFX "internal error - pending bit "
+                                       "very stuck - no further pstate "
+                                       "changes possible\n");
                        return 1;
                }
        } while (query_current_values_with_pending_wait(data));
  
        if (savefid != data->currfid) {
-               printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+               printk(KERN_ERR PFX "fid changed on vid trans, old "
+                       "0x%x new 0x%x\n",
                       savefid, data->currfid);
                return 1;
        }
  
        if (vid != data->currvid) {
-               printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
-                               data->currvid);
+               printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+                               "curr 0x%x\n",
+                               vid, data->currvid);
                return 1;
        }
  
   * Decreasing vid codes represent increasing voltages:
   * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
   */
- static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+ static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+               u32 reqvid, u32 step)
  {
        if ((data->currvid - reqvid) > step)
                reqvid = data->currvid - step;
@@@ -286,7 -295,8 +298,8 @@@ static int transition_pstate(struct pow
  }
  
  /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
- static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+ static int transition_fid_vid(struct powernow_k8_data *data,
+               u32 reqfid, u32 reqvid)
  {
        if (core_voltage_pre_transition(data, reqvid))
                return 1;
                return 1;
  
        if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
-               printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+               printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+                               "curr 0x%x 0x%x\n",
                                smp_processor_id(),
                                reqfid, reqvid, data->currfid, data->currvid);
                return 1;
  }
  
  /* Phase 1 - core voltage transition ... setup voltage */
- static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+ static int core_voltage_pre_transition(struct powernow_k8_data *data,
+               u32 reqvid)
  {
        u32 rvosteps = data->rvo;
        u32 savefid = data->currfid;
        u32 maxvid, lo;
  
-       dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+       dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+               "reqvid 0x%x, rvo 0x%x\n",
                smp_processor_id(),
                data->currfid, data->currvid, reqvid, data->rvo);
  
                } else {
                        dprintk("ph1: changing vid for rvo, req 0x%x\n",
                                data->currvid - 1);
-                       if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+                       if (decrease_vid_code_by_step(data, data->currvid-1, 1))
                                return 1;
                        rvosteps--;
                }
                return 1;
  
        if (savefid != data->currfid) {
-               printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+               printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+                               data->currfid);
                return 1;
        }
  
  /* Phase 2 - core frequency transition */
  static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
  {
-       u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
+       u32 vcoreqfid, vcocurrfid, vcofiddiff;
+       u32 fid_interval, savevid = data->currvid;
  
-       if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
-               printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
-                       reqfid, data->currfid);
+       if ((reqfid < HI_FID_TABLE_BOTTOM) &&
+           (data->currfid < HI_FID_TABLE_BOTTOM)) {
+               printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
+                               "0x%x 0x%x\n", reqfid, data->currfid);
                return 1;
        }
  
        if (data->currfid == reqfid) {
-               printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+               printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+                               data->currfid);
                return 0;
        }
  
-       dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+       dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+               "reqfid 0x%x\n",
                smp_processor_id(),
                data->currfid, data->currvid, reqfid);
  
  
                if (reqfid > data->currfid) {
                        if (data->currfid > LO_FID_TABLE_TOP) {
-                               if (write_new_fid(data, data->currfid + fid_interval)) {
+                               if (write_new_fid(data,
+                                               data->currfid + fid_interval))
                                        return 1;
-                               }
                        } else {
                                if (write_new_fid
-                                   (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+                                   (data,
+                                    2 + convert_fid_to_vco_fid(data->currfid)))
                                        return 1;
-                               }
                        }
                } else {
                        if (write_new_fid(data, data->currfid - fid_interval))
  
        if (data->currfid != reqfid) {
                printk(KERN_ERR PFX
-                       "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+                       "ph2: mismatch, failed fid transition, "
+                       "curr 0x%x, req 0x%x\n",
                        data->currfid, reqfid);
                return 1;
        }
  }
  
  /* Phase 3 - core voltage transition flow ... jump to the final vid. */
- static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+ static int core_voltage_post_transition(struct powernow_k8_data *data,
+               u32 reqvid)
  {
        u32 savefid = data->currfid;
        u32 savereqvid = reqvid;
  
                if (data->currvid != reqvid) {
                        printk(KERN_ERR PFX
-                              "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+                              "ph3: failed vid transition\n, "
+                              "req 0x%x, curr 0x%x",
                               reqvid, data->currvid);
                        return 1;
                }
@@@ -511,7 -532,8 +535,8 @@@ static int check_supported_cpu(unsigne
        if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
                if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
                    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
-                       printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+                       printk(KERN_INFO PFX
+                               "Processor cpuid %x not supported\n", eax);
                        goto out;
                }
  
                }
  
                cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-               if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
-                       printk(KERN_INFO PFX "Power state transitions not supported\n");
+               if ((edx & P_STATE_TRANSITION_CAPABLE)
+                       != P_STATE_TRANSITION_CAPABLE) {
+                       printk(KERN_INFO PFX
+                               "Power state transitions not supported\n");
                        goto out;
                }
        } else { /* must be a HW Pstate capable processor */
@@@ -542,7 -566,8 +569,8 @@@ out
        return rc;
  }
  
- static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+               u8 maxvid)
  {
        unsigned int j;
        u8 lastfid = 0xff;
                               j, pst[j].vid);
                        return -EINVAL;
                }
-               if (pst[j].vid < data->rvo) {   /* vid + rvo >= 0 */
+               if (pst[j].vid < data->rvo) {
+                       /* vid + rvo >= 0 */
                        printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
                               " %d\n", j);
                        return -ENODEV;
                }
-               if (pst[j].vid < maxvid + data->rvo) {  /* vid + rvo >= maxvid */
+               if (pst[j].vid < maxvid + data->rvo) {
+                       /* vid + rvo >= maxvid */
                        printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
                               " %d\n", j);
                        return -ENODEV;
                return -EINVAL;
        }
        if (lastfid > LO_FID_TABLE_TOP)
-               printk(KERN_INFO FW_BUG PFX  "first fid not from lo freq table\n");
+               printk(KERN_INFO FW_BUG PFX
+                       "first fid not from lo freq table\n");
  
        return 0;
  }
  
+ static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
+ {
+       data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+ }
  static void print_basics(struct powernow_k8_data *data)
  {
        int j;
        for (j = 0; j < data->numps; j++) {
-               if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+               if (data->powernow_table[j].frequency !=
+                               CPUFREQ_ENTRY_INVALID) {
                        if (cpu_family == CPU_HW_PSTATE) {
-                               printk(KERN_INFO PFX "   %d : pstate %d (%d MHz)\n",
-                                       j,
+                               printk(KERN_INFO PFX
+                                       "   %d : pstate %d (%d MHz)\n", j,
                                        data->powernow_table[j].index,
                                        data->powernow_table[j].frequency/1000);
                        } else {
-                               printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
+                               printk(KERN_INFO PFX
+                                       "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
                                        j,
                                        data->powernow_table[j].index & 0xff,
                                        data->powernow_table[j].frequency/1000,
                }
        }
        if (data->batps)
-               printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+               printk(KERN_INFO PFX "Only %d pstates on battery\n",
+                               data->batps);
  }
  
- static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+ static int fill_powernow_table(struct powernow_k8_data *data,
+               struct pst_s *pst, u8 maxvid)
  {
        struct cpufreq_frequency_table *powernow_table;
        unsigned int j;
  
-       if (data->batps) {    /* use ACPI support to get full speed on mains power */
-               printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+       if (data->batps) {
+               /* use ACPI support to get full speed on mains power */
+               printk(KERN_WARNING PFX
+                       "Only %d pstates usable (use ACPI driver for full "
+                       "range\n", data->batps);
                data->numps = data->batps;
        }
  
-       for ( j=1; j<data->numps; j++ ) {
+       for (j = 1; j < data->numps; j++) {
                if (pst[j-1].fid >= pst[j].fid) {
                        printk(KERN_ERR PFX "PST out of sequence\n");
                        return -EINVAL;
        }
  
        for (j = 0; j < data->numps; j++) {
+               int freq;
                powernow_table[j].index = pst[j].fid; /* lower 8 bits */
                powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
-               powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+               freq = find_khz_freq_from_fid(pst[j].fid);
+               powernow_table[j].frequency = freq;
        }
        powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
        powernow_table[data->numps].index = 0;
  
        dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
        data->powernow_table = powernow_table;
 -      if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
 +      if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
                print_basics(data);
  
        for (j = 0; j < data->numps; j++)
-               if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+               if ((pst[j].fid == data->currfid) &&
+                   (pst[j].vid == data->currvid))
                        return 0;
  
        dprintk("currfid/vid do not match PST, ignoring\n");
@@@ -701,7 -744,8 +747,8 @@@ static int find_psb_table(struct powern
                }
  
                data->vstable = psb->vstable;
-               dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+               dprintk("voltage stabilization time: %d(*20us)\n",
+                               data->vstable);
  
                dprintk("flags2: 0x%x\n", psb->flags2);
                data->rvo = psb->flags2 & 3;
  
                dprintk("numpst: 0x%x\n", psb->num_tables);
                cpst = psb->num_tables;
-               if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+               if ((psb->cpuid == 0x00000fc0) ||
+                   (psb->cpuid == 0x00000fe0)) {
                        thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-                       if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+                       if ((thiscpuid == 0x00000fc0) ||
+                           (thiscpuid == 0x00000fe0))
                                cpst = 1;
-                       }
                }
                if (cpst != 1) {
                        printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
  
                data->numps = psb->numps;
                dprintk("numpstates: 0x%x\n", data->numps);
-               return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+               return fill_powernow_table(data,
+                               (struct pst_s *)(psb+1), maxvid);
        }
        /*
         * If you see this message, complain to BIOS manufacturer. If
         * BIOS and Kernel Developer's Guide, which is available on
         * www.amd.com
         */
-       printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+       printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
        return -ENODEV;
  }
  
- #ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
              unsigned int index)
  {
+       acpi_integer control;
        if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
                return;
  
-       data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
-       data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
-       data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
-       data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
-       data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
-       data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
- }
+       control = data->acpi_data.states[index].control; data->irt = (control
+                       >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
+                               RVO_SHIFT) & RVO_MASK; data->exttype = (control
+                                       >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+       data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
+               << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
              (control >> VST_SHIFT) & VST_MASK; }
  
  static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
  {
        struct cpufreq_frequency_table *powernow_table;
        int ret_val = -ENODEV;
+       acpi_integer space_id;
  
        if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
                dprintk("register performance failed: bad ACPI data\n");
                goto err_out;
        }
  
-       if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-               (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+       space_id = data->acpi_data.control_register.space_id;
+       if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+               (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
                dprintk("Invalid control/status registers (%x - %x)\n",
                        data->acpi_data.control_register.space_id,
-                       data->acpi_data.status_register.space_id);
+                       space_id);
                goto err_out;
        }
  
        if (ret_val)
                goto err_out_mem;
  
-       powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+       powernow_table[data->acpi_data.state_count].frequency =
+               CPUFREQ_TABLE_END;
        powernow_table[data->acpi_data.state_count].index = 0;
        data->powernow_table = powernow_table;
  
        /* fill in data */
        data->numps = data->acpi_data.state_count;
 -      if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
 +      if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
                print_basics(data);
        powernow_k8_acpi_pst_values(data, 0);
  
@@@ -833,13 -884,15 +887,15 @@@ err_out_mem
  err_out:
        acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
  
-       /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+       /* data->acpi_data.state_count informs us at ->exit()
+        * whether ACPI was used */
        data->acpi_data.state_count = 0;
  
        return ret_val;
  }
  
- static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+               struct cpufreq_frequency_table *powernow_table)
  {
        int i;
        u32 hi = 0, lo = 0;
  
                index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
                if (index > data->max_hw_pstate) {
-                       printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
-                       printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       printk(KERN_ERR PFX "invalid pstate %d - "
+                                       "bad value %d.\n", i, index);
+                       printk(KERN_ERR PFX "Please report to BIOS "
+                                       "manufacturer\n");
+                       invalidate_entry(data, i);
                        continue;
                }
                rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
                if (!(hi & HW_PSTATE_VALID_MASK)) {
                        dprintk("invalid pstate %d, ignoring\n", index);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       invalidate_entry(data, i);
                        continue;
                }
  
                powernow_table[i].index = index;
  
-               powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+               powernow_table[i].frequency =
+                       data->acpi_data.states[i].core_frequency * 1000;
        }
        return 0;
  }
  
- static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+               struct cpufreq_frequency_table *powernow_table)
  {
        int i;
        int cntlofreq = 0;
        for (i = 0; i < data->acpi_data.state_count; i++) {
                u32 fid;
                u32 vid;
+               u32 freq, index;
+               acpi_integer status, control;
  
                if (data->exttype) {
-                       fid = data->acpi_data.states[i].status & EXT_FID_MASK;
-                       vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+                       status =  data->acpi_data.states[i].status;
+                       fid = status & EXT_FID_MASK;
+                       vid = (status >> VID_SHIFT) & EXT_VID_MASK;
                } else {
-                       fid = data->acpi_data.states[i].control & FID_MASK;
-                       vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+                       control =  data->acpi_data.states[i].control;
+                       fid = control & FID_MASK;
+                       vid = (control >> VID_SHIFT) & VID_MASK;
                }
  
                dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
  
-               powernow_table[i].index = fid; /* lower 8 bits */
-               powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-               powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+               index = fid | (vid<<8);
+               powernow_table[i].index = index;
+               freq = find_khz_freq_from_fid(fid);
+               powernow_table[i].frequency = freq;
  
                /* verify frequency is OK */
-               if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
-                       (powernow_table[i].frequency < (MIN_FREQ * 1000))) {
-                       dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+               if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+                       dprintk("invalid freq %u kHz, ignoring\n", freq);
+                       invalidate_entry(data, i);
                        continue;
                }
  
-               /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+               /* verify voltage is OK -
+                * BIOSs are using "off" to indicate invalid */
                if (vid == VID_OFF) {
                        dprintk("invalid vid %u, ignoring\n", vid);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       invalidate_entry(data, i);
                        continue;
                }
  
                /* verify only 1 entry from the lo frequency table */
                if (fid < HI_FID_TABLE_BOTTOM) {
                        if (cntlofreq) {
-                               /* if both entries are the same, ignore this one ... */
-                               if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
-                                   (powernow_table[i].index != powernow_table[cntlofreq].index)) {
-                                       printk(KERN_ERR PFX "Too many lo freq table entries\n");
+                               /* if both entries are the same,
+                                * ignore this one ... */
+                               if ((freq != powernow_table[cntlofreq].frequency) ||
+                                   (index != powernow_table[cntlofreq].index)) {
+                                       printk(KERN_ERR PFX
+                                               "Too many lo freq table "
+                                               "entries\n");
                                        return 1;
                                }
  
-                               dprintk("double low frequency table entry, ignoring it.\n");
-                               powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                               dprintk("double low frequency table entry, "
+                                               "ignoring it.\n");
+                               invalidate_entry(data, i);
                                continue;
                        } else
                                cntlofreq = i;
                }
  
-               if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
-                       printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
-                               powernow_table[i].frequency,
-                               (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+               if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+                       printk(KERN_INFO PFX "invalid freq entries "
+                               "%u kHz vs. %u kHz\n", freq,
+                               (unsigned int)
+                               (data->acpi_data.states[i].core_frequency
+                                * 1000));
+                       invalidate_entry(data, i);
                        continue;
                }
        }
  static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
  {
        if (data->acpi_data.state_count)
-               acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+               acpi_processor_unregister_performance(&data->acpi_data,
+                               data->cpu);
        free_cpumask_var(data->acpi_data.shared_cpu_map);
  }
  
@@@ -956,15 -1027,9 +1030,9 @@@ static int get_transition_latency(struc
        return 1000 * max_latency;
  }
  
- #else
- static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
- static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
- static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
- static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
- #endif /* CONFIG_X86_POWERNOW_K8_ACPI */
  /* Take a frequency, and issue the fid/vid transition command */
- static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
+ static int transition_frequency_fidvid(struct powernow_k8_data *data,
+               unsigned int index)
  {
        u32 fid = 0;
        u32 vid = 0;
                return 0;
        }
  
-       if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+       if ((fid < HI_FID_TABLE_BOTTOM) &&
+           (data->currfid < HI_FID_TABLE_BOTTOM)) {
                printk(KERN_ERR PFX
                       "ignoring illegal change in lo freq table-%x to 0x%x\n",
                       data->currfid, fid);
  }
  
  /* Take a frequency, and issue the hardware pstate transition command */
- static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+ static int transition_frequency_pstate(struct powernow_k8_data *data,
+               unsigned int index)
  {
        u32 pstate = 0;
        int res, i;
        pstate = index & HW_PSTATE_MASK;
        if (pstate > data->max_hw_pstate)
                return 0;
-       freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+       freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+                       data->currpstate);
        freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
  
        for_each_cpu_mask_nr(i, *(data->available_cores)) {
  }
  
  /* Driver entry point to switch to the target frequency */
- static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+ static int powernowk8_target(struct cpufreq_policy *pol,
+               unsigned targfreq, unsigned relation)
  {
        cpumask_t oldmask;
        struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
                dprintk("targ: curr fid 0x%x, vid 0x%x\n",
                data->currfid, data->currvid);
  
-               if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+               if ((checkvid != data->currvid) ||
+                   (checkfid != data->currfid)) {
                        printk(KERN_INFO PFX
-                               "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
-                               checkfid, data->currfid, checkvid, data->currvid);
+                               "error - out of sync, fix 0x%x 0x%x, "
+                               "vid 0x%x 0x%x\n",
+                               checkfid, data->currfid,
+                               checkvid, data->currvid);
                }
        }
  
-       if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+       if (cpufreq_frequency_table_target(pol, data->powernow_table,
+                               targfreq, relation, &newstate))
                goto err_out;
  
        mutex_lock(&fidvid_mutex);
        mutex_unlock(&fidvid_mutex);
  
        if (cpu_family == CPU_HW_PSTATE)
-               pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
+               pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+                               newstate);
        else
                pol->cur = find_khz_freq_from_fid(data->currfid);
        ret = 0;
@@@ -1144,6 -1218,7 +1221,7 @@@ static int __cpuinit powernowk8_cpu_ini
        struct powernow_k8_data *data;
        cpumask_t oldmask;
        int rc;
+       static int print_once;
  
        if (!cpu_online(pol->cpu))
                return -ENODEV;
                 * an UP version, and is deprecated by AMD.
                 */
                if (num_online_cpus() != 1) {
- #ifndef CONFIG_ACPI_PROCESSOR
-                       printk(KERN_ERR PFX "ACPI Processor support is required "
-                              "for SMP systems but is absent. Please load the "
-                              "ACPI Processor module before starting this "
-                              "driver.\n");
- #else
-                       printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide"
-                              " ACPI _PSS objects in a way that Linux "
-                              "understands. Please report this to the Linux "
-                              "ACPI maintainers and complain to your BIOS "
-                              "vendor.\n");
- #endif
-                       kfree(data);
-                       return -ENODEV;
+                       /*
+                        * Replace this one with print_once as soon as such a
+                        * thing gets introduced
+                        */
+                       if (!print_once) {
+                               WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
+                                       "does not provide ACPI _PSS objects "
+                                       "in a way that Linux understands. "
+                                       "Please report this to the Linux ACPI"
+                                       " maintainers and complain to your "
+                                       "BIOS vendor.\n");
+                               print_once++;
+                       }
+                       goto err_out;
                }
                if (pol->cpu != 0) {
                        printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
                               "CPU other than CPU0. Complain to your BIOS "
                               "vendor.\n");
-                       kfree(data);
-                       return -ENODEV;
+                       goto err_out;
                }
                rc = find_psb_table(data);
-               if (rc) {
-                       kfree(data);
-                       return -ENODEV;
-               }
+               if (rc)
+                       goto err_out;
                /* Take a crude guess here.
                 * That guess was in microseconds, so multiply with 1000 */
                pol->cpuinfo.transition_latency = (
  
        if (smp_processor_id() != pol->cpu) {
                printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-               goto err_out;
+               goto err_out_unmask;
        }
  
        if (pending_bit_stuck()) {
                printk(KERN_ERR PFX "failing init, change pending bit set\n");
-               goto err_out;
+               goto err_out_unmask;
        }
  
        if (query_current_values_with_pending_wait(data))
-               goto err_out;
+               goto err_out_unmask;
  
        if (cpu_family == CPU_OPTERON)
                fidvid_msr_init();
        if (cpu_family == CPU_HW_PSTATE)
                cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
        else
 -              cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
 +              cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
        data->available_cores = pol->cpus;
  
        if (cpu_family == CPU_HW_PSTATE)
-               pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+               pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+                               data->currpstate);
        else
                pol->cur = find_khz_freq_from_fid(data->currfid);
        dprintk("policy current frequency %d kHz\n", pol->cur);
        cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
  
        if (cpu_family == CPU_HW_PSTATE)
-               dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
+               dprintk("cpu_init done, current pstate 0x%x\n",
+                               data->currpstate);
        else
                dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
                        data->currfid, data->currvid);
  
        return 0;
  
- err_out:
+ err_out_unmask:
        set_cpus_allowed_ptr(current, &oldmask);
        powernow_k8_cpu_exit_acpi(data);
  
+ err_out:
        kfree(data);
        return -ENODEV;
  }
  
- static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
  {
        struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
  
        return 0;
  }
  
- static unsigned int powernowk8_get (unsigned int cpu)
+ static unsigned int powernowk8_get(unsigned int cpu)
  {
        struct powernow_k8_data *data;
        cpumask_t oldmask = current->cpus_allowed;
        unsigned int khz = 0;
        unsigned int first;
  
 -      first = first_cpu(per_cpu(cpu_core_map, cpu));
 +      first = cpumask_first(cpu_core_mask(cpu));
        data = per_cpu(powernow_data, first);
  
        if (!data)
@@@ -1318,7 -1394,7 +1397,7 @@@ out
        return khz;
  }
  
- static struct freq_attrpowernow_k8_attr[] = {
+ static struct freq_attr *powernow_k8_attr[] = {
        &cpufreq_freq_attr_scaling_available_freqs,
        NULL,
  };
@@@ -1363,7 -1439,8 +1442,8 @@@ static void __exit powernowk8_exit(void
        cpufreq_unregister_driver(&cpufreq_amd64_driver);
  }
  
- MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
+ MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+               "Mark Langsdorf <mark.langsdorf@amd.com>");
  MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
  MODULE_LICENSE("GPL");
  
index 1f0ec83d343b76e62db2de80bcb7b306a786b038,8bbb11adb3157d1e0d436910dd6b2423cec025ef..016c1a4fa3fc5a88d8ab9950da8d4a92591894e1
@@@ -39,7 -39,7 +39,7 @@@ static struct pci_dev *speedstep_chipse
  
  /* speedstep_processor
   */
- static unsigned int speedstep_processor = 0;
+ static unsigned int speedstep_processor;
  
  static u32 pmbase;
  
@@@ -54,7 -54,8 +54,8 @@@ static struct cpufreq_frequency_table s
  };
  
  
- #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+               "speedstep-ich", msg)
  
  
  /**
@@@ -62,7 -63,7 +63,7 @@@
   *
   * Returns: -ENODEV if no register could be found
   */
- static int speedstep_find_register (void)
+ static int speedstep_find_register(void)
  {
        if (!speedstep_chipset_dev)
                return -ENODEV;
@@@ -90,7 -91,7 +91,7 @@@
   *
   *   Tries to change the SpeedStep state.
   */
- static void speedstep_set_state (unsigned int state)
+ static void speedstep_set_state(unsigned int state)
  {
        u8 pm2_blk;
        u8 value;
  
        dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
  
-       if (state == (value & 0x1)) {
-               dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
-       } else {
-               printk (KERN_ERR "cpufreq: change failed - I/O error\n");
-       }
+       if (state == (value & 0x1))
+               dprintk("change to %u MHz succeeded\n",
+                       speedstep_get_frequency(speedstep_processor) / 1000);
+       else
+               printk(KERN_ERR "cpufreq: change failed - I/O error\n");
  
        return;
  }
   *   Tries to activate the SpeedStep status and control registers.
   * Returns -EINVAL on an unsupported chipset, and zero on success.
   */
- static int speedstep_activate (void)
+ static int speedstep_activate(void)
  {
        u16 value = 0;
  
   * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
   * chipset, or zero on failure.
   */
- static unsigned int speedstep_detect_chipset (void)
+ static unsigned int speedstep_detect_chipset(void)
  {
        speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                              PCI_DEVICE_ID_INTEL_82801DB_12,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                              NULL);
        if (speedstep_chipset_dev)
                return 4; /* 4-M */
  
        speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                              PCI_DEVICE_ID_INTEL_82801CA_12,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                              NULL);
        if (speedstep_chipset_dev)
                return 3; /* 3-M */
  
        speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                              PCI_DEVICE_ID_INTEL_82801BA_10,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                              NULL);
        if (speedstep_chipset_dev) {
                /* speedstep.c causes lockups on Dell Inspirons 8000 and
  
                hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                              PCI_DEVICE_ID_INTEL_82815_MC,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                              NULL);
  
                if (!hostbridge)
@@@ -236,7 -233,7 +233,7 @@@ static unsigned int _speedstep_get(cons
  
        cpus_allowed = current->cpus_allowed;
        set_cpus_allowed_ptr(current, cpus);
-       speed = speedstep_get_processor_frequency(speedstep_processor);
+       speed = speedstep_get_frequency(speedstep_processor);
        set_cpus_allowed_ptr(current, &cpus_allowed);
        dprintk("detected %u kHz as current frequency\n", speed);
        return speed;
@@@ -251,11 -248,12 +248,12 @@@ static unsigned int speedstep_get(unsig
   * speedstep_target - set a new CPUFreq policy
   * @policy: new policy
   * @target_freq: the target frequency
-  * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+  * @relation: how that frequency relates to achieved frequency
+  *    (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
   *
   * Sets a new CPUFreq policy.
   */
- static int speedstep_target (struct cpufreq_policy *policy,
+ static int speedstep_target(struct cpufreq_policy *policy,
                             unsigned int target_freq,
                             unsigned int relation)
  {
        cpumask_t cpus_allowed;
        int i;
  
-       if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+       if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+                               target_freq, relation, &newstate))
                return -EINVAL;
  
        freqs.old = _speedstep_get(policy->cpus);
   * Limit must be within speedstep_low_freq and speedstep_high_freq, with
   * at least one border included.
   */
- static int speedstep_verify (struct cpufreq_policy *policy)
+ static int speedstep_verify(struct cpufreq_policy *policy)
  {
        return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
  }
@@@ -322,7 -321,7 +321,7 @@@ static int speedstep_cpu_init(struct cp
  
        /* only run on CPU to be set, or on its sibling */
  #ifdef CONFIG_SMP
 -      cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
 +      cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
  #endif
  
        cpus_allowed = current->cpus_allowed;
                return -EIO;
  
        dprintk("currently at %s speed setting - %i MHz\n",
-               (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+               (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+               ? "low" : "high",
                (speed / 1000));
  
        /* cpuinfo and default policy values */
  
        result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
        if (result)
-               return (result);
+               return result;
  
-         cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+       cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
  
        return 0;
  }
@@@ -366,7 -366,7 +366,7 @@@ static int speedstep_cpu_exit(struct cp
        return 0;
  }
  
- static struct freq_attrspeedstep_attr[] = {
+ static struct freq_attr *speedstep_attr[] = {
        &cpufreq_freq_attr_scaling_available_freqs,
        NULL,
  };
@@@ -396,13 -396,15 +396,15 @@@ static int __init speedstep_init(void
        /* detect processor */
        speedstep_processor = speedstep_detect_processor();
        if (!speedstep_processor) {
-               dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+               dprintk("Intel(R) SpeedStep(TM) capable processor "
+                               "not found\n");
                return -ENODEV;
        }
  
        /* detect chipset */
        if (!speedstep_detect_chipset()) {
-               dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+               dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+                               "(yet) available.\n");
                return -ENODEV;
        }
  
@@@ -431,9 -433,11 +433,11 @@@ static void __exit speedstep_exit(void
  }
  
  
- MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
- MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
- MODULE_LICENSE ("GPL");
+ MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+               "Dominik Brodowski <linux@brodo.de>");
+ MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+               "with ICH-M southbridges.");
+ MODULE_LICENSE("GPL");
  
  module_init(speedstep_init);
  module_exit(speedstep_exit);
index 8e6ce2c146d65559a11bbf4b43bd210c5c909cac,c471eb1a389cc02c4f788d5a828f3f4bfe56c93d..483eda96e102062b23f3e29820d911a9c7d6ab59
@@@ -32,7 -32,7 +32,7 @@@ struct _cache_tabl
  };
  
  /* all the cache descriptor types we care about (no TLB or trace cache entries) */
- static struct _cache_table cache_table[] __cpuinitdata =
+ static const struct _cache_table __cpuinitconst cache_table[] =
  {
        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
@@@ -159,7 -159,7 +159,7 @@@ struct _cpuid4_info_regs 
        unsigned long can_disable;
  };
  
 -#ifdef CONFIG_PCI
 +#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
  static struct pci_device_id k8_nb_id[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@@ -206,15 -206,15 +206,15 @@@ union l3_cache 
        unsigned val;
  };
  
- static unsigned short assocs[] __cpuinitdata = {
+ static const unsigned short __cpuinitconst assocs[] = {
        [1] = 1, [2] = 2, [4] = 4, [6] = 8,
        [8] = 16, [0xa] = 32, [0xb] = 48,
        [0xc] = 64,
        [0xf] = 0xffff // ??
  };
  
- static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
- static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
+ static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
+ static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
  
  static void __cpuinit
  amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
@@@ -324,6 -324,15 +324,6 @@@ __cpuinit cpuid4_cache_lookup_regs(int 
        return 0;
  }
  
 -static int
 -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 -{
 -      struct _cpuid4_info_regs *leaf_regs =
 -              (struct _cpuid4_info_regs *)this_leaf;
 -
 -      return cpuid4_cache_lookup_regs(index, leaf_regs);
 -}
 -
  static int __cpuinit find_num_cache_leaves(void)
  {
        unsigned int            eax, ebx, ecx, edx;
@@@ -499,8 -508,6 +499,8 @@@ unsigned int __cpuinit init_intel_cache
        return l2;
  }
  
 +#ifdef CONFIG_SYSFS
 +
  /* pointer to _cpuid4_info array (for each cache leaf) */
  static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
  #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
@@@ -564,15 -571,6 +564,15 @@@ static void __cpuinit free_cache_attrib
        per_cpu(cpuid4_info, cpu) = NULL;
  }
  
 +static int
 +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 +{
 +      struct _cpuid4_info_regs *leaf_regs =
 +              (struct _cpuid4_info_regs *)this_leaf;
 +
 +      return cpuid4_cache_lookup_regs(index, leaf_regs);
 +}
 +
  static void __cpuinit get_cpu_leaves(void *_retval)
  {
        int j, *retval = _retval, cpu = smp_processor_id();
@@@ -614,6 -612,8 +614,6 @@@ static int __cpuinit detect_cache_attri
        return retval;
  }
  
 -#ifdef CONFIG_SYSFS
 -
  #include <linux/kobject.h>
  #include <linux/sysfs.h>
  
index 1f429ee3477d7c9c10617c8c33c03e6e1604c185,7d01be868870d1a7922c7ec18fc7a8b666ddb25b..56dde9c4bc96b8fe3202a2051a500efe52853b0f
@@@ -92,7 -92,8 +92,8 @@@ struct thresh_restart 
  };
  
  /* must be called with correct cpu affinity */
- static long threshold_restart_bank(void *_tr)
+ /* Called via smp_call_function_single() */
+ static void threshold_restart_bank(void *_tr)
  {
        struct thresh_restart *tr = _tr;
        u32 mci_misc_hi, mci_misc_lo;
  
        mci_misc_hi |= MASK_COUNT_EN_HI;
        wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
-       return 0;
  }
  
  /* cpu init entry point, called from mce.c with preempt off */
@@@ -279,7 -279,7 +279,7 @@@ static ssize_t store_interrupt_enable(s
        tr.b = b;
        tr.reset = 0;
        tr.old_limit = 0;
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  
        return end - buf;
  }
@@@ -301,23 -301,32 +301,32 @@@ static ssize_t store_threshold_limit(st
        tr.b = b;
        tr.reset = 0;
  
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  
        return end - buf;
  }
  
- static long local_error_count(void *_b)
+ struct threshold_block_cross_cpu {
+       struct threshold_block *tb;
+       long retval;
+ };
+ static void local_error_count_handler(void *_tbcc)
  {
-       struct threshold_block *b = _b;
+       struct threshold_block_cross_cpu *tbcc = _tbcc;
+       struct threshold_block *b = tbcc->tb;
        u32 low, high;
  
        rdmsr(b->address, low, high);
-       return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+       tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
  }
  
  static ssize_t show_error_count(struct threshold_block *b, char *buf)
  {
-       return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
+       struct threshold_block_cross_cpu tbcc = { .tb = b, };
+       smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
+       return sprintf(buf, "%lx\n", tbcc.retval);
  }
  
  static ssize_t store_error_count(struct threshold_block *b,
  {
        struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
  
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
        return 1;
  }
  
@@@ -394,7 -403,7 +403,7 @@@ static __cpuinit int allocate_threshold
        if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
                return 0;
  
-       if (rdmsr_safe(address, &low, &high))
+       if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
                return 0;
  
        if (!(high & MASK_VALID_HI)) {
@@@ -458,12 -467,11 +467,11 @@@ out_free
        return err;
  }
  
- static __cpuinit long local_allocate_threshold_blocks(void *_bank)
+ static __cpuinit long
+ local_allocate_threshold_blocks(int cpu, unsigned int bank)
  {
-       unsigned int *bank = _bank;
-       return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
-                                        MSR_IA32_MC0_MISC + *bank * 4);
+       return allocate_threshold_blocks(cpu, bank, 0,
+                                        MSR_IA32_MC0_MISC + bank * 4);
  }
  
  /* symlinks sibling shared banks to first core.  first core owns dir/files. */
@@@ -477,7 -485,7 +485,7 @@@ static __cpuinit int threshold_create_b
  
  #ifdef CONFIG_SMP
        if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {   /* symlink */
 -              i = cpumask_first(&per_cpu(cpu_core_map, cpu));
 +              i = cpumask_first(cpu_core_mask(cpu));
  
                /* first core not up yet */
                if (cpu_data(i).cpu_core_id)
                if (err)
                        goto out;
  
 -              cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
 +              cpumask_copy(b->cpus, cpu_core_mask(cpu));
                per_cpu(threshold_banks, cpu)[bank] = b;
                goto out;
        }
  #ifndef CONFIG_SMP
        cpumask_setall(b->cpus);
  #else
 -      cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
 +      cpumask_copy(b->cpus, cpu_core_mask(cpu));
  #endif
  
        per_cpu(threshold_banks, cpu)[bank] = b;
  
-       err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
+       err = local_allocate_threshold_blocks(cpu, bank);
        if (err)
                goto out_free;
  
index 96b2a85545aa8d6940775d54b191edce0aacc7dc,57df3d383470bb158a7d64bccb7948d45ec9e4ac..d6b72df89d697cb6b809cc1e3c68540d28cfe1f3
@@@ -249,7 -249,7 +249,7 @@@ void cmci_rediscover(int dying
        for_each_online_cpu (cpu) {
                if (cpu == dying)
                        continue;
 -              if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
 +              if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
                        continue;
                /* Recheck banks in case CPUs don't all have the same */
                if (cmci_supported(&banks))
@@@ -270,7 -270,7 +270,7 @@@ void cmci_reenable(void
                cmci_discover(banks, 0);
  }
  
- static __cpuinit void intel_init_cmci(void)
+ static void intel_init_cmci(void)
  {
        int banks;
  
index 78533a519d8f48e38d00d81bef94a87313c34249,156f87582c6cd5e0fcb677b7c685d92c68443d72..25e28087a3ee628e31569206868de49e183360d3
@@@ -65,11 -65,11 +65,11 @@@ void exit_thread(void
  {
        struct task_struct *me = current;
        struct thread_struct *t = &me->thread;
+       unsigned long *bp = t->io_bitmap_ptr;
  
-       if (me->thread.io_bitmap_ptr) {
+       if (bp) {
                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
  
-               kfree(t->io_bitmap_ptr);
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
                /*
@@@ -78,6 -78,7 +78,7 @@@
                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
                t->io_bitmap_max = 0;
                put_cpu();
+               kfree(bp);
        }
  
        ds_exit_thread(current);
@@@ -324,7 -325,7 +325,7 @@@ void stop_this_cpu(void *dummy
        /*
         * Remove this CPU:
         */
 -      cpu_clear(smp_processor_id(), cpu_online_map);
 +      set_cpu_online(smp_processor_id(), false);
        disable_local_APIC();
  
        for (;;) {
@@@ -474,13 -475,12 +475,13 @@@ static int __cpuinit check_c1e_idle(con
        return 1;
  }
  
 -static cpumask_t c1e_mask = CPU_MASK_NONE;
 +static cpumask_var_t c1e_mask;
  static int c1e_detected;
  
  void c1e_remove_cpu(int cpu)
  {
 -      cpu_clear(cpu, c1e_mask);
 +      if (c1e_mask != NULL)
 +              cpumask_clear_cpu(cpu, c1e_mask);
  }
  
  /*
@@@ -509,8 -509,8 +510,8 @@@ static void c1e_idle(void
        if (c1e_detected) {
                int cpu = smp_processor_id();
  
 -              if (!cpu_isset(cpu, c1e_mask)) {
 -                      cpu_set(cpu, c1e_mask);
 +              if (!cpumask_test_cpu(cpu, c1e_mask)) {
 +                      cpumask_set_cpu(cpu, c1e_mask);
                        /*
                         * Force broadcast so ACPI can not interfere. Needs
                         * to run with interrupts enabled as it uses
@@@ -562,15 -562,6 +563,15 @@@ void __cpuinit select_idle_routine(cons
                pm_idle = default_idle;
  }
  
 +void __init init_c1e_mask(void)
 +{
 +      /* If we're using c1e_idle, we need to allocate c1e_mask. */
 +      if (pm_idle == c1e_idle) {
 +              alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
 +              cpumask_clear(c1e_mask);
 +      }
 +}
 +
  static int __init idle_setup(char *str)
  {
        if (!str)
diff --combined arch/x86/kernel/tlb_uv.c
index 8afb69180c9bb2bf49b87bfad9dcb3b599b31656,79c073247284cbdb334cf2a03b4e7ffaae8d0bcd..deb5ebb32c3b71a8a698df7b68ae5447e12f0cf8
@@@ -275,8 -275,6 +275,8 @@@ const struct cpumask *uv_flush_send_and
        return NULL;
  }
  
 +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
 +
  /**
   * uv_flush_tlb_others - globally purge translation cache of a virtual
   * address or all TLB's
@@@ -306,7 -304,8 +306,7 @@@ const struct cpumask *uv_flush_tlb_othe
                                          struct mm_struct *mm,
                                          unsigned long va, unsigned int cpu)
  {
 -      static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
 -      struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
 +      struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
        int i;
        int bit;
        int blade;
@@@ -751,20 -750,17 +751,21 @@@ static int __init uv_bau_init(void
        int node;
        int nblades;
        int last_blade;
-       int cur_cpu = 0;
+       int cur_cpu;
  
        if (!is_uv_system())
                return 0;
  
 +      for_each_possible_cpu(cur_cpu)
 +              alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
 +                                     GFP_KERNEL, cpu_to_node(cur_cpu));
 +
        uv_bau_retry_limit = 1;
        uv_nshift = uv_hub_info->n_val;
        uv_mmask = (1UL << uv_hub_info->n_val) - 1;
        nblades = 0;
        last_blade = -1;
+       cur_cpu = 0;
        for_each_online_node(node) {
                blade = uv_node_to_blade_id(node);
                if (blade == last_blade)
diff --combined kernel/sched.c
index 11dd52780adb556a5d825174620bac18a095fca3,5757e03cfac0bdf7cd50f3625a318645c562b973..0ad7c28b7236e1f9e9ea4226849fb9a94863bf76
@@@ -331,6 -331,13 +331,13 @@@ static DEFINE_PER_CPU(struct rt_rq, ini
   */
  static DEFINE_SPINLOCK(task_group_lock);
  
+ #ifdef CONFIG_SMP
+ static int root_task_group_empty(void)
+ {
+       return list_empty(&root_task_group.children);
+ }
+ #endif
  #ifdef CONFIG_FAIR_GROUP_SCHED
  #ifdef CONFIG_USER_SCHED
  # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
@@@ -391,6 -398,13 +398,13 @@@ static inline void set_task_rq(struct t
  
  #else
  
+ #ifdef CONFIG_SMP
+ static int root_task_group_empty(void)
+ {
+       return 1;
+ }
+ #endif
  static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
  static inline struct task_group *task_group(struct task_struct *p)
  {
@@@ -467,11 -481,17 +481,17 @@@ struct rt_rq 
        struct rt_prio_array active;
        unsigned long rt_nr_running;
  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       int highest_prio; /* highest queued rt task prio */
+       struct {
+               int curr; /* highest queued rt task prio */
+ #ifdef CONFIG_SMP
+               int next; /* next highest */
+ #endif
+       } highest_prio;
  #endif
  #ifdef CONFIG_SMP
        unsigned long rt_nr_migratory;
        int overloaded;
+       struct plist_head pushable_tasks;
  #endif
        int rt_throttled;
        u64 rt_time;
@@@ -549,7 -569,6 +569,6 @@@ struct rq 
        unsigned long nr_running;
        #define CPU_LOAD_IDX_MAX 5
        unsigned long cpu_load[CPU_LOAD_IDX_MAX];
-       unsigned char idle_at_tick;
  #ifdef CONFIG_NO_HZ
        unsigned long last_tick_seen;
        unsigned char in_nohz_recently;
        struct root_domain *rd;
        struct sched_domain *sd;
  
+       unsigned char idle_at_tick;
        /* For active balancing */
        int active_balance;
        int push_cpu;
        /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
  
        /* sys_sched_yield() stats */
-       unsigned int yld_exp_empty;
-       unsigned int yld_act_empty;
-       unsigned int yld_both_empty;
        unsigned int yld_count;
  
        /* schedule() stats */
@@@ -1183,10 -1200,10 +1200,10 @@@ static void resched_task(struct task_st
  
        assert_spin_locked(&task_rq(p)->lock);
  
-       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+       if (test_tsk_need_resched(p))
                return;
  
-       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+       set_tsk_need_resched(p);
  
        cpu = task_cpu(p);
        if (cpu == smp_processor_id())
@@@ -1242,7 -1259,7 +1259,7 @@@ void wake_up_idle_cpu(int cpu
         * lockless. The worst case is that the other CPU runs the
         * idle task through an additional NOOP schedule()
         */
-       set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+       set_tsk_need_resched(rq->idle);
  
        /* NEED_RESCHED must be visible before we test polling */
        smp_mb();
@@@ -1610,21 -1627,42 +1627,42 @@@ static inline void update_shares_locked
  
  #endif
  
+ #ifdef CONFIG_PREEMPT
  /*
-  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+  * fair double_lock_balance: Safely acquires both rq->locks in a fair
+  * way at the expense of forcing extra atomic operations in all
+  * invocations.  This assures that the double_lock is acquired using the
+  * same underlying policy as the spinlock_t on this architecture, which
+  * reduces latency compared to the unfair variant below.  However, it
+  * also adds more overhead and therefore may reduce throughput.
   */
- static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(this_rq->lock)
+       __acquires(busiest->lock)
+       __acquires(this_rq->lock)
+ {
+       spin_unlock(&this_rq->lock);
+       double_rq_lock(this_rq, busiest);
+       return 1;
+ }
+ #else
+ /*
+  * Unfair double_lock_balance: Optimizes throughput at the expense of
+  * latency by eliminating extra atomic operations when the locks are
+  * already in proper order on entry.  This favors lower cpu-ids and will
+  * grant the double lock to lower cpus over higher ids under contention,
+  * regardless of entry order into the function.
+  */
+ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(this_rq->lock)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
  {
        int ret = 0;
  
-       if (unlikely(!irqs_disabled())) {
-               /* printk() doesn't work good under rq->lock */
-               spin_unlock(&this_rq->lock);
-               BUG_ON(1);
-       }
        if (unlikely(!spin_trylock(&busiest->lock))) {
                if (busiest < this_rq) {
                        spin_unlock(&this_rq->lock);
        return ret;
  }
  
+ #endif /* CONFIG_PREEMPT */
+ /*
+  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+  */
+ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ {
+       if (unlikely(!irqs_disabled())) {
+               /* printk() doesn't work good under rq->lock */
+               spin_unlock(&this_rq->lock);
+               BUG_ON(1);
+       }
+       return _double_lock_balance(this_rq, busiest);
+ }
  static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
  {
@@@ -1705,6 -1759,9 +1759,9 @@@ static void update_avg(u64 *avg, u64 sa
  
  static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
  {
+       if (wakeup)
+               p->se.start_runtime = p->se.sum_exec_runtime;
        sched_info_queued(p);
        p->sched_class->enqueue_task(rq, p, wakeup);
        p->se.on_rq = 1;
  
  static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
  {
-       if (sleep && p->se.last_wakeup) {
-               update_avg(&p->se.avg_overlap,
-                          p->se.sum_exec_runtime - p->se.last_wakeup);
-               p->se.last_wakeup = 0;
+       if (sleep) {
+               if (p->se.last_wakeup) {
+                       update_avg(&p->se.avg_overlap,
+                               p->se.sum_exec_runtime - p->se.last_wakeup);
+                       p->se.last_wakeup = 0;
+               } else {
+                       update_avg(&p->se.avg_wakeup,
+                               sysctl_sched_wakeup_granularity);
+               }
        }
  
        sched_info_dequeued(p);
@@@ -2017,7 -2079,7 +2079,7 @@@ unsigned long wait_task_inactive(struc
                 * it must be off the runqueue _entirely_, and not
                 * preempted!
                 *
-                * So if it wa still runnable (but just not actively
+                * So if it was still runnable (but just not actively
                 * running right now), it's preempted, and we should
                 * yield - it could be a while.
                 */
@@@ -2267,7 -2329,7 +2329,7 @@@ static int try_to_wake_up(struct task_s
                sync = 0;
  
  #ifdef CONFIG_SMP
-       if (sched_feat(LB_WAKEUP_UPDATE)) {
+       if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
                struct sched_domain *sd;
  
                this_cpu = raw_smp_processor_id();
@@@ -2345,6 -2407,22 +2407,22 @@@ out_activate
        activate_task(rq, p, 1);
        success = 1;
  
+       /*
+        * Only attribute actual wakeups done by this task.
+        */
+       if (!in_interrupt()) {
+               struct sched_entity *se = &current->se;
+               u64 sample = se->sum_exec_runtime;
+               if (se->last_wakeup)
+                       sample -= se->last_wakeup;
+               else
+                       sample -= se->start_runtime;
+               update_avg(&se->avg_wakeup, sample);
+               se->last_wakeup = se->sum_exec_runtime;
+       }
  out_running:
        trace_sched_wakeup(rq, p, success);
        check_preempt_curr(rq, p, sync);
                p->sched_class->task_wake_up(rq, p);
  #endif
  out:
-       current->se.last_wakeup = current->se.sum_exec_runtime;
        task_rq_unlock(rq, &flags);
  
        return success;
@@@ -2386,6 -2462,8 +2462,8 @@@ static void __sched_fork(struct task_st
        p->se.prev_sum_exec_runtime     = 0;
        p->se.last_wakeup               = 0;
        p->se.avg_overlap               = 0;
+       p->se.start_runtime             = 0;
+       p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
  
  #ifdef CONFIG_SCHEDSTATS
        p->se.wait_start                = 0;
@@@ -2448,6 -2526,8 +2526,8 @@@ void sched_fork(struct task_struct *p, 
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
  #endif
+       plist_node_init(&p->pushable_tasks, MAX_PRIO);
        put_cpu();
  }
  
@@@ -2491,7 -2571,7 +2571,7 @@@ void wake_up_new_task(struct task_struc
  #ifdef CONFIG_PREEMPT_NOTIFIERS
  
  /**
-  * preempt_notifier_register - tell me when current is being being preempted & rescheduled
+  * preempt_notifier_register - tell me when current is being preempted & rescheduled
   * @notifier: notifier struct to register
   */
  void preempt_notifier_register(struct preempt_notifier *notifier)
@@@ -2588,6 -2668,12 +2668,12 @@@ static void finish_task_switch(struct r
  {
        struct mm_struct *mm = rq->prev_mm;
        long prev_state;
+ #ifdef CONFIG_SMP
+       int post_schedule = 0;
+       if (current->sched_class->needs_post_schedule)
+               post_schedule = current->sched_class->needs_post_schedule(rq);
+ #endif
  
        rq->prev_mm = NULL;
  
        finish_arch_switch(prev);
        finish_lock_switch(rq, prev);
  #ifdef CONFIG_SMP
-       if (current->sched_class->post_schedule)
+       if (post_schedule)
                current->sched_class->post_schedule(rq);
  #endif
  
@@@ -2913,6 -2999,7 +2999,7 @@@ int can_migrate_task(struct task_struc
                     struct sched_domain *sd, enum cpu_idle_type idle,
                     int *all_pinned)
  {
+       int tsk_cache_hot = 0;
        /*
         * We do not migrate tasks that are:
         * 1) running (obviously), or
         * 2) too many balance attempts have failed.
         */
  
-       if (!task_hot(p, rq->clock, sd) ||
-                       sd->nr_balance_failed > sd->cache_nice_tries) {
+       tsk_cache_hot = task_hot(p, rq->clock, sd);
+       if (!tsk_cache_hot ||
+               sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
-               if (task_hot(p, rq->clock, sd)) {
+               if (tsk_cache_hot) {
                        schedstat_inc(sd, lb_hot_gained[idle]);
                        schedstat_inc(p, se.nr_forced_migrations);
                }
                return 1;
        }
  
-       if (task_hot(p, rq->clock, sd)) {
+       if (tsk_cache_hot) {
                schedstat_inc(p, se.nr_failed_migrations_hot);
                return 0;
        }
@@@ -2987,6 -3075,16 +3075,16 @@@ next
        pulled++;
        rem_load_move -= p->se.load.weight;
  
+ #ifdef CONFIG_PREEMPT
+       /*
+        * NEWIDLE balancing is a source of latency, so preemptible kernels
+        * will stop after the first task is pulled to minimize the critical
+        * section.
+        */
+       if (idle == CPU_NEWLY_IDLE)
+               goto out;
+ #endif
        /*
         * We only want to steal up to the prescribed amount of weighted load.
         */
@@@ -3033,9 -3131,15 +3131,15 @@@ static int move_tasks(struct rq *this_r
                                sd, idle, all_pinned, &this_best_prio);
                class = class->next;
  
+ #ifdef CONFIG_PREEMPT
+               /*
+                * NEWIDLE balancing is a source of latency, so preemptible
+                * kernels will stop after the first task is pulled to minimize
+                * the critical section.
+                */
                if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
                        break;
+ #endif
        } while (class && max_load_move > total_load_moved);
  
        return total_load_moved > 0;
@@@ -3085,246 -3189,480 +3189,480 @@@ static int move_one_task(struct rq *thi
  
        return 0;
  }
+ /********** Helpers for find_busiest_group ************************/
  /*
-  * find_busiest_group finds and returns the busiest CPU group within the
-  * domain. It calculates and returns the amount of weighted load which
-  * should be moved to restore balance via the imbalance parameter.
+  * sd_lb_stats - Structure to store the statistics of a sched_domain
+  *            during load balancing.
   */
- static struct sched_group *
- find_busiest_group(struct sched_domain *sd, int this_cpu,
-                  unsigned long *imbalance, enum cpu_idle_type idle,
-                  int *sd_idle, const struct cpumask *cpus, int *balance)
- {
-       struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
-       unsigned long max_load, avg_load, total_load, this_load, total_pwr;
-       unsigned long max_pull;
-       unsigned long busiest_load_per_task, busiest_nr_running;
-       unsigned long this_load_per_task, this_nr_running;
-       int load_idx, group_imb = 0;
+ struct sd_lb_stats {
+       struct sched_group *busiest; /* Busiest group in this sd */
+       struct sched_group *this;  /* Local group in this sd */
+       unsigned long total_load;  /* Total load of all groups in sd */
+       unsigned long total_pwr;   /*   Total power of all groups in sd */
+       unsigned long avg_load;    /* Average load across all groups in sd */
+       /** Statistics of this group */
+       unsigned long this_load;
+       unsigned long this_load_per_task;
+       unsigned long this_nr_running;
+       /* Statistics of the busiest group */
+       unsigned long max_load;
+       unsigned long busiest_load_per_task;
+       unsigned long busiest_nr_running;
+       int group_imb; /* Is there imbalance in this sd */
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-       int power_savings_balance = 1;
-       unsigned long leader_nr_running = 0, min_load_per_task = 0;
-       unsigned long min_nr_running = ULONG_MAX;
-       struct sched_group *group_min = NULL, *group_leader = NULL;
+       int power_savings_balance; /* Is powersave balance needed for this sd */
+       struct sched_group *group_min; /* Least loaded group in sd */
+       struct sched_group *group_leader; /* Group which relieves group_min */
+       unsigned long min_load_per_task; /* load_per_task in group_min */
+       unsigned long leader_nr_running; /* Nr running of group_leader */
+       unsigned long min_nr_running; /* Nr running of group_min */
  #endif
+ };
  
-       max_load = this_load = total_load = total_pwr = 0;
-       busiest_load_per_task = busiest_nr_running = 0;
-       this_load_per_task = this_nr_running = 0;
+ /*
+  * sg_lb_stats - stats of a sched_group required for load_balancing
+  */
+ struct sg_lb_stats {
+       unsigned long avg_load; /*Avg load across the CPUs of the group */
+       unsigned long group_load; /* Total load over the CPUs of the group */
+       unsigned long sum_nr_running; /* Nr tasks running in the group */
+       unsigned long sum_weighted_load; /* Weighted load of group's tasks */
+       unsigned long group_capacity;
+       int group_imb; /* Is there an imbalance in the group ? */
+ };
  
-       if (idle == CPU_NOT_IDLE)
+ /**
+  * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+  * @group: The group whose first cpu is to be returned.
+  */
+ static inline unsigned int group_first_cpu(struct sched_group *group)
+ {
+       return cpumask_first(sched_group_cpus(group));
+ }
+ /**
+  * get_sd_load_idx - Obtain the load index for a given sched domain.
+  * @sd: The sched_domain whose load_idx is to be obtained.
+  * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+  */
+ static inline int get_sd_load_idx(struct sched_domain *sd,
+                                       enum cpu_idle_type idle)
+ {
+       int load_idx;
+       switch (idle) {
+       case CPU_NOT_IDLE:
                load_idx = sd->busy_idx;
-       else if (idle == CPU_NEWLY_IDLE)
+               break;
+       case CPU_NEWLY_IDLE:
                load_idx = sd->newidle_idx;
-       else
+               break;
+       default:
                load_idx = sd->idle_idx;
+               break;
+       }
  
-       do {
-               unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
-               int local_group;
-               int i;
-               int __group_imb = 0;
-               unsigned int balance_cpu = -1, first_idle_cpu = 0;
-               unsigned long sum_nr_running, sum_weighted_load;
-               unsigned long sum_avg_load_per_task;
-               unsigned long avg_load_per_task;
+       return load_idx;
+ }
  
-               local_group = cpumask_test_cpu(this_cpu,
-                                              sched_group_cpus(group));
  
-               if (local_group)
-                       balance_cpu = cpumask_first(sched_group_cpus(group));
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ /**
+  * init_sd_power_savings_stats - Initialize power savings statistics for
+  * the given sched_domain, during load balancing.
+  *
+  * @sd: Sched domain whose power-savings statistics are to be initialized.
+  * @sds: Variable containing the statistics for sd.
+  * @idle: Idle status of the CPU at which we're performing load-balancing.
+  */
+ static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+       struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ {
+       /*
+        * Busy processors will not participate in power savings
+        * balance.
+        */
+       if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
+               sds->power_savings_balance = 0;
+       else {
+               sds->power_savings_balance = 1;
+               sds->min_nr_running = ULONG_MAX;
+               sds->leader_nr_running = 0;
+       }
+ }
  
-               /* Tally up the load of all CPUs in the group */
-               sum_weighted_load = sum_nr_running = avg_load = 0;
-               sum_avg_load_per_task = avg_load_per_task = 0;
+ /**
+  * update_sd_power_savings_stats - Update the power saving stats for a
+  * sched_domain while performing load balancing.
+  *
+  * @group: sched_group belonging to the sched_domain under consideration.
+  * @sds: Variable containing the statistics of the sched_domain
+  * @local_group: Does group contain the CPU for which we're performing
+  *            load balancing ?
+  * @sgs: Variable containing the statistics of the group.
+  */
+ static inline void update_sd_power_savings_stats(struct sched_group *group,
+       struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ {
  
-               max_cpu_load = 0;
-               min_cpu_load = ~0UL;
+       if (!sds->power_savings_balance)
+               return;
  
-               for_each_cpu_and(i, sched_group_cpus(group), cpus) {
-                       struct rq *rq = cpu_rq(i);
+       /*
+        * If the local group is idle or completely loaded
+        * no need to do power savings balance at this domain
+        */
+       if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
+                               !sds->this_nr_running))
+               sds->power_savings_balance = 0;
  
-                       if (*sd_idle && rq->nr_running)
-                               *sd_idle = 0;
+       /*
+        * If a group is already running at full capacity or idle,
+        * don't include that group in power savings calculations
+        */
+       if (!sds->power_savings_balance ||
+               sgs->sum_nr_running >= sgs->group_capacity ||
+               !sgs->sum_nr_running)
+               return;
  
-                       /* Bias balancing toward cpus of our domain */
-                       if (local_group) {
-                               if (idle_cpu(i) && !first_idle_cpu) {
-                                       first_idle_cpu = 1;
-                                       balance_cpu = i;
-                               }
+       /*
+        * Calculate the group which has the least non-idle load.
+        * This is the group from where we need to pick up the load
+        * for saving power
+        */
+       if ((sgs->sum_nr_running < sds->min_nr_running) ||
+           (sgs->sum_nr_running == sds->min_nr_running &&
+            group_first_cpu(group) > group_first_cpu(sds->group_min))) {
+               sds->group_min = group;
+               sds->min_nr_running = sgs->sum_nr_running;
+               sds->min_load_per_task = sgs->sum_weighted_load /
+                                               sgs->sum_nr_running;
+       }
  
-                               load = target_load(i, load_idx);
-                       } else {
-                               load = source_load(i, load_idx);
-                               if (load > max_cpu_load)
-                                       max_cpu_load = load;
-                               if (min_cpu_load > load)
-                                       min_cpu_load = load;
-                       }
+       /*
+        * Calculate the group which is almost near its
+        * capacity but still has some space to pick up some load
+        * from other group and save more power
+        */
+       if (sgs->sum_nr_running > sgs->group_capacity - 1)
+               return;
  
-                       avg_load += load;
-                       sum_nr_running += rq->nr_running;
-                       sum_weighted_load += weighted_cpuload(i);
+       if (sgs->sum_nr_running > sds->leader_nr_running ||
+           (sgs->sum_nr_running == sds->leader_nr_running &&
+            group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
+               sds->group_leader = group;
+               sds->leader_nr_running = sgs->sum_nr_running;
+       }
+ }
  
-                       sum_avg_load_per_task += cpu_avg_load_per_task(i);
-               }
+ /**
+  * check_power_save_busiest_group - see if there is potential for some power-savings balance
+  * @sds: Variable containing the statistics of the sched_domain
+  *    under consideration.
+  * @this_cpu: Cpu at which we're currently performing load-balancing.
+  * @imbalance: Variable to store the imbalance.
+  *
+  * Description:
+  * Check if we have potential to perform some power-savings balance.
+  * If yes, set the busiest group to be the least loaded group in the
+  * sched_domain, so that it's CPUs can be put to idle.
+  *
+  * Returns 1 if there is potential to perform power-savings balance.
+  * Else returns 0.
+  */
+ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+                                       int this_cpu, unsigned long *imbalance)
+ {
+       if (!sds->power_savings_balance)
+               return 0;
  
-               /*
-                * First idle cpu or the first cpu(busiest) in this sched group
-                * is eligible for doing load balancing at this and above
-                * domains. In the newly idle case, we will allow all the cpu's
-                * to do the newly idle load balance.
-                */
-               if (idle != CPU_NEWLY_IDLE && local_group &&
-                   balance_cpu != this_cpu && balance) {
-                       *balance = 0;
-                       goto ret;
-               }
+       if (sds->this != sds->group_leader ||
+                       sds->group_leader == sds->group_min)
+               return 0;
  
-               total_load += avg_load;
-               total_pwr += group->__cpu_power;
+       *imbalance = sds->min_load_per_task;
+       sds->busiest = sds->group_min;
  
-               /* Adjust by relative CPU power of the group */
-               avg_load = sg_div_cpu_power(group,
-                               avg_load * SCHED_LOAD_SCALE);
+       if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+               cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+                       group_first_cpu(sds->group_leader);
+       }
  
+       return 1;
  
-               /*
-                * Consider the group unbalanced when the imbalance is larger
-                * than the average weight of two tasks.
-                *
-                * APZ: with cgroup the avg task weight can vary wildly and
-                *      might not be a suitable number - should we keep a
-                *      normalized nr_running number somewhere that negates
-                *      the hierarchy?
-                */
-               avg_load_per_task = sg_div_cpu_power(group,
-                               sum_avg_load_per_task * SCHED_LOAD_SCALE);
+ }
+ #else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+       struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ {
+       return;
+ }
  
-               if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
-                       __group_imb = 1;
+ static inline void update_sd_power_savings_stats(struct sched_group *group,
+       struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ {
+       return;
+ }
+ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+                                       int this_cpu, unsigned long *imbalance)
+ {
+       return 0;
+ }
+ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
-               group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
  
+ /**
+  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
+  * @group: sched_group whose statistics are to be updated.
+  * @this_cpu: Cpu for which load balance is currently performed.
+  * @idle: Idle status of this_cpu
+  * @load_idx: Load index of sched_domain of this_cpu for load calc.
+  * @sd_idle: Idle status of the sched_domain containing group.
+  * @local_group: Does group contain this_cpu.
+  * @cpus: Set of cpus considered for load balancing.
+  * @balance: Should we balance.
+  * @sgs: variable to hold the statistics for this group.
+  */
+ static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+                       enum cpu_idle_type idle, int load_idx, int *sd_idle,
+                       int local_group, const struct cpumask *cpus,
+                       int *balance, struct sg_lb_stats *sgs)
+ {
+       unsigned long load, max_cpu_load, min_cpu_load;
+       int i;
+       unsigned int balance_cpu = -1, first_idle_cpu = 0;
+       unsigned long sum_avg_load_per_task;
+       unsigned long avg_load_per_task;
+       if (local_group)
+               balance_cpu = group_first_cpu(group);
+       /* Tally up the load of all CPUs in the group */
+       sum_avg_load_per_task = avg_load_per_task = 0;
+       max_cpu_load = 0;
+       min_cpu_load = ~0UL;
+       for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+               struct rq *rq = cpu_rq(i);
+               if (*sd_idle && rq->nr_running)
+                       *sd_idle = 0;
+               /* Bias balancing toward cpus of our domain */
                if (local_group) {
-                       this_load = avg_load;
-                       this = group;
-                       this_nr_running = sum_nr_running;
-                       this_load_per_task = sum_weighted_load;
-               } else if (avg_load > max_load &&
-                          (sum_nr_running > group_capacity || __group_imb)) {
-                       max_load = avg_load;
-                       busiest = group;
-                       busiest_nr_running = sum_nr_running;
-                       busiest_load_per_task = sum_weighted_load;
-                       group_imb = __group_imb;
+                       if (idle_cpu(i) && !first_idle_cpu) {
+                               first_idle_cpu = 1;
+                               balance_cpu = i;
+                       }
+                       load = target_load(i, load_idx);
+               } else {
+                       load = source_load(i, load_idx);
+                       if (load > max_cpu_load)
+                               max_cpu_load = load;
+                       if (min_cpu_load > load)
+                               min_cpu_load = load;
                }
  
- #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-               /*
-                * Busy processors will not participate in power savings
-                * balance.
-                */
-               if (idle == CPU_NOT_IDLE ||
-                               !(sd->flags & SD_POWERSAVINGS_BALANCE))
-                       goto group_next;
+               sgs->group_load += load;
+               sgs->sum_nr_running += rq->nr_running;
+               sgs->sum_weighted_load += weighted_cpuload(i);
  
-               /*
-                * If the local group is idle or completely loaded
-                * no need to do power savings balance at this domain
-                */
-               if (local_group && (this_nr_running >= group_capacity ||
-                                   !this_nr_running))
-                       power_savings_balance = 0;
+               sum_avg_load_per_task += cpu_avg_load_per_task(i);
+       }
  
-               /*
-                * If a group is already running at full capacity or idle,
-                * don't include that group in power savings calculations
-                */
-               if (!power_savings_balance || sum_nr_running >= group_capacity
-                   || !sum_nr_running)
-                       goto group_next;
+       /*
+        * First idle cpu or the first cpu(busiest) in this sched group
+        * is eligible for doing load balancing at this and above
+        * domains. In the newly idle case, we will allow all the cpu's
+        * to do the newly idle load balance.
+        */
+       if (idle != CPU_NEWLY_IDLE && local_group &&
+           balance_cpu != this_cpu && balance) {
+               *balance = 0;
+               return;
+       }
  
-               /*
-                * Calculate the group which has the least non-idle load.
-                * This is the group from where we need to pick up the load
-                * for saving power
-                */
-               if ((sum_nr_running < min_nr_running) ||
-                   (sum_nr_running == min_nr_running &&
-                    cpumask_first(sched_group_cpus(group)) >
-                    cpumask_first(sched_group_cpus(group_min)))) {
-                       group_min = group;
-                       min_nr_running = sum_nr_running;
-                       min_load_per_task = sum_weighted_load /
-                                               sum_nr_running;
-               }
+       /* Adjust by relative CPU power of the group */
+       sgs->avg_load = sg_div_cpu_power(group,
+                       sgs->group_load * SCHED_LOAD_SCALE);
  
-               /*
-                * Calculate the group which is almost near its
-                * capacity but still has some space to pick up some load
-                * from other group and save more power
-                */
-               if (sum_nr_running <= group_capacity - 1) {
-                       if (sum_nr_running > leader_nr_running ||
-                           (sum_nr_running == leader_nr_running &&
-                            cpumask_first(sched_group_cpus(group)) <
-                            cpumask_first(sched_group_cpus(group_leader)))) {
-                               group_leader = group;
-                               leader_nr_running = sum_nr_running;
-                       }
+       /*
+        * Consider the group unbalanced when the imbalance is larger
+        * than the average weight of two tasks.
+        *
+        * APZ: with cgroup the avg task weight can vary wildly and
+        *      might not be a suitable number - should we keep a
+        *      normalized nr_running number somewhere that negates
+        *      the hierarchy?
+        */
+       avg_load_per_task = sg_div_cpu_power(group,
+                       sum_avg_load_per_task * SCHED_LOAD_SCALE);
+       if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
+               sgs->group_imb = 1;
+       sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
+ }
+ /**
+  * update_sd_lb_stats - Update sched_group's statistics for load balancing.
+  * @sd: sched_domain whose statistics are to be updated.
+  * @this_cpu: Cpu for which load balance is currently performed.
+  * @idle: Idle status of this_cpu
+  * @sd_idle: Idle status of the sched_domain containing group.
+  * @cpus: Set of cpus considered for load balancing.
+  * @balance: Should we balance.
+  * @sds: variable to hold the statistics for this sched_domain.
+  */
+ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
+                       enum cpu_idle_type idle, int *sd_idle,
+                       const struct cpumask *cpus, int *balance,
+                       struct sd_lb_stats *sds)
+ {
+       struct sched_group *group = sd->groups;
+       struct sg_lb_stats sgs;
+       int load_idx;
+       init_sd_power_savings_stats(sd, sds, idle);
+       load_idx = get_sd_load_idx(sd, idle);
+       do {
+               int local_group;
+               local_group = cpumask_test_cpu(this_cpu,
+                                              sched_group_cpus(group));
+               memset(&sgs, 0, sizeof(sgs));
+               update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+                               local_group, cpus, balance, &sgs);
+               if (local_group && balance && !(*balance))
+                       return;
+               sds->total_load += sgs.group_load;
+               sds->total_pwr += group->__cpu_power;
+               if (local_group) {
+                       sds->this_load = sgs.avg_load;
+                       sds->this = group;
+                       sds->this_nr_running = sgs.sum_nr_running;
+                       sds->this_load_per_task = sgs.sum_weighted_load;
+               } else if (sgs.avg_load > sds->max_load &&
+                          (sgs.sum_nr_running > sgs.group_capacity ||
+                               sgs.group_imb)) {
+                       sds->max_load = sgs.avg_load;
+                       sds->busiest = group;
+                       sds->busiest_nr_running = sgs.sum_nr_running;
+                       sds->busiest_load_per_task = sgs.sum_weighted_load;
+                       sds->group_imb = sgs.group_imb;
                }
- group_next:
- #endif
+               update_sd_power_savings_stats(group, sds, local_group, &sgs);
                group = group->next;
        } while (group != sd->groups);
  
-       if (!busiest || this_load >= max_load || busiest_nr_running == 0)
-               goto out_balanced;
-       avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+ }
  
-       if (this_load >= avg_load ||
-                       100*max_load <= sd->imbalance_pct*this_load)
-               goto out_balanced;
+ /**
+  * fix_small_imbalance - Calculate the minor imbalance that exists
+  *                    amongst the groups of a sched_domain, during
+  *                    load balancing.
+  * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
+  * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+  * @imbalance: Variable to store the imbalance.
+  */
+ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
+                               int this_cpu, unsigned long *imbalance)
+ {
+       unsigned long tmp, pwr_now = 0, pwr_move = 0;
+       unsigned int imbn = 2;
+       if (sds->this_nr_running) {
+               sds->this_load_per_task /= sds->this_nr_running;
+               if (sds->busiest_load_per_task >
+                               sds->this_load_per_task)
+                       imbn = 1;
+       } else
+               sds->this_load_per_task =
+                       cpu_avg_load_per_task(this_cpu);
  
-       busiest_load_per_task /= busiest_nr_running;
-       if (group_imb)
-               busiest_load_per_task = min(busiest_load_per_task, avg_load);
+       if (sds->max_load - sds->this_load + sds->busiest_load_per_task >=
+                       sds->busiest_load_per_task * imbn) {
+               *imbalance = sds->busiest_load_per_task;
+               return;
+       }
  
        /*
-        * We're trying to get all the cpus to the average_load, so we don't
-        * want to push ourselves above the average load, nor do we wish to
-        * reduce the max loaded cpu below the average load, as either of these
-        * actions would just result in more rebalancing later, and ping-pong
-        * tasks around. Thus we look for the minimum possible imbalance.
-        * Negative imbalances (*we* are more loaded than anyone else) will
-        * be counted as no imbalance for these purposes -- we can't fix that
-        * by pulling tasks to us. Be careful of negative numbers as they'll
-        * appear as very large values with unsigned longs.
+        * OK, we don't have enough imbalance to justify moving tasks,
+        * however we may be able to increase total CPU power used by
+        * moving them.
         */
-       if (max_load <= busiest_load_per_task)
-               goto out_balanced;
  
+       pwr_now += sds->busiest->__cpu_power *
+                       min(sds->busiest_load_per_task, sds->max_load);
+       pwr_now += sds->this->__cpu_power *
+                       min(sds->this_load_per_task, sds->this_load);
+       pwr_now /= SCHED_LOAD_SCALE;
+       /* Amount of load we'd subtract */
+       tmp = sg_div_cpu_power(sds->busiest,
+                       sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+       if (sds->max_load > tmp)
+               pwr_move += sds->busiest->__cpu_power *
+                       min(sds->busiest_load_per_task, sds->max_load - tmp);
+       /* Amount of load we'd add */
+       if (sds->max_load * sds->busiest->__cpu_power <
+               sds->busiest_load_per_task * SCHED_LOAD_SCALE)
+               tmp = sg_div_cpu_power(sds->this,
+                       sds->max_load * sds->busiest->__cpu_power);
+       else
+               tmp = sg_div_cpu_power(sds->this,
+                       sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+       pwr_move += sds->this->__cpu_power *
+                       min(sds->this_load_per_task, sds->this_load + tmp);
+       pwr_move /= SCHED_LOAD_SCALE;
+       /* Move if we gain throughput */
+       if (pwr_move > pwr_now)
+               *imbalance = sds->busiest_load_per_task;
+ }
+ /**
+  * calculate_imbalance - Calculate the amount of imbalance present within the
+  *                     groups of a given sched_domain during load balance.
+  * @sds: statistics of the sched_domain whose imbalance is to be calculated.
+  * @this_cpu: Cpu for which currently load balance is being performed.
+  * @imbalance: The variable to store the imbalance.
+  */
+ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
+               unsigned long *imbalance)
+ {
+       unsigned long max_pull;
        /*
         * In the presence of smp nice balancing, certain scenarios can have
         * max load less than avg load(as we skip the groups at or below
         * its cpu_power, while calculating max_load..)
         */
-       if (max_load < avg_load) {
+       if (sds->max_load < sds->avg_load) {
                *imbalance = 0;
-               goto small_imbalance;
+               return fix_small_imbalance(sds, this_cpu, imbalance);
        }
  
        /* Don't want to pull so many tasks that a group would go idle */
-       max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);
+       max_pull = min(sds->max_load - sds->avg_load,
+                       sds->max_load - sds->busiest_load_per_task);
  
        /* How much load to actually move to equalise the imbalance */
-       *imbalance = min(max_pull * busiest->__cpu_power,
-                               (avg_load - this_load) * this->__cpu_power)
+       *imbalance = min(max_pull * sds->busiest->__cpu_power,
+               (sds->avg_load - sds->this_load) * sds->this->__cpu_power)
                        / SCHED_LOAD_SCALE;
  
        /*
         * a think about bumping its value to force at least one task to be
         * moved
         */
-       if (*imbalance < busiest_load_per_task) {
-               unsigned long tmp, pwr_now, pwr_move;
-               unsigned int imbn;
- small_imbalance:
-               pwr_move = pwr_now = 0;
-               imbn = 2;
-               if (this_nr_running) {
-                       this_load_per_task /= this_nr_running;
-                       if (busiest_load_per_task > this_load_per_task)
-                               imbn = 1;
-               } else
-                       this_load_per_task = cpu_avg_load_per_task(this_cpu);
+       if (*imbalance < sds->busiest_load_per_task)
+               return fix_small_imbalance(sds, this_cpu, imbalance);
  
-               if (max_load - this_load + busiest_load_per_task >=
-                                       busiest_load_per_task * imbn) {
-                       *imbalance = busiest_load_per_task;
-                       return busiest;
-               }
+ }
+ /******* find_busiest_group() helpers end here *********************/
  
-               /*
-                * OK, we don't have enough imbalance to justify moving tasks,
-                * however we may be able to increase total CPU power used by
-                * moving them.
-                */
+ /**
+  * find_busiest_group - Returns the busiest group within the sched_domain
+  * if there is an imbalance. If there isn't an imbalance, and
+  * the user has opted for power-savings, it returns a group whose
+  * CPUs can be put to idle by rebalancing those tasks elsewhere, if
+  * such a group exists.
+  *
+  * Also calculates the amount of weighted load which should be moved
+  * to restore balance.
+  *
+  * @sd: The sched_domain whose busiest group is to be returned.
+  * @this_cpu: The cpu for which load balancing is currently being performed.
+  * @imbalance: Variable which stores amount of weighted load which should
+  *            be moved to restore balance/put a group to idle.
+  * @idle: The idle status of this_cpu.
+  * @sd_idle: The idleness of sd
+  * @cpus: The set of CPUs under consideration for load-balancing.
+  * @balance: Pointer to a variable indicating if this_cpu
+  *    is the appropriate cpu to perform load balancing at this_level.
+  *
+  * Returns:   - the busiest group if imbalance exists.
+  *            - If no imbalance and user has opted for power-savings balance,
+  *               return the least loaded group whose CPUs can be
+  *               put to idle by rebalancing its tasks onto our group.
+  */
+ static struct sched_group *
+ find_busiest_group(struct sched_domain *sd, int this_cpu,
+                  unsigned long *imbalance, enum cpu_idle_type idle,
+                  int *sd_idle, const struct cpumask *cpus, int *balance)
+ {
+       struct sd_lb_stats sds;
  
-               pwr_now += busiest->__cpu_power *
-                               min(busiest_load_per_task, max_load);
-               pwr_now += this->__cpu_power *
-                               min(this_load_per_task, this_load);
-               pwr_now /= SCHED_LOAD_SCALE;
-               /* Amount of load we'd subtract */
-               tmp = sg_div_cpu_power(busiest,
-                               busiest_load_per_task * SCHED_LOAD_SCALE);
-               if (max_load > tmp)
-                       pwr_move += busiest->__cpu_power *
-                               min(busiest_load_per_task, max_load - tmp);
-               /* Amount of load we'd add */
-               if (max_load * busiest->__cpu_power <
-                               busiest_load_per_task * SCHED_LOAD_SCALE)
-                       tmp = sg_div_cpu_power(this,
-                                       max_load * busiest->__cpu_power);
-               else
-                       tmp = sg_div_cpu_power(this,
-                               busiest_load_per_task * SCHED_LOAD_SCALE);
-               pwr_move += this->__cpu_power *
-                               min(this_load_per_task, this_load + tmp);
-               pwr_move /= SCHED_LOAD_SCALE;
+       memset(&sds, 0, sizeof(sds));
  
-               /* Move if we gain throughput */
-               if (pwr_move > pwr_now)
-                       *imbalance = busiest_load_per_task;
-       }
+       /*
+        * Compute the various statistics relavent for load balancing at
+        * this level.
+        */
+       update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
+                                       balance, &sds);
+       /* Cases where imbalance does not exist from POV of this_cpu */
+       /* 1) this_cpu is not the appropriate cpu to perform load balancing
+        *    at this level.
+        * 2) There is no busy sibling group to pull from.
+        * 3) This group is the busiest group.
+        * 4) This group is more busy than the avg busieness at this
+        *    sched_domain.
+        * 5) The imbalance is within the specified limit.
+        * 6) Any rebalance would lead to ping-pong
+        */
+       if (balance && !(*balance))
+               goto ret;
  
-       return busiest;
+       if (!sds.busiest || sds.busiest_nr_running == 0)
+               goto out_balanced;
  
- out_balanced:
- #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-       if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
-               goto ret;
+       if (sds.this_load >= sds.max_load)
+               goto out_balanced;
  
-       if (this == group_leader && group_leader != group_min) {
-               *imbalance = min_load_per_task;
-               if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-                       cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-                               cpumask_first(sched_group_cpus(group_leader));
-               }
-               return group_min;
-       }
- #endif
+       sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+       if (sds.this_load >= sds.avg_load)
+               goto out_balanced;
+       if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+               goto out_balanced;
+       sds.busiest_load_per_task /= sds.busiest_nr_running;
+       if (sds.group_imb)
+               sds.busiest_load_per_task =
+                       min(sds.busiest_load_per_task, sds.avg_load);
+       /*
+        * We're trying to get all the cpus to the average_load, so we don't
+        * want to push ourselves above the average load, nor do we wish to
+        * reduce the max loaded cpu below the average load, as either of these
+        * actions would just result in more rebalancing later, and ping-pong
+        * tasks around. Thus we look for the minimum possible imbalance.
+        * Negative imbalances (*we* are more loaded than anyone else) will
+        * be counted as no imbalance for these purposes -- we can't fix that
+        * by pulling tasks to us. Be careful of negative numbers as they'll
+        * appear as very large values with unsigned longs.
+        */
+       if (sds.max_load <= sds.busiest_load_per_task)
+               goto out_balanced;
+       /* Looks like there is an imbalance. Compute it */
+       calculate_imbalance(&sds, this_cpu, imbalance);
+       return sds.busiest;
+ out_balanced:
+       /*
+        * There is no obvious imbalance. But check if we can do some balancing
+        * to save power.
+        */
+       if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
+               return sds.busiest;
  ret:
        *imbalance = 0;
        return NULL;
@@@ -3448,23 -3818,19 +3818,23 @@@ find_busiest_queue(struct sched_group *
   */
  #define MAX_PINNED_INTERVAL   512
  
 +/* Working cpumask for load_balance and load_balance_newidle. */
 +static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 +
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
                        struct sched_domain *sd, enum cpu_idle_type idle,
 -                      int *balance, struct cpumask *cpus)
 +                      int *balance)
  {
        int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
        struct sched_group *group;
        unsigned long imbalance;
        struct rq *busiest;
        unsigned long flags;
 +      struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
        cpumask_setall(cpus);
  
@@@ -3619,7 -3985,8 +3989,7 @@@ out
   * this_rq is locked.
   */
  static int
 -load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
 -                      struct cpumask *cpus)
 +load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
  {
        struct sched_group *group;
        struct rq *busiest = NULL;
        int ld_moved = 0;
        int sd_idle = 0;
        int all_pinned = 0;
 +      struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
        cpumask_setall(cpus);
  
@@@ -3768,6 -4134,10 +4138,6 @@@ static void idle_balance(int this_cpu, 
        struct sched_domain *sd;
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
 -      cpumask_var_t tmpmask;
 -
 -      if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
 -              return;
  
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
                if (sd->flags & SD_BALANCE_NEWIDLE)
                        /* If we've pulled tasks over stop searching: */
                        pulled_task = load_balance_newidle(this_cpu, this_rq,
 -                                                         sd, tmpmask);
 +                                                         sd);
  
                interval = msecs_to_jiffies(sd->balance_interval);
                if (time_after(next_balance, sd->last_balance + interval))
                 */
                this_rq->next_balance = next_balance;
        }
 -      free_cpumask_var(tmpmask);
  }
  
  /*
@@@ -3942,6 -4313,11 +4312,6 @@@ static void rebalance_domains(int cpu, 
        unsigned long next_balance = jiffies + 60*HZ;
        int update_next_balance = 0;
        int need_serialize;
 -      cpumask_var_t tmp;
 -
 -      /* Fails alloc?  Rebalancing probably not a priority right now. */
 -      if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
 -              return;
  
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
                }
  
                if (time_after_eq(jiffies, sd->last_balance + interval)) {
 -                      if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
 +                      if (load_balance(cpu, rq, sd, idle, &balance)) {
                                /*
                                 * We've pulled tasks over so either we're no
                                 * longer idle, or one of our SMT siblings is
@@@ -4000,6 -4376,8 +4370,6 @@@ out
         */
        if (likely(update_next_balance))
                rq->next_balance = next_balance;
 -
 -      free_cpumask_var(tmp);
  }
  
  /*
@@@ -4049,6 -4427,11 +4419,11 @@@ static void run_rebalance_domains(struc
  #endif
  }
  
+ static inline int on_null_domain(int cpu)
+ {
+       return !rcu_dereference(cpu_rq(cpu)->sd);
+ }
  /*
   * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
   *
@@@ -4106,7 -4489,9 +4481,9 @@@ static inline void trigger_load_balance
            cpumask_test_cpu(cpu, nohz.cpu_mask))
                return;
  #endif
-       if (time_after_eq(jiffies, rq->next_balance))
+       /* Don't need to rebalance while attached to NULL domain */
+       if (time_after_eq(jiffies, rq->next_balance) &&
+           likely(!on_null_domain(cpu)))
                raise_softirq(SCHED_SOFTIRQ);
  }
  
@@@ -4500,11 -4885,33 +4877,33 @@@ static inline void schedule_debug(struc
  #endif
  }
  
+ static void put_prev_task(struct rq *rq, struct task_struct *prev)
+ {
+       if (prev->state == TASK_RUNNING) {
+               u64 runtime = prev->se.sum_exec_runtime;
+               runtime -= prev->se.prev_sum_exec_runtime;
+               runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+               /*
+                * In order to avoid avg_overlap growing stale when we are
+                * indeed overlapping and hence not getting put to sleep, grow
+                * the avg_overlap on preemption.
+                *
+                * We use the average preemption runtime because that
+                * correlates to the amount of cache footprint a task can
+                * build up.
+                */
+               update_avg(&prev->se.avg_overlap, runtime);
+       }
+       prev->sched_class->put_prev_task(rq, prev);
+ }
  /*
   * Pick up the highest-prio task:
   */
  static inline struct task_struct *
- pick_next_task(struct rq *rq, struct task_struct *prev)
+ pick_next_task(struct rq *rq)
  {
        const struct sched_class *class;
        struct task_struct *p;
@@@ -4578,8 -4985,8 +4977,8 @@@ need_resched_nonpreemptible
        if (unlikely(!rq->nr_running))
                idle_balance(cpu, rq);
  
-       prev->sched_class->put_prev_task(rq, prev);
-       next = pick_next_task(rq, prev);
+       put_prev_task(rq, prev);
+       next = pick_next_task(rq);
  
        if (likely(prev != next)) {
                sched_info_switch(prev, next);
@@@ -4634,7 -5041,7 +5033,7 @@@ asmlinkage void __sched preempt_schedul
                 * between schedule and now.
                 */
                barrier();
-       } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+       } while (need_resched());
  }
  EXPORT_SYMBOL(preempt_schedule);
  
@@@ -4663,7 -5070,7 +5062,7 @@@ asmlinkage void __sched preempt_schedul
                 * between schedule and now.
                 */
                barrier();
-       } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+       } while (need_resched());
  }
  
  #endif /* CONFIG_PREEMPT */
@@@ -5137,7 -5544,7 +5536,7 @@@ SYSCALL_DEFINE1(nice, int, increment
        if (increment > 40)
                increment = 40;
  
-       nice = PRIO_TO_NICE(current->static_prio) + increment;
+       nice = TASK_NICE(current) + increment;
        if (nice < -20)
                nice = -20;
        if (nice > 19)
@@@ -6410,7 -6817,7 +6809,7 @@@ static void migrate_dead_tasks(unsigne
                if (!rq->nr_running)
                        break;
                update_rq_clock(rq);
-               next = pick_next_task(rq, rq->curr);
+               next = pick_next_task(rq);
                if (!next)
                        break;
                next->sched_class->put_prev_task(rq, next);
@@@ -7241,7 -7648,7 +7640,7 @@@ cpu_to_core_group(int cpu, const struc
  {
        int group;
  
 -      cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
 +      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
        if (sg)
                *sg = &per_cpu(sched_group_core, group).sg;
@@@ -7270,7 -7677,7 +7669,7 @@@ cpu_to_phys_group(int cpu, const struc
        cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
        group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
 -      cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
 +      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
  #else
        group = cpu;
@@@ -7613,7 -8020,7 +8012,7 @@@ static int __build_sched_domains(const 
                SD_INIT(sd, SIBLING);
                set_domain_attribute(sd, attr);
                cpumask_and(sched_domain_span(sd),
 -                          &per_cpu(cpu_sibling_map, i), cpu_map);
 +                          topology_thread_cpumask(i), cpu_map);
                sd->parent = p;
                p->child = sd;
                cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
        /* Set up CPU (sibling) groups */
        for_each_cpu(i, cpu_map) {
                cpumask_and(this_sibling_map,
 -                          &per_cpu(cpu_sibling_map, i), cpu_map);
 +                          topology_thread_cpumask(i), cpu_map);
                if (i != cpumask_first(this_sibling_map))
                        continue;
  
@@@ -8205,11 -8612,15 +8604,15 @@@ static void init_rt_rq(struct rt_rq *rt
        __set_bit(MAX_RT_PRIO, array->bitmap);
  
  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       rt_rq->highest_prio = MAX_RT_PRIO;
+       rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ #ifdef CONFIG_SMP
+       rt_rq->highest_prio.next = MAX_RT_PRIO;
+ #endif
  #endif
  #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
+       plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
  #endif
  
        rt_rq->rt_time = 0;
@@@ -8295,9 -8706,6 +8698,9 @@@ void __init sched_init(void
  #endif
  #ifdef CONFIG_USER_SCHED
        alloc_size *= 2;
 +#endif
 +#ifdef CONFIG_CPUMASK_OFFSTACK
 +      alloc_size += num_possible_cpus() * cpumask_size();
  #endif
        /*
         * As sched_init() is called before page_alloc is setup,
                ptr += nr_cpu_ids * sizeof(void **);
  #endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
 +#ifdef CONFIG_CPUMASK_OFFSTACK
 +              for_each_possible_cpu(i) {
 +                      per_cpu(load_balance_tmpmask, i) = (void *)ptr;
 +                      ptr += cpumask_size();
 +              }
 +#endif /* CONFIG_CPUMASK_OFFSTACK */
        }
  
  #ifdef CONFIG_SMP
@@@ -9594,7 -9996,7 +9997,7 @@@ static void cpuacct_charge(struct task_
        struct cpuacct *ca;
        int cpu;
  
-       if (!cpuacct_subsys.active)
+       if (unlikely(!cpuacct_subsys.active))
                return;
  
        cpu = task_cpu(tsk);
diff --combined mm/vmscan.c
index 592bb9619f75ace42bb83aea694185000d499966,56ddf41149eb77a55158ced628c02f817b990618..1cdbf0b057278e8f157da93bb6eda0dc80644387
@@@ -1262,7 -1262,6 +1262,6 @@@ static void shrink_active_list(unsigne
         * Move the pages to the [file or anon] inactive list.
         */
        pagevec_init(&pvec, 1);
-       pgmoved = 0;
        lru = LRU_BASE + file * LRU_FILE;
  
        spin_lock_irq(&zone->lru_lock);
         */
        reclaim_stat->recent_rotated[!!file] += pgmoved;
  
+       pgmoved = 0;
        while (!list_empty(&l_inactive)) {
                page = lru_to_page(&l_inactive);
                prefetchw_prev_lru_page(page, &l_inactive, flags);
@@@ -1963,7 -1963,7 +1963,7 @@@ static int kswapd(void *p
        struct reclaim_state reclaim_state = {
                .reclaimed_slab = 0,
        };
 -      node_to_cpumask_ptr(cpumask, pgdat->node_id);
 +      const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
        if (!cpumask_empty(cpumask))
                set_cpus_allowed_ptr(tsk, cpumask);
@@@ -2198,9 -2198,7 +2198,9 @@@ static int __devinit cpu_callback(struc
        if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
                for_each_node_state(nid, N_HIGH_MEMORY) {
                        pg_data_t *pgdat = NODE_DATA(nid);
 -                      node_to_cpumask_ptr(mask, pgdat->node_id);
 +                      const struct cpumask *mask;
 +
 +                      mask = cpumask_of_node(pgdat->node_id);
  
                        if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
                                /* One of our CPUs online: restore mask */