* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (156 commits)
[PATCH] x86-64: Export smp_call_function_single
[PATCH] i386: Clean up smp_tune_scheduling()
[PATCH] unwinder: move .eh_frame to RODATA
[PATCH] unwinder: fully support linker generated .eh_frame_hdr section
[PATCH] x86-64: don't use set_irq_regs()
[PATCH] x86-64: check vector in setup_ioapic_dest to verify if need setup_IO_APIC_irq
[PATCH] x86-64: Make ix86 default to HIGHMEM4G instead of NOHIGHMEM
[PATCH] i386: replace kmalloc+memset with kzalloc
[PATCH] x86-64: remove remaining pc98 code
[PATCH] x86-64: remove unused variable
[PATCH] x86-64: Fix constraints in atomic_add_return()
[PATCH] x86-64: fix asm constraints in i386 atomic_add_return
[PATCH] x86-64: Correct documentation for bzImage protocol v2.05
[PATCH] x86-64: replace kmalloc+memset with kzalloc in MTRR code
[PATCH] x86-64: Fix numaq build error
[PATCH] x86-64: include/asm-x86_64/cpufeature.h isn't a userspace header
[PATCH] unwinder: Add debugging output to the Dwarf2 unwinder
[PATCH] x86-64: Clarify error message in GART code
[PATCH] x86-64: Fix interrupt race in idle callback (3rd try)
[PATCH] x86-64: Remove unwind stack pointer alignment forcing again
...
Fixed conflict in include/linux/uaccess.h manually
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages.
- noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing
-
i8042.direct [HW] Put keyboard port into non-translated mode
i8042.dumbkbd [HW] Pretend that controller can only read data from
keyboard and cannot control its state
idle= [HW]
Format: idle=poll or idle=halt
+ ignore_loglevel [KNL]
+ Ignore loglevel setting - this will print /all/
+ kernel messages to the console. Useful for debugging.
+
ihash_entries= [KNL]
Set number of hash buckets for inode cache.
Format: <RDP>,<reset>,<pci_scan>,<verbosity>
isolcpus= [KNL,SMP] Isolate CPUs from the general scheduler.
- Format: <cpu number>,...,<cpu number>
+ Format:
+ <cpu number>,...,<cpu number>
+ or
+ <cpu number>-<cpu number> (must be a positive range in ascending order)
+ or a mixture
+ <cpu number>,...,<cpu number>-<cpu number>
This option can be used to specify one or more CPUs
to isolate from the general SMP balancing and scheduling
algorithms. The only way to move a process onto or off
emulation library even if a 387 maths coprocessor
is present.
+ noaliencache [MM, NUMA] Disables the allcoation of alien caches in
+ the slab allocator. Saves per-node memory, but will
+ impact performance on real NUMA hardware.
+
noalign [KNL,ARM]
noapic [SMP,APIC] Tells the kernel to not make use of any
in certain environments such as networked servers or
real-time systems.
+ noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing
+
noirqdebug [IA-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
+ no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for
+ broken timer IRQ sources.
+
noisapnp [ISAPNP] Disables ISA PnP code.
noinitrd [RAM] Tells the kernel not to load any configured
Param: "schedule" - profile schedule points.
Param: <number> - step/bucket size as a power of 2 for
statistical time based profiling.
+ Param: "sleep" - profile D-state sleeping (millisecs)
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
resume= [SWSUSP]
Specify the partition device for software suspend
+ resume_offset= [SWSUSP]
+ Specify the offset from the beginning of the partition
+ given by "resume=" at which the swap header is located,
+ in <PAGE_SIZE> units (needed only for swap files).
+ See Documentation/power/swsusp-and-swap-files.txt
+
rhash_entries= [KNL,NET]
Set number of hash buckets for route cache
norandmaps Don't use address space randomization
Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
+ unwind_debug=N N > 0 will enable dwarf2 unwinder debugging
+ This is useful to get more information why
+ you got a "dwarf2 unwinder stuck"
______________________________________________________________________
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
- #include <linux/fs.h>
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
return err;
}
-#ifdef CONFIG_HOTPLUG_CPU
static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
{
.notifier_call = cpuid_class_cpu_callback,
};
-#endif /* !CONFIG_HOTPLUG_CPU */
static int __init cpuid_init(void)
{
#include <linux/pci.h>
#include <linux/msi.h>
#include <linux/htirq.h>
+#include <linux/freezer.h>
#include <asm/io.h>
#include <asm/smp.h>
* the interrupt, and we need to make sure the entry is fully populated
* before that happens.
*/
- static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+ static void
+ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- unsigned long flags;
union entry_union eu;
eu.entry = e;
- spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ }
+
+ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+ {
+ unsigned long flags;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __ioapic_write_entry(apic, pin, e);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
) &&
(mp_irqs[i].mpc_irqtype == type) &&
(mp_irqs[i].mpc_srcbusirq == irq))
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) (0)
- /* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
- #define default_NEC98_trigger(idx) (0)
- #define default_NEC98_polarity(idx) (0)
-
static int __init MPBIOS_polarity(int idx)
{
int bus = mp_irqs[idx].mpc_srcbus;
polarity = default_MCA_polarity(idx);
break;
}
- case MP_BUS_NEC98: /* NEC 98 pin */
- {
- polarity = default_NEC98_polarity(idx);
- break;
- }
default:
{
printk(KERN_WARNING "broken BIOS!!\n");
trigger = default_MCA_trigger(idx);
break;
}
- case MP_BUS_NEC98: /* NEC 98 pin */
- {
- trigger = default_NEC98_trigger(idx);
- break;
- }
default:
{
printk(KERN_WARNING "broken BIOS!!\n");
case MP_BUS_ISA: /* ISA pin */
case MP_BUS_EISA:
case MP_BUS_MCA:
- case MP_BUS_NEC98:
{
irq = mp_irqs[idx].mpc_srcbusirq;
break;
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
- u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
static int __assign_irq_vector(int irq)
{
if (!apic && (irq < 16))
disable_8259A_irq(irq);
}
- ioapic_write_entry(apic, pin, entry);
spin_lock_irqsave(&ioapic_lock, flags);
+ __ioapic_write_entry(apic, pin, entry);
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void __init setup_ioapic_ids_from_mpc(void) { }
#endif
+ static int no_timer_check __initdata;
+
+ static int __init notimercheck(char *s)
+ {
+ no_timer_check = 1;
+ return 1;
+ }
+ __setup("no_timer_check", notimercheck);
+
/*
* There is a nasty bug in some older SMP boards, their mptable lies
* about the timer IRQ. We do the following to work around the situation:
* - if this function detects that timer IRQs are defunct, then we fall
* back to ISA timer IRQs
*/
- static int __init timer_irq_works(void)
+ int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
+ if (no_timer_check)
+ return 1;
+
local_irq_enable();
/* Let ten ticks pass... */
mdelay((10 * 1000) / HZ);
unsigned char save_control, save_freq_select;
pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1) {
+ WARN_ON_ONCE(1);
+ return;
+ }
apic = find_isa_irq_apic(8, mp_INT);
- if (pin == -1)
+ if (apic == -1) {
+ WARN_ON_ONCE(1);
return;
+ }
entry0 = ioapic_read_entry(apic, pin);
clear_IO_APIC_pin(apic, pin);
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
* fanatically on his truly buggy board.
*/
- static inline void check_timer(void)
+ static inline void __init check_timer(void)
{
int apic1, pin1, apic2, pin2;
int vector;
if (!ioapic && (irq < 16))
disable_8259A_irq(irq);
- ioapic_write_entry(ioapic, pin, entry);
spin_lock_irqsave(&ioapic_lock, flags);
+ __ioapic_write_entry(ioapic, pin, entry);
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
{
const u32 __user *tmp = (const u32 __user *)buf;
u32 data[2];
- size_t rv;
u32 reg = *ppos;
int cpu = iminor(file->f_dentry->d_inode);
int err;
if (count % 8)
return -EINVAL; /* Invalid chunk size */
- for (rv = 0; count; count -= 8) {
+ for (; count; count -= 8) {
if (copy_from_user(&data, tmp, 8))
return -EFAULT;
err = do_wrmsr(cpu, reg, data[0], data[1]);
return err;
}
-#ifdef CONFIG_HOTPLUG_CPU
static int msr_class_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
{
.notifier_call = msr_class_cpu_callback,
};
-#endif
static int __init msr_init(void)
{
#include <setup_arch.h>
#include <bios_ebda.h>
- /* Forward Declaration. */
- void __init find_max_pfn(void);
-
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
/*
* Machine setup..
*/
-
- #ifdef CONFIG_EFI
- int efi_enabled = 0;
- EXPORT_SYMBOL(efi_enabled);
- #endif
+ extern struct resource code_resource;
+ extern struct resource data_resource;
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
unsigned int BIOS_revision;
unsigned int mca_pentium_flag;
- /* For PCI or other memory-mapped resources */
- unsigned long pci_mem_start = 0x10000000;
- #ifdef CONFIG_PCI
- EXPORT_SYMBOL(pci_mem_start);
- #endif
-
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
int bootloader_type;
defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
EXPORT_SYMBOL(ist_info);
#endif
- struct e820map e820;
extern void early_cpu_init(void);
extern int root_mountflags;
unsigned char __initdata boot_params[PARAM_SIZE];
- static struct resource data_resource = {
- .name = "Kernel data",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
- };
-
- static struct resource code_resource = {
- .name = "Kernel code",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
- };
-
- static struct resource system_rom_resource = {
- .name = "System ROM",
- .start = 0xf0000,
- .end = 0xfffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- };
-
- static struct resource extension_rom_resource = {
- .name = "Extension ROM",
- .start = 0xe0000,
- .end = 0xeffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- };
-
- static struct resource adapter_rom_resources[] = { {
- .name = "Adapter ROM",
- .start = 0xc8000,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- }, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- }, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- }, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- }, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- }, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- } };
-
- static struct resource video_rom_resource = {
- .name = "Video ROM",
- .start = 0xc0000,
- .end = 0xc7fff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
- };
-
- static struct resource video_ram_resource = {
- .name = "Video RAM area",
- .start = 0xa0000,
- .end = 0xbffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
- };
-
- static struct resource standard_io_resources[] = { {
- .name = "dma1",
- .start = 0x0000,
- .end = 0x001f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "pic1",
- .start = 0x0020,
- .end = 0x0021,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "timer0",
- .start = 0x0040,
- .end = 0x0043,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "timer1",
- .start = 0x0050,
- .end = 0x0053,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "keyboard",
- .start = 0x0060,
- .end = 0x006f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "dma page reg",
- .start = 0x0080,
- .end = 0x008f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "pic2",
- .start = 0x00a0,
- .end = 0x00a1,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "dma2",
- .start = 0x00c0,
- .end = 0x00df,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
- .name = "fpu",
- .start = 0x00f0,
- .end = 0x00ff,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
- } };
-
- #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
-
- static int __init romchecksum(unsigned char *rom, unsigned long length)
- {
- unsigned char *p, sum = 0;
-
- for (p = rom; p < rom + length; p++)
- sum += *p;
- return sum == 0;
- }
-
- static void __init probe_roms(void)
- {
- unsigned long start, length, upper;
- unsigned char *rom;
- int i;
-
- /* video rom */
- upper = adapter_rom_resources[0].start;
- for (start = video_rom_resource.start; start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- video_rom_resource.start = start;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* if checksum okay, trust length byte */
- if (length && romchecksum(rom, length))
- video_rom_resource.end = start + length - 1;
-
- request_resource(&iomem_resource, &video_rom_resource);
- break;
- }
-
- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
- if (start < upper)
- start = upper;
-
- /* system rom */
- request_resource(&iomem_resource, &system_rom_resource);
- upper = system_rom_resource.start;
-
- /* check for extension rom (ignore length byte!) */
- rom = isa_bus_to_virt(extension_rom_resource.start);
- if (romsignature(rom)) {
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
- if (romchecksum(rom, length)) {
- request_resource(&iomem_resource, &extension_rom_resource);
- upper = extension_rom_resource.start;
- }
- }
-
- /* check for adapter roms on 2k boundaries */
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* but accept any length that fits if checksum okay */
- if (!length || start + length > upper || !romchecksum(rom, length))
- continue;
-
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].end = start + length - 1;
- request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
- start = adapter_rom_resources[i++].end & ~2047UL;
- }
- }
-
- static void __init limit_regions(unsigned long long size)
- {
- unsigned long long current_addr = 0;
- int i;
-
- if (efi_enabled) {
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map, i = 0; p < memmap.map_end;
- p += memmap.desc_size, i++) {
- md = p;
- current_addr = md->phys_addr + (md->num_pages << 12);
- if (md->type == EFI_CONVENTIONAL_MEMORY) {
- if (current_addr >= size) {
- md->num_pages -=
- (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
- memmap.nr_map = i + 1;
- return;
- }
- }
- }
- }
- for (i = 0; i < e820.nr_map; i++) {
- current_addr = e820.map[i].addr + e820.map[i].size;
- if (current_addr < size)
- continue;
-
- if (e820.map[i].type != E820_RAM)
- continue;
-
- if (e820.map[i].addr >= size) {
- /*
- * This region starts past the end of the
- * requested size, skip it completely.
- */
- e820.nr_map = i;
- } else {
- e820.nr_map = i + 1;
- e820.map[i].size -= current_addr - size;
- }
- return;
- }
- }
-
- void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
- {
- int x;
-
- if (!efi_enabled) {
- x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
- }
- } /* add_memory_region */
-
- #define E820_DEBUG 1
-
- static void __init print_memory_map(char *who)
- {
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(" %s: %016Lx - %016Lx ", who,
- e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
- switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
- case E820_RESERVED:
- printk("(reserved)\n");
- break;
- case E820_ACPI:
- printk("(ACPI data)\n");
- break;
- case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %lu\n", e820.map[i].type);
- break;
- }
- }
- }
-
- /*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
- static struct change_member change_point_list[2*E820MAX] __initdata;
- static struct change_member *change_point[2*E820MAX] __initdata;
- static struct e820entry *overlap_list[E820MAX] __initdata;
- static struct e820entry new_bios[E820MAX] __initdata;
-
- int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
- {
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
-
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2)
- return -1;
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
- return -1;
-
- /* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i=0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx; /* true number of change-points */
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing=1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
- current_type = 0;
- for (i=0; i<overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
- if (new_bios[new_bios_entry].size != 0)
- if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- new_nr = new_bios_entry; /* retain count for new bios entries */
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
- }
-
- /*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
- int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
- {
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-
- do {
- unsigned long long start = biosmap->addr;
- unsigned long long size = biosmap->size;
- unsigned long long end = start + size;
- unsigned long type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
- /*
- * Some BIOSes claim RAM in the 640k - 1M region.
- * Not right. Fix it up.
- */
- if (type == E820_RAM) {
- if (start < 0x100000ULL && end > 0xA0000ULL) {
- if (start < 0xA0000ULL)
- add_memory_region(start, 0xA0000ULL-start, type);
- if (end <= 0x100000ULL)
- continue;
- start = 0x100000ULL;
- size = end - start;
- }
- }
- add_memory_region(start, size, type);
- } while (biosmap++,--nr_map);
- return 0;
- }
-
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
#ifdef CONFIG_EDD_MODULE
}
#endif
- static int __initdata user_defined_memmap = 0;
+ int __initdata user_defined_memmap = 0;
/*
* "mem=nopentium" disables the 4MB page tables.
}
early_param("mem", parse_mem);
- static int __init parse_memmap(char *arg)
- {
- if (!arg)
- return -EINVAL;
-
- if (strcmp(arg, "exactmap") == 0) {
- #ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- find_max_pfn();
- saved_max_pfn = max_pfn;
- #endif
- e820.nr_map = 0;
- user_defined_memmap = 1;
- } else {
- /* If the user specifies memory size, we
- * limit the BIOS-provided memory map to
- * that size. exactmap can be used to specify
- * the exact map. mem=number can be used to
- * trim the existing memory map.
- */
- unsigned long long start_at, mem_size;
-
- mem_size = memparse(arg, &arg);
- if (*arg == '@') {
- start_at = memparse(arg+1, &arg);
- add_memory_region(start_at, mem_size, E820_RAM);
- } else if (*arg == '#') {
- start_at = memparse(arg+1, &arg);
- add_memory_region(start_at, mem_size, E820_ACPI);
- } else if (*arg == '$') {
- start_at = memparse(arg+1, &arg);
- add_memory_region(start_at, mem_size, E820_RESERVED);
- } else {
- limit_regions(mem_size);
- user_defined_memmap = 1;
- }
- }
- return 0;
- }
- early_param("memmap", parse_memmap);
-
#ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
* stored by the crashed kernel.
}
early_param("reservetop", parse_reservetop);
- /*
- * Callback for efi_memory_walk.
- */
- static int __init
- efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
- {
- unsigned long *max_pfn = arg, pfn;
-
- if (start < end) {
- pfn = PFN_UP(end -1);
- if (pfn > *max_pfn)
- *max_pfn = pfn;
- }
- return 0;
- }
-
- static int __init
- efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
- {
- memory_present(0, PFN_UP(start), PFN_DOWN(end));
- return 0;
- }
-
- /*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
- int __init
- e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
- {
- u64 start = s;
- u64 end = e;
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- /* is the region (part) in overlap with the current region ?*/
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- /* if the region is at the beginning of <start,end> we move
- * start to the end of the region since it's ok until there
- */
- if (ei->addr <= start)
- start = ei->addr + ei->size;
- /* if start is now at or beyond end, we're done, full
- * coverage */
- if (start >= end)
- return 1; /* we're done */
- }
- return 0;
- }
-
- /*
- * Find the highest page frame number we have available
- */
- void __init find_max_pfn(void)
- {
- int i;
-
- max_pfn = 0;
- if (efi_enabled) {
- efi_memmap_walk(efi_find_max_pfn, &max_pfn);
- efi_memmap_walk(efi_memory_present_wrapper, NULL);
- return;
- }
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long start, end;
- /* RAM? */
- if (e820.map[i].type != E820_RAM)
- continue;
- start = PFN_UP(e820.map[i].addr);
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- if (start >= end)
- continue;
- if (end > max_pfn)
- max_pfn = end;
- memory_present(0, start, end);
- }
- }
-
/*
* Determine low and high memory ranges:
*/
return max_low_pfn;
}
- /*
- * Free all available memory for boot time allocation. Used
- * as a callback function by efi_memory_walk()
- */
-
- static int __init
- free_available_memory(unsigned long start, unsigned long end, void *arg)
- {
- /* check max_low_pfn */
- if (start >= (max_low_pfn << PAGE_SHIFT))
- return 0;
- if (end >= (max_low_pfn << PAGE_SHIFT))
- end = max_low_pfn << PAGE_SHIFT;
- if (start < end)
- free_bootmem(start, end - start);
-
- return 0;
- }
- /*
- * Register fully available low RAM pages with the bootmem allocator.
- */
- static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
- {
- int i;
-
- if (efi_enabled) {
- efi_memmap_walk(free_available_memory, NULL);
- return;
- }
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long curr_pfn, last_pfn, size;
- /*
- * Reserve usable low memory
- */
- if (e820.map[i].type != E820_RAM)
- continue;
- /*
- * We are rounding up the start address of usable memory:
- */
- curr_pfn = PFN_UP(e820.map[i].addr);
- if (curr_pfn >= max_low_pfn)
- continue;
- /*
- * ... and at the end of the usable range downwards:
- */
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
- if (last_pfn > max_low_pfn)
- last_pfn = max_low_pfn;
-
- /*
- * .. finally, did all the rounding and playing
- * around just make the area go away?
- */
- if (last_pfn <= curr_pfn)
- continue;
-
- size = last_pfn - curr_pfn;
- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
- }
- }
-
/*
* workaround for Dell systems that neglect to reserve EBDA
*/
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
+ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
reserve_bootmem(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_start = INITRD_START + PAGE_OFFSET;
initrd_end = initrd_start+INITRD_SIZE;
}
else {
}
}
- /*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
- static void __init
- legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
- {
- int i;
-
- probe_roms();
- for (i = 0; i < e820.nr_map; i++) {
- struct resource *res;
- #ifndef CONFIG_RESOURCES_64BIT
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
- #endif
- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- if (request_resource(&iomem_resource, res)) {
- kfree(res);
- continue;
- }
- if (e820.map[i].type == E820_RAM) {
- /*
- * We don't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
- */
- request_resource(res, code_resource);
- request_resource(res, data_resource);
- #ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
- #endif
- }
- }
- }
-
- /*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
- static int __init request_standard_resources(void)
- {
- int i;
-
- printk("Setting up standard PCI resources\n");
- if (efi_enabled)
- efi_initialize_iomem_resources(&code_resource, &data_resource);
- else
- legacy_init_iomem_resources(&code_resource, &data_resource);
-
- /* EFI systems may still have VGA */
- request_resource(&iomem_resource, &video_ram_resource);
-
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
- return 0;
- }
-
- subsys_initcall(request_standard_resources);
-
- static void __init register_memory(void)
- {
- unsigned long gapstart, gapsize, round;
- unsigned long long last;
- int i;
-
- /*
- * Search for the bigest gap in the low 32 bits of the e820
- * memory space.
- */
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = e820.nr_map;
- while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- }
- }
- if (start < last)
- last = start;
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
- pci_mem_start, gapstart, gapsize);
- }
-
#ifdef CONFIG_MCA
static void set_mca_bus(int x)
{
static void set_mca_bus(int x) { }
#endif
+ /* Overridden in paravirt.c if CONFIG_PARAVIRT */
+ char * __attribute__((weak)) memory_setup(void)
+ {
+ return machine_specific_memory_setup();
+ }
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
efi_init();
else {
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- print_memory_map(machine_specific_memory_setup());
+ print_memory_map(memory_setup());
}
copy_edd();
fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
unsigned long cpu;
cpu = get_cpu();
smp_mb__after_clear_bit();
out:
put_cpu_no_resched();
- set_irq_regs(old_regs);
}
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
*/
fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
- set_irq_regs(old_regs);
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
int wait = call_data->wait;
mb();
atomic_inc(&call_data->finished);
}
- set_irq_regs(old_regs);
}
/*
put_cpu();
return -EBUSY;
}
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
spin_lock_bh(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait);
spin_unlock_bh(&call_lock);
* Dave Jones : Report invalid combinations of Athlon CPUs.
* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
+
+ /* SMP boot always wants to use real time delay to allow sufficient time for
+ * the APs to come online */
+ #define USE_REAL_TIME_DELAY
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/nmi.h>
+ #include <asm/pda.h>
+ #include <asm/genapic.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
static void __devinit start_secondary(void *unused)
{
/*
- * Dont put anything before smp_callin(), SMP
+ * Don't put *anything* before secondary_cpu_init(), SMP
* booting is too fragile that we want to limit the
* things done here to the most necessary things.
*/
- cpu_init();
+ secondary_cpu_init();
preempt_disable();
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
"movl %0,%%esp\n\t"
"jmp *%1"
:
- :"r" (current->thread.esp),"r" (current->thread.eip));
+ :"m" (current->thread.esp),"m" (current->thread.eip));
}
+ /* Static state in head.S used to set up a CPU */
extern struct {
void * esp;
unsigned short ss;
} stack_start;
+ extern struct i386_pda *start_pda;
+ extern struct Xgt_desc_struct cpu_gdt_descr;
#ifdef CONFIG_NUMA
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- ++cpucount;
- alternatives_smp_switch(1);
-
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
+
+ /* Pre-allocate and initialize the CPU's GDT and PDA so it
+ doesn't have to do any memory allocation during the
+ delicate CPU-bringup phase. */
+ if (!init_gdt(cpu, idle)) {
+ printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
+ return -1; /* ? */
+ }
+
idle->thread.eip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
+ ++cpucount;
+ alternatives_smp_switch(1);
+
/* So we see what's up */
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
/* Stack for startup_32 can be just as for start_secondary onwards */
stack_start.esp = (void *) idle->thread.esp;
+ start_pda = cpu_pda(cpu);
+ cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu);
+
irq_ctx_init(cpu);
x86_cpu_to_apicid[cpu] = apicid;
struct warm_boot_cpu_info {
struct completion *complete;
+ struct work_struct task;
int apicid;
int cpu;
};
-static void __cpuinit do_warm_boot_cpu(void *p)
+static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
{
- struct warm_boot_cpu_info *info = p;
+ struct warm_boot_cpu_info *info =
+ container_of(work, struct warm_boot_cpu_info, task);
do_boot_cpu(info->apicid, info->cpu);
complete(info->complete);
}
{
DECLARE_COMPLETION_ONSTACK(done);
struct warm_boot_cpu_info info;
- struct work_struct task;
int apicid, ret;
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
- INIT_WORK(&task, do_warm_boot_cpu, &info);
+ INIT_WORK(&info.task, do_warm_boot_cpu);
tsc_sync_disabled = 1;
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
KERNEL_PGD_PTRS);
flush_tlb_all();
- schedule_work(&task);
+ schedule_work(&info.task);
wait_for_completion(&done);
tsc_sync_disabled = 0;
}
#endif
- static void smp_tune_scheduling (void)
+ static void smp_tune_scheduling(void)
{
unsigned long cachesize; /* kB */
- unsigned long bandwidth = 350; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- return;
- } else {
+ if (cpu_khz) {
cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
- max_cache_size = cachesize * 1024;
+
+ if (cachesize > 0)
+ max_cache_size = cachesize * 1024;
}
}
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
+
+ #ifdef CONFIG_X86_GENERICARCH
+ if (num_online_cpus() > 8 && genapic == &apic_default)
+ panic("Default flat APIC routing can't be used with > 8 cpus\n");
+ #endif
+
return 0;
}
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
+ #ifdef CONFIG_PARAVIRT
+ unsigned int __read_mostly vdso_enabled = 0;
+ #else
unsigned int __read_mostly vdso_enabled = 1;
+ #endif
EXPORT_SYMBOL_GPL(vdso_enabled);
goto up_fail;
}
- vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
ret = -ENOMEM;
goto up_fail;
#include <linux/kexec.h>
#include <linux/unwind.h>
#include <linux/uaccess.h>
+ #include <linux/nmi.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
asmlinkage int system_call(void);
- struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
- { 0, 0 }, { 0, 0 } };
-
/* Do we ignore FPU interrupts ? */
char ignore_fpu_irq = 0;
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
- static int kstack_depth_to_print = 24;
+ int kstack_depth_to_print = 24;
#ifdef CONFIG_STACK_UNWIND
static int call_trace = 1;
#else
{
struct ops_and_data *oad = (struct ops_and_data *)data;
int n = 0;
+ unsigned long sp = UNW_SP(info);
+ if (arch_unw_user_mode(info))
+ return -1;
while (unwind(info) == 0 && UNW_PC(info)) {
n++;
oad->ops->address(oad->data, UNW_PC(info));
if (arch_unw_user_mode(info))
break;
+ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+ && sp > UNW_SP(info))
+ break;
+ sp = UNW_SP(info);
}
return n;
}
+ #define MSG(msg) ops->warning(data, msg)
+
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack,
struct stacktrace_ops *ops, void *data)
if (unwind_init_frame_info(&info, task, regs) == 0)
unw_ret = dump_trace_unwind(&info, &oad);
} else if (task == current)
- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+ unw_ret = unwind_init_running(&info, dump_trace_unwind,
+ &oad);
else {
if (unwind_init_blocked(&info, task) == 0)
unw_ret = dump_trace_unwind(&info, &oad);
}
if (unw_ret > 0) {
if (call_trace == 1 && !arch_unw_user_mode(&info)) {
- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+ ops->warning_symbol(data,
+ "DWARF2 unwinder stuck at %s",
UNW_PC(&info));
if (UNW_SP(&info) >= PAGE_OFFSET) {
- ops->warning(data, "Leftover inexact backtrace:\n");
+ MSG("Leftover inexact backtrace:");
stack = (void *)UNW_SP(&info);
if (!stack)
return;
ebp = UNW_FP(&info);
} else
- ops->warning(data, "Full inexact backtrace again:\n");
+ MSG("Full inexact backtrace again:");
} else if (call_trace >= 1)
return;
else
- ops->warning(data, "Full inexact backtrace again:\n");
+ MSG("Full inexact backtrace again:");
} else
- ops->warning(data, "Inexact backtrace:\n");
+ MSG("Inexact backtrace:");
}
if (!stack) {
unsigned long dummy;
stack = (unsigned long*)context->previous_esp;
if (!stack)
break;
+ touch_nmi_watchdog();
}
}
EXPORT_SYMBOL(dump_trace);
* time of the fault..
*/
if (in_kernel) {
- u8 __user *eip;
+ u8 *eip;
int code_bytes = 64;
unsigned char c;
printk(KERN_EMERG "Code: ");
- eip = (u8 __user *)regs->eip - 43;
- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+ eip = (u8 *)regs->eip - 43;
+ if (eip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(eip, c)) {
/* try starting at EIP */
- eip = (u8 __user *)regs->eip;
+ eip = (u8 *)regs->eip;
code_bytes = 32;
}
for (i = 0; i < code_bytes; i++, eip++) {
- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+ if (eip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(eip, c)) {
printk(" Bad EIP value.");
break;
}
- if (eip == (u8 __user *)regs->eip)
+ if (eip == (u8 *)regs->eip)
printk("<%02x> ", c);
else
printk("%02x ", c);
if (eip < PAGE_OFFSET)
return;
- if (probe_kernel_address((unsigned short __user *)eip, ud2))
+ if (probe_kernel_address((unsigned short *)eip, ud2))
return;
if (ud2 != 0x0b0f)
return;
char *file;
char c;
- if (probe_kernel_address((unsigned short __user *)(eip + 2),
- line))
+ if (probe_kernel_address((unsigned short *)(eip + 2), line))
break;
- if (__get_user(file, (char * __user *)(eip + 4)) ||
- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+ if (probe_kernel_address((char **)(eip + 4), file) ||
+ (unsigned long)file < PAGE_OFFSET ||
+ probe_kernel_address(file, c))
file = "<bad filename>";
printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
u32 lock_owner;
int lock_owner_depth;
} die = {
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(die.lock),
.lock_owner = -1,
.lock_owner_depth = 0
};
{
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
"CPU %d.\n", reason, smp_processor_id());
- printk(KERN_EMERG "You probably have a hardware problem with your RAM "
- "chips\n");
+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
printk(" on CPU%d, eip %08lx, registers:\n",
smp_processor_id(), regs->eip);
show_registers(regs);
- printk(KERN_EMERG "console shuts up ...\n");
console_silent();
spin_unlock(&nmi_print_lock);
bust_spinlocks(0);
#endif
}
- fastcall void setup_x86_bogus_stack(unsigned char * stk)
- {
- unsigned long *switch16_ptr, *switch32_ptr;
- struct pt_regs *regs;
- unsigned long stack_top, stack_bot;
- unsigned short iret_frame16_off;
- int cpu = smp_processor_id();
- /* reserve the space on 32bit stack for the magic switch16 pointer */
- memmove(stk, stk + 8, sizeof(struct pt_regs));
- switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
- regs = (struct pt_regs *)stk;
- /* now the switch32 on 16bit stack */
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
- switch32_ptr = (unsigned long *)(stack_top - 8);
- iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
- /* copy iret frame on 16bit stack */
- memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20);
- /* fill in the switch pointers */
- switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
- switch16_ptr[1] = __ESPFIX_SS;
- switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
- 8 - CPU_16BIT_STACK_SIZE;
- switch32_ptr[1] = __KERNEL_DS;
- }
-
- fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+ fastcall unsigned long patch_espfix_desc(unsigned long uesp,
+ unsigned long kesp)
{
- unsigned long *switch32_ptr;
- unsigned char *stack16, *stack32;
- unsigned long stack_top, stack_bot;
- int len;
int cpu = smp_processor_id();
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
- switch32_ptr = (unsigned long *)(stack_top - 8);
- /* copy the data from 16bit stack to 32bit stack */
- len = CPU_16BIT_STACK_SIZE - 8 - sp;
- stack16 = (unsigned char *)(stack_bot + sp);
- stack32 = (unsigned char *)
- (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
- memcpy(stack32, stack16, len);
- return stack32;
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+ unsigned long new_kesp = kesp - base;
+ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
+ /* Set up base for espfix segment */
+ desc &= 0x00f0ff0000000000ULL;
+ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+ ((((__u64)base) << 32) & 0xff00000000000000ULL) |
+ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
+ (lim_pages & 0xffff);
+ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
+ return new_kesp;
}
/*
* Must be called with kernel preemption disabled (in this case,
* local interrupts are disabled at the call-site in entry.S).
*/
- asmlinkage void math_state_restore(struct pt_regs regs)
+ asmlinkage void math_state_restore(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
init_fpu(tsk);
restore_fpu(tsk);
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+ tsk->fpu_counter++;
}
#ifndef CONFIG_MATH_EMULATION
#include <asm/delay.h>
#include <asm/tsc.h>
- #include <asm/delay.h>
#include <asm/io.h>
#include "mach_timer.h"
static unsigned int cpufreq_init = 0;
static struct work_struct cpufreq_delayed_get_work;
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *work)
{
unsigned int cpu;
{
int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
if (!ret)
return 0;
}
- extern int is_available_memory(efi_memory_desc_t *);
-
int page_is_ram(unsigned long pagenr)
{
int i;
#endif
#endif
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
+struct kmem_cache *pgd_cache;
+struct kmem_cache *pmd_cache;
void __init pgtable_cache_init(void)
{
return;
}
pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
+ if (pgprot_val(flags))
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte(pfn, flags));
+ else
+ pte_clear(&init_mm, vaddr, pte);
/*
* It's enough to flush this one mapping.
return pte;
}
-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
+void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
{
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
}
set_page_private(next, (unsigned long)pprev);
}
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags;
}
/* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
*/
static int check_interval = 5 * 60; /* 5 minutes */
-static void mcheck_timer(void *data);
-static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
+static void mcheck_timer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
static void mcheck_check_cpu(void *info)
{
do_machine_check(NULL, 0);
}
-static void mcheck_timer(void *data)
+static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
schedule_delayed_work(&mcheck_work, check_interval * HZ);
return err;
}
-#ifdef CONFIG_HOTPLUG_CPU
static void mce_remove_device(unsigned int cpu)
{
int i;
sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
sysdev_unregister(&per_cpu(device_mce,cpu));
+ memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static struct notifier_block mce_cpu_notifier = {
.notifier_call = mce_cpu_callback,
};
-#endif
static __init int mce_init_device(void)
{
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_start = INITRD_START + PAGE_OFFSET;
initrd_end = initrd_start+INITRD_SIZE;
}
else {
/* Fix cpuid4 emulation for more */
num_cache_leaves = 3;
- /* When there is only one core no need to synchronize RDTSC */
- if (num_possible_cpus() == 1)
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- else
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ /* RDTSC can be speculated around */
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
}
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
}
+ if (cpu_has_ds) {
+ unsigned int l1, l2;
+ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+ if (!(l1 & (1<<11)))
+ set_bit(X86_FEATURE_BTS, c->x86_capability);
+ if (!(l1 & (1<<12)))
+ set_bit(X86_FEATURE_PEBS, c->x86_capability);
+ }
+
n = c->extended_cpuid_level;
if (n >= 0x80000008) {
unsigned eax = cpuid_eax(0x80000008);
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
if (c->x86 == 6)
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ if (c->x86 == 15)
+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ else
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
c->x86_max_cores = intel_num_cpu_cores(c);
srat_detect_node();
put_cpu();
return 0;
}
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
spin_lock_bh(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait);
spin_unlock_bh(&call_lock);
put_cpu();
return 0;
}
+ EXPORT_SYMBOL(smp_call_function_single);
/*
* this function sends a 'generic call function' IPI to all other CPUs
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/numa.h>
+ #include <asm/genapic.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
}
struct create_idle {
+ struct work_struct work;
struct task_struct *idle;
struct completion done;
int cpu;
};
-void do_fork_idle(void *_c_idle)
+void do_fork_idle(struct work_struct *work)
{
- struct create_idle *c_idle = _c_idle;
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
c_idle->idle = fork_idle(c_idle->cpu);
complete(&c_idle->done);
int timeout;
unsigned long start_rip;
struct create_idle c_idle = {
+ .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- DECLARE_WORK(work, do_fork_idle, &c_idle);
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
* thread.
*/
if (!keventd_up() || current_is_keventd())
- work.func(work.data);
+ c_idle.work.func(&c_idle.work);
else {
- schedule_work(&work);
+ schedule_work(&c_idle.work);
wait_for_completion(&c_idle.done);
}
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
+
+ if (num_online_cpus() > 8 && genapic == &apic_flat) {
+ printk(KERN_WARNING
+ "flat APIC routing can't be used with > 8 cpus\n");
+ BUG();
+ }
+
err = 0;
return err;
#include <asm/topology.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __syscall_clobber "r11","rcx","memory"
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
vsyscall_set_cpu(raw_smp_processor_id());
}
-#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
return NOTIFY_DONE;
}
-#endif
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+ /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
}
#include <linux/dmi.h>
#include <linux/delay.h>
#include <linux/acpi.h>
+#include <linux/freezer.h>
#include <asm/page.h>
#include <asm/desc.h>
if (check_legacy_ioport(PNPBIOS_BASE))
return -ENODEV;
#endif
- if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table)) {
+ if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) ||
+ paravirt_enabled()) {
printk(KERN_INFO "PnPBIOS: Disabled\n");
return -ENODEV;
}
up_native =
compat_alloc_user_space(sizeof(struct video_still_picture));
- put_user(compat_ptr(fp), &up_native->iFrame);
- put_user(size, &up_native->size);
+ err = put_user(compat_ptr(fp), &up_native->iFrame);
+ err |= put_user(size, &up_native->size);
+ if (err)
+ return -EFAULT;
err = sys_ioctl(fd, cmd, (unsigned long) up_native);
err |= get_user(length, &up->length);
up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
- put_user(compat_ptr(palp), &up_native->palette);
- put_user(length, &up_native->length);
+ err = put_user(compat_ptr(palp), &up_native->palette);
+ err |= put_user(length, &up_native->length);
+ if (err)
+ return -EFAULT;
err = sys_ioctl(fd, cmd, (unsigned long) up_native);
struct serial_struct ss;
mm_segment_t oldseg = get_fs();
__u32 udata;
+ unsigned int base;
if (cmd == TIOCSSERIAL) {
if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
return -EFAULT;
if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
return -EFAULT;
- __get_user(udata, &ss32->iomem_base);
+ if (__get_user(udata, &ss32->iomem_base))
+ return -EFAULT;
ss.iomem_base = compat_ptr(udata);
- __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
- __get_user(ss.port_high, &ss32->port_high);
+ if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+ __get_user(ss.port_high, &ss32->port_high))
+ return -EFAULT;
ss.iomap_base = 0UL;
}
set_fs(KERNEL_DS);
return -EFAULT;
if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
return -EFAULT;
- __put_user((unsigned long)ss.iomem_base >> 32 ?
- 0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
- &ss32->iomem_base);
- __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
- __put_user(ss.port_high, &ss32->port_high);
-
+ base = (unsigned long)ss.iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
+ if (__put_user(base, &ss32->iomem_base) ||
+ __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+ __put_user(ss.port_high, &ss32->port_high))
+ return -EFAULT;
}
return err;
}
HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
+ HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
/* ioctls used by appletalk ddp.c */
HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
* on us. We need to use _exactly_ the address the user gave us,
* not some alias that contains the same information.
*/
-typedef struct { volatile int counter; } atomic_t;
+typedef struct { int counter; } atomic_t;
#define ATOMIC_INIT(i) { (i) }
/* Modern 486+ processor */
__i = i;
__asm__ __volatile__(
- LOCK_PREFIX "xaddl %0, %1;"
- :"=r"(i)
- :"m"(v->counter), "0"(i));
+ LOCK_PREFIX "xaddl %0, %1"
+ :"+r" (i), "+m" (v->counter)
+ : : "memory");
return i + __i;
#ifdef CONFIG_M386
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
+ #include <asm/paravirt.h>
#ifndef _I386_BITOPS_H
#include <asm/bitops.h>
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
extern unsigned long empty_zero_page[1024];
extern pgd_t swapper_pg_dir[1024];
-extern kmem_cache_t *pgd_cache;
-extern kmem_cache_t *pmd_cache;
+extern struct kmem_cache *pgd_cache;
+extern struct kmem_cache *pmd_cache;
extern spinlock_t pgd_lock;
extern struct page *pgd_list;
-void pmd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+void pmd_ctor(void *, struct kmem_cache *, unsigned long);
+void pgd_ctor(void *, struct kmem_cache *, unsigned long);
+void pgd_dtor(void *, struct kmem_cache *, unsigned long);
void pgtable_cache_init(void);
void paging_init(void);
# include <asm/pgtable-2level.h>
#endif
+ #ifndef CONFIG_PARAVIRT
/*
* Rules for using pte_update - it must be called after any PTE update which
* has not been done using the set_pte / clear_pte interfaces. It is used by
*/
#define pte_update(mm, addr, ptep) do { } while (0)
#define pte_update_defer(mm, addr, ptep) do { } while (0)
-
+ #endif
/*
* We only update the dirty/accessed state if we set
do { \
if (dirty) { \
(ptep)->pte_low = (entry).pte_low; \
- pte_update_defer((vma)->vm_mm, (addr), (ptep)); \
+ pte_update_defer((vma)->vm_mm, (address), (ptep)); \
flush_tlb_page(vma, address); \
} \
} while (0)
__dirty = pte_dirty(*(ptep)); \
if (__dirty) { \
clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low); \
- pte_update_defer((vma)->vm_mm, (addr), (ptep)); \
+ pte_update_defer((vma)->vm_mm, (address), (ptep)); \
flush_tlb_page(vma, address); \
} \
__dirty; \
__young = pte_young(*(ptep)); \
if (__young) { \
clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low); \
- pte_update_defer((vma)->vm_mm, (addr), (ptep)); \
+ pte_update_defer((vma)->vm_mm, (address), (ptep)); \
flush_tlb_page(vma, address); \
} \
__young; \
})
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = raw_ptep_get_and_clear(ptep);
+ pte_update(mm, addr, ptep);
+ return pte;
+ }
+
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
{
#ifndef _i386_SETUP_H
#define _i386_SETUP_H
+#define COMMAND_LINE_SIZE 256
+
#ifdef __KERNEL__
#include <linux/pfn.h>
*/
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
-#endif
#define PARAM_SIZE 4096
-#define COMMAND_LINE_SIZE 256
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
struct e820entry;
char * __init machine_specific_memory_setup(void);
+ char *memory_setup(void);
int __init copy_e820_map(struct e820entry * biosmap, int nr_map);
int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map);
#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
#endif /* _i386_SETUP_H */
#include <asm/desc.h>
#include <asm/i387.h>
-static inline int
-arch_prepare_suspend(void)
-{
- /* If you want to make non-PSE machine work, turn off paging
- in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so
- it could work... */
- if (!cpu_has_pse) {
- printk(KERN_ERR "PSE is required for swsusp.\n");
- return -EPERM;
- }
- return 0;
-}
+static inline int arch_prepare_suspend(void) { return 0; }
/* image of the saved processor state */
struct saved_context {
u16 es, fs, gs, ss;
unsigned long cr0, cr2, cr3, cr4;
- u16 gdt_pad;
- u16 gdt_limit;
- unsigned long gdt_base;
- u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct Xgt_desc_struct gdt;
+ struct Xgt_desc_struct idt;
u16 ldt;
u16 tss;
unsigned long tr;
header-y += boot.h
header-y += bootsetup.h
- header-y += cpufeature.h
header-y += debugreg.h
header-y += ldt.h
header-y += msr.h
header-y += prctl.h
header-y += ptrace-abi.h
-header-y += setup.h
header-y += sigcontext32.h
header-y += ucontext.h
header-y += vsyscall32.h
* on us. We need to use _exactly_ the address the user gave us,
* not some alias that contains the same information.
*/
-typedef struct { volatile int counter; } atomic_t;
+typedef struct { int counter; } atomic_t;
#define ATOMIC_INIT(i) { (i) }
{
int __i = i;
__asm__ __volatile__(
- LOCK_PREFIX "xaddl %0, %1;"
- :"=r"(i)
- :"m"(v->counter), "0"(i));
+ LOCK_PREFIX "xaddl %0, %1"
+ :"+r" (i), "+m" (v->counter)
+ : : "memory");
return i + __i;
}
extern u8 x86_cpu_to_log_apicid[NR_CPUS];
extern u8 bios_cpu_apicid[];
- static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
- {
- return cpus_addr(cpumask)[0];
- }
-
static inline int cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < NR_CPUS)
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
#else
#define cpu_physical_id(cpu) boot_cpu_id
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
- void *info, int retry, int wait)
-{
- /* Disable interrupts here? */
- func(info);
- return 0;
-}
#endif /* !CONFIG_SMP */
#endif
#include <linux/compiler.h>
#include <linux/cpumask.h>
#include <asm/semaphore.h>
+#include <linux/mutex.h>
struct cpu {
int node_id; /* The node which contains the CPU */
- int no_control; /* Should the sysfs control file be created? */
+ int hotpluggable; /* creates sysfs control file if hotpluggable */
struct sys_device sysdev;
};
#ifdef CONFIG_HOTPLUG_CPU
/* Stop CPUs going up and down. */
+
+static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex)
+{
+ mutex_lock(cpu_hp_mutex);
+}
+
+static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex)
+{
+ mutex_unlock(cpu_hp_mutex);
+}
+
extern void lock_cpu_hotplug(void);
extern void unlock_cpu_hotplug(void);
#define hotcpu_notifier(fn, pri) { \
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
int cpu_down(unsigned int cpu);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
-#else
+
+#else /* CONFIG_HOTPLUG_CPU */
+
+static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex)
+{ }
+static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex)
+{ }
+
#define lock_cpu_hotplug() do { } while (0)
#define unlock_cpu_hotplug() do { } while (0)
#define lock_cpu_hotplug_interruptible() 0
-#define hotcpu_notifier(fn, pri) do { } while (0)
-#define register_hotcpu_notifier(nb) do { } while (0)
-#define unregister_hotcpu_notifier(nb) do { } while (0)
+#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
+#define register_hotcpu_notifier(nb) do { (void)(nb); } while (0)
+#define unregister_hotcpu_notifier(nb) do { (void)(nb); } while (0)
/* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */
static inline int cpu_is_offline(int cpu) { return 0; }
-#endif
+#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_SUSPEND_SMP
extern int disable_nonboot_cpus(void);
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__
+#include <linux/preempt.h>
#include <asm/uaccess.h>
+/*
+ * These routines enable/disable the pagefault handler in that
+ * it will not take any locks and go straight to the fixup table.
+ *
+ * They have great resemblance to the preempt_disable/enable calls
+ * and in fact they are identical; this is because currently there is
+ * no other way to make the pagefault handlers do this. So we do
+ * disable preemption but we don't necessarily care about that.
+ */
+static inline void pagefault_disable(void)
+{
+ inc_preempt_count();
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+
+static inline void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ dec_preempt_count();
+ /*
+ * make sure we do..
+ */
+ barrier();
+ preempt_check_resched();
+}
+
#ifndef ARCH_HAS_NOCACHE_UACCESS
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
* do_page_fault() doesn't attempt to take mmap_sem. This makes
* probe_kernel_address() suitable for use within regions where the caller
* already holds mmap_sem, or other locks which nest inside mmap_sem.
+ * This must be a macro because __get_user() needs to know the types of the
+ * args.
+ *
+ * We don't include enough header files to be able to do the set_fs(). We
+ * require that the probe_kernel_address() caller will do that.
*/
#define probe_kernel_address(addr, retval) \
({ \
long ret; \
+ mm_segment_t old_fs = get_fs(); \
\
- inc_preempt_count(); \
- ret = __get_user(retval, (__force typeof(*addr) __user *)addr);\
- dec_preempt_count(); \
+ set_fs(KERNEL_DS); \
+ pagefault_disable(); \
- ret = __get_user(retval, addr); \
++ ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \
+ pagefault_enable(); \
+ set_fs(old_fs); \
ret; \
})
#include <linux/percpu.h>
#include <linux/kmod.h>
#include <linux/kernel_stat.h>
+ #include <linux/start_kernel.h>
#include <linux/security.h>
#include <linux/workqueue.h>
#include <linux/profile.h>
#error Sorry, your GCC is too old. It builds incorrect kernels.
#endif
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
+#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended.
+#endif
+
static int init(void *);
extern void init_IRQ(void);
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
-static kmem_cache_t *task_struct_cachep;
+static struct kmem_cache *task_struct_cachep;
#endif
/* SLAB cache for signal_struct structures (tsk->signal) */
-static kmem_cache_t *signal_cachep;
+static struct kmem_cache *signal_cachep;
/* SLAB cache for sighand_struct structures (tsk->sighand) */
-kmem_cache_t *sighand_cachep;
+struct kmem_cache *sighand_cachep;
/* SLAB cache for files_struct structures (tsk->files) */
-kmem_cache_t *files_cachep;
+struct kmem_cache *files_cachep;
/* SLAB cache for fs_struct structures (tsk->fs) */
-kmem_cache_t *fs_cachep;
+struct kmem_cache *fs_cachep;
/* SLAB cache for vm_area_struct structures */
-kmem_cache_t *vm_area_cachep;
+struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
-static kmem_cache_t *mm_cachep;
+static struct kmem_cache *mm_cachep;
void free_task(struct task_struct *tsk)
{
goto fail_nomem;
charge = len;
}
- tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!tmp)
goto fail_nomem;
*tmp = *mpnt;
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
-#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
#include <linux/init_task.h>
tsk->vfork_done = NULL;
complete(vfork_done);
}
- if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+
+ /*
+ * If we're exiting normally, clear a user-space tid field if
+ * requested. We leave this alone when dying by signal, to leave
+ * the value intact in a core dump, and to save the unnecessary
+ * trouble otherwise. Userland only wants this done for a sys_exit.
+ */
+ if (tsk->clear_child_tid
+ && !(tsk->flags & PF_SIGNALED)
+ && atomic_read(&mm->mm_users) > 1) {
u32 __user * tidptr = tsk->clear_child_tid;
tsk->clear_child_tid = NULL;
memcpy(mm, oldmm, sizeof(*mm));
+ /* Initializing for Swap token stuff */
+ mm->token_priority = 0;
+ mm->last_interval = 0;
+
if (!mm_init(mm))
goto fail_nomem;
goto fail_nomem;
good_mm:
+ /* Initializing for Swap token stuff */
+ mm->token_priority = 0;
+ mm->last_interval = 0;
+
tsk->mm = mm;
tsk->active_mm = mm;
return 0;
struct files_struct *newf;
struct fdtable *fdt;
- newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+ newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
if (!newf)
goto out;
if (clone_flags & CLONE_THREAD) {
atomic_inc(¤t->signal->count);
atomic_inc(¤t->signal->live);
- taskstats_tgid_alloc(current);
return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
return ERR_PTR(retval);
}
- struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+ noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
return regs;
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
-static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags)
{
struct sighand_struct *sighand = data;
EXPORT_SYMBOL(lockdep_on);
-int lockdep_internal(void)
-{
- return current->lockdep_recursion != 0;
-}
-
-EXPORT_SYMBOL(lockdep_internal);
-
/*
* Debugging switches:
*/
trace->skip = 3;
trace->all_contexts = 0;
- /* Make sure to not recurse in case the the unwinder needs to tak
- e locks. */
- lockdep_off();
save_stack_trace(trace, NULL);
- lockdep_on();
trace->max_entries = trace->nr_entries;
nr_stack_trace_entries += trace->nr_entries;
- if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
+ if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) {
+ __raw_spin_unlock(&hash_lock);
return 0;
+ }
if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
__raw_spin_unlock(&hash_lock);
static void print_lock_name(struct lock_class *class)
{
- char str[128], c1, c2, c3, c4;
+ char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4;
const char *name;
get_usage_chars(class, &c1, &c2, &c3, &c4);
static void print_lockdep_cache(struct lockdep_map *lock)
{
const char *name;
- char str[128];
+ char str[KSYM_NAME_LEN + 1];
name = lock->name;
if (!name)
print_lock_class_header(class, depth);
list_for_each_entry(entry, &class->locks_after, entry) {
- DEBUG_LOCKS_WARN_ON(!entry->class);
+ if (DEBUG_LOCKS_WARN_ON(!entry->class))
+ return;
+
print_lock_dependencies(entry->class, depth + 1);
printk("%*s ... acquired at:\n",depth,"");
return 0;
entry->class = this;
- save_trace(&entry->trace);
+ if (!save_trace(&entry->trace))
+ return 0;
/*
* Since we never remove from the dependency list, the list can
if (debug_locks_silent)
return 0;
+ /* hash_lock unlocked by the header */
+ __raw_spin_lock(&hash_lock);
this.class = check_source->class;
- save_trace(&this.trace);
+ if (!save_trace(&this.trace))
+ return 0;
+ __raw_spin_unlock(&hash_lock);
print_circular_bug_entry(&this, 0);
printk("\nother info that might help us debug this:\n\n");
&prev->class->locks_after, next->acquire_ip);
if (!ret)
return 0;
- /*
- * Return value of 2 signals 'dependency already added',
- * in that case we dont have to add the backlink either.
- */
- if (ret == 2)
- return 2;
+
ret = add_lock_to_list(next->class, prev->class,
&next->class->locks_before, next->acquire_ip);
+ if (!ret)
+ return 0;
/*
* Debugging printouts:
* added:
*/
if (hlock->read != 2) {
- check_prev_add(curr, hlock, next);
+ if (!check_prev_add(curr, hlock, next))
+ return 0;
/*
* Stop after the first non-trylock entry,
* as non-trylock entries have added their
struct lockdep_subclass_key *key;
struct list_head *hash_head;
struct lock_class *class;
+ unsigned long flags;
class = look_up_lock_class(lock, subclass);
if (likely(class))
key = lock->key->subkeys + subclass;
hash_head = classhashentry(key);
+ raw_local_irq_save(flags);
__raw_spin_lock(&hash_lock);
/*
* We have to do the hash-walk again, to avoid races
*/
if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
__raw_spin_unlock(&hash_lock);
+ raw_local_irq_restore(flags);
debug_locks_off();
printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
printk("turning off the locking correctness validator.\n");
if (verbose(class)) {
__raw_spin_unlock(&hash_lock);
+ raw_local_irq_restore(flags);
printk("\nnew class %p: %s", class->key, class->name);
if (class->name_version > 1)
printk("#%d", class->name_version);
printk("\n");
dump_stack();
+ raw_local_irq_save(flags);
__raw_spin_lock(&hash_lock);
}
out_unlock_set:
__raw_spin_unlock(&hash_lock);
+ raw_local_irq_restore(flags);
if (!subclass || force)
lock->class_cache = class;
debug_atomic_dec(&nr_unused_locks);
break;
default:
+ __raw_spin_unlock(&hash_lock);
debug_locks_off();
WARN_ON(1);
return 0;
}
local_irq_restore(flags);
}
+EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
static void print_held_locks_bug(struct task_struct *curr)
{
#ifdef CONFIG_X86
#include <asm/nmi.h>
+ #include <asm/stacktrace.h>
#endif
#if defined(CONFIG_SYSCTL)
static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
static int proc_opensys(struct inode *, struct file *);
-struct file_operations proc_sys_file_operations = {
+const struct file_operations proc_sys_file_operations = {
.open = proc_opensys,
.read = proc_readsys,
.write = proc_writesys,
.mode = 0444,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "kstack_depth_to_print",
+ .data = &kstack_depth_to_print,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
.extra1 = &zero,
},
#endif
-#ifdef CONFIG_SWAP
- {
- .ctl_name = VM_SWAP_TOKEN_TIMEOUT,
- .procname = "swap_token_timeout",
- .data = &swap_token_default_timeout,
- .maxlen = sizeof(swap_token_default_timeout),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
- },
-#endif
#ifdef CONFIG_NUMA
{
.ctl_name = VM_ZONE_RECLAIM_MODE,
p = buf;
if (*p == '-' && left > 1) {
neg = 1;
- left--, p++;
+ p++;
}
if (*p < '0' || *p > '9')
break;
p = buf;
if (*p == '-' && left > 1) {
neg = 1;
- left--, p++;
+ p++;
}
if (*p < '0' || *p > '9')
break;