2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
55 #include <video/edid.h>
58 #include <asm/mpspec.h>
59 #include <asm/mmu_context.h>
60 #include <asm/bootsetup.h>
61 #include <asm/proto.h>
62 #include <asm/setup.h>
63 #include <asm/mach_apic.h>
65 #include <asm/sections.h>
71 struct cpuinfo_x86 boot_cpu_data __read_mostly;
73 unsigned long mmu_cr4_features;
76 EXPORT_SYMBOL(acpi_disabled);
78 extern int __initdata acpi_ht;
79 extern acpi_interrupt_flags acpi_sci_flags;
80 int __initdata acpi_force = 0;
83 int acpi_numa __initdata;
85 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
88 unsigned long saved_video_mode;
92 EXPORT_SYMBOL(swiotlb);
98 struct drive_info_struct { char dummy[32]; } drive_info;
99 struct screen_info screen_info;
100 struct sys_desc_table_struct {
101 unsigned short length;
102 unsigned char table[0];
105 struct edid_info edid_info;
108 extern int root_mountflags;
110 char command_line[COMMAND_LINE_SIZE];
112 struct resource standard_io_resources[] = {
113 { .name = "dma1", .start = 0x00, .end = 0x1f,
114 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
115 { .name = "pic1", .start = 0x20, .end = 0x21,
116 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
117 { .name = "timer0", .start = 0x40, .end = 0x43,
118 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
119 { .name = "timer1", .start = 0x50, .end = 0x53,
120 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
121 { .name = "keyboard", .start = 0x60, .end = 0x6f,
122 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
123 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
124 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
125 { .name = "pic2", .start = 0xa0, .end = 0xa1,
126 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
127 { .name = "dma2", .start = 0xc0, .end = 0xdf,
128 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
129 { .name = "fpu", .start = 0xf0, .end = 0xff,
130 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
133 #define STANDARD_IO_RESOURCES \
134 (sizeof standard_io_resources / sizeof standard_io_resources[0])
136 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
138 struct resource data_resource = {
139 .name = "Kernel data",
142 .flags = IORESOURCE_RAM,
144 struct resource code_resource = {
145 .name = "Kernel code",
148 .flags = IORESOURCE_RAM,
151 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
153 static struct resource system_rom_resource = {
154 .name = "System ROM",
157 .flags = IORESOURCE_ROM,
160 static struct resource extension_rom_resource = {
161 .name = "Extension ROM",
164 .flags = IORESOURCE_ROM,
167 static struct resource adapter_rom_resources[] = {
168 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
169 .flags = IORESOURCE_ROM },
170 { .name = "Adapter ROM", .start = 0, .end = 0,
171 .flags = IORESOURCE_ROM },
172 { .name = "Adapter ROM", .start = 0, .end = 0,
173 .flags = IORESOURCE_ROM },
174 { .name = "Adapter ROM", .start = 0, .end = 0,
175 .flags = IORESOURCE_ROM },
176 { .name = "Adapter ROM", .start = 0, .end = 0,
177 .flags = IORESOURCE_ROM },
178 { .name = "Adapter ROM", .start = 0, .end = 0,
179 .flags = IORESOURCE_ROM }
182 #define ADAPTER_ROM_RESOURCES \
183 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
185 static struct resource video_rom_resource = {
189 .flags = IORESOURCE_ROM,
192 static struct resource video_ram_resource = {
193 .name = "Video RAM area",
196 .flags = IORESOURCE_RAM,
199 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
201 static int __init romchecksum(unsigned char *rom, unsigned long length)
203 unsigned char *p, sum = 0;
205 for (p = rom; p < rom + length; p++)
210 static void __init probe_roms(void)
212 unsigned long start, length, upper;
217 upper = adapter_rom_resources[0].start;
218 for (start = video_rom_resource.start; start < upper; start += 2048) {
219 rom = isa_bus_to_virt(start);
220 if (!romsignature(rom))
223 video_rom_resource.start = start;
225 /* 0 < length <= 0x7f * 512, historically */
226 length = rom[2] * 512;
228 /* if checksum okay, trust length byte */
229 if (length && romchecksum(rom, length))
230 video_rom_resource.end = start + length - 1;
232 request_resource(&iomem_resource, &video_rom_resource);
236 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
241 request_resource(&iomem_resource, &system_rom_resource);
242 upper = system_rom_resource.start;
244 /* check for extension rom (ignore length byte!) */
245 rom = isa_bus_to_virt(extension_rom_resource.start);
246 if (romsignature(rom)) {
247 length = extension_rom_resource.end - extension_rom_resource.start + 1;
248 if (romchecksum(rom, length)) {
249 request_resource(&iomem_resource, &extension_rom_resource);
250 upper = extension_rom_resource.start;
254 /* check for adapter roms on 2k boundaries */
255 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
256 rom = isa_bus_to_virt(start);
257 if (!romsignature(rom))
260 /* 0 < length <= 0x7f * 512, historically */
261 length = rom[2] * 512;
263 /* but accept any length that fits if checksum okay */
264 if (!length || start + length > upper || !romchecksum(rom, length))
267 adapter_rom_resources[i].start = start;
268 adapter_rom_resources[i].end = start + length - 1;
269 request_resource(&iomem_resource, &adapter_rom_resources[i]);
271 start = adapter_rom_resources[i++].end & ~2047UL;
275 static __init void parse_cmdline_early (char ** cmdline_p)
277 char c = ' ', *to = command_line, *from = COMMAND_LINE;
281 /* Save unparsed command line copy for /proc/cmdline */
282 memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
283 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
291 * If the BIOS enumerates physical processors before logical,
292 * maxcpus=N at enumeration-time can be used to disable HT.
294 else if (!memcmp(from, "maxcpus=", 8)) {
295 extern unsigned int maxcpus;
297 maxcpus = simple_strtoul(from + 8, NULL, 0);
301 /* "acpi=off" disables both ACPI table parsing and interpreter init */
302 if (!memcmp(from, "acpi=off", 8))
305 if (!memcmp(from, "acpi=force", 10)) {
306 /* add later when we do DMI horrors: */
311 /* acpi=ht just means: do ACPI MADT parsing
312 at bootup, but don't enable the full ACPI interpreter */
313 if (!memcmp(from, "acpi=ht", 7)) {
318 else if (!memcmp(from, "pci=noacpi", 10))
320 else if (!memcmp(from, "acpi=noirq", 10))
323 else if (!memcmp(from, "acpi_sci=edge", 13))
324 acpi_sci_flags.trigger = 1;
325 else if (!memcmp(from, "acpi_sci=level", 14))
326 acpi_sci_flags.trigger = 3;
327 else if (!memcmp(from, "acpi_sci=high", 13))
328 acpi_sci_flags.polarity = 1;
329 else if (!memcmp(from, "acpi_sci=low", 12))
330 acpi_sci_flags.polarity = 3;
332 /* acpi=strict disables out-of-spec workarounds */
333 else if (!memcmp(from, "acpi=strict", 11)) {
336 #ifdef CONFIG_X86_IO_APIC
337 else if (!memcmp(from, "acpi_skip_timer_override", 24))
338 acpi_skip_timer_override = 1;
342 if (!memcmp(from, "disable_timer_pin_1", 19))
343 disable_timer_pin_1 = 1;
344 if (!memcmp(from, "enable_timer_pin_1", 18))
345 disable_timer_pin_1 = -1;
347 if (!memcmp(from, "nolapic", 7) ||
348 !memcmp(from, "disableapic", 11))
351 if (!memcmp(from, "noapic", 6))
352 skip_ioapic_setup = 1;
354 /* Make sure to not confuse with apic= */
355 if (!memcmp(from, "apic", 4) &&
356 (from[4] == ' ' || from[4] == 0)) {
357 skip_ioapic_setup = 0;
361 if (!memcmp(from, "mem=", 4))
362 parse_memopt(from+4, &from);
364 if (!memcmp(from, "memmap=", 7)) {
365 /* exactmap option is for used defined memory */
366 if (!memcmp(from+7, "exactmap", 8)) {
367 #ifdef CONFIG_CRASH_DUMP
368 /* If we are doing a crash dump, we
369 * still need to know the real mem
370 * size before original memory map is
373 saved_max_pfn = e820_end_of_ram();
381 parse_memmapopt(from+7, &from);
387 if (!memcmp(from, "numa=", 5))
391 #ifdef CONFIG_GART_IOMMU
392 if (!memcmp(from,"iommu=",6)) {
397 if (!memcmp(from,"oops=panic", 10))
400 if (!memcmp(from, "noexec=", 7))
401 nonx_setup(from + 7);
404 /* crashkernel=size@addr specifies the location to reserve for
405 * a crash kernel. By reserving this memory we guarantee
406 * that linux never set's it up as a DMA target.
407 * Useful for holding code to do something appropriate
408 * after a kernel panic.
410 else if (!memcmp(from, "crashkernel=", 12)) {
411 unsigned long size, base;
412 size = memparse(from+12, &from);
414 base = memparse(from+1, &from);
415 /* FIXME: Do I want a sanity check
416 * to validate the memory range?
418 crashk_res.start = base;
419 crashk_res.end = base + size - 1;
424 #ifdef CONFIG_PROC_VMCORE
425 /* elfcorehdr= specifies the location of elf core header
426 * stored by the crashed kernel. This option will be passed
427 * by kexec loader to the capture kernel.
429 else if(!memcmp(from, "elfcorehdr=", 11))
430 elfcorehdr_addr = memparse(from+11, &from);
436 if (COMMAND_LINE_SIZE <= ++len)
441 printk(KERN_INFO "user-defined physical RAM map:\n");
442 e820_print_map("user");
445 *cmdline_p = command_line;
450 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
452 unsigned long bootmap_size, bootmap;
454 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
455 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
457 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
458 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
459 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
460 reserve_bootmem(bootmap, bootmap_size);
464 /* Use inline assembly to define this because the nops are defined
465 as inline assembly strings in the include files and we cannot
466 get them easily into strings. */
467 asm("\t.data\nk8nops: "
468 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
471 extern unsigned char k8nops[];
472 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
478 k8nops + 1 + 2 + 3 + 4,
479 k8nops + 1 + 2 + 3 + 4 + 5,
480 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
481 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
484 /* Replace instructions with better alternatives for this CPU type.
486 This runs before SMP is initialized to avoid SMP problems with
487 self modifying code. This implies that assymetric systems where
488 APs have less capabilities than the boot processor are not handled.
489 In this case boot with "noreplacement". */
490 void apply_alternatives(void *start, void *end)
494 for (a = start; (void *)a < end; a++) {
495 if (!boot_cpu_has(a->cpuid))
498 BUG_ON(a->replacementlen > a->instrlen);
499 __inline_memcpy(a->instr, a->replacement, a->replacementlen);
500 diff = a->instrlen - a->replacementlen;
502 /* Pad the rest with nops */
503 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
507 __inline_memcpy(a->instr + i, k8_nops[k], k);
512 static int no_replacement __initdata = 0;
514 void __init alternative_instructions(void)
516 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
519 apply_alternatives(__alt_instructions, __alt_instructions_end);
522 static int __init noreplacement_setup(char *s)
528 __setup("noreplacement", noreplacement_setup);
530 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
532 #ifdef CONFIG_EDD_MODULE
536 * copy_edd() - Copy the BIOS EDD information
537 * from boot_params into a safe place.
540 static inline void copy_edd(void)
542 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
543 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
544 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
545 edd.edd_info_nr = EDD_NR;
548 static inline void copy_edd(void)
553 #define EBDA_ADDR_POINTER 0x40E
554 static void __init reserve_ebda_region(void)
558 * there is a real-mode segmented pointer pointing to the
559 * 4K EBDA area at 0x40E
561 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
564 reserve_bootmem_generic(addr, PAGE_SIZE);
567 void __init setup_arch(char **cmdline_p)
569 unsigned long kernel_end;
571 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
572 drive_info = DRIVE_INFO;
573 screen_info = SCREEN_INFO;
574 edid_info = EDID_INFO;
575 saved_video_mode = SAVED_VIDEO_MODE;
576 bootloader_type = LOADER_TYPE;
578 #ifdef CONFIG_BLK_DEV_RAM
579 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
580 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
581 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
583 setup_memory_region();
586 if (!MOUNT_ROOT_RDONLY)
587 root_mountflags &= ~MS_RDONLY;
588 init_mm.start_code = (unsigned long) &_text;
589 init_mm.end_code = (unsigned long) &_etext;
590 init_mm.end_data = (unsigned long) &_edata;
591 init_mm.brk = (unsigned long) &_end;
593 code_resource.start = virt_to_phys(&_text);
594 code_resource.end = virt_to_phys(&_etext)-1;
595 data_resource.start = virt_to_phys(&_etext);
596 data_resource.end = virt_to_phys(&_edata)-1;
598 parse_cmdline_early(cmdline_p);
600 early_identify_cpu(&boot_cpu_data);
603 * partially used pages are not usable - thus
604 * we are rounding upwards:
606 end_pfn = e820_end_of_ram();
610 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
616 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
617 * Call this early for SRAT node setup.
619 acpi_boot_table_init();
622 #ifdef CONFIG_ACPI_NUMA
624 * Parse SRAT to discover nodes.
630 numa_initmem_init(0, end_pfn);
632 contig_initmem_init(0, end_pfn);
635 /* Reserve direct mapping */
636 reserve_bootmem_generic(table_start << PAGE_SHIFT,
637 (table_end - table_start) << PAGE_SHIFT);
640 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
641 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
644 * reserve physical page 0 - it's a special BIOS page on many boxes,
645 * enabling clean reboots, SMP operation, laptop functions.
647 reserve_bootmem_generic(0, PAGE_SIZE);
649 /* reserve ebda region */
650 reserve_ebda_region();
654 * But first pinch a few for the stack/trampoline stuff
655 * FIXME: Don't need the extra page at 4K, but need to fix
656 * trampoline before removing it. (see the GDT stuff)
658 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
660 /* Reserve SMP trampoline */
661 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
664 #ifdef CONFIG_ACPI_SLEEP
666 * Reserve low memory region for sleep support.
668 acpi_reserve_bootmem();
670 #ifdef CONFIG_X86_LOCAL_APIC
672 * Find and reserve possible boot-time SMP configuration:
676 #ifdef CONFIG_BLK_DEV_INITRD
677 if (LOADER_TYPE && INITRD_START) {
678 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
679 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
681 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
682 initrd_end = initrd_start+INITRD_SIZE;
685 printk(KERN_ERR "initrd extends beyond end of memory "
686 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
687 (unsigned long)(INITRD_START + INITRD_SIZE),
688 (unsigned long)(end_pfn << PAGE_SHIFT));
694 if (crashk_res.start != crashk_res.end) {
695 reserve_bootmem(crashk_res.start,
696 crashk_res.end - crashk_res.start + 1);
706 * Read APIC and some other early information from ACPI tables.
711 #ifdef CONFIG_X86_LOCAL_APIC
713 * get boot-time SMP configuration:
715 if (smp_found_config)
717 init_apic_mappings();
721 * Request address space for all standard RAM and ROM resources
722 * and also for regions reported as reserved by the e820.
725 e820_reserve_resources();
727 request_resource(&iomem_resource, &video_ram_resource);
731 /* request I/O space for devices used on all i[345]86 PCs */
732 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
733 request_resource(&ioport_resource, &standard_io_resources[i]);
738 #ifdef CONFIG_GART_IOMMU
743 #if defined(CONFIG_VGA_CONSOLE)
744 conswitchp = &vga_con;
745 #elif defined(CONFIG_DUMMY_CONSOLE)
746 conswitchp = &dummy_con;
751 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
755 if (c->extended_cpuid_level < 0x80000004)
758 v = (unsigned int *) c->x86_model_id;
759 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
760 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
761 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
762 c->x86_model_id[48] = 0;
767 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
769 unsigned int n, dummy, eax, ebx, ecx, edx;
771 n = c->extended_cpuid_level;
773 if (n >= 0x80000005) {
774 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
775 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
776 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
777 c->x86_cache_size=(ecx>>24)+(edx>>24);
778 /* On K8 L1 TLB is inclusive, so don't count it */
782 if (n >= 0x80000006) {
783 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
784 ecx = cpuid_ecx(0x80000006);
785 c->x86_cache_size = ecx >> 16;
786 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
788 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
789 c->x86_cache_size, ecx & 0xFF);
793 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
794 if (n >= 0x80000008) {
795 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
796 c->x86_virt_bits = (eax >> 8) & 0xff;
797 c->x86_phys_bits = eax & 0xff;
802 static int nearby_node(int apicid)
805 for (i = apicid - 1; i >= 0; i--) {
806 int node = apicid_to_node[i];
807 if (node != NUMA_NO_NODE && node_online(node))
810 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
811 int node = apicid_to_node[i];
812 if (node != NUMA_NO_NODE && node_online(node))
815 return first_node(node_online_map); /* Shouldn't happen */
820 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
821 * Assumes number of cores is a power of two.
823 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
826 int cpu = smp_processor_id();
830 unsigned apicid = phys_proc_id[cpu];
834 while ((1 << bits) < c->x86_max_cores)
837 /* Low order bits define the core id (index of core in socket) */
838 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
839 /* Convert the APIC ID into the socket ID */
840 phys_proc_id[cpu] >>= bits;
843 node = phys_proc_id[cpu];
844 if (apicid_to_node[apicid] != NUMA_NO_NODE)
845 node = apicid_to_node[apicid];
846 if (!node_online(node)) {
847 /* Two possibilities here:
848 - The CPU is missing memory and no node was created.
849 In that case try picking one from a nearby CPU
850 - The APIC IDs differ from the HyperTransport node IDs
851 which the K8 northbridge parsing fills in.
852 Assume they are all increased by a constant offset,
853 but in the same order as the HT nodeids.
854 If that doesn't result in a usable node fall back to the
855 path for the previous case. */
856 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
857 if (ht_nodeid >= 0 &&
858 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
859 node = apicid_to_node[ht_nodeid];
860 /* Pick a nearby node */
861 if (!node_online(node))
862 node = nearby_node(apicid);
864 numa_set_node(cpu, node);
866 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
867 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
872 static int __init init_amd(struct cpuinfo_x86 *c)
881 * Disable TLB flush filter by setting HWCR.FFDIS on K8
882 * bit 6 of msr C001_0015
884 * Errata 63 for SH-B3 steppings
885 * Errata 122 for all steppings (F+ have it disabled by default)
888 rdmsrl(MSR_K8_HWCR, value);
890 wrmsrl(MSR_K8_HWCR, value);
894 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
895 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
896 clear_bit(0*32+31, &c->x86_capability);
899 level = cpuid_eax(1);
900 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
901 set_bit(X86_FEATURE_K8_C, &c->x86_capability);
903 r = get_model_name(c);
907 /* Should distinguish Models here, but this is only
908 a fallback anyways. */
909 strcpy(c->x86_model_id, "Hammer");
913 display_cacheinfo(c);
915 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
916 if (c->x86_power & (1<<8))
917 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
919 if (c->extended_cpuid_level >= 0x80000008) {
920 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
921 if (c->x86_max_cores & (c->x86_max_cores - 1))
922 c->x86_max_cores = 1;
930 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
933 u32 eax, ebx, ecx, edx;
934 int index_msb, core_bits;
935 int cpu = smp_processor_id();
937 cpuid(1, &eax, &ebx, &ecx, &edx);
939 c->apicid = phys_pkg_id(0);
941 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
944 smp_num_siblings = (ebx & 0xff0000) >> 16;
946 if (smp_num_siblings == 1) {
947 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
948 } else if (smp_num_siblings > 1 ) {
950 if (smp_num_siblings > NR_CPUS) {
951 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
952 smp_num_siblings = 1;
956 index_msb = get_count_order(smp_num_siblings);
957 phys_proc_id[cpu] = phys_pkg_id(index_msb);
959 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
962 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
964 index_msb = get_count_order(smp_num_siblings) ;
966 core_bits = get_count_order(c->x86_max_cores);
968 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
969 ((1 << core_bits) - 1);
971 if (c->x86_max_cores > 1)
972 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
979 * find out the number of processor cores on the die
981 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
985 if (c->cpuid_level < 4)
994 return ((eax >> 26) + 1);
999 static void srat_detect_node(void)
1003 int cpu = smp_processor_id();
1005 /* Don't do the funky fallback heuristics the AMD version employs
1007 node = apicid_to_node[hard_smp_processor_id()];
1008 if (node == NUMA_NO_NODE)
1010 numa_set_node(cpu, node);
1013 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1017 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1022 init_intel_cacheinfo(c);
1023 n = c->extended_cpuid_level;
1024 if (n >= 0x80000008) {
1025 unsigned eax = cpuid_eax(0x80000008);
1026 c->x86_virt_bits = (eax >> 8) & 0xff;
1027 c->x86_phys_bits = eax & 0xff;
1028 /* CPUID workaround for Intel 0F34 CPU */
1029 if (c->x86_vendor == X86_VENDOR_INTEL &&
1030 c->x86 == 0xF && c->x86_model == 0x3 &&
1032 c->x86_phys_bits = 36;
1036 c->x86_cache_alignment = c->x86_clflush_size * 2;
1037 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1038 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1039 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1040 c->x86_max_cores = intel_num_cpu_cores(c);
1045 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1047 char *v = c->x86_vendor_id;
1049 if (!strcmp(v, "AuthenticAMD"))
1050 c->x86_vendor = X86_VENDOR_AMD;
1051 else if (!strcmp(v, "GenuineIntel"))
1052 c->x86_vendor = X86_VENDOR_INTEL;
1054 c->x86_vendor = X86_VENDOR_UNKNOWN;
1057 struct cpu_model_info {
1060 char *model_names[16];
1063 /* Do some early cpuid on the boot CPU to get some parameter that are
1064 needed before check_bugs. Everything advanced is in identify_cpu
1066 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1070 c->loops_per_jiffy = loops_per_jiffy;
1071 c->x86_cache_size = -1;
1072 c->x86_vendor = X86_VENDOR_UNKNOWN;
1073 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1074 c->x86_vendor_id[0] = '\0'; /* Unset */
1075 c->x86_model_id[0] = '\0'; /* Unset */
1076 c->x86_clflush_size = 64;
1077 c->x86_cache_alignment = c->x86_clflush_size;
1078 c->x86_max_cores = 1;
1079 c->extended_cpuid_level = 0;
1080 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1082 /* Get vendor name */
1083 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1084 (unsigned int *)&c->x86_vendor_id[0],
1085 (unsigned int *)&c->x86_vendor_id[8],
1086 (unsigned int *)&c->x86_vendor_id[4]);
1090 /* Initialize the standard set of capabilities */
1091 /* Note that the vendor-specific code below might override */
1093 /* Intel-defined flags: level 0x00000001 */
1094 if (c->cpuid_level >= 0x00000001) {
1096 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1097 &c->x86_capability[0]);
1098 c->x86 = (tfms >> 8) & 0xf;
1099 c->x86_model = (tfms >> 4) & 0xf;
1100 c->x86_mask = tfms & 0xf;
1102 c->x86 += (tfms >> 20) & 0xff;
1104 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1105 if (c->x86_capability[0] & (1<<19))
1106 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1108 /* Have CPUID level 0 only - unheard of */
1113 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1118 * This does the hard work of actually picking apart the CPU stuff...
1120 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1125 early_identify_cpu(c);
1127 /* AMD-defined flags: level 0x80000001 */
1128 xlvl = cpuid_eax(0x80000000);
1129 c->extended_cpuid_level = xlvl;
1130 if ((xlvl & 0xffff0000) == 0x80000000) {
1131 if (xlvl >= 0x80000001) {
1132 c->x86_capability[1] = cpuid_edx(0x80000001);
1133 c->x86_capability[6] = cpuid_ecx(0x80000001);
1135 if (xlvl >= 0x80000004)
1136 get_model_name(c); /* Default name */
1139 /* Transmeta-defined flags: level 0x80860001 */
1140 xlvl = cpuid_eax(0x80860000);
1141 if ((xlvl & 0xffff0000) == 0x80860000) {
1142 /* Don't set x86_cpuid_level here for now to not confuse. */
1143 if (xlvl >= 0x80860001)
1144 c->x86_capability[2] = cpuid_edx(0x80860001);
1148 * Vendor-specific initialization. In this section we
1149 * canonicalize the feature flags, meaning if there are
1150 * features a certain CPU supports which CPUID doesn't
1151 * tell us, CPUID claiming incorrect flags, or other bugs,
1152 * we handle them here.
1154 * At the end of this section, c->x86_capability better
1155 * indicate the features this CPU genuinely supports!
1157 switch (c->x86_vendor) {
1158 case X86_VENDOR_AMD:
1162 case X86_VENDOR_INTEL:
1166 case X86_VENDOR_UNKNOWN:
1168 display_cacheinfo(c);
1172 select_idle_routine(c);
1176 * On SMP, boot_cpu_data holds the common feature set between
1177 * all CPUs; so make sure that we indicate which features are
1178 * common between the CPUs. The first time this routine gets
1179 * executed, c == &boot_cpu_data.
1181 if (c != &boot_cpu_data) {
1182 /* AND the already accumulated flags with these */
1183 for (i = 0 ; i < NCAPINTS ; i++)
1184 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1187 #ifdef CONFIG_X86_MCE
1190 if (c == &boot_cpu_data)
1195 numa_add_cpu(smp_processor_id());
1200 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1202 if (c->x86_model_id[0])
1203 printk("%s", c->x86_model_id);
1205 if (c->x86_mask || c->cpuid_level >= 0)
1206 printk(" stepping %02x\n", c->x86_mask);
1212 * Get CPU information for use by the procfs.
1215 static int show_cpuinfo(struct seq_file *m, void *v)
1217 struct cpuinfo_x86 *c = v;
1220 * These flag bits must match the definitions in <asm/cpufeature.h>.
1221 * NULL means this bit is undefined or reserved; either way it doesn't
1222 * have meaning as far as Linux is concerned. Note that it's important
1223 * to realize there is a difference between this table and CPUID -- if
1224 * applications want to get the raw CPUID data, they should access
1225 * /dev/cpu/<cpu_nr>/cpuid instead.
1227 static char *x86_cap_flags[] = {
1229 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1230 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1231 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1232 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1235 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1236 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1237 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1238 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1240 /* Transmeta-defined */
1241 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1242 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1243 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1244 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1246 /* Other (Linux-defined) */
1247 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1248 "constant_tsc", NULL, NULL,
1249 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1250 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1251 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1253 /* Intel-defined (#2) */
1254 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1255 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1256 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1257 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1259 /* VIA/Cyrix/Centaur-defined */
1260 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1261 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1262 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1263 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1265 /* AMD-defined (#2) */
1266 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1267 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1268 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1269 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1271 static char *x86_power_flags[] = {
1272 "ts", /* temperature sensor */
1273 "fid", /* frequency id control */
1274 "vid", /* voltage id control */
1275 "ttp", /* thermal trip */
1279 /* nothing */ /* constant_tsc - moved to flags */
1284 if (!cpu_online(c-cpu_data))
1288 seq_printf(m,"processor\t: %u\n"
1290 "cpu family\t: %d\n"
1292 "model name\t: %s\n",
1293 (unsigned)(c-cpu_data),
1294 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1297 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1299 if (c->x86_mask || c->cpuid_level >= 0)
1300 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1302 seq_printf(m, "stepping\t: unknown\n");
1304 if (cpu_has(c,X86_FEATURE_TSC)) {
1305 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1308 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1309 freq / 1000, (freq % 1000));
1313 if (c->x86_cache_size >= 0)
1314 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1317 if (smp_num_siblings * c->x86_max_cores > 1) {
1318 int cpu = c - cpu_data;
1319 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1320 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1321 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1322 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1328 "fpu_exception\t: yes\n"
1329 "cpuid level\t: %d\n"
1336 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1337 if ( test_bit(i, &c->x86_capability) &&
1338 x86_cap_flags[i] != NULL )
1339 seq_printf(m, " %s", x86_cap_flags[i]);
1342 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1343 c->loops_per_jiffy/(500000/HZ),
1344 (c->loops_per_jiffy/(5000/HZ)) % 100);
1346 if (c->x86_tlbsize > 0)
1347 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1348 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1349 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1351 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1352 c->x86_phys_bits, c->x86_virt_bits);
1354 seq_printf(m, "power management:");
1357 for (i = 0; i < 32; i++)
1358 if (c->x86_power & (1 << i)) {
1359 if (i < ARRAY_SIZE(x86_power_flags) &&
1361 seq_printf(m, "%s%s",
1362 x86_power_flags[i][0]?" ":"",
1363 x86_power_flags[i]);
1365 seq_printf(m, " [%d]", i);
1369 seq_printf(m, "\n\n");
1374 static void *c_start(struct seq_file *m, loff_t *pos)
1376 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1379 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1382 return c_start(m, pos);
1385 static void c_stop(struct seq_file *m, void *v)
1389 struct seq_operations cpuinfo_op = {
1393 .show = show_cpuinfo,