Fix SMP booting: x86/64 startup initialisation fixes and so on.
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Thu, 25 Aug 2005 13:27:10 +0000 (13:27 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Thu, 25 Aug 2005 13:27:10 +0000 (13:27 +0000)
Signed-off-by: Keir Fraser <keir@xensource.com>
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c

index 7a072454bf46308138ed443c746df4cf3a41469e..31ded2cf5803f9b078c2f0bd8ab052dddeb79490 100644 (file)
 
 #include "cpu.h"
 
+#ifndef CONFIG_XEN
 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
 
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
 
 static int cachesize_override __initdata = -1;
 static int disable_x86_fxsr __initdata = 0;
index 96c4317c71475faa65aa08380bb1f3f23d527d45..9e3efcd0adbb948b21b13180e4c91c75561b438c 100644 (file)
@@ -131,15 +131,7 @@ static void map_cpu_to_logical_apicid(void);
  */
 void __init smp_alloc_memory(void)
 {
-#if 1
-       int cpu;
-
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
-               cpu_gdt_descr[cpu].address = (unsigned long)
-                       alloc_bootmem_low_pages(PAGE_SIZE);
-               /* XXX free unused pages later */
-       }
-#else
+#if 0
        trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
        /*
         * Has to be in very low memory so we can execute
@@ -861,8 +853,8 @@ static int __init do_boot_cpu(int apicid)
        atomic_set(&init_deasserted, 0);
 
 #if 1
-       if (cpu_gdt_descr[0].size > PAGE_SIZE)
-               BUG();
+       cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+       BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
        cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
        printk("GDT: copying %d bytes from %lx to %lx\n",
                cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
index d34ca827e623c6413534eb3b631c00e63b239b87..80775ba7aeb6a119e4f2999f84d4c4201a53cc21 100644 (file)
@@ -871,6 +871,7 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
        }
 }
 
+#ifndef CONFIG_XEN
 fastcall void setup_x86_bogus_stack(unsigned char * stk)
 {
        unsigned long *switch16_ptr, *switch32_ptr;
@@ -915,6 +916,7 @@ fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
        memcpy(stack32, stack16, len);
        return stack32;
 }
+#endif
 
 /*
  *  'math_state_restore()' saves the current math information in the
index 7bc0773a2d6e6b9e206766ae504bf0c3bf26fe60..56dfeef889da7502a38a5c4f8da15413c42bc687 100644 (file)
@@ -6,6 +6,8 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 
+#ifndef CONFIG_XEN
+
 /* Simple VGA output */
 
 #ifdef __i386__
@@ -59,7 +61,6 @@ static struct console early_vga_console = {
        .index =        -1,
 };
 
-#ifndef CONFIG_XEN
 /* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ 
 
 static int early_serial_base = 0x3f8;  /* ttyS0 */
@@ -148,7 +149,8 @@ static __init void early_serial_init(char *s)
        outb((divisor >> 8) & 0xff, early_serial_base + DLH); 
        outb(c & ~DLAB, early_serial_base + LCR);
 }
-#else
+
+#else /* CONFIG_XEN */
 
 static void
 early_serial_write(struct console *con, const char *s, unsigned count)
@@ -167,6 +169,13 @@ early_serial_write(struct console *con, const char *s, unsigned count)
 static __init void early_serial_init(char *s)
 {
 }
+
+/*
+ * No early VGA console on Xen, as we do not have convenient ISA-space
+ * mappings. Someone should fix this for domain 0. For now, use fake serial.
+ */
+#define early_vga_console early_serial_console
+
 #endif
 
 static struct console early_serial_console = {
index e1f03535de3d26d09581d1a64aac368c6a998da1..252af49d555818b7aab3038143b71a7f9656b635 100644 (file)
@@ -206,11 +206,13 @@ ENTRY(cpu_gdt_table)
        .quad   0,0,0                   /* three TLS descriptors */ 
        .quad   0                       /* unused now?   __KERNEL16_CS - 16bit PM for S3 wakeup. */
 
-gdt_end:       
+gdt_end:
+#if 0
        /* asm/segment.h:GDT_ENTRIES must match this */ 
        /* This should be a multiple of the cache line size */
        /* GDTs of other CPUs: */       
        .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
+#endif
 
 .org 0x8000
 ENTRY(empty_zero_page)
index b7f5de7cbeb58adb9ab53b4159807f2e2e8829ba..73674406ba5063a0e1ff498a746cae6307f10ccf 100644 (file)
@@ -536,48 +536,7 @@ static inline void copy_edd(void)
 }
 #endif
 
-#ifdef CONFIG_XEN
-#define reserve_ebda_region() void(0)
-
-static void __init print_memory_map(char *who)
-{
-        int i;
-
-        for (i = 0; i < e820.nr_map; i++) {
-                early_printk(" %s: %016Lx - %016Lx ", who,
-                        e820.map[i].addr,
-                        e820.map[i].addr + e820.map[i].size);
-                switch (e820.map[i].type) {
-                case E820_RAM:  early_printk("(usable)\n");
-                                break;
-                case E820_RESERVED:
-                                early_printk("(reserved)\n");
-                                break;
-                case E820_ACPI:
-                                early_printk("(ACPI data)\n");
-                                break;
-                case E820_NVS:
-                                early_printk("(ACPI NVS)\n");
-                                break;
-                default:        early_printk("type %u\n", e820.map[i].type);
-                                break;
-                }
-        }
-}
-
-void __init smp_alloc_memory(void)
-{
-       int cpu;
-
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
-               cpu_gdt_descr[cpu].address = (unsigned long)
-                       alloc_bootmem_low_pages(PAGE_SIZE);
-               /* XXX free unused pages later */
-       }
-}
-
-
-#else
+#ifndef CONFIG_XEN
 #define EBDA_ADDR_POINTER 0x40E
 static void __init reserve_ebda_region(void)
 {
@@ -628,7 +587,6 @@ void __init setup_arch(char **cmdline_p)
                             VMASST_TYPE_writable_pagetables);
 
         ARCH_SETUP
-        print_memory_map(machine_specific_memory_setup());
 #else
        ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
        drive_info = DRIVE_INFO;
@@ -744,9 +702,6 @@ void __init setup_arch(char **cmdline_p)
                }
        }
 #endif
-#ifdef CONFIG_SMP
-       smp_alloc_memory();
-#endif
 #else  /* CONFIG_XEN */
 #ifdef CONFIG_BLK_DEV_INITRD
        if (LOADER_TYPE && INITRD_START) {
index bb2f550330d2d869f34fd89579164b81e5b289db..6d2e43d67e6196313cfc372ba318b79f615e48b9 100644 (file)
@@ -286,10 +286,10 @@ void __init cpu_init (void)
 
        memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
 #else
-       memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
+       memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
            GDT_ENTRY_TLS_ENTRIES * 8);
 
-    cpu_gdt_init(&cpu_gdt_descr[cpu]);
+       cpu_gdt_init(&cpu_gdt_descr[cpu]);
 #endif
        
        /*
index a8f95211b68bc4bf5c2023987e0ff6d3502bd43a..4245ac202dab016b726f27b94376083383ade726 100644 (file)
@@ -739,8 +739,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
        atomic_set(&init_deasserted, 0);
 
 #ifdef CONFIG_XEN
-       if (cpu_gdt_descr[0].size > PAGE_SIZE)
-               BUG();
+       cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+       BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
        cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
        memcpy((void *)cpu_gdt_descr[cpu].address,
                (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
@@ -798,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
        ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
        boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+       if (boot_error)
+               printk("boot error: %ld\n", boot_error);
 
        if (!boot_error) {
                /*
index 0618209c9ff3d092c6084cd5dd710b390d99e058..74ee49b2de2a2bd5005659b7ab5873be4066e382 100644 (file)
@@ -536,70 +536,38 @@ static void __init find_early_table_space(unsigned long end)
                          round_up(ptes * 8, PAGE_SIZE); 
 }
 
-static void xen_copy_pt(void)
-{
-       unsigned long va = __START_KERNEL_map;
-       unsigned long addr, *pte_page;
-       int i;
-       pud_t *pud; pmd_t *pmd; pte_t *pte;
-       unsigned long *page = (unsigned long *) init_level4_pgt;
-
-       addr = (unsigned long) page[pgd_index(va)];
-       addr_to_page(addr, page);
-
-       pud = (pud_t *) &page[pud_index(va)];
-       addr = page[pud_index(va)];
-       addr_to_page(addr, page);
-
-       level3_kernel_pgt[pud_index(va)] = 
-               __pud(__pa_symbol(level2_kernel_pgt) | _KERNPG_TABLE | _PAGE_USER);
-
-       for (;;) {
-               pmd = (pmd_t *) &page[pmd_index(va)];
-               if (pmd_present(*pmd)) {
-                       level2_kernel_pgt[pmd_index(va)] = *pmd;
-                       /*
-                        * if pmd is valid, check pte.
-                        */
-                       addr = page[pmd_index(va)];
-                       addr_to_page(addr, pte_page);
-                       
-                       for (i = 0; i < PTRS_PER_PTE; i++) {
-                               pte = (pte_t *) &pte_page[pte_index(va)];
-                               if (pte_present(*pte))
-                                       va += PAGE_SIZE;
-                               else
-                                   break;
-                       }
-
-               } else
-                   break;
-       }
-
-       init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
-               mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
-}
-
 void __init xen_init_pt(void)
 {
+       unsigned long addr, *page;
        int i;
 
        for (i = 0; i < NR_CPUS; i++)
                per_cpu(cur_pgd, i) = init_mm.pgd;
 
-       memcpy((void *)init_level4_pgt, 
-              (void *)xen_start_info.pt_base, PAGE_SIZE);
-
+       memset((void *)init_level4_pgt,   0, PAGE_SIZE);
        memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
        memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
 
-       xen_copy_pt();
+       /* Find the initial pte page that was built for us. */
+       page = (unsigned long *)xen_start_info.pt_base;
+       addr = page[pgd_index(__START_KERNEL_map)];
+       addr_to_page(addr, page);
+       addr = page[pud_index(__START_KERNEL_map)];
+       addr_to_page(addr, page);
+
+       /* Construct mapping of initial pte page in our own directories. */
+       init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
+               mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
+       level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
+               __pud(__pa_symbol(level2_kernel_pgt) |
+                     _KERNPG_TABLE | _PAGE_USER);
+        memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
 
        make_page_readonly(init_level4_pgt);
+       make_page_readonly(init_level4_user_pgt);
        make_page_readonly(level3_kernel_pgt);
+       make_page_readonly(level3_user_pgt);
        make_page_readonly(level2_kernel_pgt);
-       make_page_readonly(init_level4_user_pgt);
-       make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
 
        xen_pgd_pin(__pa_symbol(init_level4_pgt));
        xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
@@ -609,7 +577,6 @@ void __init xen_init_pt(void)
 
        set_pgd((pgd_t *)(init_level4_user_pgt + 511), 
                mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
-
 }
 
 /*
@@ -617,69 +584,58 @@ void __init xen_init_pt(void)
  * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
  * mapping for early initialization.
  */
-
-#define MIN_INIT_SIZE  0x800000
 static unsigned long current_size, extended_size;
 
 void __init extend_init_mapping(void) 
 {
        unsigned long va = __START_KERNEL_map;
-       unsigned long addr, *pte_page;
-
-       unsigned long phys;
+       unsigned long phys, addr, *pte_page;
         pmd_t *pmd;
        pte_t *pte, new_pte;
        unsigned long *page = (unsigned long *) init_level4_pgt;
        int i;
 
-       addr = (unsigned long) page[pgd_index(va)];
+       addr = page[pgd_index(va)];
        addr_to_page(addr, page);
-
        addr = page[pud_index(va)];
        addr_to_page(addr, page);
 
        for (;;) {
-               pmd = (pmd_t *) &page[pmd_index(va)];
-               if (pmd_present(*pmd)) {
-                       /*
-                        * if pmd is valid, check pte.
-                        */
-                       addr = page[pmd_index(va)];
-                       addr_to_page(addr, pte_page);
-                       
-                       for (i = 0; i < PTRS_PER_PTE; i++) {
-                               pte = (pte_t *) &pte_page[pte_index(va)];
-                               
-                               if (pte_present(*pte)) {
-                                       va += PAGE_SIZE;
-                                       current_size += PAGE_SIZE;
-                               } else
-                                   break;
-                       }
-
-               } else
-                   break;
+               pmd = (pmd_t *)&page[pmd_index(va)];
+               if (!pmd_present(*pmd))
+                       break;
+               addr = page[pmd_index(va)];
+               addr_to_page(addr, pte_page);
+               for (i = 0; i < PTRS_PER_PTE; i++) {
+                       pte = (pte_t *) &pte_page[pte_index(va)];
+                       if (!pte_present(*pte))
+                               break;
+                       va += PAGE_SIZE;
+                       current_size += PAGE_SIZE;
+               }
        }
 
-       for (; va < __START_KERNEL_map + current_size + tables_space; ) {
+       while (va < __START_KERNEL_map + current_size + tables_space) {
                pmd = (pmd_t *) &page[pmd_index(va)];
-
-               if (pmd_none(*pmd)) {
-                       pte_page = (unsigned long *) alloc_static_page(&phys);
-                       make_page_readonly(pte_page);
-                       xen_pte_pin(phys);
-                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
-
-                       for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
-                               new_pte = pfn_pte((va -  __START_KERNEL_map) >> PAGE_SHIFT, 
-                                                 __pgprot(_KERNPG_TABLE | _PAGE_USER));
-
-                               pte = (pte_t *) &pte_page[pte_index(va)];
-                               xen_l1_entry_update(pte, new_pte);
-                               extended_size += PAGE_SIZE;
-                       }
-               } 
+               if (!pmd_none(*pmd))
+                       continue;
+               pte_page = (unsigned long *) alloc_static_page(&phys);
+               make_page_readonly(pte_page);
+               xen_pte_pin(phys);
+               set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+               for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+                       new_pte = pfn_pte(
+                               (va - __START_KERNEL_map) >> PAGE_SHIFT, 
+                               __pgprot(_KERNPG_TABLE | _PAGE_USER));
+                       pte = (pte_t *)&pte_page[pte_index(va)];
+                       xen_l1_entry_update(pte, new_pte);
+                       extended_size += PAGE_SIZE;
+               }
        }
+
+       /* Kill mapping of low 1MB. */
+       for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += PAGE_SIZE)
+               HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
 }
 
 
@@ -720,10 +676,6 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
 
         start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
 
-        /*
-         * TBD: Need to calculate at runtime
-         */
-
        __flush_tlb_all();
         init_mapping_done = 1;
 }