x86: debugging code for testing 16Tb support on smaller memory systems
authorJan Beulich <jbeulich@suse.com>
Fri, 8 Feb 2013 10:06:04 +0000 (11:06 +0100)
committerJan Beulich <jbeulich@suse.com>
Fri, 8 Feb 2013 10:06:04 +0000 (11:06 +0100)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
docs/misc/xen-command-line.markdown
xen/arch/x86/domain_page.c
xen/arch/x86/mm.c
xen/arch/x86/setup.c
xen/common/page_alloc.c
xen/include/asm-x86/setup.h

index 6ad63094b2a01a1300afc8e3d9c0424341d33ed4..32443b0d0c87d31ccd10efee1d6872f84e114895 100644 (file)
@@ -546,6 +546,12 @@ Paging (HAP).
 ### hvm\_port80
 > `= <boolean>`
 
+### highmem-start
+> `= <size>`
+
+Specify the memory boundary past which memory will be treated as highmem (x86
+debug hypervisor only).
+
 ### idle\_latency\_factor
 > `= <integer>`
 
index 1beae8c1601dccd86dac19de0d8c3dfacb2f8f0b..acc948698113929f80c769af48d56eeaacde78bf 100644 (file)
@@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn)
     struct mapcache_vcpu *vcache;
     struct vcpu_maphash_entry *hashent;
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     v = mapcache_current_vcpu();
     if ( !v || is_hvm_vcpu(v) )
@@ -249,8 +251,10 @@ int mapcache_domain_init(struct domain *d)
     if ( is_hvm_domain(d) || is_idle_domain(d) )
         return 0;
 
+#ifdef NDEBUG
     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return 0;
+#endif
 
     dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
     d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf);
@@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned long mfn)
 
     ASSERT(!in_irq() && local_irq_is_enabled());
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     spin_lock(&globalmap_lock);
 
index aefac6d959daf9faa2a908c485a0f9d3a842c0ee..fce73e159a381e72b70c71fea9ae25ad7119d71b 100644 (file)
@@ -250,6 +250,14 @@ void __init init_frametable(void)
         init_spagetable();
 }
 
+#ifndef NDEBUG
+static unsigned int __read_mostly root_pgt_pv_xen_slots
+    = ROOT_PAGETABLE_PV_XEN_SLOTS;
+static l4_pgentry_t __read_mostly split_l4e;
+#else
+#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
+#endif
+
 void __init arch_init_memory(void)
 {
     unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
@@ -344,6 +352,40 @@ void __init arch_init_memory(void)
     efi_init_memory();
 
     mem_sharing_init();
+
+#ifndef NDEBUG
+    if ( highmem_start )
+    {
+        unsigned long split_va = (unsigned long)__va(highmem_start);
+
+        if ( split_va < HYPERVISOR_VIRT_END &&
+             split_va - 1 == (unsigned long)__va(highmem_start - 1) )
+        {
+            root_pgt_pv_xen_slots = l4_table_offset(split_va) -
+                                    ROOT_PAGETABLE_FIRST_XEN_SLOT;
+            ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS);
+            if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) )
+            {
+                l3_pgentry_t *l3tab = alloc_xen_pagetable();
+
+                if ( l3tab )
+                {
+                    const l3_pgentry_t *l3idle =
+                        l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]);
+
+                    for ( i = 0; i < l3_table_offset(split_va); ++i )
+                        l3tab[i] = l3idle[i];
+                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                        l3tab[i] = l3e_empty();
+                    split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
+                                             __PAGE_HYPERVISOR);
+                }
+                else
+                    ++root_pgt_pv_xen_slots;
+            }
+        }
+    }
+#endif
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
@@ -1320,7 +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
+           root_pgt_pv_xen_slots * sizeof(l4_pgentry_t));
+#ifndef NDEBUG
+    if ( l4e_get_intpte(split_l4e) )
+        l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] =
+            split_l4e;
+#endif
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
index 2b9dbe3e887b4a46373c7dd3679e2249a2a40e65..7437c84db593fc8eaa3cfd5999957ff923c168c1 100644 (file)
@@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setup);
 s8 __read_mostly xen_cpuidle = -1;
 boolean_param("cpuidle", xen_cpuidle);
 
+#ifndef NDEBUG
+unsigned long __initdata highmem_start;
+size_param("highmem-start", highmem_start);
+#endif
+
 cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
@@ -788,6 +793,14 @@ void __init __start_xen(unsigned long mbi_p)
     modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
     bootstrap_map(NULL);
 
+#ifndef highmem_start
+    /* Don't allow split below 4Gb. */
+    if ( highmem_start < GB(4) )
+        highmem_start = 0;
+    else /* align to L3 entry boundary */
+        highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
+#endif
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -916,6 +929,9 @@ void __init __start_xen(unsigned long mbi_p)
             /* Don't overlap with other modules. */
             end = consider_modules(s, e, size, mod, mbi->mods_count, j);
 
+            if ( highmem_start && end > highmem_start )
+                continue;
+
             if ( s < end &&
                  (headroom ||
                   ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
@@ -957,6 +973,8 @@ void __init __start_xen(unsigned long mbi_p)
     kexec_reserve_area(&boot_e820);
 
     setup_max_pdx();
+    if ( highmem_start )
+        xenheap_max_mfn(PFN_DOWN(highmem_start));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1128,7 +1146,8 @@ void __init __start_xen(unsigned long mbi_p)
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
         uint64_t mask = PAGE_SIZE - 1;
 
-        xenheap_max_mfn(limit);
+        if ( !highmem_start )
+            xenheap_max_mfn(limit);
 
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
index 9593743ef5eac05a62771e72755c080b0a96d503..6b8bc398974925fb92f0a89087dd7ad190bb1672 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/flushtlb.h>
 #ifdef CONFIG_X86
 #include <asm/p2m.h>
+#include <asm/setup.h> /* for highmem_start only */
 #else
 #define p2m_pod_offline_or_broken_hit(pg) 0
 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL)
@@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages(
         pg = (r->e - nr_pfns) & ~(pfn_align - 1);
         if ( pg < r->s )
             continue;
+
+#if defined(CONFIG_X86) && !defined(NDEBUG)
+        /*
+         * Filtering pfn_align == 1 since the only allocations using a bigger
+         * alignment are the ones used for setting up the frame table chunks.
+         * Those allocations get remapped anyway, i.e. them not having 1:1
+         * mappings always accessible is not a problem.
+         */
+        if ( highmem_start && pfn_align == 1 &&
+             r->e > PFN_DOWN(highmem_start) )
+        {
+            pg = r->s;
+            if ( pg + nr_pfns > PFN_DOWN(highmem_start) )
+                continue;
+            r->s = pg + nr_pfns;
+            return pg;
+        }
+#endif
+
         _e = r->e;
         r->e = pg;
         bootmem_region_add(pg + nr_pfns, _e);
index 4580f807c876696b9d19c2f151ef4d3ced467877..3039e1b49b265dc67ced064626a48fdd6732ac6a 100644 (file)
@@ -43,4 +43,10 @@ void microcode_grab_module(
 
 extern uint8_t kbd_shift_flags;
 
+#ifdef NDEBUG
+# define highmem_start 0
+#else
+extern unsigned long highmem_start;
+#endif
+
 #endif