From: Keir Fraser Date: Thu, 27 May 2010 08:04:46 +0000 (+0100) Subject: x86: Speed up PV-guest superpage mapping X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~12076 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=06ef473029dcd540b7e4fb76f89cc54fd53a840e;p=xen.git x86: Speed up PV-guest superpage mapping The current version of superpage mapping takes a PGT_writable reference to every page in a superpage each time it is mapped. This is extremely slow, so slow that applications become unusable. My solution for this is to introduce a superpage table in the hypervisor, similar to the frametable structure for pages. Currently this table only has a type_info element. There are three types a superpage can have, SGT_mark, SGT_dynamic, or SGT_none. In normal operation, the first time a superpage is mapped, a PGT_writable reference is taken to each page in the superpage, and the superpage is set to type SGT_dynamic and the superpage typecount is incremented. On subsequent mappings and unmappings, only the superpage typecount changes. On the last unmap, the PGT_writable reference on each page is removed. The SGT_mark type is set and cleared through two new MMUEXT hypercalls, mark_super and unmark_super. When the hypercall is made, the superpage's type is set to SGT_mark and a PGT_writable reference is taken to its pages. On unmark, the type is cleared and the reference removed. If a page is already set to SGT_dynamic when mark_super is called, the type is changed to SGT_mark and no additional PGT_writable reference is taken. If there are still outstanding mappings of this superpage when unmark_super is called, the type is set to SGT_dynamic and the PGT_writable reference is not removed. Fast superpage mapping is only supported on 64 bit hypervisors. For 32 bit hyperviors, superpage mapping is supported but will be extremely slow. Signed-off-by: Dave McCracken --- diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 311c014e26..b483612a4f 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1739,6 +1739,8 @@ static int relinquish_memory( BUG(); } + clear_superpage_mark(page); + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index bebc9c58d8..fcb00f737b 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -151,8 +151,11 @@ unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS( #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) -int opt_allow_hugepage; -boolean_param("allowhugepage", opt_allow_hugepage); +int opt_allow_superpage; +boolean_param("allowsuperpage", opt_allow_superpage); + +static int get_superpage(unsigned long mfn, struct domain *d); +static void put_superpage(unsigned long mfn); #define l1_disallow_mask(d) \ ((d != dom_io) && \ @@ -171,6 +174,30 @@ l2_pgentry_t *compat_idle_pg_table_l2 = NULL; #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif +#ifdef __x86_64__ +static void __init init_spagetable(void) +{ + unsigned long s, start = SPAGETABLE_VIRT_START; + unsigned long end = SPAGETABLE_VIRT_END; + unsigned long step, mfn; + unsigned int max_entries; + + step = 1UL << PAGETABLE_ORDER; + max_entries = (max_pdx + ((1UL<> SUPERPAGE_ORDER; + end = start + (((max_entries * sizeof(*spage_table)) + + ((1UL< mfn ) - put_data_page(mfn_to_page(m), writeable); - return -EINVAL; - } - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - rc = 1; + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn); + return -EINVAL; } - return rc; + return get_superpage(mfn, d); } @@ -1100,19 +1113,9 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) return 1; if ( l2e_get_flags(l2e) & _PAGE_PSE ) - { - unsigned long mfn = l2e_get_pfn(l2e), m = mfn; - int writeable = l2e_get_flags(l2e) & _PAGE_RW; - - ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); - do { - put_data_page(mfn_to_page(m), writeable); - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - } + put_superpage(l2e_get_pfn(l2e)); else - { put_page_and_type(l2e_get_page(l2e)); - } return 0; } @@ -2445,6 +2448,209 @@ int get_page_type_preemptible(struct page_info *page, unsigned long type) return __get_page_type(page, type, 1); } +static int get_spage_pages(struct page_info *page, struct domain *d) +{ + int i; + + for (i = 0; i < (1<= 0) + put_page_and_type(--page); + return 0; + } + } + return 1; +} + +static void put_spage_pages(struct page_info *page) +{ + int i; + + for (i = 0; i < (1<type_info; + int pages_done = 0; + + ASSERT(opt_allow_superpage); + + do { + x = y; + nx = x + 1; + if ( (x & SGT_type_mask) == SGT_mark ) + { + MEM_LOG("Duplicate superpage mark attempt mfn %lx", + spage_to_mfn(spage)); + if ( pages_done ) + put_spage_pages(spage_to_page(spage)); + return -EINVAL; + } + if ( (x & SGT_type_mask) == SGT_dynamic ) + { + if ( pages_done ) + { + put_spage_pages(spage_to_page(spage)); + pages_done = 0; + } + } + else if ( !pages_done ) + { + if ( !get_spage_pages(spage_to_page(spage), d) ) + { + MEM_LOG("Superpage type conflict in mark attempt mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + } + nx = (nx & ~SGT_type_mask) | SGT_mark; + + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + return 0; +} + +static int unmark_superpage(struct spage_info *spage) +{ + unsigned long x, nx, y = spage->type_info; + unsigned long do_pages = 0; + + ASSERT(opt_allow_superpage); + + do { + x = y; + nx = x - 1; + if ( (x & SGT_type_mask) != SGT_mark ) + { + MEM_LOG("Attempt to unmark unmarked superpage mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + if ( (nx & SGT_count_mask) == 0 ) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + else + { + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + if ( do_pages ) + put_spage_pages(spage_to_page(spage)); + + return 0; +} + +void clear_superpage_mark(struct page_info *page) +{ + struct spage_info *spage; + + if ( !opt_allow_superpage ) + return; + + spage = page_to_spage(page); + if ((spage->type_info & SGT_type_mask) == SGT_mark) + unmark_superpage(spage); + +} + +static int get_superpage(unsigned long mfn, struct domain *d) +{ + struct spage_info *spage; + unsigned long x, nx, y; + int pages_done = 0; + + ASSERT(opt_allow_superpage); + + spage = mfn_to_spage(mfn); + y = spage->type_info; + do { + x = y; + nx = x + 1; + if ( (x & SGT_type_mask) != SGT_none ) + { + if ( pages_done ) + { + put_spage_pages(spage_to_page(spage)); + pages_done = 0; + } + } + else + { + if ( !get_spage_pages(spage_to_page(spage), d) ) + { + MEM_LOG("Type conflict on superpage mapping mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + + return 0; +} + +static void put_superpage(unsigned long mfn) +{ + struct spage_info *spage; + unsigned long x, nx, y; + unsigned long do_pages = 0; + + ASSERT(opt_allow_superpage); + + spage = mfn_to_spage(mfn); + y = spage->type_info; + do { + x = y; + nx = x - 1; + if ((x & SGT_type_mask) == SGT_dynamic) + { + if ((nx & SGT_count_mask) == 0) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + } + + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + + if (do_pages) + put_spage_pages(spage_to_page(spage)); + + return; +} + +#else /* __i386__ */ + +void clear_superpage_mark(struct page_info *page) +{ +} + +static int get_superpage(unsigned long mfn, struct domain *d) +{ + return get_spage_pages(mfn_to_page(mfn), d); +} + +static void put_superpage(unsigned long mfn) +{ + put_spage_pages(mfn_to_page(mfn)); +} + +#endif + void cleanup_page_cacheattr(struct page_info *page) { uint32_t cacheattr = @@ -3002,6 +3208,60 @@ int do_mmuext_op( break; } +#ifdef __x86_64__ + case MMUEXT_MARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + + if ( !opt_allow_superpage ) + { + MEM_LOG("Superpages disallowed"); + okay = 0; + rc = -ENOSYS; + break; + } + + spage = mfn_to_spage(mfn); + okay = (mark_superpage(spage, d) >= 0); + break; + } + + case MMUEXT_UNMARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + + if ( !opt_allow_superpage ) + { + MEM_LOG("Superpages disallowed"); + okay = 0; + rc = -ENOSYS; + break; + } + + spage = mfn_to_spage(mfn); + okay = (unmark_superpage(spage) >= 0); + break; + } +#endif + default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); rc = -ENOSYS; diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index fc540b6c9c..4235c3524f 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -225,6 +225,11 @@ extern unsigned int video_mode, video_flags; /* Slot 261: xen text, static data and bss (1GB). */ #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + GB(1)) +/* Slot 261: superpage information array (20MB). */ +#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START +#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \ + sizeof(struct spage_info)) +#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) /* Slot 261: page-frame information array (40GB). */ #define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START #define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \ diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h index e7814a9da2..34c6575bd1 100644 --- a/xen/include/asm-x86/guest_pt.h +++ b/xen/include/asm-x86/guest_pt.h @@ -187,7 +187,7 @@ guest_supports_superpages(struct vcpu *v) * CR4.PSE is set or the guest is in PAE or long mode. * It's also used in the dummy PT for vcpus with CR4.PG cleared. */ return (!is_hvm_vcpu(v) - ? opt_allow_hugepage + ? opt_allow_superpage : (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 26a1a680f2..f1d6c8dc1c 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -214,6 +214,23 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<> PAGE_SHIFT)) +/* Convert between machine frame numbers and spage-info structures. */ +#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn)) +#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table)) + +/* Convert between page-info structures and spage-info structures. */ +#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))) +#define spage_to_page(spage) (frame_table+(((spage)-spage_table)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) + /* * We define non-underscored wrappers for above conversion functions. These are * overridden in various source files while underscored versions remain intact. @@ -251,6 +259,8 @@ void copy_page_sse2(void *, const void *); #define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma)) #define mfn_to_page(mfn) __mfn_to_page(mfn) #define page_to_mfn(pg) __page_to_mfn(pg) +#define mfn_to_spage(mfn) __mfn_to_spage(mfn) +#define spage_to_mfn(pg) __spage_to_mfn(pg) #define maddr_to_page(ma) __maddr_to_page(ma) #define page_to_maddr(pg) __page_to_maddr(pg) #define virt_to_page(va) __virt_to_page(va) diff --git a/xen/include/asm-x86/x86_32/page.h b/xen/include/asm-x86/x86_32/page.h index bae4d8ea2d..98c20ef27b 100644 --- a/xen/include/asm-x86/x86_32/page.h +++ b/xen/include/asm-x86/x86_32/page.h @@ -6,6 +6,7 @@ #define L2_PAGETABLE_SHIFT 21 #define L3_PAGETABLE_SHIFT 30 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -13,6 +14,7 @@ #define L2_PAGETABLE_ENTRIES (1<>(SUPERPAGE_SHIFT-PAGE_SHIFT)) +#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)) + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END); diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h index f1448c12a8..2e61709e29 100644 --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -7,6 +7,7 @@ #define L3_PAGETABLE_SHIFT 30 #define L4_PAGETABLE_SHIFT 39 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -15,6 +16,7 @@ #define L3_PAGETABLE_ENTRIES (1<>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table) +#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) /* * Note: These are solely for the use by page_{get,set}_owner(), and * therefore don't need to handle the XEN_VIRT_{START,END} range. @@ -64,6 +68,16 @@ static inline unsigned long pdx_to_pfn(unsigned long pdx) ((pdx << pfn_pdx_hole_shift) & pfn_top_mask); } +static inline unsigned long pfn_to_sdx(unsigned long pfn) +{ + return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT); +} + +static inline unsigned long sdx_to_pfn(unsigned long sdx) +{ + return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT)); +} + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= XEN_VIRT_START);