From: Keir Fraser Date: Fri, 20 Jun 2008 17:40:32 +0000 (+0100) Subject: Out-of-sync L1 shadows: Fixup Tables X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14192^2~35 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=7ee9469202cc089d284d303c0bd0a4d9d709790b;p=xen.git Out-of-sync L1 shadows: Fixup Tables This patch implement a very simple non complete reverse map for OOS pages writable mappings to avoid shadow brute-force search on resyncs. Signed-off-by: Gianluca Guida --- diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index dabfee29ae..be22e8e7cb 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -580,25 +580,126 @@ static inline void _sh_resync_l1(struct vcpu *v, mfn_t gmfn) #endif } -/* Pull all the entries on an out-of-sync page back into sync. */ -static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va) +#define _FIXUP_IDX(_b, _i) ((_b) * SHADOW_OOS_FT_HASH + (_i)) + +void oos_fixup_add(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) { - struct page_info *pg = mfn_to_page(gmfn); + int idx, i, free = 0, free_slot = 0; + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; - ASSERT(shadow_locked_by_me(v->domain)); - ASSERT(mfn_is_out_of_sync(gmfn)); - /* Guest page must be shadowed *only* as L1 when out of sync. */ - ASSERT(!(mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask - & ~SHF_L1_ANY)); - ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn))); + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( !mfn_valid(fixups[_FIXUP_IDX(idx, i)].gmfn) + || !mfn_is_out_of_sync(fixups[_FIXUP_IDX(idx, i)].gmfn) ) + { + free = 1; + free_slot = _FIXUP_IDX(idx, i); + } + else if ( (mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn)) + && (mfn_x(fixups[_FIXUP_IDX(idx, i)].smfn) == mfn_x(smfn)) + && (fixups[_FIXUP_IDX(idx, i)].off == off) ) + { + perfc_incr(shadow_oos_fixup_no_add); + return; + } + } - SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n", - v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va); + if ( free ) + { + if ( !v->arch.paging.shadow.oos_fixup_used ) + v->arch.paging.shadow.oos_fixup_used = 1; + fixups[free_slot].gmfn = gmfn; + fixups[free_slot].smfn = smfn; + fixups[free_slot].off = off; + perfc_incr(shadow_oos_fixup_add_ok); + return; + } + + + perfc_incr(shadow_oos_fixup_add_fail); +} + +void oos_fixup_remove(struct vcpu *v, mfn_t gmfn) +{ + int idx, i; + struct domain *d = v->domain; + + perfc_incr(shadow_oos_fixup_remove); + + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for_each_vcpu(d, v) + { + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn) ) + fixups[_FIXUP_IDX(idx, i)].gmfn = _mfn(INVALID_MFN); + } +} + +int oos_fixup_flush(struct vcpu *v) +{ + int i, rc = 0; + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + + perfc_incr(shadow_oos_fixup_flush); + + if ( !v->arch.paging.shadow.oos_fixup_used ) + return 0; + + for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( mfn_valid(fixups[i].gmfn) ) + { + if ( mfn_is_out_of_sync(fixups[i].gmfn) ) + rc |= sh_remove_write_access_from_sl1p(v, fixups[i].gmfn, + fixups[i].smfn, + fixups[i].off); + fixups[i].gmfn = _mfn(INVALID_MFN); + } + } + + v->arch.paging.shadow.oos_fixup_used = 0; + + return rc; +} + +int oos_fixup_flush_gmfn(struct vcpu *v, mfn_t gmfn) +{ + int idx, i, rc = 0; + struct domain *d = v->domain; + + perfc_incr(shadow_oos_fixup_flush_gmfn); + + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for_each_vcpu(d, v) + { + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) != mfn_x(gmfn) ) + continue; + + rc |= sh_remove_write_access_from_sl1p(v, + fixups[_FIXUP_IDX(idx,i)].gmfn, + fixups[_FIXUP_IDX(idx,i)].smfn, + fixups[_FIXUP_IDX(idx,i)].off); + + fixups[_FIXUP_IDX(idx,i)].gmfn = _mfn(INVALID_MFN); + } + } + + return rc; +} + +static int oos_remove_write_access(struct vcpu *v, mfn_t gmfn, unsigned long va) +{ + int ftlb = 0; + + ftlb |= oos_fixup_flush_gmfn(v, gmfn); - /* Need to pull write access so the page *stays* in sync. - * This might be rather slow but we hope that in the common case - * we're handling this pagetable after a guest walk has pulled - * write access the fast way. */ switch ( sh_remove_write_access(v, gmfn, 0, va) ) { default: @@ -606,15 +707,45 @@ static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va) break; case 1: - flush_tlb_mask(v->domain->domain_dirty_cpumask); + ftlb |= 1; break; case -1: /* An unfindable writeable typecount has appeared, probably via a * grant table entry: can't shoot the mapping, so try to unshadow * the page. If that doesn't work either, the guest is granting - * his pagetables and must be killed after all. */ + * his pagetables and must be killed after all. + * This will flush the tlb, so we can return with no worries. */ sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); + return 1; + } + + if ( ftlb ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + + return 0; +} + + +/* Pull all the entries on an out-of-sync page back into sync. */ +static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va) +{ + struct page_info *pg = mfn_to_page(gmfn); + + ASSERT(shadow_locked_by_me(v->domain)); + ASSERT(mfn_is_out_of_sync(gmfn)); + /* Guest page must be shadowed *only* as L1 when out of sync. */ + ASSERT(!(mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask + & ~SHF_L1_ANY)); + ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn))); + + SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n", + v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va); + + /* Need to pull write access so the page *stays* in sync. */ + if ( oos_remove_write_access(v, gmfn, va) ) + { + /* Page has been unshadowed. */ return; } @@ -753,6 +884,9 @@ void sh_resync_all(struct vcpu *v, int skip, int this, int others, int do_lockin if ( do_locking ) shadow_lock(v->domain); + if ( oos_fixup_flush(v) ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + /* First: resync all of this vcpu's oos pages */ for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ ) if ( mfn_valid(oos[idx]) ) @@ -882,7 +1016,10 @@ void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type) #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Was the page out of sync? */ if ( page_is_out_of_sync(page) ) + { oos_hash_remove(v, gmfn); + oos_fixup_remove(v, gmfn); + } #endif clear_bit(_PGC_page_table, &page->count_info); } @@ -2224,7 +2361,10 @@ int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */ /* Brute-force search of all the shadows, by walking the hash */ - perfc_incr(shadow_writeable_bf); + if ( level == 0 ) + perfc_incr(shadow_writeable_bf_1); + else + perfc_incr(shadow_writeable_bf); hash_foreach(v, callback_mask, callbacks, gmfn); /* If that didn't catch the mapping, then there's some non-pagetable @@ -2244,7 +2384,34 @@ int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, return 1; } - +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) +int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) +{ + struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + + ASSERT(mfn_valid(smfn)); + ASSERT(mfn_valid(gmfn)); + + if ( sp->type == SH_type_l1_32_shadow ) + { + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2) + (v, gmfn, smfn, off); + } +#if CONFIG_PAGING_LEVELS >= 3 + else if ( sp->type == SH_type_l1_pae_shadow ) + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3) + (v, gmfn, smfn, off); +#if CONFIG_PAGING_LEVELS >= 4 + else if ( sp->type == SH_type_l1_64_shadow ) + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4) + (v, gmfn, smfn, off); +#endif +#endif + + return 0; +} +#endif /**************************************************************************/ /* Remove all mappings of a guest frame from the shadow tables. @@ -2581,6 +2748,25 @@ static void sh_update_paging_modes(struct vcpu *v) } #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + if ( v->arch.paging.shadow.oos_fixups == NULL ) + { + int i; + v->arch.paging.shadow.oos_fixups = + alloc_xenheap_pages(SHADOW_OOS_FT_ORDER); + if ( v->arch.paging.shadow.oos_fixups == NULL ) + { + SHADOW_ERROR("Could not allocate OOS fixup table" + " for dom %u vcpu %u\n", + v->domain->domain_id, v->vcpu_id); + domain_crash(v->domain); + return; + } + for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ ) + v->arch.paging.shadow.oos_fixups[i].gmfn = _mfn(INVALID_MFN); + } +#endif /* OOS */ + // Valid transitions handled by this function: // - For PV guests: // - after a shadow mode has been changed @@ -2908,18 +3094,28 @@ void shadow_teardown(struct domain *d) } } -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) +#if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) /* Free the virtual-TLB array attached to each vcpu */ for_each_vcpu(d, v) { +#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) if ( v->arch.paging.vtlb ) { xfree(v->arch.paging.vtlb); v->arch.paging.vtlb = NULL; } - } #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + if ( v->arch.paging.shadow.oos_fixups ) + { + free_xenheap_pages(v->arch.paging.shadow.oos_fixups, + SHADOW_OOS_FT_ORDER); + } +#endif /* OOS */ + } +#endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */ + list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) { list_del(entry); diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 996595f073..08a3350197 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -1409,6 +1409,9 @@ static int shadow_set_l1e(struct vcpu *v, int flags = 0; struct domain *d = v->domain; shadow_l1e_t old_sl1e; +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC + mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e); +#endif ASSERT(sl1e != NULL); old_sl1e = *sl1e; @@ -1425,8 +1428,18 @@ static int shadow_set_l1e(struct vcpu *v, /* Doesn't look like a pagetable. */ flags |= SHADOW_SET_ERROR; new_sl1e = shadow_l1e_empty(); - } else { + } + else + { shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC + if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn) + && (shadow_l1e_get_flags(new_sl1e) & _PAGE_RW) ) + { + oos_fixup_add(v, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e)); + } +#endif + } } } @@ -4238,6 +4251,56 @@ sh_update_cr3(struct vcpu *v, int do_locking) /**************************************************************************/ /* Functions to revoke guest rights */ +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC +int sh_rm_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) +{ + int r; + shadow_l1e_t *sl1p, sl1e; + struct shadow_page_info *sp; + + ASSERT(mfn_valid(gmfn)); + ASSERT(mfn_valid(smfn)); + + sp = mfn_to_shadow_page(smfn); + + if ( sp->mbz != 0 || +#if GUEST_PAGING_LEVELS == 4 + (sp->type != SH_type_l1_64_shadow) +#elif GUEST_PAGING_LEVELS == 3 + (sp->type != SH_type_l1_pae_shadow) +#elif GUEST_PAGING_LEVELS == 2 + (sp->type != SH_type_l1_32_shadow) +#endif + ) + goto fail; + + sl1p = sh_map_domain_page(smfn); + sl1p += off; + sl1e = *sl1p; + if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW)) + != (_PAGE_PRESENT|_PAGE_RW)) + || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) ) + { + sh_unmap_domain_page(sl1p); + goto fail; + } + + /* Found it! Need to remove its write permissions. */ + sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); + r = shadow_set_l1e(v, sl1p, sl1e, smfn); + ASSERT( !(r & SHADOW_SET_ERROR) ); + + sh_unmap_domain_page(sl1p); + perfc_incr(shadow_writeable_h_7); + return 1; + + fail: + perfc_incr(shadow_writeable_h_8); + return 0; +} +#endif /* OOS */ + #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) /* Look up this vaddr in the current shadow and see if it's a writeable diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h index 4f7e5b0320..0edb6f1f95 100644 --- a/xen/arch/x86/mm/shadow/multi.h +++ b/xen/arch/x86/mm/shadow/multi.h @@ -124,4 +124,8 @@ SHADOW_INTERNAL_NAME(sh_resync_l1, GUEST_LEVELS) extern int SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, GUEST_LEVELS) (struct vcpu*v, mfn_t gmfn); + +extern int +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p, GUEST_LEVELS) + (struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off); #endif diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index 327a5137d1..d02690b18c 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -321,6 +321,16 @@ static inline int sh_type_is_pinnable(struct vcpu *v, unsigned int t) */ #define SHF_out_of_sync (1u<<30) #define SHF_oos_may_write (1u<<29) + +/* Fixup tables are a non-complete writable-mappings reverse map for + OOS pages. This let us quickly resync pages (avoiding brute-force + search of the shadows) when the va hint is not sufficient (i.e., + the pagetable is mapped in multiple places and in multiple + shadows.) */ +#define SHADOW_OOS_FT_ENTRIES \ + ((PAGE_SIZE << SHADOW_OOS_FT_ORDER) \ + / (SHADOW_OOS_FT_HASH * sizeof(struct oos_fixup))) + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ static inline int sh_page_has_multiple_shadows(struct page_info *pg) @@ -415,6 +425,11 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn, unsigned long va); /* Pull an out-of-sync page back into sync. */ void sh_resync(struct vcpu *v, mfn_t gmfn); +void oos_fixup_add(struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off); + +int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long offset); + /* Pull all out-of-sync shadows back into sync. If skip != 0, we try * to avoid resyncing where we think we can get away with it. */ diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h index 64f3fb7945..440d2d31fb 100644 --- a/xen/arch/x86/mm/shadow/types.h +++ b/xen/arch/x86/mm/shadow/types.h @@ -441,6 +441,7 @@ struct shadow_walk_t #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC #define sh_resync_l1 INTERNAL_NAME(sh_resync_l1) #define sh_safe_not_to_sync INTERNAL_NAME(sh_safe_not_to_sync) +#define sh_rm_write_access_from_sl1p INTERNAL_NAME(sh_rm_write_access_from_sl1p) #endif /* The sh_guest_(map|get)_* functions depends on Xen's paging levels */ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 35232a6f59..7b9d0cd359 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -129,6 +129,12 @@ struct shadow_vcpu { /* Shadow out-of-sync: pages that this vcpu has let go out of sync */ mfn_t oos[SHADOW_OOS_PAGES]; unsigned long oos_va[SHADOW_OOS_PAGES]; + struct oos_fixup { + mfn_t gmfn; + mfn_t smfn; + unsigned long off; + } *oos_fixups; + int oos_fixup_used; }; /************************************************/ diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 86a9da074d..74ac964b80 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -131,7 +131,12 @@ static inline u32 pickle_domptr(struct domain *domain) #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */ /* The number of out-of-sync shadows we allow per vcpu (prime, please) */ -#define SHADOW_OOS_PAGES 7 +#define SHADOW_OOS_PAGES 3 + +/* The order OOS fixup tables per vcpu */ +#define SHADOW_OOS_FT_ORDER 1 +/* OOS fixup tables hash entries */ +#define SHADOW_OOS_FT_HASH 13 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h index ca23a2d21e..424d927fef 100644 --- a/xen/include/asm-x86/perfc_defn.h +++ b/xen/include/asm-x86/perfc_defn.h @@ -81,7 +81,10 @@ PERFCOUNTER(shadow_writeable_h_3, "shadow writeable: 64b w2k3") PERFCOUNTER(shadow_writeable_h_4, "shadow writeable: linux low/solaris") PERFCOUNTER(shadow_writeable_h_5, "shadow writeable: linux high") PERFCOUNTER(shadow_writeable_h_6, "shadow writeable: unsync va") +PERFCOUNTER(shadow_writeable_h_7, "shadow writeable: sl1p") +PERFCOUNTER(shadow_writeable_h_8, "shadow writeable: sl1p failed") PERFCOUNTER(shadow_writeable_bf, "shadow writeable brute-force") +PERFCOUNTER(shadow_writeable_bf_1, "shadow writeable resync bf") PERFCOUNTER(shadow_mappings, "shadow removes all mappings") PERFCOUNTER(shadow_mappings_bf, "shadow rm-mappings brute-force") PERFCOUNTER(shadow_early_unshadow, "shadow unshadows for fork/exit") @@ -102,6 +105,13 @@ PERFCOUNTER(shadow_em_ex_pt, "shadow extra pt write") PERFCOUNTER(shadow_em_ex_non_pt, "shadow extra non-pt-write op") PERFCOUNTER(shadow_em_ex_fail, "shadow extra emulation failed") +PERFCOUNTER(shadow_oos_fixup_add_ok, "shadow OOS fixups adds") +PERFCOUNTER(shadow_oos_fixup_no_add, "shadow OOS fixups no adds") +PERFCOUNTER(shadow_oos_fixup_add_fail, "shadow OOS fixups adds failed") +PERFCOUNTER(shadow_oos_fixup_remove, "shadow OOS fixups removes") +PERFCOUNTER(shadow_oos_fixup_flush, "shadow OOS fixups flushes") +PERFCOUNTER(shadow_oos_fixup_flush_gmfn,"shadow OOS fixups gmfn flushes") + PERFCOUNTER(shadow_unsync, "shadow OOS unsyncs") PERFCOUNTER(shadow_unsync_evict, "shadow OOS evictions") PERFCOUNTER(shadow_resync, "shadow OOS resyncs")