[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Out-of-sync L1 shadows: Fixup Tables
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1213983632 -3600 # Node ID f178082cce0a4de9516e2ed51fcb0f5f5739f710 # Parent 597058a3b619742e4a37581fabb4d35f87279282 Out-of-sync L1 shadows: Fixup Tables This patch implement a very simple non complete reverse map for OOS pages writable mappings to avoid shadow brute-force search on resyncs. Signed-off-by: Gianluca Guida <gianluca.guida@xxxxxxxxxxxxx> --- xen/arch/x86/mm/shadow/common.c | 244 +++++++++++++++++++++++++++++++++++---- xen/arch/x86/mm/shadow/multi.c | 65 ++++++++++ xen/arch/x86/mm/shadow/multi.h | 4 xen/arch/x86/mm/shadow/private.h | 15 ++ xen/arch/x86/mm/shadow/types.h | 1 xen/include/asm-x86/domain.h | 6 xen/include/asm-x86/mm.h | 7 - xen/include/asm-x86/perfc_defn.h | 10 + 8 files changed, 326 insertions(+), 26 deletions(-) diff -r 597058a3b619 -r f178082cce0a xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Fri Jun 20 18:40:32 2008 +0100 @@ -580,6 +580,153 @@ static inline void _sh_resync_l1(struct #endif } +#define _FIXUP_IDX(_b, _i) ((_b) * SHADOW_OOS_FT_HASH + (_i)) + +void oos_fixup_add(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) +{ + int idx, i, free = 0, free_slot = 0; + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( !mfn_valid(fixups[_FIXUP_IDX(idx, i)].gmfn) + || !mfn_is_out_of_sync(fixups[_FIXUP_IDX(idx, i)].gmfn) ) + { + free = 1; + free_slot = _FIXUP_IDX(idx, i); + } + else if ( (mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn)) + && (mfn_x(fixups[_FIXUP_IDX(idx, i)].smfn) == mfn_x(smfn)) + && (fixups[_FIXUP_IDX(idx, i)].off == off) ) + { + perfc_incr(shadow_oos_fixup_no_add); + return; + } + } + + if ( free ) + { + if ( !v->arch.paging.shadow.oos_fixup_used ) + v->arch.paging.shadow.oos_fixup_used = 1; + fixups[free_slot].gmfn = gmfn; + fixups[free_slot].smfn = smfn; + fixups[free_slot].off = off; + perfc_incr(shadow_oos_fixup_add_ok); + return; + } + + + perfc_incr(shadow_oos_fixup_add_fail); +} + +void oos_fixup_remove(struct vcpu *v, mfn_t gmfn) +{ + int idx, i; + struct domain *d = v->domain; + + perfc_incr(shadow_oos_fixup_remove); + + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for_each_vcpu(d, v) + { + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn) ) + fixups[_FIXUP_IDX(idx, i)].gmfn = _mfn(INVALID_MFN); + } +} + +int oos_fixup_flush(struct vcpu *v) +{ + int i, rc = 0; + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + + perfc_incr(shadow_oos_fixup_flush); + + if ( !v->arch.paging.shadow.oos_fixup_used ) + return 0; + + for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( mfn_valid(fixups[i].gmfn) ) + { + if ( mfn_is_out_of_sync(fixups[i].gmfn) ) + rc |= sh_remove_write_access_from_sl1p(v, fixups[i].gmfn, + fixups[i].smfn, + fixups[i].off); + fixups[i].gmfn = _mfn(INVALID_MFN); + } + } + + v->arch.paging.shadow.oos_fixup_used = 0; + + return rc; +} + +int oos_fixup_flush_gmfn(struct vcpu *v, mfn_t gmfn) +{ + int idx, i, rc = 0; + struct domain *d = v->domain; + + perfc_incr(shadow_oos_fixup_flush_gmfn); + + idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH; + for_each_vcpu(d, v) + { + struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups; + + for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ ) + { + if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) != mfn_x(gmfn) ) + continue; + + rc |= sh_remove_write_access_from_sl1p(v, + fixups[_FIXUP_IDX(idx,i)].gmfn, + fixups[_FIXUP_IDX(idx,i)].smfn, + fixups[_FIXUP_IDX(idx,i)].off); + + fixups[_FIXUP_IDX(idx,i)].gmfn = _mfn(INVALID_MFN); + } + } + + return rc; +} + +static int oos_remove_write_access(struct vcpu *v, mfn_t gmfn, unsigned long va) +{ + int ftlb = 0; + + ftlb |= oos_fixup_flush_gmfn(v, gmfn); + + switch ( sh_remove_write_access(v, gmfn, 0, va) ) + { + default: + case 0: + break; + + case 1: + ftlb |= 1; + break; + + case -1: + /* An unfindable writeable typecount has appeared, probably via a + * grant table entry: can't shoot the mapping, so try to unshadow + * the page. If that doesn't work either, the guest is granting + * his pagetables and must be killed after all. + * This will flush the tlb, so we can return with no worries. */ + sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); + return 1; + } + + if ( ftlb ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + + return 0; +} + + /* Pull all the entries on an out-of-sync page back into sync. */ static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va) { @@ -595,26 +742,10 @@ static void _sh_resync(struct vcpu *v, m SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va); - /* Need to pull write access so the page *stays* in sync. - * This might be rather slow but we hope that in the common case - * we're handling this pagetable after a guest walk has pulled - * write access the fast way. */ - switch ( sh_remove_write_access(v, gmfn, 0, va) ) - { - default: - case 0: - break; - - case 1: - flush_tlb_mask(v->domain->domain_dirty_cpumask); - break; - - case -1: - /* An unfindable writeable typecount has appeared, probably via a - * grant table entry: can't shoot the mapping, so try to unshadow - * the page. If that doesn't work either, the guest is granting - * his pagetables and must be killed after all. */ - sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); + /* Need to pull write access so the page *stays* in sync. */ + if ( oos_remove_write_access(v, gmfn, va) ) + { + /* Page has been unshadowed. */ return; } @@ -753,6 +884,9 @@ void sh_resync_all(struct vcpu *v, int s if ( do_locking ) shadow_lock(v->domain); + if ( oos_fixup_flush(v) ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + /* First: resync all of this vcpu's oos pages */ for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ ) if ( mfn_valid(oos[idx]) ) @@ -882,7 +1016,10 @@ void shadow_demote(struct vcpu *v, mfn_t #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Was the page out of sync? */ if ( page_is_out_of_sync(page) ) + { oos_hash_remove(v, gmfn); + oos_fixup_remove(v, gmfn); + } #endif clear_bit(_PGC_page_table, &page->count_info); } @@ -2224,7 +2361,10 @@ int sh_remove_write_access(struct vcpu * #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */ /* Brute-force search of all the shadows, by walking the hash */ - perfc_incr(shadow_writeable_bf); + if ( level == 0 ) + perfc_incr(shadow_writeable_bf_1); + else + perfc_incr(shadow_writeable_bf); hash_foreach(v, callback_mask, callbacks, gmfn); /* If that didn't catch the mapping, then there's some non-pagetable @@ -2244,7 +2384,34 @@ int sh_remove_write_access(struct vcpu * return 1; } - +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) +int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) +{ + struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + + ASSERT(mfn_valid(smfn)); + ASSERT(mfn_valid(gmfn)); + + if ( sp->type == SH_type_l1_32_shadow ) + { + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2) + (v, gmfn, smfn, off); + } +#if CONFIG_PAGING_LEVELS >= 3 + else if ( sp->type == SH_type_l1_pae_shadow ) + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3) + (v, gmfn, smfn, off); +#if CONFIG_PAGING_LEVELS >= 4 + else if ( sp->type == SH_type_l1_64_shadow ) + return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4) + (v, gmfn, smfn, off); +#endif +#endif + + return 0; +} +#endif /**************************************************************************/ /* Remove all mappings of a guest frame from the shadow tables. @@ -2581,6 +2748,25 @@ static void sh_update_paging_modes(struc } #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + if ( v->arch.paging.shadow.oos_fixups == NULL ) + { + int i; + v->arch.paging.shadow.oos_fixups = + alloc_xenheap_pages(SHADOW_OOS_FT_ORDER); + if ( v->arch.paging.shadow.oos_fixups == NULL ) + { + SHADOW_ERROR("Could not allocate OOS fixup table" + " for dom %u vcpu %u\n", + v->domain->domain_id, v->vcpu_id); + domain_crash(v->domain); + return; + } + for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ ) + v->arch.paging.shadow.oos_fixups[i].gmfn = _mfn(INVALID_MFN); + } +#endif /* OOS */ + // Valid transitions handled by this function: // - For PV guests: // - after a shadow mode has been changed @@ -2908,17 +3094,27 @@ void shadow_teardown(struct domain *d) } } -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) +#if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) /* Free the virtual-TLB array attached to each vcpu */ for_each_vcpu(d, v) { +#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) if ( v->arch.paging.vtlb ) { xfree(v->arch.paging.vtlb); v->arch.paging.vtlb = NULL; } - } #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ + +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + if ( v->arch.paging.shadow.oos_fixups ) + { + free_xenheap_pages(v->arch.paging.shadow.oos_fixups, + SHADOW_OOS_FT_ORDER); + } +#endif /* OOS */ + } +#endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */ list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) { diff -r 597058a3b619 -r f178082cce0a xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Jun 20 18:40:32 2008 +0100 @@ -1409,6 +1409,9 @@ static int shadow_set_l1e(struct vcpu *v int flags = 0; struct domain *d = v->domain; shadow_l1e_t old_sl1e; +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC + mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e); +#endif ASSERT(sl1e != NULL); old_sl1e = *sl1e; @@ -1425,8 +1428,18 @@ static int shadow_set_l1e(struct vcpu *v /* Doesn't look like a pagetable. */ flags |= SHADOW_SET_ERROR; new_sl1e = shadow_l1e_empty(); - } else { + } + else + { shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC + if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn) + && (shadow_l1e_get_flags(new_sl1e) & _PAGE_RW) ) + { + oos_fixup_add(v, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e)); + } +#endif + } } } @@ -4238,6 +4251,56 @@ sh_update_cr3(struct vcpu *v, int do_loc /**************************************************************************/ /* Functions to revoke guest rights */ +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC +int sh_rm_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long off) +{ + int r; + shadow_l1e_t *sl1p, sl1e; + struct shadow_page_info *sp; + + ASSERT(mfn_valid(gmfn)); + ASSERT(mfn_valid(smfn)); + + sp = mfn_to_shadow_page(smfn); + + if ( sp->mbz != 0 || +#if GUEST_PAGING_LEVELS == 4 + (sp->type != SH_type_l1_64_shadow) +#elif GUEST_PAGING_LEVELS == 3 + (sp->type != SH_type_l1_pae_shadow) +#elif GUEST_PAGING_LEVELS == 2 + (sp->type != SH_type_l1_32_shadow) +#endif + ) + goto fail; + + sl1p = sh_map_domain_page(smfn); + sl1p += off; + sl1e = *sl1p; + if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW)) + != (_PAGE_PRESENT|_PAGE_RW)) + || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) ) + { + sh_unmap_domain_page(sl1p); + goto fail; + } + + /* Found it! Need to remove its write permissions. */ + sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); + r = shadow_set_l1e(v, sl1p, sl1e, smfn); + ASSERT( !(r & SHADOW_SET_ERROR) ); + + sh_unmap_domain_page(sl1p); + perfc_incr(shadow_writeable_h_7); + return 1; + + fail: + perfc_incr(shadow_writeable_h_8); + return 0; +} +#endif /* OOS */ + #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) /* Look up this vaddr in the current shadow and see if it's a writeable diff -r 597058a3b619 -r f178082cce0a xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/arch/x86/mm/shadow/multi.h Fri Jun 20 18:40:32 2008 +0100 @@ -124,4 +124,8 @@ extern int extern int SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, GUEST_LEVELS) (struct vcpu*v, mfn_t gmfn); + +extern int +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p, GUEST_LEVELS) + (struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off); #endif diff -r 597058a3b619 -r f178082cce0a xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/arch/x86/mm/shadow/private.h Fri Jun 20 18:40:32 2008 +0100 @@ -321,6 +321,16 @@ static inline int sh_type_is_pinnable(st */ #define SHF_out_of_sync (1u<<30) #define SHF_oos_may_write (1u<<29) + +/* Fixup tables are a non-complete writable-mappings reverse map for + OOS pages. This let us quickly resync pages (avoiding brute-force + search of the shadows) when the va hint is not sufficient (i.e., + the pagetable is mapped in multiple places and in multiple + shadows.) */ +#define SHADOW_OOS_FT_ENTRIES \ + ((PAGE_SIZE << SHADOW_OOS_FT_ORDER) \ + / (SHADOW_OOS_FT_HASH * sizeof(struct oos_fixup))) + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ static inline int sh_page_has_multiple_shadows(struct page_info *pg) @@ -414,6 +424,11 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn /* Pull an out-of-sync page back into sync. */ void sh_resync(struct vcpu *v, mfn_t gmfn); + +void oos_fixup_add(struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off); + +int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, + mfn_t smfn, unsigned long offset); /* Pull all out-of-sync shadows back into sync. If skip != 0, we try * to avoid resyncing where we think we can get away with it. */ diff -r 597058a3b619 -r f178082cce0a xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/arch/x86/mm/shadow/types.h Fri Jun 20 18:40:32 2008 +0100 @@ -441,6 +441,7 @@ struct shadow_walk_t #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC #define sh_resync_l1 INTERNAL_NAME(sh_resync_l1) #define sh_safe_not_to_sync INTERNAL_NAME(sh_safe_not_to_sync) +#define sh_rm_write_access_from_sl1p INTERNAL_NAME(sh_rm_write_access_from_sl1p) #endif /* The sh_guest_(map|get)_* functions depends on Xen's paging levels */ diff -r 597058a3b619 -r f178082cce0a xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/include/asm-x86/domain.h Fri Jun 20 18:40:32 2008 +0100 @@ -129,6 +129,12 @@ struct shadow_vcpu { /* Shadow out-of-sync: pages that this vcpu has let go out of sync */ mfn_t oos[SHADOW_OOS_PAGES]; unsigned long oos_va[SHADOW_OOS_PAGES]; + struct oos_fixup { + mfn_t gmfn; + mfn_t smfn; + unsigned long off; + } *oos_fixups; + int oos_fixup_used; }; /************************************************/ diff -r 597058a3b619 -r f178082cce0a xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/include/asm-x86/mm.h Fri Jun 20 18:40:32 2008 +0100 @@ -131,7 +131,12 @@ static inline u32 pickle_domptr(struct d #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */ /* The number of out-of-sync shadows we allow per vcpu (prime, please) */ -#define SHADOW_OOS_PAGES 7 +#define SHADOW_OOS_PAGES 3 + +/* The order OOS fixup tables per vcpu */ +#define SHADOW_OOS_FT_ORDER 1 +/* OOS fixup tables hash entries */ +#define SHADOW_OOS_FT_HASH 13 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) diff -r 597058a3b619 -r f178082cce0a xen/include/asm-x86/perfc_defn.h --- a/xen/include/asm-x86/perfc_defn.h Fri Jun 20 18:39:45 2008 +0100 +++ b/xen/include/asm-x86/perfc_defn.h Fri Jun 20 18:40:32 2008 +0100 @@ -81,7 +81,10 @@ PERFCOUNTER(shadow_writeable_h_4, "shad PERFCOUNTER(shadow_writeable_h_4, "shadow writeable: linux low/solaris") PERFCOUNTER(shadow_writeable_h_5, "shadow writeable: linux high") PERFCOUNTER(shadow_writeable_h_6, "shadow writeable: unsync va") +PERFCOUNTER(shadow_writeable_h_7, "shadow writeable: sl1p") +PERFCOUNTER(shadow_writeable_h_8, "shadow writeable: sl1p failed") PERFCOUNTER(shadow_writeable_bf, "shadow writeable brute-force") +PERFCOUNTER(shadow_writeable_bf_1, "shadow writeable resync bf") PERFCOUNTER(shadow_mappings, "shadow removes all mappings") PERFCOUNTER(shadow_mappings_bf, "shadow rm-mappings brute-force") PERFCOUNTER(shadow_early_unshadow, "shadow unshadows for fork/exit") @@ -102,6 +105,13 @@ PERFCOUNTER(shadow_em_ex_non_pt, "shad PERFCOUNTER(shadow_em_ex_non_pt, "shadow extra non-pt-write op") PERFCOUNTER(shadow_em_ex_fail, "shadow extra emulation failed") +PERFCOUNTER(shadow_oos_fixup_add_ok, "shadow OOS fixups adds") +PERFCOUNTER(shadow_oos_fixup_no_add, "shadow OOS fixups no adds") +PERFCOUNTER(shadow_oos_fixup_add_fail, "shadow OOS fixups adds failed") +PERFCOUNTER(shadow_oos_fixup_remove, "shadow OOS fixups removes") +PERFCOUNTER(shadow_oos_fixup_flush, "shadow OOS fixups flushes") +PERFCOUNTER(shadow_oos_fixup_flush_gmfn,"shadow OOS fixups gmfn flushes") + PERFCOUNTER(shadow_unsync, "shadow OOS unsyncs") PERFCOUNTER(shadow_unsync_evict, "shadow OOS evictions") PERFCOUNTER(shadow_resync, "shadow OOS resyncs") _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |