[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Because of a bug with reference counting against the target guest page
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID b41e51ffa5eacf07ed7843e5f85ae1748e19c370 # Parent 3fd239d8b640968c56fbd5aacfe3564f12203f54 Because of a bug with reference counting against the target guest page when searching the list for L1 shadow pages to write protect that page (at shadow_promote(), which is called by alloc_shadow_page()), the code was always scanning _all_ the entries in the hash list. The length of the hash list can be >500 for L1 shadow pages, and for each page we needed to check all the PTEs in the page. The patch attached does the following things: - Correct the reference count (for the target guest page) so that=20 it can exit the loop when all the L1 shadow pages to modify are found. Even with this, we can search the entire list if the page is at=20 the end. - Try to avoid the search in the hash list, by having a back pointer (as a hint) to the shadow page pfn. For most cases,=20 there is a single translation for the guest page in the shadow. - Cleanups, remove the nested function fix_entry With those, the kernel build performance, for example, was improved approximately by 20%, 40% on 32-bit, 64-bit unmodified Linux guests, respectively. Tested log-dirty mode for plain 32-bit as well. Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> Signed-off-by: Asit Mallick <asit.k.mallick@xxxxxxxxx> diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Thu Oct 13 16:55:15 2005 +++ b/xen/arch/x86/shadow.c Thu Oct 13 16:58:01 2005 @@ -616,13 +616,6 @@ pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT; spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e); - /* - gpl1e = &(linear_pg_table[l1_linear_offset(va) & - ~(L1_PAGETABLE_ENTRIES-1)]); - - spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) & - ~(L1_PAGETABLE_ENTRIES-1)]);*/ - for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ ) { l1pte_propagate_from_guest(d, gpl1e[i], &sl1e); @@ -645,7 +638,8 @@ min = i; if ( likely(i > max) ) max = i; - } + set_guest_back_ptr(d, sl1e, sl1mfn, i); + } frame_table[sl1mfn].tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max); @@ -716,8 +710,8 @@ } } + set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va)); __shadow_set_l1e(v, va, &new_spte); - shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); } @@ -1133,6 +1127,24 @@ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred); perfc_decr(writable_pte_predictions); } +} + +static int fix_entry( + struct domain *d, + l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find) +{ + l1_pgentry_t old = *pt; + l1_pgentry_t new = old; + + l1e_remove_flags(new,_PAGE_RW); + if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) ) + BUG(); + (*found)++; + *pt = new; + if ( is_l1_shadow ) + shadow_put_page_from_l1e(old, d); + + return (*found == max_refs_to_find); } static u32 remove_all_write_access_in_ptpage( @@ -1156,49 +1168,28 @@ match = l1e_from_pfn(readonly_gmfn, flags); - // returns true if all refs have been found and fixed. - // - int fix_entry(int i) - { - l1_pgentry_t old = pt[i]; - l1_pgentry_t new = old; - - l1e_remove_flags(new,_PAGE_RW); - if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) ) - BUG(); - found++; - pt[i] = new; - if ( is_l1_shadow ) - shadow_put_page_from_l1e(old, d); - -#if 0 - printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x " - "is_l1_shadow=%d\n", - readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow); -#endif - - return (found == max_refs_to_find); - } - - i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1); - if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) ) - { - perfc_incrc(remove_write_fast_exit); - increase_writable_pte_prediction(d, readonly_gpfn, prediction); - unmap_domain_page(pt); - return found; + if ( shadow_mode_external(d) ) { + i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) + >> PGT_va_shift; + + if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) && + !l1e_has_changed(pt[i], match, flags) && + fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) && + !prediction ) + goto out; } for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++) { - if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) ) + if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && + fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) ) break; } +out: unmap_domain_page(pt); return found; -#undef MATCH_ENTRY } static int remove_all_write_access( @@ -1206,8 +1197,8 @@ { int i; struct shadow_status *a; - u32 found = 0, fixups, write_refs; - unsigned long prediction, predicted_gpfn, predicted_smfn; + u32 found = 0, write_refs; + unsigned long predicted_smfn; ASSERT(shadow_lock_is_acquired(d)); ASSERT(VALID_MFN(readonly_gmfn)); @@ -1238,26 +1229,18 @@ return 1; } - // Before searching all the L1 page tables, check the typical culprit first - // - if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) ) - { - predicted_gpfn = prediction & PGT_mfn_mask; - if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) && - (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) ) - { - found += fixups; - if ( found == write_refs ) - { - perfc_incrc(remove_write_predicted); - return 1; - } - } - else - { - perfc_incrc(remove_write_bad_prediction); - decrease_writable_pte_prediction(d, readonly_gpfn, prediction); - } + if ( shadow_mode_external(d) ) { + if (write_refs-- == 0) + return 0; + + // Use the back pointer to locate the shadow page that can contain + // the PTE of interest + if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) { + found += remove_all_write_access_in_ptpage( + d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0); + if ( found == write_refs ) + return 0; + } } // Search all the shadow L1 page tables... @@ -1276,7 +1259,7 @@ { found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask); if ( found == write_refs ) - return 1; + return 0; } a = a->next; @@ -1376,7 +1359,7 @@ guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]); - + set_guest_back_ptr(d, shadow1[i], smfn, i); // can't update snapshots of linear page tables -- they // are used multiple times... // @@ -1604,6 +1587,8 @@ !shadow_get_page_from_l1e(npte, d) ) BUG(); *ppte = npte; + set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, + (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t)); shadow_put_page_from_l1e(opte, d); unmap_domain_page(ppte); diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Thu Oct 13 16:55:15 2005 +++ b/xen/arch/x86/shadow32.c Thu Oct 13 16:58:01 2005 @@ -1032,7 +1032,12 @@ if ( !get_page_type(page, PGT_writable_page) ) BUG(); put_page_type(page); - + /* + * We use tlbflush_timestamp as back pointer to smfn, and need to + * clean up it. + */ + if ( shadow_mode_external(d) ) + page->tlbflush_timestamp = 0; list_ent = page->list.next; } } @@ -1390,18 +1395,6 @@ return rc; } -/* - * XXX KAF: Why is this VMX specific? - */ -void vmx_shadow_clear_state(struct domain *d) -{ - SH_VVLOG("%s:", __func__); - shadow_lock(d); - free_shadow_pages(d); - shadow_unlock(d); - update_pagetables(d->vcpu[0]); -} - unsigned long gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) { @@ -1462,14 +1455,10 @@ hl2 = map_domain_page(hl2mfn); -#ifdef __i386__ if ( shadow_mode_external(d) ) limit = L2_PAGETABLE_ENTRIES; else limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE; -#else - limit = 0; /* XXX x86/64 XXX */ -#endif memset(hl2, 0, limit * sizeof(l1_pgentry_t)); @@ -1665,6 +1654,7 @@ min = i; if ( likely(i > max) ) max = i; + set_guest_back_ptr(d, sl1e, sl1mfn, i); } frame_table[sl1mfn].tlbflush_timestamp = @@ -2070,6 +2060,24 @@ xfree(gpfn_list); } +} + +static int fix_entry( + struct domain *d, + l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find) +{ + l1_pgentry_t old = *pt; + l1_pgentry_t new = old; + + l1e_remove_flags(new,_PAGE_RW); + if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) ) + BUG(); + (*found)++; + *pt = new; + if ( is_l1_shadow ) + shadow_put_page_from_l1e(old, d); + + return (*found == max_refs_to_find); } static u32 remove_all_write_access_in_ptpage( @@ -2088,49 +2096,28 @@ match = l1e_from_pfn(readonly_gmfn, flags); - // returns true if all refs have been found and fixed. - // - int fix_entry(int i) - { - l1_pgentry_t old = pt[i]; - l1_pgentry_t new = old; - - l1e_remove_flags(new,_PAGE_RW); - if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) ) - BUG(); - found++; - pt[i] = new; - if ( is_l1_shadow ) - shadow_put_page_from_l1e(old, d); - -#if 0 - printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x " - "is_l1_shadow=%d\n", - readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow); -#endif - - return (found == max_refs_to_find); - } - - i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1); - if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) ) - { - perfc_incrc(remove_write_fast_exit); - increase_writable_pte_prediction(d, readonly_gpfn, prediction); - unmap_domain_page(pt); - return found; - } - + if ( shadow_mode_external(d) ) { + i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) + >> PGT_va_shift; + + if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) && + !l1e_has_changed(pt[i], match, flags) && + fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) && + !prediction ) + goto out; + } + for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) { - if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) ) + if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && + fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) ) break; } +out: unmap_domain_page(pt); return found; -#undef MATCH_ENTRY } int shadow_remove_all_write_access( @@ -2138,8 +2125,8 @@ { int i; struct shadow_status *a; - u32 found = 0, fixups, write_refs; - unsigned long prediction, predicted_gpfn, predicted_smfn; + u32 found = 0, write_refs; + unsigned long predicted_smfn; ASSERT(shadow_lock_is_acquired(d)); ASSERT(VALID_MFN(readonly_gmfn)); @@ -2169,27 +2156,19 @@ perfc_incrc(remove_write_no_work); return 1; } - - // Before searching all the L1 page tables, check the typical culprit first - // - if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) ) - { - predicted_gpfn = prediction & PGT_mfn_mask; - if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) && - (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) ) - { - found += fixups; - if ( found == write_refs ) - { - perfc_incrc(remove_write_predicted); - return 1; - } - } - else - { - perfc_incrc(remove_write_bad_prediction); - decrease_writable_pte_prediction(d, readonly_gpfn, prediction); - } + + if ( shadow_mode_external(d) ) { + if (write_refs-- == 0) + return 0; + + // Use the back pointer to locate the shadow page that can contain + // the PTE of interest + if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) { + found += remove_all_write_access_in_ptpage( + d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0); + if ( found == write_refs ) + return 0; + } } // Search all the shadow L1 page tables... @@ -2203,7 +2182,7 @@ { found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask); if ( found == write_refs ) - return 1; + return 0; } a = a->next; @@ -2376,12 +2355,12 @@ l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]); + set_guest_back_ptr(d, shadow1[i], smfn, i); // can't update snapshots of linear page tables -- they // are used multiple times... // // snapshot[i] = new_pte; - changed++; } } @@ -2530,6 +2509,8 @@ !shadow_get_page_from_l1e(npte, d) ) BUG(); *ppte = npte; + set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, + (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t)); shadow_put_page_from_l1e(opte, d); unmap_domain_page(ppte); diff -r 3fd239d8b640 -r b41e51ffa5ea xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Thu Oct 13 16:55:15 2005 +++ b/xen/arch/x86/shadow_public.c Thu Oct 13 16:58:01 2005 @@ -1095,7 +1095,12 @@ if ( !get_page_type(page, PGT_writable_page) ) BUG(); put_page_type(page); - + /* + * We use tlbflush_timestamp as back pointer to smfn, and need to + * clean up it. + */ + if ( shadow_mode_external(d) ) + page->tlbflush_timestamp = 0; list_ent = page->list.next; } } diff -r 3fd239d8b640 -r b41e51ffa5ea xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Thu Oct 13 16:55:15 2005 +++ b/xen/include/asm-x86/shadow.h Thu Oct 13 16:58:01 2005 @@ -718,6 +718,23 @@ put_shadow_ref(smfn); } +/* + * SMP issue. The following code assumes the shadow lock is held. Re-visit + * when working on finer-gained locks for shadow. + */ +static inline void set_guest_back_ptr( + struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int index) +{ + if ( shadow_mode_external(d) ) { + unsigned long gmfn; + + ASSERT(shadow_lock_is_acquired(d)); + gmfn = l1e_get_pfn(spte); + frame_table[gmfn].tlbflush_timestamp = smfn; + frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask; + frame_table[gmfn].u.inuse.type_info |= (unsigned long) index << PGT_va_shift; + } +} /************************************************************************/ #if CONFIG_PAGING_LEVELS <= 2 @@ -1611,10 +1628,11 @@ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT ) shadow_put_page_from_l1e(old_spte, d); } - } - + + } + + set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va)); shadow_linear_pg_table[l1_linear_offset(va)] = new_spte; - shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); } #endif _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |