[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Clean up the shadow interface
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1166616187 0 # Node ID c75d6f2aad7a64b66b814ade1af9669ea456a69a # Parent b258c7587d8ddf5dbdae872ea54e74a119dbfd1f [XEN] Clean up the shadow interface Remove a lot of unneccesary things from shadow.h, and move the shadow lock entirely inside the shadow code. Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- xen/arch/x86/domain.c | 19 - xen/arch/x86/domain_build.c | 2 xen/arch/x86/mm.c | 201 +++++++-------- xen/arch/x86/mm/shadow/common.c | 292 +++++++++++++--------- xen/arch/x86/mm/shadow/multi.c | 53 ++-- xen/arch/x86/mm/shadow/multi.h | 4 xen/arch/x86/mm/shadow/private.h | 160 ++++++++++-- xen/arch/x86/mm/shadow/types.h | 4 xen/include/asm-x86/mm.h | 2 xen/include/asm-x86/shadow.h | 506 ++++++++++----------------------------- 10 files changed, 602 insertions(+), 641 deletions(-) diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/domain.c Wed Dec 20 12:03:07 2006 +0000 @@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d) { #ifdef __x86_64__ struct page_info *pg; + int i; #endif l1_pgentry_t gdt_l1e; int vcpuid, pdpt_order; - int i, rc = -ENOMEM; + int rc = -ENOMEM; pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); @@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d) #endif /* __x86_64__ */ - shadow_lock_init(d); - for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); + shadow_domain_init(d); if ( !is_idle_domain(d) ) { @@ -365,15 +361,6 @@ int arch_set_info_guest( v->arch.guest_table = pagetable_from_pfn(cr3_pfn); } - - /* Shadow: make sure the domain has enough shadow memory to - * boot another vcpu */ - if ( shadow_mode_enabled(d) - && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) ) - { - destroy_gdt(v); - return -ENOMEM; - } if ( v->vcpu_id == 0 ) update_domain_wallclock_time(d); diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/domain_build.c Wed Dec 20 12:03:07 2006 +0000 @@ -827,7 +827,7 @@ int construct_dom0(struct domain *d, regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) - if ( shadow_test_enable(d) == 0 ) + if ( shadow_enable(d, SHM2_enable) == 0 ) shadow_update_paging_modes(v); if ( supervisor_mode_kernel ) diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm.c Wed Dec 20 12:03:07 2006 +0000 @@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v) write_cr3(v->arch.cr3); } +/* Should be called after CR3 is updated. + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. + * + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, + * shadow_vtable, etc). + * + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and + * for HVM guests, arch.monitor_table and hvm's guest CR3. + * + * Update ref counts to shadow tables appropriately. + */ +void update_cr3(struct vcpu *v) +{ + unsigned long cr3_mfn=0; + + if ( shadow_mode_enabled(v->domain) ) + { + shadow_update_cr3(v); + return; + } + +#if CONFIG_PAGING_LEVELS == 4 + if ( !(v->arch.flags & TF_kernel_mode) ) + cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); + else +#endif + cr3_mfn = pagetable_get_pfn(v->arch.guest_table); + + make_cr3(v, cr3_mfn); +} + + void invalidate_shadow_ldt(struct vcpu *v) { int i; @@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_in #endif -static inline int update_l1e(l1_pgentry_t *pl1e, - l1_pgentry_t ol1e, - l1_pgentry_t nl1e, - unsigned long gl1mfn, - struct vcpu *v) + +/* How to write an entry to the guest pagetables. + * Returns 0 for failure (pointer not valid), 1 for success. */ +static inline int update_intpte(intpte_t *p, + intpte_t old, + intpte_t new, + unsigned long mfn, + struct vcpu *v) { int rv = 1; +#ifndef PTE_UPDATE_WITH_CMPXCHG if ( unlikely(shadow_mode_enabled(v->domain)) ) - shadow_lock(v->domain); -#ifndef PTE_UPDATE_WITH_CMPXCHG - rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); + rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); + else + rv = (!__copy_to_user(p, &new, sizeof(new))); #else { - intpte_t o = l1e_get_intpte(ol1e); - intpte_t n = l1e_get_intpte(nl1e); - + intpte_t t = old; for ( ; ; ) { - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + if ( unlikely(shadow_mode_enabled(v->domain)) ) + rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); + else + rv = (!cmpxchg_user(p, t, new)); + + if ( unlikely(rv == 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte, - l1e_get_intpte(ol1e), - l1e_get_intpte(nl1e), - o); - rv = 0; + ": saw %" PRIpte, old, new, t); break; } - if ( o == l1e_get_intpte(ol1e) ) + if ( t == old ) break; /* Allowed to change in Accessed/Dirty flags only. */ - BUG_ON((o ^ l1e_get_intpte(ol1e)) & - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); - ol1e = l1e_from_intpte(o); + BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); + + old = t; } } #endif - if ( unlikely(shadow_mode_enabled(v->domain)) && rv ) - { - shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); - shadow_unlock(v->domain); - } return rv; } +/* Macro that wraps the appropriate type-changes around update_intpte(). + * Arguments are: type, ptr, old, new, mfn, vcpu */ +#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \ + update_intpte((intpte_t *)(_p), \ + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ + (_m), (_v)) /* Update the L1 entry at pl1e to new value nl1e. */ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, @@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl return 0; if ( unlikely(shadow_mode_refcounts(d)) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { @@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) return 0; - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) { put_page_from_l1e(nl1e, d); return 0; @@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl } else { - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) return 0; } @@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl return 1; } -#ifndef PTE_UPDATE_WITH_CMPXCHG -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) -#else -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ - for ( ; ; ) \ - { \ - intpte_t __o = cmpxchg((intpte_t *)(_p), \ - _t ## e_get_intpte(_o), \ - _t ## e_get_intpte(_n)); \ - if ( __o == _t ## e_get_intpte(_o) ) \ - break; \ - /* Allowed to change in Accessed/Dirty flags only. */ \ - BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ - _o = _t ## e_from_intpte(__o); \ - } \ - 1; }) -#endif -#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ - int rv; \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - shadow_lock(current->domain); \ - rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - { \ - shadow_validate_guest_entry(current, _mfn(_m), (_p)); \ - shadow_unlock(current->domain); \ - } \ - rv; \ -}) /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, @@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); + return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { put_page_from_l2e(nl2e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { return 0; } @@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); + return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { put_page_from_l3e(nl3e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { return 0; } @@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); + return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current); if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { put_page_from_l4e(nl4e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { return 0; } @@ -2292,15 +2298,11 @@ int do_mmu_update( break; if ( unlikely(shadow_mode_enabled(d)) ) - shadow_lock(d); - - *(intpte_t *)va = req.val; - okay = 1; - - if ( unlikely(shadow_mode_enabled(d)) ) + okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); + else { - shadow_validate_guest_entry(v, _mfn(mfn), va); - shadow_unlock(d); + *(intpte_t *)va = req.val; + okay = 1; } put_page_type(page); @@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping( } ol1e = *(l1_pgentry_t *)va; - if ( !update_l1e(va, ol1e, nl1e, mfn, v) ) + if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) ) { put_page_type(page); rc = GNTST_general_error; @@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e( + if ( unlikely(!UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) { @@ -2515,7 +2517,7 @@ static int create_grant_va_mapping( return GNTST_general_error; } ol1e = *pl1e; - okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v); + okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v); guest_unmap_l1e(v, pl1e); pl1e = NULL; @@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); rc = GNTST_general_error; @@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HA UNLOCK_BIGLOCK(d); - /* If we're doing FAST_FAULT_PATH, then shadow mode may have - cached the fact that this is an mmio region in the shadow - page tables. Blow the tables away to remove the cache. - This is pretty heavy handed, but this is a rare operation - (it might happen a dozen times during boot and then never - again), so it doesn't matter too much. */ - shadow_lock(d); - shadow_blow_tables(d); - shadow_unlock(d); - put_domain(d); break; @@ -3188,27 +3180,30 @@ static int ptwr_emulated_update( pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); if ( do_cmpxchg ) { + int okay; + ol1e = l1e_from_intpte(old); + if ( shadow_mode_enabled(d) ) - shadow_lock(d); - ol1e = l1e_from_intpte(old); - if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) - { - if ( shadow_mode_enabled(d) ) - shadow_unlock(d); + { + intpte_t t = old; + okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, + &t, val, _mfn(mfn)); + okay = (okay && t == old); + } + else + okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); + + if ( !okay ) + { unmap_domain_page(pl1e); put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d); return X86EMUL_CMPXCHG_FAILED; } - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); - shadow_unlock(d); - } } else { ol1e = *pl1e; - if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) + if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) ) BUG(); } diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Wed Dec 20 12:03:07 2006 +0000 @@ -38,6 +38,21 @@ #include <asm/shadow.h> #include "private.h" + +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d) +{ + int i; + shadow_lock_init(d); + for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) + INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); + INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); + INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); + INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); +} + + #if SHADOW_AUDIT int shadow_audit_enable = 0; @@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_ ASSERT(mfn_valid(gmfn)); /* We should never try to promote a gmfn that has writeable mappings */ - ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0); + ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0); /* Is the page already shadowed? */ if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) @@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t * Returns a bitmask of SHADOW_SET_* flags. */ int -__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size) { int result = 0; struct page_info *page = mfn_to_page(gmfn); @@ -546,22 +560,9 @@ __shadow_validate_guest_entry(struct vcp } -int -shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry) -/* This is the entry point from hypercalls. It returns a bitmask of all the - * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */ -{ - int rc; - - ASSERT(shadow_locked_by_me(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t)); - shadow_audit_tables(v); - return rc; -} - void -shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size) /* This is the entry point for emulated writes to pagetables in HVM guests and * PV translated guests. */ @@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vc int rc; ASSERT(shadow_locked_by_me(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, size); + rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ flush_tlb_mask(d->domain_dirty_cpumask); @@ -583,6 +584,38 @@ shadow_validate_guest_pt_write(struct vc * unshadow the page. */ sh_remove_shadows(v, gmfn, 0, 0); } +} + +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn) +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +{ + int failed; + shadow_lock(v->domain); + failed = __copy_to_user(p, &new, sizeof(new)); + if ( failed != sizeof(new) ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + shadow_unlock(v->domain); + return (failed == 0); +} + +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn) +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +{ + int failed; + intpte_t t = *old; + shadow_lock(v->domain); + failed = cmpxchg_user(p, t, new); + if ( t == *old ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + *old = t; + shadow_unlock(v->domain); + return (failed == 0); } @@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, u /* Deliberately free all the memory we can: this will tear down all of * this domain's shadows */ -void shadow_blow_tables(struct domain *d) +static void shadow_blow_tables(struct domain *d) { struct list_head *l, *t; struct shadow_page_info *sp; @@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t * Also, we only ever allocate a max-order chunk, so as to preserve * the invariant that shadow_prealloc() always works. * Returns 0 iff it can't get a chunk (the caller should then - * free up some pages in domheap and call set_sh_allocation); + * free up some pages in domheap and call sh_set_allocation); * returns non-zero on success. */ static int @@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t * if ( pagetable_get_pfn(v->arch.guest_table) == pagetable_get_pfn(d->arch.phys_table) && v->arch.shadow.mode != NULL ) - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); } } #endif /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); + (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, + p2m_entry, sizeof *p2m_entry); } *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); next = sh_map_domain_page(*table_mfn); @@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, u /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry( - d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry)); + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, + p2m_entry, sizeof(*p2m_entry)); /* Success */ rv = 1; @@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct d * Input will be rounded up to at least shadow_min_acceptable_pages(), * plus space for the p2m table. * Returns 0 for success, non-zero for failure. */ -static unsigned int set_sh_allocation(struct domain *d, - unsigned int pages, - int *preempted) +static unsigned int sh_set_allocation(struct domain *d, + unsigned int pages, + int *preempted) { struct shadow_page_info *sp; unsigned int lower_bound; @@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(st return 0; } -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted) -/* Hypercall interface to set the shadow memory allocation */ -{ - unsigned int rv; - shadow_lock(d); - rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); - SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n", - d->domain_id, - d->arch.shadow.total_pages, - shadow_get_allocation(d)); - shadow_unlock(d); - return rv; +/* Return the size of the shadow pool, rounded up to the nearest MB */ +static unsigned int shadow_get_allocation(struct domain *d) +{ + unsigned int pg = d->arch.shadow.total_pages; + return ((pg >> (20 - PAGE_SHIFT)) + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); } /**************************************************************************/ @@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, m * level and fault_addr desribe how we found this to be a pagetable; * level==0 means we have some other reason for revoking write access.*/ -int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, - unsigned int level, - unsigned long fault_addr) +int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, + unsigned int level, + unsigned long fault_addr) { /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vc NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vc /* Remove all mappings of a guest frame from the shadow tables. * Returns non-zero if we need to flush TLBs. */ -int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) +int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn) { struct page_info *page = mfn_to_page(gmfn); - int expected_count; + int expected_count, do_locking; /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vc NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vc if ( (page->count_info & PGC_count_mask) == 0 ) return 0; - ASSERT(shadow_locked_by_me(v->domain)); + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); /* XXX TODO: * Heuristics for finding the (probably) single mapping of this gmfn */ @@ -2153,6 +2183,8 @@ int shadow_remove_all_mappings(struct vc page->count_info, page->u.inuse.type_info); } } + + if ( do_locking ) shadow_unlock(v->domain); /* We killed at least one mapping, so must flush TLBs. */ return 1; @@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, m * (all != 0 implies fast == 0) */ { - struct page_info *pg; + struct page_info *pg = mfn_to_page(gmfn); mfn_t smfn; u32 sh_flags; + int do_locking; unsigned char t; /* Dispatch table for getting per-type functions: each level must @@ -2296,14 +2329,18 @@ void sh_remove_shadows(struct vcpu *v, m 0 /* unused */ }; - ASSERT(shadow_locked_by_me(v->domain)); ASSERT(!(all && fast)); - - pg = mfn_to_page(gmfn); /* Bail out now if the page is not shadowed */ if ( (pg->count_info & PGC_page_table) == 0 ) return; + + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn)); @@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, m /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ flush_tlb_mask(v->domain->domain_dirty_cpumask); -} - -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) + + if ( do_locking ) shadow_unlock(v->domain); +} + +static void +sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) /* Even harsher: this is a HVM page that we thing is no longer a pagetable. * Unshadow it, and recursively unshadow pages that reference it. */ { - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0, 1); /* XXX TODO: * Rework this hashtable walker to return a linked-list of all * the shadows it modified, then do breadth-first recursion @@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(st /**************************************************************************/ -void sh_update_paging_modes(struct vcpu *v) +static void sh_update_paging_modes(struct vcpu *v) { struct domain *d = v->domain; struct shadow_paging_mode *old_mode = v->arch.shadow.mode; @@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu // First, tear down any old shadow tables held by this vcpu. // - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); if ( !is_hvm_domain(d) ) { @@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu /// PV guest /// #if CONFIG_PAGING_LEVELS == 4 - if ( pv_32bit_guest(v) ) - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3); - else - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); + /* When 32-on-64 PV guests are supported, they must choose + * a different mode here */ + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #elif CONFIG_PAGING_LEVELS == 2 @@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu if ( pagetable_is_null(v->arch.monitor_table) ) { - mfn_t mmfn = shadow_make_monitor_table(v); + mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); hvm_update_host_cr3(v); @@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu old_mfn = pagetable_get_mfn(v->arch.monitor_table); v->arch.monitor_table = pagetable_null(); - new_mfn = v->arch.shadow.mode->make_monitor_table(v); + new_mfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(new_mfn); SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", mfn_x(new_mfn)); @@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu // This *does* happen, at least for CR4.PGE... } - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); +} + +void shadow_update_paging_modes(struct vcpu *v) +{ + shadow_lock(v->domain); + sh_update_paging_modes(v); + shadow_unlock(v->domain); } /**************************************************************************/ @@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 /* Init the shadow memory allocation if the user hasn't done so */ old_pages = d->arch.shadow.total_pages; if ( old_pages == 0 ) - if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ - { - set_sh_allocation(d, 0, NULL); + if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ + { + sh_set_allocation(d, 0, NULL); rv = -ENOMEM; goto out; } @@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 /* Init the hash table */ if ( shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); rv = -ENOMEM; goto out; } @@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 if ( !shadow_alloc_p2m_table(d) ) { shadow_hash_teardown(d); - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); shadow_p2m_teardown(d); rv = -ENOMEM; goto out; @@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d) /* Release the shadow and monitor tables held by each vcpu */ for_each_vcpu(d, v) { - shadow_detach_old_tables(v); - if ( shadow_mode_external(d) ) + if ( v->arch.shadow.mode ) { - mfn = pagetable_get_mfn(v->arch.monitor_table); - if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) - shadow_destroy_monitor_table(v, mfn); - v->arch.monitor_table = pagetable_null(); + v->arch.shadow.mode->detach_old_tables(v); + if ( shadow_mode_external(d) ) + { + mfn = pagetable_get_mfn(v->arch.monitor_table); + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) + v->arch.shadow.mode->destroy_monitor_table(v, mfn); + v->arch.monitor_table = pagetable_null(); + } } } } @@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d) d->arch.shadow.free_pages, d->arch.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); /* Release the hash table back to xenheap */ if (d->arch.shadow.hash_table) shadow_hash_teardown(d); @@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct if ( d->arch.shadow.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ - if ( set_sh_allocation(d, 1, NULL) != 0 + if ( sh_set_allocation(d, 1, NULL) != 0 || shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); return -ENOMEM; } } @@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct d->arch.shadow.p2m_pages); for_each_vcpu(d, v) { - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); #if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); @@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct } /* Pull down the memory allocation */ - if ( set_sh_allocation(d, 0, NULL) != 0 ) + if ( sh_set_allocation(d, 0, NULL) != 0 ) { // XXX - How can this occur? // Seems like a bug to return an error now that we've @@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct } /* Enable/disable ops for the "test" and "log-dirty" modes */ -int shadow_test_enable(struct domain *d) +static int shadow_test_enable(struct domain *d) { int ret; @@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d) return ret; } -int shadow_test_disable(struct domain *d) +static int shadow_test_disable(struct domain *d) { int ret; @@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, uns if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( shadow_remove_all_mappings(v, _mfn(mfn)) ) + sh_remove_all_shadows_and_parents(v, _mfn(mfn)); + if ( sh_remove_all_mappings(v, _mfn(mfn)) ) flush_tlb_mask(d->domain_dirty_cpumask); } @@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct dom v = d->vcpu[0]; if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, omfn); - if ( shadow_remove_all_mappings(v, omfn) ) + sh_remove_all_shadows_and_parents(v, omfn); + if ( sh_remove_all_mappings(v, omfn) ) flush_tlb_mask(d->domain_dirty_cpumask); } set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); @@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct dom shadow_set_p2m_entry(d, gfn, _mfn(mfn)); set_gpfn_from_mfn(mfn, gfn); + +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) + /* If we're doing FAST_FAULT_PATH, then shadow mode may have + cached the fact that this is an mmio region in the shadow + page tables. Blow the tables away to remove the cache. + This is pretty heavy handed, but this is a rare operation + (it might happen a dozen times during boot and then never + again), so it doesn't matter too much. */ + shadow_blow_tables(d); +#endif + shadow_audit_p2m(d); shadow_unlock(d); } @@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op( /* Mark a page as dirty */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) +void sh_mark_dirty(struct domain *d, mfn_t gmfn) { unsigned long pfn; ASSERT(shadow_locked_by_me(d)); - ASSERT(shadow_mode_log_dirty(d)); - - if ( !mfn_valid(gmfn) ) + + if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) return; ASSERT(d->arch.shadow.dirty_bitmap != NULL); @@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, } } +void shadow_mark_dirty(struct domain *d, mfn_t gmfn) +{ + shadow_lock(d); + sh_mark_dirty(d, gmfn); + shadow_unlock(d); +} /**************************************************************************/ /* Shadow-control XEN_DOMCTL dispatcher */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { int rc, preempted = 0; @@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d, return 0; case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: - rc = shadow_set_allocation(d, sc->mb, &preempted); + shadow_lock(d); + rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); + shadow_unlock(d); if ( preempted ) /* Not finished. Set up to re-run the call. */ rc = hypercall_create_continuation( diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Dec 20 12:03:07 2006 +0000 @@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsign gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); if ( !mfn_valid(gw->l3mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) + guest_l3_table_offset(va); @@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsign gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e)); if ( !mfn_valid(gw->l2mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn)) + guest_l2_table_offset(va); @@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsign if ( !mfn_valid(gw->l1mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op - && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) + && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn)) + guest_l1_table_offset(va); @@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; /* More than one type bit set in shadow-flags? */ if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) - res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); + res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep)); } /* We should never need to flush the TLB or recopy PAE entries */ @@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v, /* If this is actually a page table, then we have a bug, and need * to support more operations in the emulator. More likely, * though, this is a hint that this page should not be shadowed. */ - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */); } /* Emulator has changed the user registers: write back */ @@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v) sh_unmap_domain_page(ml4e); } - /* Shadow l3 tables are made up by update_cr3 */ + /* Shadow l3 tables are made up by sh_update_cr3 */ sl3e = v->arch.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) @@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v) int unmap_l2e = 0; #if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables were built by update_cr3 */ + /* Shadow l3 tables were built by sh_update_cr3 */ if ( shadow_mode_external(d) ) shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; else @@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v, static void -sh_update_cr3(struct vcpu *v) +sh_update_cr3(struct vcpu *v, int do_locking) /* Updates vcpu->arch.cr3 after the guest has changed CR3. * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works, * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards. + * If do_locking != 0, assume we are being called from outside the + * shadow code, and must take and release the shadow lock; otherwise + * that is the caller's respnsibility. */ { struct domain *d = v->domain; @@ -3354,6 +3357,15 @@ sh_update_cr3(struct vcpu *v) #if GUEST_PAGING_LEVELS == 3 u32 guest_idx=0; #endif + + /* Don't do anything on an uninitialised vcpu */ + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { + ASSERT(v->arch.cr3 == 0); + return; + } + + if ( do_locking ) shadow_lock(v->domain); ASSERT(shadow_locked_by_me(v->domain)); ASSERT(v->arch.shadow.mode); @@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v) #endif gmfn = pagetable_get_mfn(v->arch.guest_table); - if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) - { - ASSERT(v->arch.cr3 == 0); - return; - } //// //// vcpu->arch.guest_vtable @@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v) * replace the old shadow pagetable(s), so that we can safely use the * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 - if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 @@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn); - flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } } if ( flush ) @@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v) } } #elif GUEST_PAGING_LEVELS == 4 - if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else @@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v) /* Fix up the linear pagetable mappings */ sh_update_linear_entries(v); + + /* Release the lock, if we took it (otherwise it's the caller's problem) */ + if ( do_locking ) shadow_unlock(v->domain); } @@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v } #endif -int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) +int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn, + mfn_t readonly_mfn) /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */ { shadow_l1e_t *sl1e; @@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu * } -int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) +int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) /* Excises all mappings to guest frame from this shadow l1 table */ { shadow_l1e_t *sl1e; @@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, uns skip = safe_not_to_verify_write(mfn, addr, src, bytes); memcpy(addr, src, bytes); - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) @@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u if ( prev == old ) { - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); } else rv = X86EMUL_CMPXCHG_FAILED; @@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, if ( prev == old ) { - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8); } else rv = X86EMUL_CMPXCHG_FAILED; diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm/shadow/multi.h Wed Dec 20 12:03:07 2006 +0000 @@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappi (struct vcpu *v, mfn_t sl4mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn); extern void diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Wed Dec 20 12:03:07 2006 +0000 @@ -33,8 +33,43 @@ /****************************************************************************** + * Levels of self-test and paranoia + */ + +#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ +#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ +#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ +#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ +#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ +#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ + +#ifdef NDEBUG +#define SHADOW_AUDIT 0 +#define SHADOW_AUDIT_ENABLE 0 +#else +#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW_AUDIT_ENABLE shadow_audit_enable +extern int shadow_audit_enable; +#endif + +/****************************************************************************** + * Levels of optimization + */ + +#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ +#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ +#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ +#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ +#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ +#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ + +#define SHADOW_OPTIMIZATIONS 0x3f + + +/****************************************************************************** * Debug and error-message output */ + #define SHADOW_PRINTK(_f, _a...) \ debugtrace_printk("sh: %s(): " _f, __func__, ##_a) #define SHADOW_ERROR(_f, _a...) \ @@ -53,6 +88,58 @@ #define SHADOW_DEBUG_A_AND_D 1 #define SHADOW_DEBUG_EMULATE 1 #define SHADOW_DEBUG_LOGDIRTY 0 + +/****************************************************************************** + * The shadow lock. + * + * This lock is per-domain. It is intended to allow us to make atomic + * updates to the software TLB that the shadow tables provide. + * + * Specifically, it protects: + * - all changes to shadow page table pages + * - the shadow hash table + * - the shadow page allocator + * - all changes to guest page table pages + * - all changes to the page_info->tlbflush_timestamp + * - the page_info->count fields on shadow pages + * - the shadow dirty bit array and count + */ +#ifndef CONFIG_SMP +#error shadow.h currently requires CONFIG_SMP +#endif + +#define shadow_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.shadow.lock); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + } while (0) + +#define shadow_locked_by_me(_d) \ + (current->processor == (_d)->arch.shadow.locker) + +#define shadow_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ + { \ + printk("Error: shadow lock held by %s\n", \ + (_d)->arch.shadow.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.shadow.lock); \ + ASSERT((_d)->arch.shadow.locker == -1); \ + (_d)->arch.shadow.locker = current->processor; \ + (_d)->arch.shadow.locker_function = __func__; \ + } while (0) + +#define shadow_unlock(_d) \ + do { \ + ASSERT((_d)->arch.shadow.locker == current->processor); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.shadow.lock); \ + } while (0) + /****************************************************************************** @@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l2h(struc void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); +/* Update the shadows in response to a pagetable write from Xen */ +extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Update the shadows in response to a pagetable write from a HVM guest */ +extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Remove all writeable mappings of a guest frame from the shadows. + * Returns non-zero if we need to flush TLBs. + * level and fault_addr desribe how we found this to be a pagetable; + * level==0 means we have some other reason for revoking write access. */ +extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, + unsigned int level, + unsigned long fault_addr); /****************************************************************************** * Flags used in the return value of the shadow_set_lXe() functions... @@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct #undef mfn_valid #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) + +static inline int +sh_mfn_is_a_page_table(mfn_t gmfn) +{ + struct page_info *page = mfn_to_page(gmfn); + struct domain *owner; + unsigned long type_info; + + if ( !mfn_valid(gmfn) ) + return 0; + + owner = page_get_owner(page); + if ( owner && shadow_mode_refcounts(owner) + && (page->count_info & PGC_page_table) ) + return 1; + + type_info = page->u.inuse.type_info & PGT_type_mask; + return type_info && (type_info <= PGT_l4_page_table); +} + // Provide mfn_t-aware versions of common xen functions static inline void * sh_map_domain_page(mfn_t mfn) @@ -349,6 +471,25 @@ sh_unmap_domain_page_global(void *p) { unmap_domain_page_global(p); } + +static inline mfn_t +pagetable_get_mfn(pagetable_t pt) +{ + return _mfn(pagetable_get_pfn(pt)); +} + +static inline pagetable_t +pagetable_from_mfn(mfn_t mfn) +{ + return pagetable_from_pfn(mfn_x(mfn)); +} + + +/****************************************************************************** + * Log-dirty mode bitmap handling + */ + +extern void sh_mark_dirty(struct domain *d, mfn_t gmfn); static inline int sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) @@ -366,25 +507,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t return 1; return 0; -} - -static inline int -sh_mfn_is_a_page_table(mfn_t gmfn) -{ - struct page_info *page = mfn_to_page(gmfn); - struct domain *owner; - unsigned long type_info; - - if ( !mfn_valid(gmfn) ) - return 0; - - owner = page_get_owner(page); - if ( owner && shadow_mode_refcounts(owner) - && (page->count_info & PGC_page_table) ) - return 1; - - type_info = page->u.inuse.type_info & PGT_type_mask; - return type_info && (type_info <= PGT_l4_page_table); } diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/arch/x86/mm/shadow/types.h Wed Dec 20 12:03:07 2006 +0000 @@ -477,8 +477,8 @@ struct shadow_walk_t #define sh_gva_to_gpa INTERNAL_NAME(sh_gva_to_gpa) #define sh_gva_to_gfn INTERNAL_NAME(sh_gva_to_gfn) #define sh_update_cr3 INTERNAL_NAME(sh_update_cr3) -#define sh_remove_write_access INTERNAL_NAME(sh_remove_write_access) -#define sh_remove_all_mappings INTERNAL_NAME(sh_remove_all_mappings) +#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1) +#define sh_rm_mappings_from_l1 INTERNAL_NAME(sh_rm_mappings_from_l1) #define sh_remove_l1_shadow INTERNAL_NAME(sh_remove_l1_shadow) #define sh_remove_l2_shadow INTERNAL_NAME(sh_remove_l2_shadow) #define sh_remove_l3_shadow INTERNAL_NAME(sh_remove_l3_shadow) diff -r b258c7587d8d -r c75d6f2aad7a xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/include/asm-x86/mm.h Wed Dec 20 12:03:07 2006 +0000 @@ -307,7 +307,7 @@ void audit_domains(void); int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); - +void update_cr3(struct vcpu *v); void propagate_page_fault(unsigned long addr, u16 error_code); int __sync_lazy_execstate(void); diff -r b258c7587d8d -r c75d6f2aad7a xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Wed Dec 20 11:59:54 2006 +0000 +++ b/xen/include/asm-x86/shadow.h Wed Dec 20 12:03:07 2006 +0000 @@ -29,20 +29,8 @@ #include <xen/domain_page.h> #include <asm/flushtlb.h> -/* How to make sure a page is not referred to in a shadow PT */ -/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ -#define shadow_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) -#define shadow_sync_and_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) - -/* How to add and remove entries in the p2m mapping. */ -#define guest_physmap_add_page(_d, _p, _m) \ - shadow_guest_physmap_add_page((_d), (_p), (_m)) -#define guest_physmap_remove_page(_d, _p, _m ) \ - shadow_guest_physmap_remove_page((_d), (_p), (_m)) - -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ +/***************************************************************************** + * Macros to tell which shadow paging mode a domain is in */ #define SHM2_shift 10 /* We're in one of the shadow modes */ @@ -64,107 +52,24 @@ #define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) /* Xen traps & emulates all reads of all page table pages: - * not yet supported - */ + * not yet supported */ #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) -// How do we tell that we have a 32-bit PV guest in a 64-bit Xen? -#ifdef __x86_64__ -#define pv_32bit_guest(_v) 0 // not yet supported -#else -#define pv_32bit_guest(_v) !is_hvm_vcpu(v) -#endif - -/* The shadow lock. - * - * This lock is per-domain. It is intended to allow us to make atomic - * updates to the software TLB that the shadow tables provide. - * - * Specifically, it protects: - * - all changes to shadow page table pages - * - the shadow hash table - * - the shadow page allocator - * - all changes to guest page table pages; if/when the notion of - * out-of-sync pages is added to this code, then the shadow lock is - * protecting all guest page table pages which are not listed as - * currently as both guest-writable and out-of-sync... - * XXX -- need to think about this relative to writable page tables. - * - all changes to the page_info->tlbflush_timestamp - * - the page_info->count fields on shadow pages - * - the shadow dirty bit array and count - * - XXX - */ -#ifndef CONFIG_SMP -#error shadow.h currently requires CONFIG_SMP -#endif - -#define shadow_lock_init(_d) \ - do { \ - spin_lock_init(&(_d)->arch.shadow.lock); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - } while (0) - -#define shadow_locked_by_me(_d) \ - (current->processor == (_d)->arch.shadow.locker) - -#define shadow_lock(_d) \ - do { \ - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ - { \ - printk("Error: shadow lock held by %s\n", \ - (_d)->arch.shadow.locker_function); \ - BUG(); \ - } \ - spin_lock(&(_d)->arch.shadow.lock); \ - ASSERT((_d)->arch.shadow.locker == -1); \ - (_d)->arch.shadow.locker = current->processor; \ - (_d)->arch.shadow.locker_function = __func__; \ - } while (0) - -#define shadow_unlock(_d) \ - do { \ - ASSERT((_d)->arch.shadow.locker == current->processor); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - spin_unlock(&(_d)->arch.shadow.lock); \ - } while (0) - -/* - * Levels of self-test and paranoia - * XXX should go in config files somewhere? - */ -#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ -#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ -#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ -#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ -#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ - -#ifdef NDEBUG -#define SHADOW_AUDIT 0 -#define SHADOW_AUDIT_ENABLE 0 -#else -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ -#define SHADOW_AUDIT_ENABLE shadow_audit_enable -extern int shadow_audit_enable; -#endif - -/* - * Levels of optimization - * XXX should go in config files somewhere? - */ -#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ -#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ -#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ -#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ -#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ -#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ - -#define SHADOW_OPTIMIZATIONS 0x3f - - -/* With shadow pagetables, the different kinds of address start + +/****************************************************************************** + * The equivalent for a particular vcpu of a shadowed domain. */ + +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? + * + * This is true of translated HVM domains on a vcpu which has paging + * enabled. (HVM vcpus with paging disabled are using the p2m table as + * its paging table, so no translation occurs in this case.) + * It is also true for all vcpus of translated PV domains. */ +#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) + + +/****************************************************************************** + * With shadow pagetables, the different kinds of address start * to get get confusing. * * Virtual addresses are what they usually are: the addresses that are used @@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t #endif TYPE_SAFE(unsigned long,mfn) + +/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ #define SH_PRI_mfn "05lx" -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} - -static inline int -shadow_vcpu_mode_translate(struct vcpu *v) -{ - // Returns true if this VCPU needs to be using the P2M table to translate - // between GFNs and MFNs. - // - // This is true of translated HVM domains on a vcpu which has paging - // enabled. (HVM vcpu's with paging disabled are using the p2m table as - // its paging table, so no translation occurs in this case.) - // - // It is also true for translated PV domains. - // - return v->arch.shadow.translate_enabled; -} - - -/**************************************************************************/ -/* Mode-specific entry points into the shadow code */ + +/***************************************************************************** + * Mode-specific entry points into the shadow code. + * + * These shouldn't be used directly by callers; rather use the functions + * below which will indirect through this table as appropriate. */ struct sh_emulate_ctxt; struct shadow_paging_mode { @@ -254,7 +137,7 @@ struct shadow_paging_mode { int (*invlpg )(struct vcpu *v, unsigned long va); paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); - void (*update_cr3 )(struct vcpu *v); + void (*update_cr3 )(struct vcpu *v, int do_locking); int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, void *new_guest_entry, u32 size); int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, @@ -286,35 +169,30 @@ struct shadow_paging_mode { unsigned long *gl1mfn); void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, void *eff_l1e); -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC int (*guess_wrmap )(struct vcpu *v, unsigned long vaddr, mfn_t gmfn); -#endif /* For outsiders to tell what mode we're in */ unsigned int shadow_levels; unsigned int guest_levels; }; -static inline int shadow_guest_paging_levels(struct vcpu *v) -{ - ASSERT(v->arch.shadow.mode != NULL); - return v->arch.shadow.mode->guest_levels; -} - -/**************************************************************************/ -/* Entry points into the shadow code */ - -/* Enable arbitrary shadow mode. */ + +/***************************************************************************** + * Entry points into the shadow code */ + +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d); + +/* Enable an arbitrary shadow mode. Call once at domain creation. */ int shadow_enable(struct domain *d, u32 mode); -/* Turning on shadow test mode */ -int shadow_test_enable(struct domain *d); - -/* Handler for shadow control ops: enabling and disabling shadow modes, - * and log-dirty bitmap ops all happen through here. */ +/* Handler for shadow control ops: operations from user-space to enable + * and disable ephemeral shadow modes (test mode and log-dirty mode) and + * manipulate the log-dirty bitmap. */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); /* Call when destroying a domain */ void shadow_teardown(struct domain *d); @@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d); /* Call once all of the references to the domain have gone away */ void shadow_final_teardown(struct domain *d); - -/* Mark a page as dirty in the bitmap */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn); +/* Mark a page as dirty in the log-dirty bitmap: called when Xen + * makes changes to guest memory on its behalf. */ +void shadow_mark_dirty(struct domain *d, mfn_t gmfn); +/* Cleaner version so we don't pepper shadow_mode tests all over the place */ static inline void mark_dirty(struct domain *d, unsigned long gmfn) { - if ( likely(!shadow_mode_log_dirty(d)) ) - return; - - shadow_lock(d); - sh_do_mark_dirty(d, _mfn(gmfn)); - shadow_unlock(d); -} - -/* Internal version, for when the shadow lock is already held */ -static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn) -{ - ASSERT(shadow_locked_by_me(d)); if ( unlikely(shadow_mode_log_dirty(d)) ) - sh_do_mark_dirty(d, gmfn); -} - -static inline int -shadow_fault(unsigned long va, struct cpu_user_regs *regs) -/* Called from pagefault handler in Xen, and from the HVM trap handlers + shadow_mark_dirty(d, _mfn(gmfn)); +} + +/* Handle page-faults caused by the shadow pagetable mechanisms. + * Called from pagefault handler in Xen, and from the HVM trap handlers * for pagefaults. Returns 1 if this fault was an artefact of the * shadow code (and the guest should retry) or 0 if it is not (and the * fault should be handled elsewhere or passed to the guest). */ +static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) { struct vcpu *v = current; perfc_incrc(shadow_fault); return v->arch.shadow.mode->page_fault(v, va, regs); } -static inline int -shadow_invlpg(struct vcpu *v, unsigned long va) -/* Called when the guest requests an invlpg. Returns 1 if the invlpg - * instruction should be issued on the hardware, or 0 if it's safe not - * to do so. */ +/* Handle invlpg requests on shadowed vcpus. + * Returns 1 if the invlpg instruction should be issued on the hardware, + * or 0 if it's safe not to do so. */ +static inline int shadow_invlpg(struct vcpu *v, unsigned long va) { return v->arch.shadow.mode->invlpg(v, va); } -static inline paddr_t -shadow_gva_to_gpa(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the physical address that the + * *guest* pagetables would map it to. */ +static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return (paddr_t) va; return v->arch.shadow.mode->gva_to_gpa(v, va); } -static inline unsigned long -shadow_gva_to_gfn(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the frame number that the + * *guest* pagetables would map it to. */ +static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return va >> PAGE_SHIFT; return v->arch.shadow.mode->gva_to_gfn(v, va); } -static inline void -shadow_update_cr3(struct vcpu *v) -/* Updates all the things that are derived from the guest's CR3. - * Called when the guest changes CR3. */ -{ - shadow_lock(v->domain); - v->arch.shadow.mode->update_cr3(v); - shadow_unlock(v->domain); -} - - -/* Should be called after CR3 is updated. - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. - * - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, - * shadow_vtable, etc). - * - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and - * for HVM guests, arch.monitor_table and hvm's guest CR3. - * - * Update ref counts to shadow tables appropriately. - */ -static inline void update_cr3(struct vcpu *v) -{ - unsigned long cr3_mfn=0; - - if ( shadow_mode_enabled(v->domain) ) - { - shadow_update_cr3(v); - return; - } - -#if CONFIG_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) - cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); - else -#endif - cr3_mfn = pagetable_get_pfn(v->arch.guest_table); - - make_cr3(v, cr3_mfn); -} - -extern void sh_update_paging_modes(struct vcpu *v); - -/* Should be called to initialise paging structures if the paging mode +/* Update all the things that are derived from the guest's CR3. + * Called when the guest changes CR3; the caller can then use + * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu + * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when + * entering the HVM guest. */ +static inline void shadow_update_cr3(struct vcpu *v) +{ + v->arch.shadow.mode->update_cr3(v, 1); +} + +/* Update all the things that are derived from the guest's CR0/CR3/CR4. + * Called to initialize paging structures if the paging mode * has changed, and when bringing up a VCPU for the first time. */ -static inline void shadow_update_paging_modes(struct vcpu *v) -{ - ASSERT(shadow_mode_enabled(v->domain)); - shadow_lock(v->domain); - sh_update_paging_modes(v); - shadow_unlock(v->domain); -} - -static inline void -shadow_detach_old_tables(struct vcpu *v) -{ - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); -} - -static inline mfn_t -shadow_make_monitor_table(struct vcpu *v) -{ - return v->arch.shadow.mode->make_monitor_table(v); -} - -static inline void -shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) -{ - v->arch.shadow.mode->destroy_monitor_table(v, mmfn); -} - +void shadow_update_paging_modes(struct vcpu *v); + + +/***************************************************************************** + * Access to the guest pagetables */ + +/* Get a mapping of a PV guest's l1e for this virtual address. */ static inline void * guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) { - if ( likely(!shadow_mode_translate(v->domain)) ) - { - l2_pgentry_t l2e; - ASSERT(!shadow_mode_external(v->domain)); - /* Find this l1e and its enclosing l1mfn in the linear map */ - if ( __copy_from_user(&l2e, - &__linear_l2_table[l2_linear_offset(addr)], - sizeof(l2_pgentry_t)) != 0 ) - return NULL; - /* Check flags that it will be safe to read the l1e */ - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) - != _PAGE_PRESENT ) - return NULL; - *gl1mfn = l2e_get_pfn(l2e); - return &__linear_l1_table[l1_linear_offset(addr)]; - } - - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); -} - + l2_pgentry_t l2e; + + if ( unlikely(shadow_mode_translate(v->domain)) ) + return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + + /* Find this l1e and its enclosing l1mfn in the linear map */ + if ( __copy_from_user(&l2e, + &__linear_l2_table[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)) != 0 ) + return NULL; + /* Check flags that it will be safe to read the l1e */ + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) + != _PAGE_PRESENT ) + return NULL; + *gl1mfn = l2e_get_pfn(l2e); + return &__linear_l1_table[l1_linear_offset(addr)]; +} + +/* Pull down the mapping we got from guest_map_l1e() */ static inline void guest_unmap_l1e(struct vcpu *v, void *p) { @@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p) unmap_domain_page(p); } +/* Read the guest's l1e that maps this address. */ static inline void guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); } +/* Read the guest's l1e that maps this address, from the kernel-mode + * pagetables. */ static inline void guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -518,81 +331,35 @@ guest_get_eff_kern_l1e(struct vcpu *v, u TOGGLE_MODE(); } - -/* Validate a pagetable change from the guest and update the shadows. */ -extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry); -extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Update the shadows in response to a pagetable write from a HVM guest */ -extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Remove all writeable mappings of a guest frame from the shadows. - * Returns non-zero if we need to flush TLBs. - * level and fault_addr desribe how we found this to be a pagetable; - * level==0 means we have some other reason for revoking write access. */ -extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, - unsigned int level, - unsigned long fault_addr); - -/* Remove all mappings of the guest mfn from the shadows. - * Returns non-zero if we need to flush TLBs. */ -extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); - -/* Remove all mappings from the shadows. */ -extern void shadow_blow_tables(struct domain *d); - -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); -/* This is a HVM page that we thing is no longer a pagetable. - * Unshadow it, and recursively unshadow pages that reference it. */ +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); + +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn); + +/* Remove all mappings of the guest page from the shadows. + * This is called from common code. It does not flush TLBs. */ +int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); +static inline void +shadow_drop_references(struct domain *d, struct page_info *p) +{ + /* See the comment about locking in sh_remove_all_mappings */ + sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p))); +} /* Remove all shadows of the guest mfn. */ -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) { - int was_locked = shadow_locked_by_me(v->domain); - if ( !was_locked ) - shadow_lock(v->domain); - sh_remove_shadows(v, gmfn, 0, 1); - if ( !was_locked ) - shadow_unlock(v->domain); -} - -/* Add a page to a domain */ -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Remove a page from a domain */ -void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* - * Allocation of shadow pages - */ - -/* Return the minumum acceptable number of shadow pages a domain needs */ -unsigned int shadow_min_acceptable_pages(struct domain *d); - -/* Set the pool of shadow pages to the required number of MB. - * Input will be rounded up to at least min_acceptable_shadow_pages(). - * Returns 0 for success, 1 for failure. */ -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted); - -/* Return the size of the shadow pool, rounded up to the nearest MB */ -static inline unsigned int shadow_get_allocation(struct domain *d) -{ - unsigned int pg = d->arch.shadow.total_pages; - return ((pg >> (20 - PAGE_SHIFT)) - + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); -} - + /* See the comment about locking in sh_remove_shadows */ + sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); +} /**************************************************************************/ /* Guest physmap (p2m) support @@ -602,8 +369,19 @@ static inline unsigned int shadow_get_al * guests, so we steal the address space that would have normally * been used by the read-only MPT map. */ - #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) + +/* Add a page to a domain's p2m table */ +void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Remove a page from a domain's p2m table */ +void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Aliases, called from common code. */ +#define guest_physmap_add_page shadow_guest_physmap_add_page +#define guest_physmap_remove_page shadow_guest_physmap_remove_page /* Read the current domain's P2M table. */ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) @@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_curren return _mfn(INVALID_MFN); } -/* Walk another domain's P2M table, mapping pages as we go */ -extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); +/* Read another domain's P2M table, mapping pages as we go */ +mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); /* General conversion function from gfn to mfn */ static inline mfn_t @@ -666,6 +444,7 @@ mmio_space(paddr_t gpa) return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); } +/* Translate the frame number held in an l1e from guest to machine */ static inline l1_pgentry_t gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) { @@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr * indent-tabs-mode: nil * End: */ - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |