[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [SHADOW] Fix error paths in guest-pagetable walker.
# HG changeset patch # User Keir Fraser <keir@xxxxxxxxxxxxx> # Date 1194280727 0 # Node ID bfb1cb95863206a865fcd65a62a9b839a484c305 # Parent d945240821e7e8f0a047468fa814f86470f1a884 [SHADOW] Fix error paths in guest-pagetable walker. Real hardware sets PFEC_page_present regardless of the access bits, and doesn't write back _PAGE_ACCESSED except after a successful walk. Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx> --- xen/arch/x86/mm/shadow/multi.c | 206 +++++++++++++++++++++++------------------ 1 files changed, 116 insertions(+), 90 deletions(-) diff -r d945240821e7 -r bfb1cb958632 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Nov 05 16:37:48 2007 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Nov 05 16:38:47 2007 +0000 @@ -226,51 +226,24 @@ static uint32_t mandatory_flags(struct v return f; } -/* Read, check and modify a guest pagetable entry. Returns 0 if the - * flags are OK. Although we use l1e types here, the logic and the bits - * are the same for all types except PAE l3es. */ -static int guest_walk_entry(struct vcpu *v, mfn_t gmfn, - void *gp, void *wp, - uint32_t flags, int level) -{ - guest_l1e_t e, old_e; - uint32_t gflags; - int rc; - - /* Read the guest entry */ - e = *(guest_l1e_t *)gp; - - /* Check that all the mandatory flag bits are there. Invert NX, to - * calculate as if there were an "X" bit that allowed access. */ - gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT; - rc = ((gflags & flags) != flags); - - /* Set the accessed/dirty bits */ - if ( rc == 0 ) - { - uint32_t bits = _PAGE_ACCESSED; - if ( (flags & _PAGE_RW) // Implies that the action is a write - && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) ) - bits |= _PAGE_DIRTY; - old_e = e; - e.l1 |= bits; - SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n", - (unsigned long) flags, - (unsigned long) bits, - (unsigned long long) old_e.l1, - (unsigned long long) e.l1); - /* Try to write the entry back. If it's changed under out feet - * then leave it alone */ - if ( e.l1 != old_e.l1 ) - { - (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1); - paging_mark_dirty(v->domain, mfn_x(gmfn)); - } - } - - /* Record the entry in the walk */ - *(guest_l1e_t *)wp = e; - return rc; +/* Modify a guest pagetable entry to set the Accessed and Dirty bits. + * Returns non-zero if it actually writes to guest memory. */ +static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty) +{ + guest_intpte_t old, new; + + old = *(guest_intpte_t *)walk_p; + new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0); + if ( old != new ) + { + /* Write the new entry into the walk, and try to write it back + * into the guest table as well. If the guest table has changed + * under out feet then leave it alone. */ + *(guest_intpte_t *)walk_p = new; + if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) + return 1; + } + return 0; } /* Walk the guest pagetables, after the manner of a hardware walker. @@ -293,21 +266,20 @@ static int guest_walk_entry(struct vcpu * N.B. This is different from the old return code but almost no callers * checked the old return code anyway. */ -static int +static uint32_t guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec, int shadow_op) { struct domain *d = v->domain; p2m_type_t p2mt; - guest_l1e_t *l1p; -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ - guest_l1e_t *l2p; + guest_l1e_t *l1p = NULL; + guest_l2e_t *l2p = NULL; #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ - guest_l1e_t *l3p; -#endif -#endif - uint32_t flags = mandatory_flags(v, pfec); - int rc; + guest_l3e_t *l3p = NULL; + guest_l4e_t *l4p; +#endif + uint32_t gflags, mflags, rc = 0; + int pse; ASSERT(!shadow_op || shadow_locked_by_me(d)); @@ -315,66 +287,86 @@ guest_walk_tables(struct vcpu *v, unsign memset(gw, 0, sizeof(*gw)); gw->va = va; + /* Mandatory bits that must be set in every entry. We invert NX, to + * calculate as if there were an "X" bit that allowed access. + * We will accumulate, in rc, the set of flags that are missing. */ + mflags = mandatory_flags(v, pfec); + #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ + /* Get the l4e from the top level table and check its flags*/ gw->l4mfn = pagetable_get_mfn(v->arch.guest_table); - rc = guest_walk_entry(v, gw->l4mfn, - (guest_l4e_t *)v->arch.paging.shadow.guest_vtable - + guest_l4_table_offset(va), - &gw->l4e, flags, 4); - if ( rc != 0 ) return rc; + l4p = ((guest_l4e_t *)v->arch.paging.shadow.guest_vtable); + gw->l4e = l4p[guest_l4_table_offset(va)]; + gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT; + rc |= ((gflags & mflags) ^ mflags); + if ( rc & _PAGE_PRESENT ) goto out; /* Map the l3 table */ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt); - if ( !p2m_is_ram(p2mt) ) return 1; + if ( !p2m_is_ram(p2mt) ) + { + rc |= _PAGE_PRESENT; + goto out; + } ASSERT(mfn_valid(gw->l3mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); /* Get the l3e and check its flags*/ l3p = sh_map_domain_page(gw->l3mfn); - rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va), - &gw->l3e, flags, 3); - sh_unmap_domain_page(l3p); - if ( rc != 0 ) return rc; + gw->l3e = l3p[guest_l3_table_offset(va)]; + gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT; + rc |= ((gflags & mflags) ^ mflags); + if ( rc & _PAGE_PRESENT ) + goto out; #else /* PAE only... */ /* Get l3e from the cache of the top level table and check its flag */ gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; - if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1; + if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) + { + rc |= _PAGE_PRESENT; + goto out; + } #endif /* PAE or 64... */ /* Map the l2 table */ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt); - if ( !p2m_is_ram(p2mt) ) return 1; + if ( !p2m_is_ram(p2mt) ) + { + rc |= _PAGE_PRESENT; + goto out; + } ASSERT(mfn_valid(gw->l2mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); /* Get the l2e */ l2p = sh_map_domain_page(gw->l2mfn); - rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va), - &gw->l2e, flags, 2); - sh_unmap_domain_page(l2p); - if ( rc != 0 ) return rc; + gw->l2e = l2p[guest_l2_table_offset(va)]; #else /* 32-bit only... */ - /* Get l2e from the top level table and check its flags */ + /* Get l2e from the top level table */ gw->l2mfn = pagetable_get_mfn(v->arch.guest_table); - rc = guest_walk_entry(v, gw->l2mfn, - (guest_l2e_t *)v->arch.paging.shadow.guest_vtable - + guest_l2_table_offset(va), - &gw->l2e, flags, 2); - if ( rc != 0 ) return rc; + l2p = ((guest_l2e_t *)v->arch.paging.shadow.guest_vtable); + gw->l2e = l2p[guest_l2_table_offset(va)]; #endif /* All levels... */ - if ( guest_supports_superpages(v) && - (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) ) + gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT; + rc |= ((gflags & mflags) ^ mflags); + if ( rc & _PAGE_PRESENT ) + goto out; + + pse = (guest_supports_superpages(v) && + (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); + + if ( pse ) { /* Special case: this guest VA is in a PSE superpage, so there's * no guest l1e. We make one up so that the propagation code @@ -404,20 +396,55 @@ guest_walk_tables(struct vcpu *v, unsign { /* Not a superpage: carry on and find the l1e. */ gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt); - if ( !p2m_is_ram(p2mt) ) return 1; + if ( !p2m_is_ram(p2mt) ) + { + rc |= _PAGE_PRESENT; + goto out; + } ASSERT(mfn_valid(gw->l1mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( shadow_op && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); l1p = sh_map_domain_page(gw->l1mfn); - rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va), - &gw->l1e, flags, 1); - sh_unmap_domain_page(l1p); - if ( rc != 0 ) return rc; - } - - return 0; + gw->l1e = l1p[guest_l1_table_offset(va)]; + gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT; + rc |= ((gflags & mflags) ^ mflags); + } + + /* Go back and set accessed and dirty bits only if the walk was a + * success. Although the PRMs say higher-level _PAGE_ACCESSED bits + * get set whenever a lower-level PT is used, at least some hardware + * walkers behave this way. */ + if ( rc == 0 ) + { +#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ + if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) + paging_mark_dirty(d, mfn_x(gw->l4mfn)); + if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) ) + paging_mark_dirty(d, mfn_x(gw->l3mfn)); +#endif + if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, + (pse && (pfec & PFEC_write_access))) ) + paging_mark_dirty(d, mfn_x(gw->l2mfn)); + if ( !pse ) + { + if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, + (pfec & PFEC_write_access)) ) + paging_mark_dirty(d, mfn_x(gw->l1mfn)); + } + } + + out: +#if GUEST_PAGING_LEVELS == 4 + if ( l3p ) sh_unmap_domain_page(l3p); +#endif +#if GUEST_PAGING_LEVELS >= 3 + if ( l2p ) sh_unmap_domain_page(l2p); +#endif + if ( l1p ) sh_unmap_domain_page(l1p); + + return rc; } /* Given a walk_t, translate the gw->va into the guest's notion of the @@ -521,9 +548,8 @@ sh_guest_map_l1e(struct vcpu *v, unsigne // FIXME! shadow_lock(v->domain); - guest_walk_tables(v, addr, &gw, 0, 1); - - if ( mfn_valid(gw.l1mfn) ) + if ( guest_walk_tables(v, addr, &gw, PFEC_page_present, 1) == 0 + && mfn_valid(gw.l1mfn) ) { if ( gl1mfn ) *gl1mfn = mfn_x(gw.l1mfn); @@ -547,7 +573,7 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns // FIXME! shadow_lock(v->domain); - guest_walk_tables(v, addr, &gw, 0, 1); + (void) guest_walk_tables(v, addr, &gw, PFEC_page_present, 1); *(guest_l1e_t *)eff_l1e = gw.l1e; shadow_unlock(v->domain); } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |