[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge.
# HG changeset patch # User Keir Fraser <keir@xxxxxxxxxxxxx> # Date 1194021491 0 # Node ID 650cadd1b28303dae4927ca65e64eb2507e3a3f3 # Parent 838e77a41a3c53a54428e642cb0440a8a6f8912b # Parent db9f62d8f7f4d2d8f8ccf7c512623977132bcffa Merge. --- xen/arch/x86/hvm/hvm.c | 36 + xen/arch/x86/hvm/platform.c | 25 - xen/arch/x86/hvm/svm/svm.c | 13 xen/arch/x86/hvm/vmx/vmx.c | 14 xen/arch/x86/mm/hap/guest_walk.c | 4 xen/arch/x86/mm/hap/hap.c | 2 xen/arch/x86/mm/hap/private.h | 9 xen/arch/x86/mm/p2m.c | 6 xen/arch/x86/mm/shadow/common.c | 16 xen/arch/x86/mm/shadow/multi.c | 689 ++++++++++++++++---------------------- xen/arch/x86/mm/shadow/private.h | 13 xen/arch/x86/mm/shadow/types.h | 49 +- xen/include/asm-x86/hvm/support.h | 1 xen/include/asm-x86/paging.h | 18 xen/include/asm-x86/perfc_defn.h | 8 15 files changed, 420 insertions(+), 483 deletions(-) diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/hvm/hvm.c Fri Nov 02 16:38:11 2007 +0000 @@ -931,6 +931,7 @@ static void *hvm_map(unsigned long va, i { unsigned long gfn, mfn; p2m_type_t p2mt; + uint32_t pfec; if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE ) { @@ -939,11 +940,15 @@ static void *hvm_map(unsigned long va, i return NULL; } - gfn = paging_gva_to_gfn(current, va); + /* We're mapping on behalf of the segment-load logic, which might + * write the accessed flags in the descriptors (in 32-bit mode), but + * we still treat it as a kernel-mode read (i.e. no access checks). */ + pfec = PFEC_page_present; + gfn = paging_gva_to_gfn(current, va, &pfec); mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) { - hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va); + hvm_inject_exception(TRAP_page_fault, pfec, va); return NULL; } @@ -1263,14 +1268,24 @@ void hvm_task_switch( * @size = number of bytes to copy * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)? * @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)? + * @fetch = copy is an instruction fetch? * Returns number of bytes failed to copy (0 == complete success). */ -static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt) +static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, + int virt, int fetch) { unsigned long gfn, mfn; p2m_type_t p2mt; char *p; int count, todo; + uint32_t pfec = PFEC_page_present; + + if ( dir ) + pfec |= PFEC_write_access; + if ( ring_3(guest_cpu_user_regs()) ) + pfec |= PFEC_user_mode; + if ( fetch ) + pfec |= PFEC_insn_fetch; todo = size; while ( todo > 0 ) @@ -1278,7 +1293,7 @@ static int __hvm_copy(void *buf, paddr_t count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); if ( virt ) - gfn = paging_gva_to_gfn(current, addr); + gfn = paging_gva_to_gfn(current, addr, &pfec); else gfn = addr >> PAGE_SHIFT; @@ -1310,22 +1325,27 @@ static int __hvm_copy(void *buf, paddr_t int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size) { - return __hvm_copy(buf, paddr, size, 1, 0); + return __hvm_copy(buf, paddr, size, 1, 0, 0); } int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size) { - return __hvm_copy(buf, paddr, size, 0, 0); + return __hvm_copy(buf, paddr, size, 0, 0, 0); } int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size) { - return __hvm_copy(buf, vaddr, size, 1, 1); + return __hvm_copy(buf, vaddr, size, 1, 1, 0); } int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size) { - return __hvm_copy(buf, vaddr, size, 0, 1); + return __hvm_copy(buf, vaddr, size, 0, 1, 0); +} + +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size) +{ + return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current)); } diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/hvm/platform.c Fri Nov 02 16:38:11 2007 +0000 @@ -833,7 +833,7 @@ int inst_copy_from_guest(unsigned char * { if ( inst_len > MAX_INST_LEN || inst_len <= 0 ) return 0; - if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) ) + if ( hvm_fetch_from_guest_virt(buf, guest_eip, inst_len) ) return 0; return inst_len; } @@ -1075,6 +1075,7 @@ void handle_mmio(unsigned long gpa) unsigned long addr, gfn; paddr_t paddr; int dir, size = op_size; + uint32_t pfec; ASSERT(count); @@ -1082,8 +1083,11 @@ void handle_mmio(unsigned long gpa) addr = regs->edi; if ( ad_size == WORD ) addr &= 0xFFFF; - addr += hvm_get_segment_base(v, x86_seg_es); - gfn = paging_gva_to_gfn(v, addr); + addr += hvm_get_segment_base(v, x86_seg_es); + pfec = PFEC_page_present | PFEC_write_access; + if ( ring_3(regs) ) + pfec |= PFEC_user_mode; + gfn = paging_gva_to_gfn(v, addr, &pfec); paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); if ( paddr == gpa ) { @@ -1105,7 +1109,8 @@ void handle_mmio(unsigned long gpa) default: domain_crash_synchronous(); } addr += hvm_get_segment_base(v, seg); - gfn = paging_gva_to_gfn(v, addr); + pfec &= ~PFEC_write_access; + gfn = paging_gva_to_gfn(v, addr, &pfec); paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); } else @@ -1115,12 +1120,9 @@ void handle_mmio(unsigned long gpa) { /* The guest does not have the non-mmio address mapped. * Need to send in a page fault */ - int errcode = 0; - /* IO read --> memory write */ - if ( dir == IOREQ_READ ) errcode |= PFEC_write_access; regs->eip -= inst_len; /* do not advance %eip */ regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */ - hvm_inject_exception(TRAP_page_fault, errcode, addr); + hvm_inject_exception(TRAP_page_fault, pfec, addr); return; } @@ -1308,10 +1310,9 @@ void handle_mmio(unsigned long gpa) DEFINE_PER_CPU(int, guest_handles_in_xen_space); -/* Note that copy_{to,from}_user_hvm don't set the A and D bits on - PTEs, and require the PTE to be writable even when they're only - trying to read from it. The guest is expected to deal with - this. */ +/* Note that copy_{to,from}_user_hvm require the PTE to be writable even + when they're only trying to read from it. The guest is expected to + deal with this. */ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len) { if ( this_cpu(guest_handles_in_xen_space) ) diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/hvm/svm/svm.c Fri Nov 02 16:38:11 2007 +0000 @@ -1441,6 +1441,7 @@ static void svm_io_instruction(struct vc unsigned long addr, count; paddr_t paddr; unsigned long gfn; + uint32_t pfec; int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; if (!svm_get_io_address(v, regs, size, info, &count, &addr)) @@ -1459,15 +1460,17 @@ static void svm_io_instruction(struct vc } /* Translate the address to a physical address */ - gfn = paging_gva_to_gfn(v, addr); + pfec = PFEC_page_present; + if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */ + pfec |= PFEC_write_access; + if ( ring_3(regs) ) + pfec |= PFEC_user_mode; + gfn = paging_gva_to_gfn(v, addr, &pfec); if ( gfn == INVALID_GFN ) { /* The guest does not have the RAM address mapped. * Need to send in a page fault */ - int errcode = 0; - /* IO read --> memory write */ - if ( dir == IOREQ_READ ) errcode |= PFEC_write_access; - svm_hvm_inject_exception(TRAP_page_fault, errcode, addr); + svm_hvm_inject_exception(TRAP_page_fault, pfec, addr); return; } paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Nov 02 16:38:11 2007 +0000 @@ -1642,7 +1642,7 @@ static void vmx_do_str_pio(unsigned long unsigned long addr, count = 1, base; paddr_t paddr; unsigned long gfn; - u32 ar_bytes, limit; + u32 ar_bytes, limit, pfec; int sign; int long_mode = 0; @@ -1714,15 +1714,17 @@ static void vmx_do_str_pio(unsigned long #endif /* Translate the address to a physical address */ - gfn = paging_gva_to_gfn(current, addr); + pfec = PFEC_page_present; + if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */ + pfec |= PFEC_write_access; + if ( ring_3(regs) ) + pfec |= PFEC_user_mode; + gfn = paging_gva_to_gfn(current, addr, &pfec); if ( gfn == INVALID_GFN ) { /* The guest does not have the RAM address mapped. * Need to send in a page fault */ - int errcode = 0; - /* IO read --> memory write */ - if ( dir == IOREQ_READ ) errcode |= PFEC_write_access; - vmx_inject_exception(TRAP_page_fault, errcode, addr); + vmx_inject_exception(TRAP_page_fault, pfec, addr); return; } paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK); diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/hap/guest_walk.c Fri Nov 02 16:38:11 2007 +0000 @@ -40,7 +40,7 @@ #if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( - struct vcpu *v, unsigned long gva) + struct vcpu *v, unsigned long gva, uint32_t *pfec) { gdprintk(XENLOG_ERR, "Guest paging level is greater than host paging level!\n"); @@ -61,7 +61,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN #endif unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( - struct vcpu *v, unsigned long gva) + struct vcpu *v, unsigned long gva, uint32_t *pfec) { unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3]; int mode = GUEST_PAGING_LEVELS; diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:38:11 2007 +0000 @@ -695,7 +695,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi } static unsigned long hap_gva_to_gfn_real_mode( - struct vcpu *v, unsigned long gva) + struct vcpu *v, unsigned long gva, uint32_t *pfec) { return ((paddr_t)gva >> PAGE_SHIFT); } diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/private.h --- a/xen/arch/x86/mm/hap/private.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/hap/private.h Fri Nov 02 16:38:11 2007 +0000 @@ -26,9 +26,12 @@ /********************************************/ /* GUEST TRANSLATION FUNCS */ /********************************************/ -unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva); -unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva); -unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva); +unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva, + uint32_t *pfec); +unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva, + uint32_t *pfec); +unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva, + uint32_t *pfec); /********************************************/ /* MISC DEFINITIONS */ diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/p2m.c Fri Nov 02 16:38:11 2007 +0000 @@ -31,7 +31,7 @@ /* Debugging and auditing of the P2M code? */ #define P2M_AUDIT 0 -#define P2M_DEBUGGING 1 +#define P2M_DEBUGGING 0 /* * The P2M lock. This protects all updates to the p2m table. @@ -290,11 +290,11 @@ int p2m_alloc_table(struct domain *d, void (*free_page)(struct domain *d, struct page_info *pg)) { - mfn_t mfn; + mfn_t mfn = _mfn(INVALID_MFN); struct list_head *entry; struct page_info *page, *p2m_top; unsigned int page_count = 0; - unsigned long gfn; + unsigned long gfn = -1UL; p2m_lock(d); diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Fri Nov 02 16:38:11 2007 +0000 @@ -150,11 +150,13 @@ hvm_read(enum x86_segment seg, return rc; *val = 0; - // XXX -- this is WRONG. - // It entirely ignores the permissions in the page tables. - // In this case, that is only a user vs supervisor access check. - // - if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 ) + + if ( access_type == hvm_access_insn_fetch ) + rc = hvm_fetch_from_guest_virt(val, addr, bytes); + else + rc = hvm_copy_from_guest_virt(val, addr, bytes); + + if ( rc == 0 ) return X86EMUL_OKAY; /* If we got here, there was nothing mapped here, or a bad GFN @@ -395,7 +397,7 @@ struct x86_emulate_ops *shadow_init_emul (!hvm_translate_linear_addr( x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf), hvm_access_insn_fetch, sh_ctxt, &addr) && - !hvm_copy_from_guest_virt( + !hvm_fetch_from_guest_virt( sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf))) ? sizeof(sh_ctxt->insn_buf) : 0; @@ -423,7 +425,7 @@ void shadow_continue_emulation(struct sh (!hvm_translate_linear_addr( x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf), hvm_access_insn_fetch, sh_ctxt, &addr) && - !hvm_copy_from_guest_virt( + !hvm_fetch_from_guest_virt( sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf))) ? sizeof(sh_ctxt->insn_buf) : 0; sh_ctxt->insn_buf_eip = regs->eip; diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Nov 02 16:38:11 2007 +0000 @@ -189,7 +189,7 @@ guest_supports_nx(struct vcpu *v) if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx ) return 0; if ( !is_hvm_vcpu(v) ) - return 1; + return cpu_has_nx; return hvm_nx_enabled(v); } @@ -197,22 +197,119 @@ guest_supports_nx(struct vcpu *v) /**************************************************************************/ /* Functions for walking the guest page tables */ - -/* Walk the guest pagetables, filling the walk_t with what we see. - * Takes an uninitialised walk_t. The caller must call unmap_walk() - * on the walk_t before discarding it or calling guest_walk_tables again. - * If "guest_op" is non-zero, we are serving a genuine guest memory access, +/* Flags that are needed in a pagetable entry, with the sense of NX inverted */ +static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) +{ + static uint32_t flags[] = { + /* I/F - Usr Wr */ + /* 0 0 0 0 */ _PAGE_PRESENT, + /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW, + /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER, + /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, + /* 0 1 0 0 */ _PAGE_PRESENT, + /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW, + /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER, + /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, + /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, + /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, + /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, + /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, + /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, + /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, + /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, + /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, + }; + uint32_t f = flags[(pfec & 0x1f) >> 1]; + /* Don't demand not-NX if the CPU wouldn't enforce it. */ + if ( !guest_supports_nx(v) ) + f &= ~_PAGE_NX_BIT; + return f; +} + +/* Read, check and modify a guest pagetable entry. Returns 0 if the + * flags are OK. Although we use l1e types here, the logic and the bits + * are the same for all types except PAE l3es. */ +static int guest_walk_entry(struct vcpu *v, mfn_t gmfn, + void *gp, void *wp, + uint32_t flags, int level) +{ + guest_l1e_t e, old_e; + uint32_t gflags; + int rc; + + /* Read the guest entry */ + e = *(guest_l1e_t *)gp; + + /* Check that all the mandatory flag bits are there. Invert NX, to + * calculate as if there were an "X" bit that allowed access. */ + gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT; + rc = ((gflags & flags) != flags); + + /* Set the accessed/dirty bits */ + if ( rc == 0 ) + { + uint32_t bits = _PAGE_ACCESSED; + if ( (flags & _PAGE_RW) // Implies that the action is a write + && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) ) + bits |= _PAGE_DIRTY; + old_e = e; + e.l1 |= bits; + SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n", + (unsigned long) flags, + (unsigned long) bits, + (unsigned long long) old_e.l1, + (unsigned long long) e.l1); + /* Try to write the entry back. If it's changed under out feet + * then leave it alone */ + if ( e.l1 != old_e.l1 ) + { + (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1); + paging_mark_dirty(v->domain, mfn_x(gmfn)); + } + } + + /* Record the entry in the walk */ + *(guest_l1e_t *)wp = e; + return rc; +} + +/* Walk the guest pagetables, after the manner of a hardware walker. + * + * Inputs: a vcpu, a virtual address, a walk_t to fill, a + * pointer to a pagefault code, and a flag "shadow_op". + * + * We walk the vcpu's guest pagetables, filling the walk_t with what we + * see and adding any Accessed and Dirty bits that are needed in the + * guest entries. Using the pagefault code, we check the permissions as + * we go. For the purposes of reading pagetables we treat all non-RAM + * memory as contining zeroes. + * + * If "shadow_op" is non-zero, we are serving a genuine guest memory access, * and must (a) be under the shadow lock, and (b) remove write access - * from any gueat PT pages we see, as we will be using their contents to - * perform shadow updates. - * Returns 0 for success or non-zero if the guest pagetables are malformed. - * N.B. Finding a not-present entry does not cause a non-zero return code. */ -static inline int -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) + * from any guest PT pages we see, as we will be shadowing them soon + * and will rely on the contents' not having changed. + * + * Returns 0 for success or non-zero if the walk did not complete. + * N.B. This is different from the old return code but almost no callers + * checked the old return code anyway. + */ +static int +guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, + uint32_t pfec, int shadow_op) { struct domain *d = v->domain; p2m_type_t p2mt; - ASSERT(!guest_op || shadow_locked_by_me(d)); + guest_l1e_t *l1p; +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ + guest_l1e_t *l2p; +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ + guest_l1e_t *l3p; +#endif +#endif + uint32_t flags = mandatory_flags(v, pfec); + int rc; + + ASSERT(!shadow_op || shadow_locked_by_me(d)); perfc_incr(shadow_guest_walk); memset(gw, 0, sizeof(*gw)); @@ -220,84 +317,104 @@ guest_walk_tables(struct vcpu *v, unsign #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ - /* Get l4e from the top level table */ + /* Get the l4e from the top level table and check its flags*/ gw->l4mfn = pagetable_get_mfn(v->arch.guest_table); - gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable - + guest_l4_table_offset(va); - /* Walk down to the l3e */ - if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0; - gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt); + rc = guest_walk_entry(v, gw->l4mfn, + (guest_l4e_t *)v->arch.paging.shadow.guest_vtable + + guest_l4_table_offset(va), + &gw->l4e, flags, 4); + if ( rc != 0 ) return rc; + + /* Map the l3 table */ + gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt); if ( !p2m_is_ram(p2mt) ) return 1; ASSERT(mfn_valid(gw->l3mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) + if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); - gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) - + guest_l3_table_offset(va); + /* Get the l3e and check its flags*/ + l3p = sh_map_domain_page(gw->l3mfn); + rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va), + &gw->l3e, flags, 3); + sh_unmap_domain_page(l3p); + if ( rc != 0 ) return rc; + #else /* PAE only... */ - /* Get l3e from the cache of the guest's top level table */ - gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; + + /* Get l3e from the cache of the top level table and check its flag */ + gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; + if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1; + #endif /* PAE or 64... */ - /* Walk down to the l2e */ - if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0; - gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt); + + /* Map the l2 table */ + gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt); if ( !p2m_is_ram(p2mt) ) return 1; ASSERT(mfn_valid(gw->l2mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) + if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); - gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn)) - + guest_l2_table_offset(va); + /* Get the l2e */ + l2p = sh_map_domain_page(gw->l2mfn); + rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va), + &gw->l2e, flags, 2); + sh_unmap_domain_page(l2p); + if ( rc != 0 ) return rc; + #else /* 32-bit only... */ - /* Get l2e from the top level table */ + + /* Get l2e from the top level table and check its flags */ gw->l2mfn = pagetable_get_mfn(v->arch.guest_table); - gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable - + guest_l2_table_offset(va); + rc = guest_walk_entry(v, gw->l2mfn, + (guest_l2e_t *)v->arch.paging.shadow.guest_vtable + + guest_l2_table_offset(va), + &gw->l2e, flags, 2); + if ( rc != 0 ) return rc; + #endif /* All levels... */ - - if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0; + if ( guest_supports_superpages(v) && - (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) + (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) ) { /* Special case: this guest VA is in a PSE superpage, so there's * no guest l1e. We make one up so that the propagation code * can generate a shadow l1 table. Start with the gfn of the * first 4k-page of the superpage. */ - gfn_t start = guest_l2e_get_gfn(*gw->l2e); + gfn_t start = guest_l2e_get_gfn(gw->l2e); /* Grant full access in the l1e, since all the guest entry's - * access controls are enforced in the shadow l2e. This lets - * us reflect l2 changes later without touching the l1s. */ + * access controls are enforced in the shadow l2e. */ int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| _PAGE_ACCESSED|_PAGE_DIRTY); - /* propagate PWT PCD to level 1 for PSE */ - if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) ) - flags |= _PAGE_PWT; - if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) ) - flags |= _PAGE_PCD; /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7 - * of the level 1 */ - if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) - flags |= _PAGE_PAT; + * of the level 1. */ + if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) ) + flags |= _PAGE_PAT; + /* Copy the cache-control bits to the l1 as well, because we + * can't represent PAT in the (non-PSE) shadow l2e. :( + * This could cause problems if a guest ever maps an area of + * memory with superpages using more than one caching mode. */ + flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD); /* Increment the pfn by the right number of 4k pages. * The ~0x1 is to mask out the PAT bit mentioned above. */ start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va)); - gw->eff_l1e = guest_l1e_from_gfn(start, flags); - gw->l1e = NULL; + gw->l1e = guest_l1e_from_gfn(start, flags); gw->l1mfn = _mfn(INVALID_MFN); } else { /* Not a superpage: carry on and find the l1e. */ - gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt); + gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt); if ( !p2m_is_ram(p2mt) ) return 1; ASSERT(mfn_valid(gw->l1mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op + if ( shadow_op && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); - gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn)) - + guest_l1_table_offset(va); - gw->eff_l1e = *gw->l1e; + l1p = sh_map_domain_page(gw->l1mfn); + rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va), + &gw->l1e, flags, 1); + sh_unmap_domain_page(l1p); + if ( rc != 0 ) return rc; } return 0; @@ -308,9 +425,9 @@ static inline gfn_t static inline gfn_t guest_walk_to_gfn(walk_t *gw) { - if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) ) + if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) ) return _gfn(INVALID_GFN); - return guest_l1e_get_gfn(gw->eff_l1e); + return guest_l1e_get_gfn(gw->l1e); } /* Given a walk_t, translate the gw->va into the guest's notion of the @@ -318,29 +435,12 @@ static inline paddr_t static inline paddr_t guest_walk_to_gpa(walk_t *gw) { - if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) ) + if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) ) return 0; - return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK); -} - - -/* Unmap (and reinitialise) a guest walk. - * Call this to dispose of any walk filled in by guest_walk_tables() */ -static void unmap_walk(struct vcpu *v, walk_t *gw) -{ -#if GUEST_PAGING_LEVELS >= 3 -#if GUEST_PAGING_LEVELS >= 4 - if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e); -#endif - if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e); -#endif - if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e); -#ifdef DEBUG - memset(gw, 0, sizeof(*gw)); -#endif -} - - + return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK); +} + +#if 0 /* Keep for debugging */ /* Pretty-print the contents of a guest-walk */ static inline void print_gw(walk_t *gw) { @@ -348,26 +448,17 @@ static inline void print_gw(walk_t *gw) #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn)); - SHADOW_PRINTK(" l4e=%p\n", gw->l4e); - if ( gw->l4e ) - SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4); + SHADOW_PRINTK(" l4e=%" SH_PRI_gpte "\n", gw->l4e.l4); SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn)); #endif /* PAE or 64... */ - SHADOW_PRINTK(" l3e=%p\n", gw->l3e); - if ( gw->l3e ) - SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3); + SHADOW_PRINTK(" l3e=%" SH_PRI_gpte "\n", gw->l3e.l3); #endif /* All levels... */ SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn)); - SHADOW_PRINTK(" l2e=%p\n", gw->l2e); - if ( gw->l2e ) - SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2); + SHADOW_PRINTK(" l2e=%" SH_PRI_gpte "\n", gw->l2e.l2); SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn)); - SHADOW_PRINTK(" l1e=%p\n", gw->l1e); - if ( gw->l1e ) - SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1); - SHADOW_PRINTK(" eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1); -} - + SHADOW_PRINTK(" l1e=%" SH_PRI_gpte "\n", gw->l1e.l1); +} +#endif /* 0 */ #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES /* Lightweight audit: pass all the shadows associated with this guest walk @@ -404,10 +495,10 @@ static void sh_audit_gw(struct vcpu *v, && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow))) ) (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN)); - else if ( gw->l2e - && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) + else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT) + && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) && mfn_valid( - (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) ) + (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) ) (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN)); } @@ -415,85 +506,6 @@ static void sh_audit_gw(struct vcpu *v, #define sh_audit_gw(_v, _gw) do {} while(0) #endif /* audit code */ - - -/**************************************************************************/ -/* Function to write to the guest tables, for propagating accessed and - * dirty bits from the shadow to the guest. - * Takes a guest mfn, a pointer to the guest entry, the level of pagetable, - * and an operation type. The guest entry is always passed as an l1e: - * since we only ever write flags, that's OK. - * Returns the new flag bits of the guest entry. */ - -static u32 guest_set_ad_bits(struct vcpu *v, - mfn_t gmfn, - guest_l1e_t *ep, - unsigned int level, - fetch_type_t ft) -{ - u32 flags; - int res = 0; - - ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1))); - ASSERT(level <= GUEST_PAGING_LEVELS); - ASSERT(shadow_locked_by_me(v->domain)); - - flags = guest_l1e_get_flags(*ep); - - /* Only set A and D bits for guest-initiated accesses */ - if ( !(ft & FETCH_TYPE_DEMAND) ) - return flags; - - ASSERT(mfn_valid(gmfn) - && (sh_mfn_is_a_page_table(gmfn) - || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) - == 0))); - - /* PAE l3s do not have A and D bits */ - ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); - - /* Need the D bit as well for writes, in L1es and PSE L2es. */ - if ( ft == ft_demand_write - && (level == 1 || - (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) ) - { - if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) - == (_PAGE_DIRTY | _PAGE_ACCESSED) ) - return flags; /* Guest already has A and D bits set */ - flags |= _PAGE_DIRTY | _PAGE_ACCESSED; - perfc_incr(shadow_ad_update); - } - else - { - if ( flags & _PAGE_ACCESSED ) - return flags; /* Guest already has A bit set */ - flags |= _PAGE_ACCESSED; - perfc_incr(shadow_a_update); - } - - /* Set the bit(s) */ - paging_mark_dirty(v->domain, mfn_x(gmfn)); - SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", " - "old flags = %#x, new flags = %#x\n", - gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), - flags); - *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags); - - /* Propagate this change to any other shadows of the page - * (only necessary if there is more than one shadow) */ - if ( mfn_to_page(gmfn)->count_info & PGC_page_table ) - { - u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; - /* More than one type bit set in shadow-flags? */ - if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) - res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep)); - } - - /* We should never need to flush the TLB or recopy PAE entries */ - ASSERT((res == 0) || (res == SHADOW_SET_CHANGED)); - - return flags; -} #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS) void * @@ -509,11 +521,9 @@ sh_guest_map_l1e(struct vcpu *v, unsigne // FIXME! shadow_lock(v->domain); - guest_walk_tables(v, addr, &gw, 1); - - if ( gw.l2e && - (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) && - !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) ) + guest_walk_tables(v, addr, &gw, 0, 1); + + if ( mfn_valid(gw.l1mfn) ) { if ( gl1mfn ) *gl1mfn = mfn_x(gw.l1mfn); @@ -521,7 +531,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne (guest_l1_table_offset(addr) * sizeof(guest_l1e_t)); } - unmap_walk(v, &gw); shadow_unlock(v->domain); return pl1e; @@ -538,9 +547,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns // FIXME! shadow_lock(v->domain); - guest_walk_tables(v, addr, &gw, 1); - *(guest_l1e_t *)eff_l1e = gw.eff_l1e; - unmap_walk(v, &gw); + guest_walk_tables(v, addr, &gw, 0, 1); + *(guest_l1e_t *)eff_l1e = gw.l1e; shadow_unlock(v->domain); } #endif /* CONFIG==SHADOW==GUEST */ @@ -636,17 +644,17 @@ unsigned char pat_type_2_pte_flags(unsig static always_inline void _sh_propagate(struct vcpu *v, - void *guest_entry_ptr, - mfn_t guest_table_mfn, + guest_intpte_t guest_intpte, mfn_t target_mfn, void *shadow_entry_ptr, int level, fetch_type_t ft, p2m_type_t p2mt) { - guest_l1e_t *gp = guest_entry_ptr; + guest_l1e_t guest_entry = { guest_intpte }; shadow_l1e_t *sp = shadow_entry_ptr; struct domain *d = v->domain; + gfn_t target_gfn = guest_l1e_get_gfn(guest_entry); u32 pass_thru_flags; u32 gflags, sflags; @@ -660,15 +668,7 @@ _sh_propagate(struct vcpu *v, goto done; } - if ( mfn_valid(guest_table_mfn) ) - /* Handle A and D bit propagation into the guest */ - gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft); - else - { - /* Must be an fl1e or a prefetch */ - ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND)); - gflags = guest_l1e_get_flags(*gp); - } + gflags = guest_l1e_get_flags(guest_entry); if ( unlikely(!(gflags & _PAGE_PRESENT)) ) { @@ -684,7 +684,7 @@ _sh_propagate(struct vcpu *v, if ( level == 1 && p2mt == p2m_mmio_dm ) { /* Guest l1e maps emulated MMIO space */ - *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags); + *sp = sh_l1e_mmio(target_gfn, gflags); if ( !d->arch.paging.shadow.has_fast_mmio_entries ) d->arch.paging.shadow.has_fast_mmio_entries = 1; goto done; @@ -694,9 +694,6 @@ _sh_propagate(struct vcpu *v, // case of a prefetch, an invalid mfn means that we can not usefully // shadow anything, and so we return early. // - /* N.B. For pass-through MMIO, either this test needs to be relaxed, - * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the - * MMIO areas need to be added to the frame-table to make them "valid". */ if ( shadow_mode_refcounts(d) && !mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) ) { @@ -718,20 +715,22 @@ _sh_propagate(struct vcpu *v, pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT; sflags = gflags & pass_thru_flags; - /* Only change memory caching type for pass-through domain */ + /* + * For HVM domains with direct access to MMIO areas, set the correct + * caching attributes in the shadows to match what was asked for + */ if ( (level == 1) && is_hvm_domain(d) && !list_empty(&(domain_hvm_iommu(d)->pdev_list)) ) { unsigned int type; - if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)), - &type) ) + if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) ) sflags |= pat_type_2_pte_flags(type); - else if ( v->domain->arch.hvm_domain.is_in_uc_mode ) + else if ( d->arch.hvm_domain.is_in_uc_mode ) sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE); else sflags |= get_pat_flags(v, gflags, - guest_l1e_get_paddr(*gp), + gfn_to_paddr(target_gfn), mfn_x(target_mfn) << PAGE_SHIFT); } @@ -813,59 +812,55 @@ _sh_propagate(struct vcpu *v, done: SHADOW_DEBUG(PROPAGATE, "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n", - fetch_type_names[ft], level, gp->l1, sp->l1); -} - - -/* These four wrappers give us a little bit of type-safety back around the - * use of void-* pointers in _sh_propagate(), and allow the compiler to - * optimize out some level checks. */ + fetch_type_names[ft], level, guest_entry.l1, sp->l1); +} + + +/* These four wrappers give us a little bit of type-safety back around + * the use of void-* pointers and intpte types in _sh_propagate(), and + * allow the compiler to optimize out some level checks. */ #if GUEST_PAGING_LEVELS >= 4 static void l4e_propagate_from_guest(struct vcpu *v, - guest_l4e_t *gl4e, - mfn_t gl4mfn, + guest_l4e_t gl4e, mfn_t sl3mfn, shadow_l4e_t *sl4e, fetch_type_t ft) { - _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw); + _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw); } static void l3e_propagate_from_guest(struct vcpu *v, - guest_l3e_t *gl3e, - mfn_t gl3mfn, + guest_l3e_t gl3e, mfn_t sl2mfn, shadow_l3e_t *sl3e, fetch_type_t ft) { - _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw); + _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw); } #endif // GUEST_PAGING_LEVELS >= 4 static void l2e_propagate_from_guest(struct vcpu *v, - guest_l2e_t *gl2e, - mfn_t gl2mfn, + guest_l2e_t gl2e, mfn_t sl1mfn, shadow_l2e_t *sl2e, fetch_type_t ft) { - _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw); + _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw); } static void l1e_propagate_from_guest(struct vcpu *v, - guest_l1e_t *gl1e, - mfn_t gl1mfn, + guest_l1e_t gl1e, mfn_t gmfn, shadow_l1e_t *sl1e, fetch_type_t ft, p2m_type_t p2mt) { - _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt); + _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt); } @@ -1859,8 +1854,7 @@ static shadow_l3e_t * shadow_get_and_cre *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow); } /* Install the new sl3 table in the sl4e */ - l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, - *sl3mfn, &new_sl4e, ft); + l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft); r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn); ASSERT((r & SHADOW_SET_FLUSH) == 0); if ( r & SHADOW_SET_ERROR ) @@ -1909,8 +1903,7 @@ static shadow_l2e_t * shadow_get_and_cre *sl2mfn = sh_make_shadow(v, gw->l2mfn, t); } /* Install the new sl2 table in the sl3e */ - l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, - *sl2mfn, &new_sl3e, ft); + l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft); r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn); ASSERT((r & SHADOW_SET_FLUSH) == 0); if ( r & SHADOW_SET_ERROR ) @@ -1934,7 +1927,7 @@ static shadow_l2e_t * shadow_get_and_cre /* This next line is important: the guest l2 has a 16k * shadow, we need to return the right mfn of the four. This * call will set it for us as a side-effect. */ - (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e)); + (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va)); /* Reading the top level table is always valid. */ return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); #endif @@ -1956,8 +1949,8 @@ static shadow_l1e_t * shadow_get_and_cre * re-do it to fix a PSE dirty bit. */ if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT && likely(ft != ft_demand_write - || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY) - || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) ) + || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW) + || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) { *sl1mfn = shadow_l2e_get_mfn(*sl2e); ASSERT(mfn_valid(*sl1mfn)); @@ -1965,14 +1958,14 @@ static shadow_l1e_t * shadow_get_and_cre else { shadow_l2e_t new_sl2e; - int r, flags = guest_l2e_get_flags(*gw->l2e); + int r, flags = guest_l2e_get_flags(gw->l2e); /* No l1 shadow installed: find and install it. */ if ( !(flags & _PAGE_PRESENT) ) return NULL; /* No guest page. */ if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) { /* Splintering a superpage */ - gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e); + gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e); *sl1mfn = get_fl1_shadow_status(v, l2gfn); if ( !mfn_valid(*sl1mfn) ) { @@ -1992,8 +1985,7 @@ static shadow_l1e_t * shadow_get_and_cre } } /* Install the new sl1 table in the sl2e */ - l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, - *sl1mfn, &new_sl2e, ft); + l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft); r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn); ASSERT((r & SHADOW_SET_FLUSH) == 0); if ( r & SHADOW_SET_ERROR ) @@ -2247,7 +2239,7 @@ static int validate_gl4e(struct vcpu *v, static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se) { shadow_l4e_t new_sl4e; - guest_l4e_t *new_gl4e = new_ge; + guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge; shadow_l4e_t *sl4p = se; mfn_t sl3mfn = _mfn(INVALID_MFN); struct domain *d = v->domain; @@ -2256,17 +2248,16 @@ static int validate_gl4e(struct vcpu *v, perfc_incr(shadow_validate_gl4e_calls); - if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT ) - { - gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e); + if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT ) + { + gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e); mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow); else result |= SHADOW_SET_ERROR; } - l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN), - sl3mfn, &new_sl4e, ft_prefetch); + l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch); // check for updates to xen reserved slots if ( !shadow_mode_external(d) ) @@ -2301,7 +2292,7 @@ static int validate_gl3e(struct vcpu *v, static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se) { shadow_l3e_t new_sl3e; - guest_l3e_t *new_gl3e = new_ge; + guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge; shadow_l3e_t *sl3p = se; mfn_t sl2mfn = _mfn(INVALID_MFN); p2m_type_t p2mt; @@ -2309,17 +2300,16 @@ static int validate_gl3e(struct vcpu *v, perfc_incr(shadow_validate_gl3e_calls); - if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT ) - { - gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e); + if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT ) + { + gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e); mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow); else result |= SHADOW_SET_ERROR; } - l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), - sl2mfn, &new_sl3e, ft_prefetch); + l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch); result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn); return result; @@ -2329,7 +2319,7 @@ static int validate_gl2e(struct vcpu *v, static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se) { shadow_l2e_t new_sl2e; - guest_l2e_t *new_gl2e = new_ge; + guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge; shadow_l2e_t *sl2p = se; mfn_t sl1mfn = _mfn(INVALID_MFN); p2m_type_t p2mt; @@ -2337,11 +2327,11 @@ static int validate_gl2e(struct vcpu *v, perfc_incr(shadow_validate_gl2e_calls); - if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT ) - { - gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e); + if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT ) + { + gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e); if ( guest_supports_superpages(v) && - (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) ) + (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) ) { // superpage -- need to look up the shadow L1 which holds the // splitters... @@ -2364,8 +2354,7 @@ static int validate_gl2e(struct vcpu *v, result |= SHADOW_SET_ERROR; } } - l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN), - sl1mfn, &new_sl2e, ft_prefetch); + l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch); // check for updates to xen reserved slots in PV guests... // XXX -- need to revisit this for PV 3-on-4 guests. @@ -2415,7 +2404,7 @@ static int validate_gl1e(struct vcpu *v, static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se) { shadow_l1e_t new_sl1e; - guest_l1e_t *new_gl1e = new_ge; + guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge; shadow_l1e_t *sl1p = se; gfn_t gfn; mfn_t gmfn; @@ -2424,11 +2413,10 @@ static int validate_gl1e(struct vcpu *v, perfc_incr(shadow_validate_gl1e_calls); - gfn = guest_l1e_get_gfn(*new_gl1e); + gfn = guest_l1e_get_gfn(new_gl1e); gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); - l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, - ft_prefetch, p2mt); + l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt); result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); return result; @@ -2615,7 +2603,7 @@ static void sh_prefetch(struct vcpu *v, int i, dist; gfn_t gfn; mfn_t gmfn; - guest_l1e_t gl1e; + guest_l1e_t *gl1p = NULL, gl1e; shadow_l1e_t sl1e; u32 gflags; p2m_type_t p2mt; @@ -2626,16 +2614,23 @@ static void sh_prefetch(struct vcpu *v, if ( dist > PREFETCH_DISTANCE ) dist = PREFETCH_DISTANCE; + if ( mfn_valid(gw->l1mfn) ) + { + /* Normal guest page; grab the next guest entry */ + gl1p = sh_map_domain_page(gw->l1mfn); + gl1p += guest_l1_table_offset(gw->va); + } + for ( i = 1; i < dist ; i++ ) { /* No point in prefetching if there's already a shadow */ if ( ptr_sl1e[i].l1 != 0 ) break; - if ( gw->l1e ) + if ( mfn_valid(gw->l1mfn) ) { /* Normal guest page; grab the next guest entry */ - gl1e = gw->l1e[i]; + gl1e = gl1p[i]; /* Not worth continuing if we hit an entry that will need another * fault for A/D-bit propagation anyway */ gflags = guest_l1e_get_flags(gl1e); @@ -2647,24 +2642,23 @@ static void sh_prefetch(struct vcpu *v, else { /* Fragmented superpage, unless we've been called wrongly */ - ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE); + ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE); /* Increment the l1e's GFN by the right number of guest pages */ gl1e = guest_l1e_from_gfn( - _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i), - guest_l1e_get_flags(gw->eff_l1e)); + _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i), + guest_l1e_get_flags(gw->l1e)); } /* Look at the gfn that the l1e is pointing at */ gfn = guest_l1e_get_gfn(gl1e); gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); - /* Propagate the entry. Safe to use a pointer to our local - * gl1e, since this is not a demand-fetch so there will be no - * write-back to the guest. */ - l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN), - gmfn, &sl1e, ft_prefetch, p2mt); + /* Propagate the entry. */ + l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt); (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn); } + if ( gl1p != NULL ) + sh_unmap_domain_page(gl1p); } #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */ @@ -2684,7 +2678,6 @@ static int sh_page_fault(struct vcpu *v, { struct domain *d = v->domain; walk_t gw; - u32 accumulated_gflags; gfn_t gfn; mfn_t gmfn, sl1mfn=_mfn(0); shadow_l1e_t sl1e, *ptr_sl1e; @@ -2769,10 +2762,10 @@ static int sh_page_fault(struct vcpu *v, shadow_audit_tables(v); - if ( guest_walk_tables(v, va, &gw, 1) != 0 ) - { - SHADOW_PRINTK("malformed guest pagetable\n"); - print_gw(&gw); + if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 ) + { + perfc_incr(shadow_fault_bail_real_fault); + goto not_a_shadow_fault; } /* It's possible that the guest has put pagetables in memory that it has @@ -2788,64 +2781,12 @@ static int sh_page_fault(struct vcpu *v, sh_audit_gw(v, &gw); - // We do not look at the gw->l1e, as that will not exist for superpages. - // Instead, we use the gw->eff_l1e... - // - // We need not check all the levels of the guest page table entries for - // present vs not-present, as the eff_l1e will always be not present if - // one of the higher level entries is not present. - // - if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) ) - { - perfc_incr(shadow_fault_bail_not_present); - goto not_a_shadow_fault; - } - - // All levels of the guest page table are now known to be present. - accumulated_gflags = accumulate_guest_flags(v, &gw); - - // Check for attempts to access supervisor-only pages from user mode, - // i.e. ring 3. Such errors are not caused or dealt with by the shadow - // code. - // - if ( (regs->error_code & PFEC_user_mode) && - !(accumulated_gflags & _PAGE_USER) ) - { - /* illegal user-mode access to supervisor-only page */ - perfc_incr(shadow_fault_bail_user_supervisor); - goto not_a_shadow_fault; - } - - // Was it a write fault? + /* What kind of access are we dealing with? */ ft = ((regs->error_code & PFEC_write_access) ? ft_demand_write : ft_demand_read); - if ( ft == ft_demand_write ) - { - if ( unlikely(!(accumulated_gflags & _PAGE_RW)) ) - { - perfc_incr(shadow_fault_bail_ro_mapping); - goto not_a_shadow_fault; - } - } - else // must have been either an insn fetch or read fault - { - // Check for NX bit violations: attempts to execute code that is - // marked "do not execute". Such errors are not caused or dealt with - // by the shadow code. - // - if ( regs->error_code & PFEC_insn_fetch ) - { - if ( accumulated_gflags & _PAGE_NX_BIT ) - { - /* NX prevented this code fetch */ - perfc_incr(shadow_fault_bail_nx); - goto not_a_shadow_fault; - } - } - } /* What mfn is the guest trying to access? */ - gfn = guest_l1e_get_gfn(gw.eff_l1e); + gfn = guest_l1e_get_gfn(gw.l1e); gmfn = gfn_to_mfn(d, gfn, &p2mt); if ( shadow_mode_refcounts(d) && @@ -2876,14 +2817,12 @@ static int sh_page_fault(struct vcpu *v, * shadow_set_l*e(), which will have crashed the guest. * Get out of the fault handler immediately. */ ASSERT(d->is_shutting_down); - unmap_walk(v, &gw); shadow_unlock(d); return 0; } /* Calculate the shadow entry and write it */ - l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, - gmfn, &sl1e, ft, p2mt); + l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt); r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH @@ -2921,7 +2860,6 @@ static int sh_page_fault(struct vcpu *v, done: sh_audit_gw(v, &gw); - unmap_walk(v, &gw); SHADOW_PRINTK("fixed\n"); shadow_audit_tables(v); shadow_unlock(d); @@ -2972,7 +2910,6 @@ static int sh_page_fault(struct vcpu *v, * take it again when we write to the pagetables. */ sh_audit_gw(v, &gw); - unmap_walk(v, &gw); shadow_audit_tables(v); shadow_unlock(d); @@ -3033,7 +2970,6 @@ static int sh_page_fault(struct vcpu *v, goto not_a_shadow_fault; perfc_incr(shadow_fault_mmio); sh_audit_gw(v, &gw); - unmap_walk(v, &gw); SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa); shadow_audit_tables(v); reset_early_unshadow(v); @@ -3043,7 +2979,6 @@ static int sh_page_fault(struct vcpu *v, not_a_shadow_fault: sh_audit_gw(v, &gw); - unmap_walk(v, &gw); SHADOW_PRINTK("not a shadow fault\n"); shadow_audit_tables(v); reset_early_unshadow(v); @@ -3129,30 +3064,36 @@ sh_invlpg(struct vcpu *v, unsigned long static unsigned long -sh_gva_to_gfn(struct vcpu *v, unsigned long va) +sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec) /* Called to translate a guest virtual address to what the *guest* * pagetables would map it to. */ { walk_t gw; gfn_t gfn; - + #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) struct shadow_vtlb t = {0}; - if ( vtlb_lookup(v, va, &t) ) + /* Check the vTLB cache first */ + if ( vtlb_lookup(v, va, pfec[0], &t) ) return t.frame_number; #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ - guest_walk_tables(v, va, &gw, 0); + if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 ) + { + if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) ) + pfec[0] &= ~PFEC_page_present; + return INVALID_GFN; + } gfn = guest_walk_to_gfn(&gw); #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) t.page_number = va >> PAGE_SHIFT; t.frame_number = gfn_x(gfn); t.flags = accumulate_guest_flags(v, &gw); + t.pfec = pfec[0]; vtlb_insert(v, t); #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ - unmap_walk(v, &gw); return gfn_x(gfn); } @@ -4006,9 +3947,8 @@ static inline void * emulate_map_dest(st struct sh_emulate_ctxt *sh_ctxt, mfn_t *mfnp) { - walk_t gw; - u32 flags, errcode; - gfn_t gfn; + uint32_t pfec; + unsigned long gfn; mfn_t mfn; p2m_type_t p2mt; @@ -4016,50 +3956,20 @@ static inline void * emulate_map_dest(st if ( ring_3(sh_ctxt->ctxt.regs) ) return NULL; -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) - /* Try the virtual TLB first */ - { - struct shadow_vtlb t = {0}; - if ( vtlb_lookup(v, vaddr, &t) - && ((t.flags & (_PAGE_PRESENT|_PAGE_RW)) - == (_PAGE_PRESENT|_PAGE_RW)) ) - { - flags = t.flags; - gfn = _gfn(t.frame_number); - } + /* Translate the VA, and exit with a page-fault if we fail */ + pfec = PFEC_page_present | PFEC_write_access; + gfn = sh_gva_to_gfn(v, vaddr, &pfec); + if ( gfn == INVALID_GFN ) + { + if ( is_hvm_vcpu(v) ) + hvm_inject_exception(TRAP_page_fault, pfec, vaddr); else - { - /* Need to do the full lookup, just in case permissions - * have increased since we cached this entry */ - -#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ - - /* Walk the guest pagetables */ - guest_walk_tables(v, vaddr, &gw, 1); - flags = accumulate_guest_flags(v, &gw); - gfn = guest_l1e_get_gfn(gw.eff_l1e); - sh_audit_gw(v, &gw); - unmap_walk(v, &gw); - -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) - /* Remember this translation for next time */ - t.page_number = vaddr >> PAGE_SHIFT; - t.frame_number = gfn_x(gfn); - t.flags = flags; - vtlb_insert(v, t); - } - } -#endif - - errcode = PFEC_write_access; - if ( !(flags & _PAGE_PRESENT) ) - goto page_fault; - - errcode |= PFEC_page_present; - if ( !(flags & _PAGE_RW) ) - goto page_fault; - - mfn = gfn_to_mfn(v->domain, gfn, &p2mt); + propagate_page_fault(vaddr, pfec); + return NULL; + } + + /* Translate the GFN */ + mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt); if ( p2m_is_ram(p2mt) ) { ASSERT(mfn_valid(mfn)); @@ -4069,13 +3979,6 @@ static inline void * emulate_map_dest(st } else return NULL; - - page_fault: - if ( is_hvm_vcpu(v) ) - hvm_inject_exception(TRAP_page_fault, errcode, vaddr); - else - propagate_page_fault(vaddr, errcode); - return NULL; } static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Fri Nov 02 16:38:11 2007 +0000 @@ -665,9 +665,10 @@ void shadow_continue_emulation( #define VTLB_ENTRIES 13 struct shadow_vtlb { - unsigned long page_number; /* Guest virtual address >> PAGE_SHIFT */ - unsigned long frame_number; /* Guest physical address >> PAGE_SHIFT */ - u32 flags; /* Accumulated guest pte flags, or 0 for an empty slot. */ + unsigned long page_number; /* Guest virtual address >> PAGE_SHIFT */ + unsigned long frame_number; /* Guest physical address >> PAGE_SHIFT */ + uint32_t pfec; /* Pagefault code for the lookup that filled this entry */ + uint32_t flags; /* Accumulated guest pte flags, or 0 for an empty slot. */ }; /* Call whenever the guest flushes hit actual TLB */ @@ -692,7 +693,7 @@ static inline void vtlb_insert(struct vc } /* Look a translation up in the vTLB. Returns 0 if not found. */ -static inline int vtlb_lookup(struct vcpu *v, unsigned long va, +static inline int vtlb_lookup(struct vcpu *v, unsigned long va, uint32_t pfec, struct shadow_vtlb *result) { unsigned long page_number = va >> PAGE_SHIFT; @@ -701,7 +702,9 @@ static inline int vtlb_lookup(struct vcp spin_lock(&v->arch.paging.vtlb_lock); if ( v->arch.paging.vtlb[i].flags != 0 - && v->arch.paging.vtlb[i].page_number == page_number ) + && v->arch.paging.vtlb[i].page_number == page_number + /* Any successful walk that had at least these pfec bits is OK */ + && (v->arch.paging.vtlb[i].pfec & pfec) == pfec ) { rv = 1; result[0] = v->arch.paging.vtlb[i]; diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/arch/x86/mm/shadow/types.h Fri Nov 02 16:38:11 2007 +0000 @@ -251,6 +251,7 @@ TYPE_SAFE(u32,gfn) /* Types of the guest's page tables */ typedef l1_pgentry_32_t guest_l1e_t; typedef l2_pgentry_32_t guest_l2e_t; +typedef intpte_32_t guest_intpte_t; /* Access functions for them */ static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e) @@ -319,6 +320,7 @@ typedef l3_pgentry_t guest_l3e_t; #if GUEST_PAGING_LEVELS >= 4 typedef l4_pgentry_t guest_l4e_t; #endif +typedef intpte_t guest_intpte_t; /* Access functions for them */ static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e) @@ -419,32 +421,27 @@ gfn_to_paddr(gfn_t gfn) /* Type used for recording a walk through guest pagetables. It is * filled in by the pagetable walk function, and also used as a cache - * for later walks. - * Any non-null pointer in this structure represents a mapping of guest - * memory. We must always call walk_init() before using a walk_t, and - * call walk_unmap() when we're done. - * The "Effective l1e" field is used when there isn't an l1e to point to, - * but we have fabricated an l1e for propagation to the shadow (e.g., - * for splintering guest superpages into many shadow l1 entries). */ + * for later walks. When we encounter a suporpage l2e, we fabricate an + * l1e for propagation to the shadow (for splintering guest superpages + * into many shadow l1 entries). */ typedef struct shadow_walk_t walk_t; struct shadow_walk_t { unsigned long va; /* Address we were looking for */ #if GUEST_PAGING_LEVELS >= 3 #if GUEST_PAGING_LEVELS >= 4 - guest_l4e_t *l4e; /* Pointer to guest's level 4 entry */ -#endif - guest_l3e_t *l3e; /* Pointer to guest's level 3 entry */ -#endif - guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */ - guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */ - guest_l1e_t eff_l1e; /* Effective level 1 entry */ -#if GUEST_PAGING_LEVELS >= 4 - mfn_t l4mfn; /* MFN that the level 4 entry is in */ - mfn_t l3mfn; /* MFN that the level 3 entry is in */ -#endif - mfn_t l2mfn; /* MFN that the level 2 entry is in */ - mfn_t l1mfn; /* MFN that the level 1 entry is in */ + guest_l4e_t l4e; /* Guest's level 4 entry */ +#endif + guest_l3e_t l3e; /* Guest's level 3 entry */ +#endif + guest_l2e_t l2e; /* Guest's level 2 entry */ + guest_l1e_t l1e; /* Guest's level 1 entry (or fabrication) */ +#if GUEST_PAGING_LEVELS >= 4 + mfn_t l4mfn; /* MFN that the level 4 entry was in */ + mfn_t l3mfn; /* MFN that the level 3 entry was in */ +#endif + mfn_t l2mfn; /* MFN that the level 2 entry was in */ + mfn_t l1mfn; /* MFN that the level 1 entry was in */ }; /* macros for dealing with the naming of the internal function names of the @@ -542,7 +539,7 @@ accumulate_guest_flags(struct vcpu *v, w { u32 accumulated_flags; - if ( unlikely(!(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT)) ) + if ( unlikely(!(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT)) ) return 0; // We accumulate the permission flags with bitwise ANDing. @@ -550,17 +547,17 @@ accumulate_guest_flags(struct vcpu *v, w // For the NX bit, however, the polarity is wrong, so we accumulate the // inverse of the NX bit. // - accumulated_flags = guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT; - accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT; + accumulated_flags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT; + accumulated_flags &= guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT; // Note that PAE guests do not have USER or RW or NX bits in their L3s. // #if GUEST_PAGING_LEVELS == 3 accumulated_flags &= - ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT); + ~_PAGE_PRESENT | (guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT); #elif GUEST_PAGING_LEVELS >= 4 - accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT; - accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT; + accumulated_flags &= guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT; + accumulated_flags &= guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT; #endif // Revert the NX bit back to its original polarity diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:38:11 2007 +0000 @@ -86,6 +86,7 @@ int hvm_copy_from_guest_phys(void *buf, int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size); int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size); int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size); +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size); void hvm_print_line(struct vcpu *v, const char c); void hlt_timer_fn(void *data); diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/include/asm-x86/paging.h Fri Nov 02 16:38:11 2007 +0000 @@ -105,7 +105,8 @@ struct paging_mode { int (*page_fault )(struct vcpu *v, unsigned long va, struct cpu_user_regs *regs); int (*invlpg )(struct vcpu *v, unsigned long va); - unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); + unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va, + uint32_t *pfec); void (*update_cr3 )(struct vcpu *v, int do_locking); void (*update_paging_modes )(struct vcpu *v); void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn, @@ -204,12 +205,17 @@ static inline int paging_invlpg(struct v } /* Translate a guest virtual address to the frame number that the - * *guest* pagetables would map it to. Returns INVALID_GFN if the guest - * tables don't map this address. */ + * *guest* pagetables would map it to. Returns INVALID_GFN if the guest + * tables don't map this address for this kind of access. + * pfec[0] is used to determine which kind of access this is when + * walking the tables. The caller should set the PFEC_page_present bit + * in pfec[0]; in the failure case, that bit will be cleared if appropriate. */ #define INVALID_GFN (-1UL) -static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va) -{ - return v->arch.paging.mode->gva_to_gfn(v, va); +static inline unsigned long paging_gva_to_gfn(struct vcpu *v, + unsigned long va, + uint32_t *pfec) +{ + return v->arch.paging.mode->gva_to_gfn(v, va, pfec); } /* Update all the things that are derived from the guest's CR3. diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/perfc_defn.h --- a/xen/include/asm-x86/perfc_defn.h Fri Nov 02 16:34:54 2007 +0000 +++ b/xen/include/asm-x86/perfc_defn.h Fri Nov 02 16:38:11 2007 +0000 @@ -50,12 +50,8 @@ PERFCOUNTER(shadow_fault_fast_mmio, "sha PERFCOUNTER(shadow_fault_fast_mmio, "shadow_fault fast path mmio") PERFCOUNTER(shadow_fault_fast_fail, "shadow_fault fast path error") PERFCOUNTER(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn") -PERFCOUNTER(shadow_fault_bail_not_present, - "shadow_fault guest not-present") -PERFCOUNTER(shadow_fault_bail_nx, "shadow_fault guest NX fault") -PERFCOUNTER(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault") -PERFCOUNTER(shadow_fault_bail_user_supervisor, - "shadow_fault guest U/S fault") +PERFCOUNTER(shadow_fault_bail_real_fault, + "shadow_fault really guest fault") PERFCOUNTER(shadow_fault_emulate_read, "shadow_fault emulates a read") PERFCOUNTER(shadow_fault_emulate_write, "shadow_fault emulates a write") PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails") _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |