[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Don't keep shadows of PAE guest l3 tables.
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Node ID 37ee88ca14403998404d727117a3a968526b5228 # Parent 22885e4c1275a540a95a64e1c30960d62be1e7c9 [XEN] Don't keep shadows of PAE guest l3 tables. Holding pages readonly that have guest PAE l3 tables in them means a performance hit and potential bug if the guest puts other datastructures on the same page as an l3 table. Instead of shadowing them, treat PAE guests as if they had four CR3 registers, and load all four top-level entries when we handle a CR3 write. This also cuts about 500 lines of special-case refcounting and re-copying code. Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- xen/arch/x86/hvm/svm/svm.c | 17 xen/arch/x86/hvm/svm/vmcb.c | 9 xen/arch/x86/mm/shadow/common.c | 102 +--- xen/arch/x86/mm/shadow/multi.c | 825 ++++++++++----------------------------- xen/arch/x86/mm/shadow/multi.h | 4 xen/arch/x86/mm/shadow/private.h | 4 xen/arch/x86/mm/shadow/types.h | 123 ----- xen/include/asm-x86/domain.h | 15 xen/include/asm-x86/hvm/vcpu.h | 5 xen/include/asm-x86/mm.h | 17 xen/include/asm-x86/shadow.h | 3 11 files changed, 279 insertions(+), 845 deletions(-) diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Oct 17 11:11:48 2006 +0100 @@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_svm.cpu_cr3 = value; update_cr3(v); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; @@ -1787,10 +1784,6 @@ static int mov_to_cr(int gpreg, int cr, (unsigned long) (mfn << PAGE_SHIFT)); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; - - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", @@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, str { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long pt = pagetable_get_paddr(v->arch.shadow_table); + unsigned long pt = v->arch.hvm_vcpu.hw_cr3; printf("%s: guest registers from %s:\n", __func__, from); #if defined (__x86_64__) @@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, " - "shadow_table = 0x%08x\n", + "hw_cr3 = 0x%16lx\n", __func__, (int) v->arch.guest_table.pfn, (int) v->arch.monitor_table.pfn, - (int) v->arch.shadow_table.pfn); + (long unsigned int) v->arch.hvm_vcpu.hw_cr3); svm_dump_vmcb(__func__, vmcb); svm_dump_regs(__func__, regs); @@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("vmexit_handler():- guest_table = 0x%08x, " - "monitor_table = 0x%08x, shadow_table = 0x%08x\n", + "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n", (int)v->arch.guest_table.pfn, (int)v->arch.monitor_table.pfn, - (int)v->arch.shadow_table.pfn); + (int)v->arch.hvm_vcpu.hw_cr3); printk("svm_vmexit_handler: Returning\n"); } #endif diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Oct 17 11:11:48 2006 +0100 @@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v) if (svm_dbg_on) { unsigned long pt; - pt = pagetable_get_paddr(v->arch.shadow_table); - printk("%s: shadow_table = %lx\n", __func__, pt); + printk("%s: hw_cr3 = %llx\n", __func__, + (unsigned long long) v->arch.hvm_vcpu.hw_cr3); pt = pagetable_get_paddr(v->arch.guest_table); printk("%s: guest_table = %lx\n", __func__, pt); pt = pagetable_get_paddr(v->domain->arch.phys_table); @@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v) { printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3); printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x," - " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, - (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn); + " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, + (int)v->arch.monitor_table.pfn, + (unsigned long long) v->arch.hvm_vcpu.hw_cr3); } v->arch.schedule_tail = arch_svm_do_resume; diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Tue Oct 17 11:11:48 2006 +0100 @@ -283,11 +283,8 @@ __shadow_validate_guest_entry(struct vcp if ( page->shadow_flags & SHF_L2H_PAE ) result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3) (v, gmfn, entry, size); - if ( page->shadow_flags & SHF_L3_PAE ) - result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3) - (v, gmfn, entry, size); #else /* 32-bit non-PAE hypervisor does not support PAE guests */ - ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0); + ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0); #endif #if CONFIG_PAGING_LEVELS >= 4 @@ -427,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc /* Allocating shadow pages * ----------------------- * - * Most shadow pages are allocated singly, but there are two cases where we - * need to allocate multiple pages together. - * - * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows. - * A 32-bit guest l1 table covers 4MB of virtuial address space, - * and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB - * of virtual address space each). Similarly, a 32-bit guest l2 table - * (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va - * each). These multi-page shadows are contiguous and aligned; - * functions for handling offsets into them are defined in shadow.c - * (shadow_l1_index() etc.) + * Most shadow pages are allocated singly, but there is one case where + * we need to allocate multiple pages together: shadowing 32-bit guest + * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB + * of virtuial address space, and needs to be shadowed by two PAE/64-bit + * l1 tables (covering 2MB of virtual address space each). Similarly, a + * 32-bit guest l2 table (4GB va) needs to be shadowed by four + * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are + * contiguous and aligned; functions for handling offsets into them are + * defined in shadow.c (shadow_l1_index() etc.) * - * 2: Shadowing PAE top-level pages. Each guest page that contains - * any PAE top-level pages requires two shadow pages to shadow it. - * They contain alternating l3 tables and pae_l3_bookkeeping structs. - * * This table shows the allocation behaviour of the different modes: * * Xen paging 32b pae pae 64b 64b 64b @@ -452,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc * * sl1 size 4k 8k 4k 8k 4k 4k * sl2 size 4k 16k 4k 16k 4k 4k - * sl3 size - - 8k - 8k 4k + * sl3 size - - - - - 4k * sl4 size - - - - - 4k * * We allocate memory from xen in four-page units and break them down @@ -506,7 +497,6 @@ shadow_order(u32 shadow_type) 0, /* PGC_SH_fl1_pae_shadow */ 0, /* PGC_SH_l2_pae_shadow */ 0, /* PGC_SH_l2h_pae_shadow */ - 1, /* PGC_SH_l3_pae_shadow */ 0, /* PGC_SH_l1_64_shadow */ 0, /* PGC_SH_fl1_64_shadow */ 0, /* PGC_SH_l2_64_shadow */ @@ -549,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu #endif break; #if CONFIG_PAGING_LEVELS >= 3 - case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift: + case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift: + case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift: SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn); break; #endif @@ -590,18 +581,8 @@ void shadow_prealloc(struct domain *d, u pg = list_entry(l, struct page_info, list); smfn = page_to_mfn(pg); -#if CONFIG_PAGING_LEVELS >= 3 - if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow ) - { - /* For PAE, we need to unpin each subshadow on this shadow */ - SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); - } - else -#endif /* 32-bit code always takes this branch */ - { - /* Unpin this top-level shadow */ - sh_unpin(v, smfn); - } + /* Unpin this top-level shadow */ + sh_unpin(v, smfn); /* See if that freed up a chunk of appropriate size */ if ( chunk_is_available(d, order) ) return; @@ -623,8 +604,12 @@ void shadow_prealloc(struct domain *d, u shadow_unhook_mappings(v, smfn); /* Need to flush TLB if we've altered our own tables */ - if ( !shadow_mode_external(d) - && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) ) + if ( !shadow_mode_external(d) && + (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn) + || pagetable_get_pfn(current->arch.shadow_table[1]) == mfn_x(smfn) + || pagetable_get_pfn(current->arch.shadow_table[2]) == mfn_x(smfn) + || pagetable_get_pfn(current->arch.shadow_table[3]) == mfn_x(smfn) + ) ) local_flush_tlb(); /* See if that freed up a chunk of appropriate size */ @@ -923,9 +908,20 @@ p2m_next_level(struct domain *d, mfn_t * #if CONFIG_PAGING_LEVELS == 3 if (type == PGT_l2_page_table) { + struct vcpu *v; /* We have written to the p2m l3: need to sync the per-vcpu * copies of it in the monitor tables */ p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); + /* Also, any vcpus running on shadows of the p2m need to + * reload their CR3s so the change propagates to the shadow */ + ASSERT(shadow_lock_is_acquired(d)); + for_each_vcpu(d, v) + { + if ( pagetable_get_pfn(v->arch.guest_table) + == pagetable_get_pfn(d->arch.phys_table) + && v->arch.shadow.mode != NULL ) + v->arch.shadow.mode->update_cr3(v); + } } #endif /* The P2M can be shadowed: keep the shadows synced */ @@ -1714,9 +1710,6 @@ void sh_destroy_shadow(struct vcpu *v, m case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift: SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn); break; - case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift: - SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn); - break; #endif #if CONFIG_PAGING_LEVELS >= 4 @@ -1771,7 +1764,6 @@ int shadow_remove_write_access(struct vc #endif NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ @@ -1935,7 +1927,6 @@ int shadow_remove_all_mappings(struct vc #endif NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ @@ -2008,7 +1999,8 @@ static int sh_remove_shadow_via_pointer( ASSERT((pg->count_info & PGC_SH_type_mask) > 0); ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow); ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow); - ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow); + ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow); + ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow); ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow); if (pg->up == 0) return 0; @@ -2037,7 +2029,6 @@ static int sh_remove_shadow_via_pointer( case PGC_SH_l1_pae_shadow: case PGC_SH_l2_pae_shadow: case PGC_SH_l2h_pae_shadow: - case PGC_SH_l3_pae_shadow: SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn); break; #if CONFIG_PAGING_LEVELS >= 4 @@ -2091,11 +2082,9 @@ void sh_remove_shadows(struct vcpu *v, m #if CONFIG_PAGING_LEVELS >= 3 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */ SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */ - SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae */ #else NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #endif NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2121,9 +2110,8 @@ void sh_remove_shadows(struct vcpu *v, m ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift)) | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae */ 0, /* fl1_pae */ - 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae */ - 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae */ - 0, /* l3_pae */ + 0, /* l2_pae */ + 0, /* l2h_pae */ 1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64 */ 0, /* fl1_64 */ 1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64 */ @@ -2166,17 +2154,14 @@ void sh_remove_shadows(struct vcpu *v, m smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned ) \ sh_unpin(v, smfn); \ - if ( (_type) == PGC_SH_l3_pae_shadow ) \ - SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \ } while (0) if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(PGC_SH_l1_32_shadow); if ( sh_flags & SHF_L2_32 ) DO_UNPIN(PGC_SH_l2_32_shadow); #if CONFIG_PAGING_LEVELS >= 3 if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(PGC_SH_l1_pae_shadow); - if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(PGC_SH_l2_pae_shadow); - if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow); - if ( sh_flags & SHF_L3_PAE ) DO_UNPIN(PGC_SH_l3_pae_shadow); + if ( sh_flags & SHF_L2_PAE ) DO_UNPIN(PGC_SH_l2_pae_shadow); + if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow); #if CONFIG_PAGING_LEVELS >= 4 if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(PGC_SH_l1_64_shadow); if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(PGC_SH_l2_64_shadow); @@ -2187,14 +2172,6 @@ void sh_remove_shadows(struct vcpu *v, m #undef DO_UNSHADOW #undef DO_UNPIN - - -#if CONFIG_PAGING_LEVELS > 2 - /* We may have caused some PAE l3 entries to change: need to - * fix up the copies of them in various places */ - if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) ) - sh_pae_recopy(v->domain); -#endif /* If that didn't catch the shadows, something is wrong */ if ( !fast && (pg->count_info & PGC_page_table) ) @@ -3127,7 +3104,6 @@ void shadow_audit_tables(struct vcpu *v) SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */ SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */ SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */ - SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3), /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */ @@ -3152,7 +3128,7 @@ void shadow_audit_tables(struct vcpu *v) { case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE - |SHF_L2H_PAE|SHF_L3_PAE); break; + |SHF_L2H_PAE); break; case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64 |SHF_L3_64|SHF_L4_64); break; default: BUG(); diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Tue Oct 17 11:11:48 2006 +0100 @@ -20,20 +20,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -// DESIGN QUESTIONS: -// Why use subshadows for PAE guests? -// - reduces pressure in the hash table -// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3) -// - would need to find space in the page_info to store 7 more bits of -// backpointer -// - independent shadows of 32 byte chunks makes it non-obvious how to quickly -// figure out when to demote the guest page from l3 status -// -// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space. -// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address -// space for both PV and HVM guests. -// #include <xen/config.h> #include <xen/types.h> @@ -118,9 +104,6 @@ static char *fetch_type_names[] = { #endif /* XXX forward declarations */ -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res); -#endif static inline void sh_update_linear_entries(struct vcpu *v); /**************************************************************************/ @@ -129,8 +112,6 @@ static inline void sh_update_linear_entr * Normal case: maps the mfn of a guest page to the mfn of its shadow page. * FL1's: maps the *gfn* of the start of a superpage to the mfn of a * shadow L1 which maps its "splinters". - * PAE CR3s: maps the 32-byte aligned, 32-bit CR3 value to the mfn of the - * PAE L3 info page for that CR3 value. */ static inline mfn_t @@ -429,18 +410,16 @@ static void sh_audit_gw(struct vcpu *v, if ( !(SHADOW_AUDIT_ENABLE) ) return; -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ if ( valid_mfn(gw->l4mfn) && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, PGC_SH_l4_shadow))) ) (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN)); -#endif /* PAE or 64... */ if ( valid_mfn(gw->l3mfn) && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow))) ) (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN)); -#endif /* All levels... */ +#endif /* PAE or 64... */ if ( valid_mfn(gw->l2mfn) ) { if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, @@ -498,8 +477,7 @@ static u32 guest_set_ad_bits(struct vcpu flags = guest_l1e_get_flags(*ep); /* PAE l3s do not have A and D bits */ - if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) ) - return flags; + ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); /* Need the D bit as well for writes, in L1es and PSE L2es. */ if ( ft == ft_demand_write @@ -646,37 +624,13 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i #endif } -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 static inline u32 shadow_l3_index(mfn_t *smfn, u32 guest_index) { -#if GUEST_PAGING_LEVELS == 3 - u32 group_id; - - // Because we use twice the space in L3 shadows as was consumed in guest - // L3s, the number of guest entries per shadow page is - // SHADOW_L2_PAGETABLE_ENTRIES/2. (Note this is *not* - // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...) - // - *smfn = _mfn(mfn_x(*smfn) + - (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2))); - - // We store PAE L3 shadows in groups of 4, alternating shadows and - // pae_l3_bookkeeping structs. So the effective shadow index is - // the the group_id * 8 + the offset within the group. - // - guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2); - group_id = guest_index / 4; - return (group_id * 8) + (guest_index % 4); -#else return guest_index; -#endif -} - -#endif // GUEST_PAGING_LEVELS >= 3 - -#if GUEST_PAGING_LEVELS >= 4 +} static inline u32 shadow_l4_index(mfn_t *smfn, u32 guest_index) @@ -722,6 +676,9 @@ do { u32 pass_thru_flags; u32 sflags; + /* We don't shadow PAE l3s */ + ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); + // XXX -- might want to think about PAT support for HVM guests... #ifndef NDEBUG @@ -757,29 +714,16 @@ do { if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) ) gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft); - // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's... - // - if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) ) - pass_thru_flags = _PAGE_PRESENT; - else - { - pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER | - _PAGE_RW | _PAGE_PRESENT); - if ( guest_supports_nx(v) ) - pass_thru_flags |= _PAGE_NX_BIT; - } - - // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their - // L3e's; they are all implied. So we emulate them here. - // - if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) ) - gflags = pass_thru_flags; // Propagate bits from the guest to the shadow. // Some of these may be overwritten, below. // Since we know the guest's PRESENT bit is set, we also set the shadow's // SHADOW_PRESENT bit. // + pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER | + _PAGE_RW | _PAGE_PRESENT); + if ( guest_supports_nx(v) ) + pass_thru_flags |= _PAGE_NX_BIT; sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT; // Copy the guest's RW bit into the SHADOW_RW bit. @@ -800,8 +744,7 @@ do { // If the A or D bit has not yet been set in the guest, then we must // prevent the corresponding kind of access. // - if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) && - !(gflags & _PAGE_ACCESSED)) ) + if ( unlikely(!(gflags & _PAGE_ACCESSED)) ) sflags &= ~_PAGE_PRESENT; /* D bits exist in L1es and PSE L2es */ @@ -890,9 +833,7 @@ l4e_propagate_from_guest(struct vcpu *v, fetch_type_names[ft], gl4e->l4, sl4p->l4); ASSERT(sflags != -1); } -#endif // GUEST_PAGING_LEVELS >= 4 - -#if GUEST_PAGING_LEVELS >= 3 + static void l3e_propagate_from_guest(struct vcpu *v, guest_l3e_t *gl3e, @@ -912,7 +853,7 @@ l3e_propagate_from_guest(struct vcpu *v, fetch_type_names[ft], gl3e->l3, sl3p->l3); ASSERT(sflags != -1); } -#endif // GUEST_PAGING_LEVELS >= 3 +#endif // GUEST_PAGING_LEVELS >= 4 static void l2e_propagate_from_guest(struct vcpu *v, @@ -1081,9 +1022,6 @@ shadow_write_entries(void *d, void *s, i safe_write_entry(dst++, src++); if ( map != NULL ) sh_unmap_domain_page(map); - - /* XXX TODO: - * Update min/max field in page_info struct of this mfn */ } static inline int @@ -1195,9 +1133,7 @@ static int shadow_set_l4e(struct vcpu *v } return flags; } -#endif /* GUEST_PAGING_LEVELS >= 4 */ - -#if GUEST_PAGING_LEVELS >= 3 + static int shadow_set_l3e(struct vcpu *v, shadow_l3e_t *sl3e, shadow_l3e_t new_sl3e, @@ -1224,28 +1160,6 @@ static int shadow_set_l3e(struct vcpu *v shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn); flags |= SHADOW_SET_CHANGED; -#if GUEST_PAGING_LEVELS == 3 - /* We wrote a guest l3e in a PAE pagetable. This table is copied in - * the linear pagetable entries of its l2s, and may also be copied - * to a low memory location to make it fit in CR3. Report that we - * need to resync those copies (we can't wait for the guest to flush - * the TLB because it might be an increase in rights). */ - { - struct vcpu *vcpu; - - struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e); - for_each_vcpu(v->domain, vcpu) - { - if (info->vcpus & (1 << vcpu->vcpu_id)) - { - // Remember that this flip/update needs to occur. - vcpu->arch.shadow.pae_flip_pending = 1; - flags |= SHADOW_SET_L3PAE_RECOPY; - } - } - } -#endif - if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) { /* We lost a reference to an old mfn. */ @@ -1260,7 +1174,7 @@ static int shadow_set_l3e(struct vcpu *v } return flags; } -#endif /* GUEST_PAGING_LEVELS >= 3 */ +#endif /* GUEST_PAGING_LEVELS >= 4 */ static int shadow_set_l2e(struct vcpu *v, shadow_l2e_t *sl2e, @@ -1535,51 +1449,7 @@ do { #endif /* different kinds of l2 */ -#if GUEST_PAGING_LEVELS == 3 - -/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */ -#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code) \ -do { \ - int _i; \ - for ( _i = 0; _i < 4; _i++ ) \ - { \ - if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \ - {_code} \ - if ( _done ) break; \ - _sl3e++; \ - increment_ptr_to_guest_entry(_gl3p); \ - } \ -} while (0) - -/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */ -#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \ -do { \ - int _i, _j, _k, __done = 0; \ - ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask) \ - == PGC_SH_l3_pae_shadow); \ - /* The subshadows are split, 64 on each page of the shadow */ \ - for ( _j = 0; _j < 2 && !__done; _j++ ) \ - { \ - void *_sp = sh_map_domain_page(_sl3mfn); \ - for ( _i = 0; _i < 64; _i++ ) \ - { \ - /* Every second 32-byte region is a bookkeeping entry */ \ - _sl3e = (shadow_l3e_t *)(_sp + (64 * _i)); \ - if ( (sl3p_to_info(_sl3e))->refcount > 0 ) \ - SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, \ - ({ __done = (_done); __done; }), \ - _code); \ - else \ - for ( _k = 0 ; _k < 4 ; _k++ ) \ - increment_ptr_to_guest_entry(_gl3p); \ - if ( __done ) break; \ - } \ - sh_unmap_domain_page(_sp); \ - _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1); \ - } \ -} while (0) - -#elif GUEST_PAGING_LEVELS == 4 +#if GUEST_PAGING_LEVELS == 4 /* 64-bit l3: touch all entries */ #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \ @@ -1711,8 +1581,8 @@ void sh_install_xen_entries_in_l2h(struc /* We don't set up a linear mapping here because we can't until this * l2h is installed in an l3e. sh_update_linear_entries() handles - * the linear mappings when the l3 is loaded. We zero them here, just as - * a safety measure. + * the linear mappings when CR3 (and so the fourth l3e) is loaded. + * We zero them here, just as a safety measure. */ for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] = @@ -1739,37 +1609,6 @@ void sh_install_xen_entries_in_l2h(struc } sh_unmap_domain_page(sl2e); -} - -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn) -{ - shadow_l3e_t *sl3e; - guest_l3e_t *gl3e = v->arch.guest_vtable; - shadow_l3e_t new_sl3e; - gfn_t l2gfn; - mfn_t l2gmfn, l2smfn; - int r; - - ASSERT(!shadow_mode_external(v->domain)); - ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT); - l2gfn = guest_l3e_get_gfn(gl3e[3]); - l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn)); - l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow); - if ( !valid_mfn(l2smfn) ) - { - /* must remove write access to this page before shadowing it */ - // XXX -- should check to see whether this is better with level==0 or - // level==2... - if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); - - l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow); - } - l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e, - ft_prefetch); - sl3e = sh_map_domain_page(sl3mfn); - r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn); - sh_unmap_domain_page(sl3e); } #endif @@ -1827,8 +1666,6 @@ void sh_install_xen_entries_in_l2(struct - - /**************************************************************************/ /* Create a shadow of a given guest page. */ @@ -1839,7 +1676,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n", mfn_x(gmfn), shadow_type, mfn_x(smfn)); - if ( shadow_type != PGC_SH_guest_root_type ) + if ( shadow_type != PGC_SH_l2_32_shadow + && shadow_type != PGC_SH_l2_pae_shadow + && shadow_type != PGC_SH_l2h_pae_shadow + && shadow_type != PGC_SH_l4_64_shadow ) /* Lower-level shadow, not yet linked form a higher level */ mfn_to_page(smfn)->up = 0; @@ -1853,8 +1693,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf sh_install_xen_entries_in_l4(v, gmfn, smfn); break; #endif #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3 - case PGC_SH_l3_shadow: - sh_install_xen_entries_in_l3(v, gmfn, smfn); break; case PGC_SH_l2h_shadow: sh_install_xen_entries_in_l2h(v, smfn); break; #endif @@ -1988,20 +1826,16 @@ static shadow_l4e_t * shadow_get_and_cre mfn_t *sl4mfn) { /* There is always a shadow of the top level table. Get it. */ - *sl4mfn = pagetable_get_mfn(v->arch.shadow_table); + *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]); /* Reading the top level table is always valid. */ return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va); } -#endif /* GUEST_PAGING_LEVELS >= 4 */ - - -#if GUEST_PAGING_LEVELS >= 3 + static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, walk_t *gw, mfn_t *sl3mfn, fetch_type_t ft) { -#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */ mfn_t sl4mfn; shadow_l4e_t *sl4e; if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */ @@ -2032,19 +1866,8 @@ static shadow_l3e_t * shadow_get_and_cre } /* Now follow it down a level. Guaranteed to succeed. */ return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va); -#else /* PAE... */ - /* There is always a shadow of the top level table. Get it. */ - *sl3mfn = pagetable_get_mfn(v->arch.shadow_table); - /* This next line is important: the shadow l3 table is in an 8k - * shadow and we need to return the right mfn of the pair. This call - * will set it for us as a side-effect. */ - (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e)); - ASSERT(v->arch.shadow_vtable); - return ((shadow_l3e_t *)v->arch.shadow_vtable) - + shadow_l3_table_offset(gw->va); +} #endif /* GUEST_PAGING_LEVELS >= 4 */ -} -#endif /* GUEST_PAGING_LEVELS >= 3 */ static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, @@ -2052,7 +1875,7 @@ static shadow_l2e_t * shadow_get_and_cre mfn_t *sl2mfn, fetch_type_t ft) { -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */ +#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */ mfn_t sl3mfn = _mfn(INVALID_MFN); shadow_l3e_t *sl3e; if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */ @@ -2080,17 +1903,22 @@ static shadow_l2e_t * shadow_get_and_cre *sl2mfn, &new_sl3e, ft); r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn); ASSERT((r & SHADOW_SET_FLUSH) == 0); -#if GUEST_PAGING_LEVELS == 3 - /* Need to sync up the linear maps, as we are about to use them */ - ASSERT( r & SHADOW_SET_L3PAE_RECOPY ); - sh_pae_recopy(v->domain); -#endif } /* Now follow it down a level. Guaranteed to succeed. */ + return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); +#elif GUEST_PAGING_LEVELS == 3 /* PAE... */ + /* We never demand-shadow PAE l3es: they are only created in + * sh_update_cr3(). Check if the relevant sl3e is present. */ + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) + + shadow_l3_linear_offset(gw->va); + if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) + return NULL; + *sl2mfn = shadow_l3e_get_mfn(*sl3e); + ASSERT(valid_mfn(*sl2mfn)); return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); #else /* 32bit... */ /* There is always a shadow of the top level table. Get it. */ - *sl2mfn = pagetable_get_mfn(v->arch.shadow_table); + *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]); /* This next line is important: the guest l2 has a 16k * shadow, we need to return the right mfn of the four. This * call will set it for us as a side-effect. */ @@ -2213,9 +2041,7 @@ void sh_destroy_l4_shadow(struct vcpu *v /* Put the memory back in the pool */ shadow_free(v->domain, smfn); } -#endif - -#if GUEST_PAGING_LEVELS >= 3 + void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn) { shadow_l3e_t *sl3e; @@ -2230,10 +2056,6 @@ void sh_destroy_l3_shadow(struct vcpu *v gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); -#if GUEST_PAGING_LEVELS == 3 - /* Take this shadow off the list of root shadows */ - list_del_init(&mfn_to_page(smfn)->list); -#endif /* Decrement refcounts of all the old entries */ sl3mfn = smfn; @@ -2247,53 +2069,8 @@ void sh_destroy_l3_shadow(struct vcpu *v /* Put the memory back in the pool */ shadow_free(v->domain, smfn); } -#endif - - -#if GUEST_PAGING_LEVELS == 3 -static void sh_destroy_l3_subshadow(struct vcpu *v, - shadow_l3e_t *sl3e) -/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */ -{ - int i; - mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT); - ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); - for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) - if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) - shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn); -} -#endif - -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn) -/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */ -{ - int i, j; - struct pae_l3_bookkeeping *bk; - - ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) - == PGC_SH_l3_pae_shadow); - /* The subshadows are split, 64 on each page of the shadow */ - for ( i = 0; i < 2; i++ ) - { - void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i)); - for ( j = 0; j < 64; j++ ) - { - /* Every second 32-byte region is a bookkeeping entry */ - bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32); - if ( bk->pinned ) - sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn); - /* Check whether we've just freed the whole shadow */ - if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) - { - sh_unmap_domain_page(p); - return; - } - } - sh_unmap_domain_page(p); - } -} -#endif +#endif /* GUEST_PAGING_LEVELS >= 4 */ + void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn) { @@ -2311,7 +2088,7 @@ void sh_destroy_l2_shadow(struct vcpu *v gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); -#if GUEST_PAGING_LEVELS == 2 +#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3) /* Take this shadow off the list of root shadows */ list_del_init(&mfn_to_page(smfn)->list); #endif @@ -2421,31 +2198,14 @@ void sh_unhook_32b_mappings(struct vcpu #elif GUEST_PAGING_LEVELS == 3 -void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn) -/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */ -{ - shadow_l3e_t *sl3e; - SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, { - if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) { - mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e); - if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) - == PGC_SH_l2h_pae_shadow ) - { - /* High l2: need to pick particular l2es to unhook */ - shadow_l2e_t *sl2e; - SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, { - (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); - }); - } - else - { - /* Normal l2: can safely unhook the whole l3e */ - (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn); - } - } +void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn) +/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */ +{ + shadow_l2e_t *sl2e; + int xen_mappings = !shadow_mode_external(v->domain); + SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, { + (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); }); - /* We've changed PAE L3 entries: must sync up various copies of them */ - sh_pae_recopy(v->domain); } #elif GUEST_PAGING_LEVELS == 4 @@ -2523,9 +2283,8 @@ static int validate_gl4e(struct vcpu *v, result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn); return result; } -#endif // GUEST_PAGING_LEVELS >= 4 - -#if GUEST_PAGING_LEVELS >= 3 + + static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se) { shadow_l3e_t new_sl3e; @@ -2535,16 +2294,6 @@ static int validate_gl3e(struct vcpu *v, int result = 0; perfc_incrc(shadow_validate_gl3e_calls); - -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) - { - /* If we've updated a subshadow which is unreferenced then - we don't care what value is being written - bail. */ - struct pae_l3_bookkeeping *info = sl3p_to_info(se); - if(!info->refcount) - return result; - } -#endif if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT ) { @@ -2559,16 +2308,9 @@ static int validate_gl3e(struct vcpu *v, sl2mfn, &new_sl3e, ft_prefetch); result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn); -#if GUEST_PAGING_LEVELS == 3 - /* We have changed a PAE l3 entry: need to sync up the possible copies - * of it */ - if ( result & SHADOW_SET_L3PAE_RECOPY ) - sh_pae_recopy(v->domain); -#endif - return result; } -#endif // GUEST_PAGING_LEVELS >= 3 +#endif // GUEST_PAGING_LEVELS >= 4 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se) { @@ -2755,12 +2497,12 @@ sh_map_and_validate_gl3e(struct vcpu *v, sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size) { -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 return sh_map_and_validate(v, gl3mfn, new_gl3p, size, PGC_SH_l3_shadow, shadow_l3_index, validate_gl3e); -#else // ! GUEST_PAGING_LEVELS >= 3 +#else // ! GUEST_PAGING_LEVELS >= 4 SHADOW_PRINTK("called in wrong paging mode!\n"); BUG(); return 0; @@ -2822,7 +2564,7 @@ static inline void check_for_early_unsha { u32 flags = mfn_to_page(gmfn)->shadow_flags; mfn_t smfn; - if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) ) + if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) ) { perfc_incrc(shadow_early_unshadow); sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); @@ -2840,9 +2582,14 @@ static inline void check_for_early_unsha smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow); shadow_unhook_mappings(v, smfn); } - if ( flags & SHF_L3_PAE ) + if ( flags & SHF_L2_PAE ) { - smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow); + smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow); + shadow_unhook_mappings(v, smfn); + } + if ( flags & SHF_L2H_PAE ) + { + smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow); shadow_unhook_mappings(v, smfn); } if ( flags & SHF_L4_64 ) @@ -3183,8 +2930,7 @@ sh_invlpg(struct vcpu *v, unsigned long return 0; } #elif SHADOW_PAGING_LEVELS == 3 - if ( !(shadow_l3e_get_flags( - ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)]) + if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)]) & _PAGE_PRESENT) ) // no need to flush anything if there's no SL2... return 0; @@ -3247,34 +2993,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned l else return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK); } - - -// XXX -- should this be in this file? -// Or should it be moved to shadow-common.c? -// -/* returns a lowmem machine address of the copied HVM L3 root table - * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy, - * otherwise blank out any entries with reserved bits in them. */ -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -static unsigned long -hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res) -{ - int i, f; - int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY); - l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab; - memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t)); - for ( i = 0; i < 4; i++ ) - { - f = l3e_get_flags(l3tab[i]); - if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) ) - new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res); - else - new_l3e = l3e_empty(); - safe_write_entry(©[i], &new_l3e); - } - return __pa(copy); -} -#endif static inline void @@ -3330,7 +3048,7 @@ sh_update_linear_entries(struct vcpu *v) if ( v == current ) { __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = - l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table), + l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), __PAGE_HYPERVISOR); } else @@ -3338,7 +3056,7 @@ sh_update_linear_entries(struct vcpu *v) l4_pgentry_t *ml4e; ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table)); ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table), + l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), __PAGE_HYPERVISOR); sh_unmap_domain_page(ml4e); } @@ -3379,13 +3097,8 @@ sh_update_linear_entries(struct vcpu *v) sh_unmap_domain_page(ml4e); } -#if GUEST_PAGING_LEVELS == 2 /* Shadow l3 tables are made up by update_cr3 */ - sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; -#else - /* Always safe to use shadow_vtable, because it's globally mapped */ - sl3e = v->arch.shadow_vtable; -#endif + sl3e = v->arch.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) { @@ -3424,14 +3137,14 @@ sh_update_linear_entries(struct vcpu *v) #if GUEST_PAGING_LEVELS == 2 /* Shadow l3 tables were built by update_cr3 */ if ( shadow_mode_external(d) ) - shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; + shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; else BUG(); /* PV 2-on-3 is not supported yet */ #else /* GUEST_PAGING_LEVELS == 3 */ - /* Always safe to use *_vtable, because they're globally mapped */ - shadow_l3e = v->arch.shadow_vtable; + shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; + /* Always safe to use guest_vtable, because it's globally mapped */ guest_l3e = v->arch.guest_vtable; #endif /* GUEST_PAGING_LEVELS */ @@ -3510,7 +3223,7 @@ sh_update_linear_entries(struct vcpu *v) if ( v == current ) { __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = - l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table), + l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), __PAGE_HYPERVISOR); } else @@ -3518,7 +3231,7 @@ sh_update_linear_entries(struct vcpu *v) l2_pgentry_t *ml2e; ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table)); ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = - l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table), + l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), __PAGE_HYPERVISOR); sh_unmap_domain_page(ml2e); } @@ -3530,69 +3243,7 @@ sh_update_linear_entries(struct vcpu *v) } -// XXX -- should this be in this file? -// Or should it be moved to shadow-common.c? -// -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -void sh_pae_recopy(struct domain *d) -/* Called whenever we write to the l3 entries of a PAE pagetable which - * is currently in use. Each vcpu that is using the table needs to - * resync its copies of the l3s in linear maps and any low-memory - * copies it might have made for fitting into 32bit CR3. - * Since linear maps are also resynced when we change CR3, we don't - * need to worry about changes to PAE l3es that are not currently in use.*/ -{ - struct vcpu *v; - cpumask_t flush_mask = CPU_MASK_NONE; - ASSERT(shadow_lock_is_acquired(d)); - - for_each_vcpu(d, v) - { - if ( !v->arch.shadow.pae_flip_pending ) - continue; - - cpu_set(v->processor, flush_mask); - - SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id); - - /* This vcpu has a copy in its linear maps */ - sh_update_linear_entries(v); - if ( hvm_guest(v) ) - { - /* This vcpu has a copy in its HVM PAE l3 */ - v->arch.hvm_vcpu.hw_cr3 = - hvm_pae_copy_root(v, v->arch.shadow_vtable, - !shadow_vcpu_mode_translate(v)); - } -#if CONFIG_PAGING_LEVELS == 3 - else - { - /* This vcpu might have copied the l3 to below 4GB */ - if ( v->arch.cr3 >> PAGE_SHIFT - != pagetable_get_pfn(v->arch.shadow_table) ) - { - /* Recopy to where that copy is. */ - int i; - l3_pgentry_t *dst, *src; - dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */ - src = v->arch.shadow_vtable; - for ( i = 0 ; i < 4 ; i++ ) - safe_write_entry(dst + i, src + i); - } - } -#endif - v->arch.shadow.pae_flip_pending = 0; - } - - flush_tlb_mask(flush_mask); -} -#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */ - - -/* removes: - * vcpu->arch.guest_vtable - * vcpu->arch.shadow_table - * vcpu->arch.shadow_vtable +/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[]. * Does all appropriate management/bookkeeping/refcounting/etc... */ static void @@ -3600,6 +3251,7 @@ sh_detach_old_tables(struct vcpu *v) { struct domain *d = v->domain; mfn_t smfn; + int i = 0; //// //// vcpu->arch.guest_vtable @@ -3620,56 +3272,80 @@ sh_detach_old_tables(struct vcpu *v) } //// - //// vcpu->arch.shadow_table + //// vcpu->arch.shadow_table[] //// - smfn = pagetable_get_mfn(v->arch.shadow_table); - if ( mfn_x(smfn) ) - { - ASSERT(v->arch.shadow_vtable); + #if GUEST_PAGING_LEVELS == 3 - // PAE guests do not (necessarily) use an entire page for their - // 4-entry L3s, so we have to deal with them specially. - // - sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn); -#else - sh_put_ref(v, smfn, 0); -#endif - -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) - { - struct pae_l3_bookkeeping *info = - sl3p_to_info(v->arch.shadow_vtable); - ASSERT(test_bit(v->vcpu_id, &info->vcpus)); - clear_bit(v->vcpu_id, &info->vcpus); - } -#endif - v->arch.shadow_table = pagetable_null(); - } - - //// - //// vcpu->arch.shadow_vtable - //// - if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) && - v->arch.shadow_vtable ) - { - // Q: why does this need to use (un)map_domain_page_*global* ? - /* A: so sh_update_linear_entries can operate on other vcpus */ - sh_unmap_domain_page_global(v->arch.shadow_vtable); - v->arch.shadow_vtable = NULL; - } -} + /* PAE guests have four shadow_table entries */ + for ( i = 0 ; i < 4 ; i++ ) +#endif + { + smfn = pagetable_get_mfn(v->arch.shadow_table[i]); + if ( mfn_x(smfn) ) + sh_put_ref(v, smfn, 0); + v->arch.shadow_table[i] = pagetable_null(); + } +} + +/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ +static void +sh_set_toplevel_shadow(struct vcpu *v, + int slot, + mfn_t gmfn, + unsigned int root_type) +{ + mfn_t smfn = get_shadow_status(v, gmfn, root_type); + struct domain *d = v->domain; + ASSERT(pagetable_is_null(v->arch.shadow_table[slot])); + if ( valid_mfn(smfn) ) + { + /* Pull this root shadow to the front of the list of roots. */ + list_del(&mfn_to_page(smfn)->list); + list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows); + } + else + { + /* This guest MFN is a pagetable. Must revoke write access. */ + if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + /* Make sure there's enough free shadow memory. */ + shadow_prealloc(d, SHADOW_MAX_ORDER); + /* Shadow the page. */ + smfn = sh_make_shadow(v, gmfn, root_type); + list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows); + } + ASSERT(valid_mfn(smfn)); + +#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW + /* Once again OK to unhook entries from this table if we see fork/exit */ + ASSERT(sh_mfn_is_a_page_table(gmfn)); + mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings; +#endif + + /* Take a ref to this page: it will be released in sh_detach_old_tables. */ + sh_get_ref(smfn, 0); + sh_pin(smfn); + + /* Done. Install it */ + SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n", + GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, + mfn_x(gmfn), mfn_x(smfn)); + v->arch.shadow_table[slot] = pagetable_from_mfn(smfn); +} + static void sh_update_cr3(struct vcpu *v) -/* Updates vcpu->arch.shadow_table after the guest has changed CR3. +/* Updates vcpu->arch.cr3 after the guest has changed CR3. * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). - * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)... + * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works, + * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards. */ { struct domain *d = v->domain; - mfn_t gmfn, smfn; + mfn_t gmfn; #if GUEST_PAGING_LEVELS == 3 u32 guest_idx=0; #endif @@ -3770,159 +3446,93 @@ sh_update_cr3(struct vcpu *v) #endif //// - //// vcpu->arch.shadow_table + //// vcpu->arch.shadow_table[] //// - smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type); - if ( valid_mfn(smfn) ) - { - /* Pull this root shadow to the front of the list of roots. */ - list_del(&mfn_to_page(smfn)->list); - list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows); - } - else - { - /* This guest MFN is a pagetable. Must revoke write access. */ - if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) - != 0 ) - flush_tlb_mask(d->domain_dirty_cpumask); - /* Make sure there's enough free shadow memory. */ - shadow_prealloc(d, SHADOW_MAX_ORDER); - /* Shadow the page. */ - smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type); - list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows); - } - ASSERT(valid_mfn(smfn)); - v->arch.shadow_table = pagetable_from_mfn(smfn); - -#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - /* Once again OK to unhook entries from this table if we see fork/exit */ - ASSERT(sh_mfn_is_a_page_table(gmfn)); - mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings; -#endif - - - //// - //// vcpu->arch.shadow_vtable - //// - if ( shadow_mode_external(d) ) - { -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) - mfn_t adjusted_smfn = smfn; - u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx); - // Q: why does this need to use (un)map_domain_page_*global* ? - v->arch.shadow_vtable = - (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) + - shadow_idx; + +#if GUEST_PAGING_LEVELS == 2 + sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow); +#elif GUEST_PAGING_LEVELS == 3 + /* PAE guests have four shadow_table entries, based on the + * current values of the guest's four l3es. */ + { + int i; + guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable; + for ( i = 0; i < 4; i++ ) + { + ASSERT(pagetable_is_null(v->arch.shadow_table[i])); + if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) + { + gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]); + mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn); + if ( valid_mfn(gl2mfn) ) + sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) + ? PGC_SH_l2h_shadow + : PGC_SH_l2_shadow); + } + } + } +#elif GUEST_PAGING_LEVELS == 4 + sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow); #else - // Q: why does this need to use (un)map_domain_page_*global* ? - v->arch.shadow_vtable = sh_map_domain_page_global(smfn); -#endif - } - else - { -#if SHADOW_PAGING_LEVELS == 4 - v->arch.shadow_vtable = __sh_linear_l4_table; -#elif GUEST_PAGING_LEVELS == 3 - // XXX - why does this need a global map? - v->arch.shadow_vtable = sh_map_domain_page_global(smfn); +#error This should never happen +#endif + +#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) +#endif + + /// + /// v->arch.shadow.l3table + /// +#if SHADOW_PAGING_LEVELS == 3 + { + mfn_t smfn; + int i; + for ( i = 0; i < 4; i++ ) + { +#if GUEST_PAGING_LEVELS == 2 + /* 2-on-3: make a PAE l3 that points at the four-page l2 */ + smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i); #else - v->arch.shadow_vtable = __sh_linear_l2_table; -#endif - } - -#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) - // Now that shadow_vtable is in place, check that the sl3e[3] is properly - // shadowed and installed in PAE PV guests... - if ( !shadow_mode_external(d) && - !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) & - _PAGE_PRESENT) ) - { - sh_install_xen_entries_in_l3(v, gmfn, smfn); - } -#endif - - //// - //// Take a ref to the new shadow table, and pin it. - //// - // - // This ref is logically "held" by v->arch.shadow_table entry itself. - // Release the old ref. - // -#if GUEST_PAGING_LEVELS == 3 - // PAE guests do not (necessarily) use an entire page for their - // 4-entry L3s, so we have to deal with them specially. - // - // XXX - might want to revisit this if/when we do multiple compilation for - // HVM-vs-PV guests, as PAE PV guests could get away without doing - // subshadows. - // - sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn); - sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn); -#else - sh_get_ref(smfn, 0); - sh_pin(smfn); -#endif - -#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3) - // PAE 3-on-3 shadows have to keep track of which vcpu's are using - // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY - // case from validate_gl3e(). Search for SHADOW_SET_L3PAE_RECOPY - // in the code for more info. - // - { - struct pae_l3_bookkeeping *info = - sl3p_to_info(v->arch.shadow_vtable); - ASSERT(!test_bit(v->vcpu_id, &info->vcpus)); - set_bit(v->vcpu_id, &info->vcpus); - } -#endif - - debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n", - __func__, gmfn, smfn); + /* 3-on-3: make a PAE l3 that points at the four l2 pages */ + smfn = pagetable_get_mfn(v->arch.shadow_table[i]); +#endif + v->arch.shadow.l3table[i] = + (mfn_x(smfn) == 0) + ? shadow_l3e_empty() + : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); + } + } +#endif /* SHADOW_PAGING_LEVELS == 3 */ + /// - /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3 + /// v->arch.cr3 /// if ( shadow_mode_external(d) ) { - ASSERT(hvm_guest(v)); make_cr3(v, pagetable_get_pfn(v->arch.monitor_table)); - -#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2) -#if SHADOW_PAGING_LEVELS != 3 -#error unexpected combination of GUEST and SHADOW paging levels -#endif - /* 2-on-3: make a PAE l3 table that points at the four-page l2 */ - { - mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table); - int i; - - ASSERT(v->arch.hvm_vcpu.hw_cr3 == - virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab)); - for (i = 0; i < 4; i++) - { - v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] = - shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT); - } - } -#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) - /* 3-on-3: copy the shadow l3 to slots that are below 4GB. - * If paging is disabled, clear l3e reserved bits; otherwise - * remove entries that have reserved bits set. */ - v->arch.hvm_vcpu.hw_cr3 = - hvm_pae_copy_root(v, v->arch.shadow_vtable, - !shadow_vcpu_mode_translate(v)); -#else - /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */ - v->arch.hvm_vcpu.hw_cr3 = - pagetable_get_paddr(v->arch.shadow_table); -#endif } else // not shadow_mode_external... { /* We don't support PV except guest == shadow == config levels */ BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS); - make_cr3(v, pagetable_get_pfn(v->arch.shadow_table)); + make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0])); + } + + + /// + /// v->arch.hvm_vcpu.hw_cr3 + /// + if ( shadow_mode_external(d) ) + { + ASSERT(hvm_guest(v)); +#if SHADOW_PAGING_LEVELS == 3 + /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ + v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table); +#else + /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ + v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]); +#endif } /* Fix up the linear pagetable mappings */ @@ -3950,7 +3560,6 @@ static int sh_guess_wrmap(struct vcpu *v /* Carefully look in the shadow linear map for the l1e we expect */ - if ( v->arch.shadow_vtable == NULL ) return 0; #if GUEST_PAGING_LEVELS >= 4 sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) @@ -3959,7 +3568,7 @@ static int sh_guess_wrmap(struct vcpu *v if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; #elif GUEST_PAGING_LEVELS == 3 - sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) + sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) + shadow_l3_linear_offset(vaddr); if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; @@ -4044,13 +3653,11 @@ void sh_clear_shadow_entry(struct vcpu * case PGC_SH_l2h_shadow: #endif shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break; -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 case PGC_SH_l3_shadow: shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break; -#if GUEST_PAGING_LEVELS >= 4 case PGC_SH_l4_shadow: shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break; -#endif #endif default: BUG(); /* Called with the wrong kind of shadow. */ } @@ -4081,7 +3688,7 @@ int sh_remove_l1_shadow(struct vcpu *v, return done; } -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn) /* Remove all mappings of this l2 shadow from this l3 shadow */ { @@ -4104,7 +3711,6 @@ int sh_remove_l2_shadow(struct vcpu *v, return done; } -#if GUEST_PAGING_LEVELS >= 4 int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn) /* Remove all mappings of this l3 shadow from this l4 shadow */ { @@ -4127,7 +3733,6 @@ int sh_remove_l3_shadow(struct vcpu *v, return done; } #endif /* 64bit guest */ -#endif /* PAE guest */ /**************************************************************************/ /* Handling HVM guest writes to pagetables */ @@ -4448,7 +4053,7 @@ int sh_audit_l2_table(struct vcpu *v, mf return 0; } -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x) { guest_l3e_t *gl3e, *gp; @@ -4486,9 +4091,7 @@ int sh_audit_l3_table(struct vcpu *v, mf sh_unmap_domain_page(gp); return 0; } -#endif /* GUEST_PAGING_LEVELS >= 3 */ - -#if GUEST_PAGING_LEVELS >= 4 + int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x) { guest_l4e_t *gl4e, *gp; diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/mm/shadow/multi.h Tue Oct 17 11:11:48 2006 +0100 @@ -49,10 +49,6 @@ extern void extern void SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)( struct vcpu *v, mfn_t smfn); - -extern void -SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3) - (struct vcpu *v, mfn_t smfn); extern void SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS) diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/mm/shadow/private.h Tue Oct 17 11:11:48 2006 +0100 @@ -336,13 +336,9 @@ void shadow_convert_to_log_dirty(struct * non-Xen mappings in this top-level shadow mfn */ void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn); -/* Re-sync copies of PAE shadow L3 tables if they have been changed */ -void sh_pae_recopy(struct domain *d); - /* Install the xen mappings in various flavours of shadow */ void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn); void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn); void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); diff -r 22885e4c1275 -r 37ee88ca1440 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/arch/x86/mm/shadow/types.h Tue Oct 17 11:11:48 2006 +0100 @@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_fr shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \ }) -// shadow linear L3 and L4 tables only exist in 4 level paging... -#if SHADOW_PAGING_LEVELS == 4 +#if SHADOW_PAGING_LEVELS >= 4 #define sh_linear_l3_table(v) ({ \ ASSERT(current == (v)); \ ((shadow_l3e_t *) \ @@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from #define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow #define PGC_SH_l2_shadow PGC_SH_l2_pae_shadow #define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow -#define PGC_SH_l3_shadow PGC_SH_l3_pae_shadow #else #define PGC_SH_l1_shadow PGC_SH_l1_64_shadow #define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow @@ -404,14 +402,6 @@ valid_gfn(gfn_t m) { return VALID_GFN(gfn_x(m)); } - -#if GUEST_PAGING_LEVELS == 2 -#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow -#elif GUEST_PAGING_LEVELS == 3 -#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow -#else -#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow -#endif /* Translation between mfns and gfns */ static inline mfn_t @@ -490,8 +480,6 @@ struct shadow_walk_t #define sh_map_and_validate_gl1e INTERNAL_NAME(sh_map_and_validate_gl1e) #define sh_destroy_l4_shadow INTERNAL_NAME(sh_destroy_l4_shadow) #define sh_destroy_l3_shadow INTERNAL_NAME(sh_destroy_l3_shadow) -#define sh_destroy_l3_subshadow INTERNAL_NAME(sh_destroy_l3_subshadow) -#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows) #define sh_destroy_l2_shadow INTERNAL_NAME(sh_destroy_l2_shadow) #define sh_destroy_l1_shadow INTERNAL_NAME(sh_destroy_l1_shadow) #define sh_unhook_32b_mappings INTERNAL_NAME(sh_unhook_32b_mappings) @@ -533,115 +521,6 @@ struct shadow_walk_t SHADOW_PAGING_LEVELS) -#if GUEST_PAGING_LEVELS == 3 -/* - * Accounting information stored in the shadow of PAE Guest L3 pages. - * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep - * various refcounts, etc., on the page_info of their page. We provide extra - * bookkeeping space in the shadow itself, and this is the structure - * definition for that bookkeeping information. - */ -struct pae_l3_bookkeeping { - u32 vcpus; /* bitmap of which vcpus are currently storing - * copies of this 32-byte page */ - u32 refcount; /* refcount for this 32-byte page */ - u8 pinned; /* is this 32-byte page pinned or not? */ -}; - -// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer. -#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *) \ - (((unsigned long)(_ptr) & ~31) + 32)) - -static void sh_destroy_l3_subshadow(struct vcpu *v, - shadow_l3e_t *sl3e); - -/* Increment a subshadow ref - * Called with a pointer to the subshadow, and the mfn of the - * *first* page of the overall shadow. */ -static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn) -{ - struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); - - /* First ref to the subshadow takes a ref to the full shadow */ - if ( bk->refcount == 0 ) - sh_get_ref(smfn, 0); - if ( unlikely(++(bk->refcount) == 0) ) - { - SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " sh=%p\n", - mfn_x(smfn), sl3e); - domain_crash_synchronous(); - } -} - -/* Decrement a subshadow ref. - * Called with a pointer to the subshadow, and the mfn of the - * *first* page of the overall shadow. Calling this may cause the - * entire shadow to disappear, so the caller must immediately unmap - * the pointer after calling. */ -static inline void sh_put_ref_l3_subshadow(struct vcpu *v, - shadow_l3e_t *sl3e, - mfn_t smfn) -{ - struct pae_l3_bookkeeping *bk; - - bk = sl3p_to_info(sl3e); - - ASSERT(bk->refcount > 0); - if ( --(bk->refcount) == 0 ) - { - /* Need to destroy this subshadow */ - sh_destroy_l3_subshadow(v, sl3e); - /* Last ref to the subshadow had a ref to the full shadow */ - sh_put_ref(v, smfn, 0); - } -} - -/* Pin a subshadow - * Called with a pointer to the subshadow, and the mfn of the - * *first* page of the overall shadow. */ -static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn) -{ - struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); - -#if 0 - debugtrace_printk("%s smfn=%05lx offset=%ld\n", - __func__, mfn_x(smfn), - ((unsigned long)sl3e & ~PAGE_MASK) / 64); -#endif - - if ( !bk->pinned ) - { - bk->pinned = 1; - sh_get_ref_l3_subshadow(sl3e, smfn); - } -} - -/* Unpin a sub-shadow. - * Called with a pointer to the subshadow, and the mfn of the - * *first* page of the overall shadow. Calling this may cause the - * entire shadow to disappear, so the caller must immediately unmap - * the pointer after calling. */ -static inline void sh_unpin_l3_subshadow(struct vcpu *v, - shadow_l3e_t *sl3e, - mfn_t smfn) -{ - struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); - -#if 0 - debugtrace_printk("%s smfn=%05lx offset=%ld\n", - __func__, mfn_x(smfn), - ((unsigned long)sl3e & ~PAGE_MASK) / 64); -#endif - - if ( bk->pinned ) - { - bk->pinned = 0; - sh_put_ref_l3_subshadow(v, sl3e, smfn); - } -} - -#endif /* GUEST_PAGING_LEVELS == 3 */ - #if SHADOW_PAGING_LEVELS == 3 #define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20) #endif diff -r 22885e4c1275 -r 37ee88ca1440 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/include/asm-x86/domain.h Tue Oct 17 11:11:48 2006 +0100 @@ -134,6 +134,10 @@ struct pae_l3_cache { }; #endif struct shadow_vcpu { +#if CONFIG_PAGING_LEVELS >= 3 + /* PAE guests: per-vcpu shadow top-level table */ + l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); +#endif /* Pointers to mode-specific entry points. */ struct shadow_paging_mode *mode; /* Last MFN that we emulated a write to. */ @@ -142,10 +146,6 @@ struct shadow_vcpu { unsigned int translate_enabled:1; /* Emulated fault needs to be propagated to guest? */ unsigned int propagate_fault:1; -#if CONFIG_PAGING_LEVELS >= 3 - /* Shadow update requires this PAE cpu to recopy/install its L3 table. */ - unsigned int pae_flip_pending:1; -#endif }; struct arch_vcpu @@ -190,13 +190,12 @@ struct arch_vcpu pagetable_t guest_table; /* (MFN) guest notion of cr3 */ /* guest_table holds a ref to the page, and also a type-count unless * shadow refcounts are in use */ - pagetable_t shadow_table; /* (MFN) shadow of guest */ + pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */ unsigned long cr3; /* (MA) value to install in HW CR3 */ - void *guest_vtable; /* virtual address of pagetable */ - void *shadow_vtable; /* virtual address of shadow_table */ - root_pgentry_t *monitor_vtable; /* virtual address of monitor_table */ + void *guest_vtable; /* virtual addr of pagetable */ + root_pgentry_t *monitor_vtable; /* virtual addr of monitor_table */ /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; diff -r 22885e4c1275 -r 37ee88ca1440 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/include/asm-x86/hvm/vcpu.h Tue Oct 17 11:11:48 2006 +0100 @@ -41,11 +41,6 @@ struct hvm_vcpu { int xen_port; -#if CONFIG_PAGING_LEVELS >= 3 - l3_pgentry_t hvm_lowmem_l3tab[4] - __attribute__((__aligned__(32))); -#endif - /* Flags */ int flag_dr_dirty; diff -r 22885e4c1275 -r 37ee88ca1440 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/include/asm-x86/mm.h Tue Oct 17 11:11:48 2006 +0100 @@ -114,15 +114,14 @@ struct page_info #define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */ #define PGC_SH_l2_pae_shadow (6U<<28) /* shadowing a pae L2-low page */ #define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */ -#define PGC_SH_l3_pae_shadow (8U<<28) /* shadowing a pae L3 page */ -#define PGC_SH_l1_64_shadow (9U<<28) /* shadowing a 64-bit L1 page */ -#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */ -#define PGC_SH_l2_64_shadow (11U<<28) /* shadowing a 64-bit L2 page */ -#define PGC_SH_l3_64_shadow (12U<<28) /* shadowing a 64-bit L3 page */ -#define PGC_SH_l4_64_shadow (13U<<28) /* shadowing a 64-bit L4 page */ -#define PGC_SH_max_shadow (13U<<28) -#define PGC_SH_p2m_table (14U<<28) /* in use as the p2m table */ -#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */ +#define PGC_SH_l1_64_shadow (8U<<28) /* shadowing a 64-bit L1 page */ +#define PGC_SH_fl1_64_shadow (9U<<28) /* L1 shadow for 64-bit 2M superpg */ +#define PGC_SH_l2_64_shadow (10U<<28) /* shadowing a 64-bit L2 page */ +#define PGC_SH_l3_64_shadow (11U<<28) /* shadowing a 64-bit L3 page */ +#define PGC_SH_l4_64_shadow (12U<<28) /* shadowing a 64-bit L4 page */ +#define PGC_SH_max_shadow (12U<<28) +#define PGC_SH_p2m_table (13U<<28) /* in use as the p2m table */ +#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */ #define PGC_SH_unused (15U<<28) #define PGC_SH_type_mask (15U<<28) diff -r 22885e4c1275 -r 37ee88ca1440 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Oct 17 11:07:11 2006 +0100 +++ b/xen/include/asm-x86/shadow.h Tue Oct 17 11:11:48 2006 +0100 @@ -72,7 +72,6 @@ #define SHADOW_SET_CHANGED 0x1 #define SHADOW_SET_FLUSH 0x2 #define SHADOW_SET_ERROR 0x4 -#define SHADOW_SET_L3PAE_RECOPY 0x8 // How do we tell that we have a 32-bit PV guest in a 64-bit Xen? #ifdef __x86_64__ @@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v) * for HVM guests, arch.monitor_table and hvm's guest CR3. * * Update ref counts to shadow tables appropriately. - * For PAE, relocate L3 entries, if necessary, into low memory. */ static inline void update_cr3(struct vcpu *v) { @@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct #define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow)) #define SHF_L2_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow)) #define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow)) -#define SHF_L3_PAE (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow)) #define SHF_L1_64 (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow)) #define SHF_FL1_64 (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow)) #define SHF_L2_64 (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow)) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |