[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Allows x86_32 PAE Xen to run VMX domains (2-level guest page
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID f030f4b565a57b0ebe3461fad7ed52193ec880b6 # Parent 17b5d5cca4841177d8f08815af857e383b5b627f Allows x86_32 PAE Xen to run VMX domains (2-level guest page tables). To support >4GB machines, we use PAE mode for the shadow page tables; the guests think they are using 2-level page tables. Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> Signed-off-by: Xin B Li <xin.b.li@xxxxxxxxx> This should not break SVM, however the SVM code will need some small changes to enable support for non-pae guests on pae hosts. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/Makefile Mon Feb 6 22:25:31 2006 @@ -26,7 +26,7 @@ endif ifeq ($(TARGET_SUBARCH),x86_32) ifneq ($(pae),n) - OBJS += shadow.o shadow_public.o # x86_32p: new code + OBJS += shadow.o shadow_public.o shadow_guest32.o # x86_32p: new code else OBJS += shadow32.o # x86_32: old code endif diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Feb 6 22:25:31 2006 @@ -191,17 +191,9 @@ __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : ); -#ifdef __x86_64__ error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE); -#else - error |= __vmwrite(GUEST_CR4, cr4); -#endif - -#ifdef __x86_64__ cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE); -#else - cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE); -#endif + error |= __vmwrite(CR4_READ_SHADOW, cr4); vmx_stts(); diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Feb 6 22:25:31 2006 @@ -645,7 +645,7 @@ !vlapic_global_enabled((VLAPIC(v))) ) clear_bit(X86_FEATURE_APIC, &edx); -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS >= 3 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) #endif { @@ -995,7 +995,7 @@ if(!get_page(mfn_to_page(mfn), v->domain)) return 0; old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); update_pagetables(v); @@ -1196,8 +1196,9 @@ #endif } else +#endif /* __x86_64__ */ { -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 if(!shadow_set_guest_paging_levels(v->domain, 2)) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ @@ -1217,14 +1218,13 @@ __vmwrite(GUEST_CR4, crn | X86_CR4_PAE); } } -#endif #if CONFIG_PAGING_LEVELS == 2 shadow_direct_map_clean(v); #endif /* * Now arch.guest_table points to machine physical. */ - v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); update_pagetables(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", @@ -1392,7 +1392,7 @@ domain_crash_synchronous(); /* need to take a clean path */ } old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); update_pagetables(v); diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/setup.c Mon Feb 6 22:25:31 2006 @@ -575,7 +575,7 @@ p += sprintf(p, "xen-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION); if ( hvm_enabled ) { - //p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION); + p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION); //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION); } diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/shadow.c Mon Feb 6 22:25:31 2006 @@ -35,6 +35,9 @@ #include <xen/sched.h> #include <xen/trace.h> #include <asm/shadow_64.h> + +/* Use this to have the compiler remove unnecessary branches */ +#define SH_L1_HAS_NEXT_PAGE (GUEST_L1_PAGETABLE_ENTRIES - L1_PAGETABLE_ENTRIES) extern void free_shadow_pages(struct domain *d); @@ -223,11 +226,16 @@ } else { - if (d->arch.ops->guest_paging_levels == PAGING_L2) + if ( SH_L1_HAS_NEXT_PAGE && + d->arch.ops->guest_paging_levels == PAGING_L2) { #if CONFIG_PAGING_LEVELS >= 3 - /* For 32-bit HVM guest, 2 shadow L1s to simulate 1 guest L1 - * So need allocate 2 continues shadow L1 each time. + /* + * For 32-bit HVM guest, 2 shadow L1s are required to + * simulate 1 guest L1 So need allocate 2 shadow L1 + * pages each time. + * + * --> Need to avoidalloc_domheap_pages. */ page = alloc_domheap_pages(NULL, SL1_ORDER, 0); if (!page) @@ -237,7 +245,7 @@ memset(l1, 0, PAGE_SIZE); unmap_domain_page(l1); - l1 = map_domain_page(page_to_mfn(page+1)); + l1 = map_domain_page(page_to_mfn(page + 1)); memset(l1, 0, PAGE_SIZE); unmap_domain_page(l1); #else @@ -265,14 +273,12 @@ else { #if CONFIG_PAGING_LEVELS == 2 page = alloc_domheap_page(NULL); -#elif CONFIG_PAGING_LEVELS == 3 - if ( psh_type == PGT_l3_shadow ) +#elif CONFIG_PAGING_LEVELS >= 3 + if ( d->arch.ops->guest_paging_levels == PAGING_L2 && + psh_type == PGT_l4_shadow ) /* allocated for PAE PDP page */ page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA); - else - page = alloc_domheap_page(NULL); -#elif CONFIG_PAGING_LEVELS == 4 - if ( (psh_type == PGT_l4_shadow) && - (d->arch.ops->guest_paging_levels != PAGING_L4) ) + else if ( d->arch.ops->guest_paging_levels == PAGING_L3 && + psh_type == PGT_l3_shadow ) /* allocated for PAE PDP page */ page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA); else page = alloc_domheap_page(NULL); @@ -550,7 +556,7 @@ { struct vcpu *v = current; struct domain *d = v->domain; - l1_pgentry_t *spl1e; + l1_pgentry_t *spl1e, *spl1e_next = 0; l2_pgentry_t sl2e; guest_l1_pgentry_t *gpl1e; guest_l2_pgentry_t gl2e = {0}; @@ -599,8 +605,9 @@ } #endif -#if CONFIG_PAGING_LEVELS >=3 - if (d->arch.ops->guest_paging_levels == PAGING_L2) +#if CONFIG_PAGING_LEVELS >= 3 + if ( SH_L1_HAS_NEXT_PAGE && + d->arch.ops->guest_paging_levels == PAGING_L2 ) { /* for 32-bit HVM guest on 64-bit or PAE host, * need update two L2 entries each time @@ -639,14 +646,20 @@ tmp_gmfn = gmfn_to_mfn(d, l2e_get_pfn(tmp_gl2e)); gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn); - /* If the PGT_l1_shadow has two continual pages */ -#if CONFIG_PAGING_LEVELS >=3 - if (d->arch.ops->guest_paging_levels == PAGING_L2) - __shadow_get_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &tmp_sl2e); + /* If the PGT_l1_shadow has two contiguous pages */ +#if CONFIG_PAGING_LEVELS >= 3 + if ( SH_L1_HAS_NEXT_PAGE && + d->arch.ops->guest_paging_levels == PAGING_L2 ) + __shadow_get_l2e(v, va & ~((1UL << L2_PAGETABLE_SHIFT_32) - 1), &tmp_sl2e); else #endif __shadow_get_l2e(v, va, &tmp_sl2e); + spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e)); + + if ( SH_L1_HAS_NEXT_PAGE ) + spl1e_next = (l1_pgentry_t *) map_domain_page( + (l2e_get_pfn(tmp_sl2e) + 1UL)); for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ ) { @@ -665,7 +678,12 @@ } break; } - spl1e[i] = sl1e; + + if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES ) + spl1e_next[i - L1_PAGETABLE_ENTRIES] = sl1e; + else + spl1e[i] = sl1e; + if ( unlikely(i < min) ) min = i; if ( likely(i > max) ) @@ -678,6 +696,9 @@ unmap_domain_page(gpl1e); unmap_domain_page(spl1e); + + if ( SH_L1_HAS_NEXT_PAGE ) + unmap_domain_page(spl1e_next); } } @@ -1032,7 +1053,7 @@ l2_pgentry_t sl2e; struct domain *d = v->domain; -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 { l4_pgentry_t sl4e; l3_pgentry_t sl3e; @@ -1322,6 +1343,7 @@ u32 max_refs_to_find, unsigned long prediction) { l1_pgentry_t *pt = map_domain_page(pt_mfn); + l1_pgentry_t *pt_next = 0, *sl1e_p; l1_pgentry_t match; unsigned long flags = _PAGE_RW | _PAGE_PRESENT; int i; @@ -1335,28 +1357,46 @@ PGT_fl1_shadow); #endif + if ( SH_L1_HAS_NEXT_PAGE ) + pt_next = map_domain_page(pt_mfn + 1); + match = l1e_from_pfn(readonly_gmfn, flags); - if ( shadow_mode_external(d) ) { + if ( shadow_mode_external(d) ) + { i = (mfn_to_page(readonly_gmfn)->u.inuse.type_info & PGT_va_mask) >> PGT_va_shift; - if ( (i >= 0 && i < L1_PAGETABLE_ENTRIES) && - !l1e_has_changed(pt[i], match, flags) && - fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) && + if ( SH_L1_HAS_NEXT_PAGE && + i >= L1_PAGETABLE_ENTRIES ) + sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES]; + else + sl1e_p = &pt[i]; + + if ( (i >= 0 && i < GUEST_L1_PAGETABLE_ENTRIES) && + !l1e_has_changed(*sl1e_p, match, flags) && + fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) && !prediction ) goto out; } - for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++) - { - if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && - fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) ) + for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ ) + { + if ( SH_L1_HAS_NEXT_PAGE && + i >= L1_PAGETABLE_ENTRIES ) + sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES]; + else + sl1e_p = &pt[i]; + + if ( unlikely(!l1e_has_changed(*sl1e_p, match, flags)) && + fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) ) break; } out: unmap_domain_page(pt); + if ( SH_L1_HAS_NEXT_PAGE ) + unmap_domain_page(pt_next); return found; } @@ -1512,6 +1552,7 @@ { guest_l1_pgentry_t *guest1 = guest; l1_pgentry_t *shadow1 = shadow; + l1_pgentry_t *shadow1_next = 0, *sl1e_p; guest_l1_pgentry_t *snapshot1 = snapshot; int unshadow_l1 = 0; @@ -1525,19 +1566,28 @@ changed = 0; + if ( SH_L1_HAS_NEXT_PAGE && shadow1 ) + shadow1_next = map_domain_page(smfn + 1); + for ( i = min_shadow; i <= max_shadow; i++ ) { + + if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES ) + sl1e_p = &shadow1_next[i - L1_PAGETABLE_ENTRIES]; + else + sl1e_p = &shadow1[i]; + if ( (i < min_snapshot) || (i > max_snapshot) || guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { int error; - error = validate_pte_change(d, guest1[i], &shadow1[i]); + error = validate_pte_change(d, guest1[i], sl1e_p); if ( error == -1 ) unshadow_l1 = 1; else { need_flush |= error; - set_guest_back_ptr(d, shadow1[i], smfn, i); + set_guest_back_ptr(d, *sl1e_p, smfn, i); } // can't update snapshots of linear page tables -- they // are used multiple times... @@ -1547,6 +1597,10 @@ changed++; } } + + if ( shadow1_next ) + unmap_domain_page(shadow1_next); + perfc_incrc(resync_l1); perfc_incr_histo(wpt_updates, changed, PT_UPDATES); perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES); @@ -1690,7 +1744,6 @@ case PGT_l4_shadow: { guest_root_pgentry_t *guest_root = guest; - l4_pgentry_t *shadow4 = shadow; guest_root_pgentry_t *snapshot_root = snapshot; changed = 0; @@ -1702,12 +1755,18 @@ if ( root_entry_has_changed( new_root_e, snapshot_root[i], PAGE_FLAG_MASK)) { +#ifndef GUEST_PGENTRY_32 + l4_pgentry_t *shadow4 = shadow; + if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) { need_flush |= validate_entry_change( d, (pgentry_64_t *)&new_root_e, (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype)); - } else { + } + else +#endif + { validate_bl2e_change(d, &new_root_e, shadow, i); } changed++; @@ -1822,12 +1881,12 @@ #endif #if CONFIG_PAGING_LEVELS >= 3 - if (d->arch.ops->guest_paging_levels == PAGING_L2) + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) need_flush |= resync_all(d, PGT_l4_shadow); else need_flush |= resync_all(d, PGT_l2_shadow); - if (d->arch.ops->guest_paging_levels >= PAGING_L3) + if ( d->arch.ops->guest_paging_levels >= PAGING_L3 ) { need_flush |= resync_all(d, PGT_l3_shadow); need_flush |= resync_all(d, PGT_l4_shadow); @@ -2184,7 +2243,7 @@ if ( !get_shadow_ref(smfn) ) BUG(); old_smfn = pagetable_get_pfn(v->arch.shadow_table); - v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT); + v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT); if ( old_smfn ) put_shadow_ref(old_smfn); @@ -2251,12 +2310,36 @@ } #endif /* CONFIG_PAGING_LEVELS == 2 */ +#if CONFIG_PAGING_LEVELS == 3 + /* + * fixup pointers in monitor table, as necessary + */ + if ( max_mode == SHM_external ) + { + l3_pgentry_t *mpl3e = (l3_pgentry_t *) v->arch.monitor_vtable; + l2_pgentry_t *spl2e; + unsigned long s2mfn; + int i; + + ASSERT( shadow_mode_translate(d) ); + s2mfn = l3e_get_pfn(mpl3e[L3_PAGETABLE_ENTRIES - 1]); + + ASSERT( s2mfn); + spl2e = map_domain_page(s2mfn); + + for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) + spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] = + (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ? + l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) : + l2e_empty(); + + unmap_domain_page(spl2e); + local_flush_tlb(); + } +#endif + if(likely(need_sync)) shadow_sync_all(d); - -#if CONFIG_PAGING_LEVELS == 3 - /* FIXME: PAE code to be written */ -#endif } @@ -2733,6 +2816,55 @@ #endif // SHADOW_DEBUG #endif // this code has not been updated for 32pae & 64 bit modes +#if CONFIG_PAGING_LEVELS >= 3 +/****************************************************************************/ +/* 64-bit shadow-mode code testing */ +/****************************************************************************/ +/* + * init_bl2() is for 32-bit VMX guest on 64-bit host + * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2 + */ +static inline unsigned long init_bl2( + struct domain *d, unsigned long gpfn, unsigned long gmfn) +{ + unsigned int count; + unsigned long sl2mfn; + unsigned long smfn; + struct page_info *page; + l4_pgentry_t *spl4e; + void *l2; + + if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) ) + { + printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); + BUG(); /* XXX Deal gracefully with failure. */ + } + + spl4e = (l4_pgentry_t *)map_domain_page(smfn); + + /* Map the self entry, L4&L3 share the same page */ + spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR); + + /* Allocate 4 shadow L2s */ + page = alloc_domheap_pages(NULL, SL2_ORDER, 0); + if ( !page ) + domain_crash_synchronous(); + + for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ ) + { + sl2mfn = page_to_mfn(page+count); + l2 = map_domain_page(sl2mfn); + memset(l2, 0, PAGE_SIZE); + unmap_domain_page(l2); + spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT); + } + + unmap_domain_page(spl4e); + + return smfn; +} +#endif + #if CONFIG_PAGING_LEVELS == 3 static unsigned long shadow_l3_table( struct domain *d, unsigned long gpfn, unsigned long gmfn) @@ -2742,10 +2874,18 @@ perfc_incrc(shadow_l3_table_count); + SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn); + + if ( SH_L1_HAS_NEXT_PAGE && + d->arch.ops->guest_paging_levels == PAGING_L2 ) + { + return init_bl2(d, gpfn, gmfn); + } + if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) ) { - printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); - BUG(); /* XXX Deal gracefully with failure. */ + printk("Couldn't alloc an L3 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); + BUG(); /* XXX Deal gracefully with failure. */ } spl3e = (l3_pgentry_t *)map_domain_page(smfn); @@ -2825,53 +2965,17 @@ return smfn; } - +#endif /* CONFIG_PAGING_LEVELS == 3 */ + +#ifndef GUEST_PGENTRY_32 static unsigned long gva_to_gpa_pae(unsigned long gva) { BUG(); return 43; } -#endif /* CONFIG_PAGING_LEVELS == 3 */ +#endif #if CONFIG_PAGING_LEVELS == 4 -/****************************************************************************/ -/* 64-bit shadow-mode code testing */ -/****************************************************************************/ -/* - * init_bl2() is for 32-bit HVM guest on 64-bit host - * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2 - */ -static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn) -{ - unsigned int count; - unsigned long sl2mfn; - struct page_info *page; - void *l2; - - memset(spl4e, 0, PAGE_SIZE); - - /* Map the self entry, L4&L3 share the same page */ - spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR); - - /* Allocate 4 shadow L2s */ - page = alloc_domheap_pages(NULL, SL2_ORDER, 0); - if (!page) - domain_crash_synchronous(); - - for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ ) - { - sl2mfn = page_to_mfn(page+count); - l2 = map_domain_page(sl2mfn); - memset(l2, 0, PAGE_SIZE); - unmap_domain_page(l2); - spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT); - } - - unmap_domain_page(spl4e); - - return smfn; -} - static unsigned long shadow_l4_table( struct domain *d, unsigned long gpfn, unsigned long gmfn) { @@ -2882,6 +2986,11 @@ perfc_incrc(shadow_l4_table_count); + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) + { + return init_bl2(d, gpfn, gmfn); + } + if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) ) { printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); @@ -2889,10 +2998,6 @@ } spl4e = (l4_pgentry_t *)map_domain_page(smfn); - - if (d->arch.ops->guest_paging_levels == PAGING_L2) { - return init_bl2(spl4e, smfn); - } /* Install hypervisor and 4x linear p.t. mapings. */ if ( (PGT_base_page_table == PGT_l4_page_table) && @@ -3568,8 +3673,6 @@ shadow_unlock(d); } - -#if CONFIG_PAGING_LEVELS == 4 static unsigned long gva_to_gpa_64(unsigned long gva) { struct vcpu *v = current; @@ -3588,8 +3691,25 @@ return gpa; } +/* + * The naming convention of the shadow_ops: + * MODE_<pgentry size>_<guest paging levels>_HANDLER + */ #ifndef GUEST_PGENTRY_32 -struct shadow_ops MODE_F_HANDLER = { +struct shadow_ops MODE_64_3_HANDLER = { + .guest_paging_levels = 3, + .invlpg = shadow_invlpg_64, + .fault = shadow_fault_64, + .update_pagetables = shadow_update_pagetables, + .sync_all = sync_all, + .remove_all_write_access = remove_all_write_access, + .do_update_va_mapping = do_update_va_mapping, + .mark_mfn_out_of_sync = mark_mfn_out_of_sync, + .is_out_of_sync = is_out_of_sync, + .gva_to_gpa = gva_to_gpa_pae, +}; + +struct shadow_ops MODE_64_4_HANDLER = { .guest_paging_levels = 4, .invlpg = shadow_invlpg_64, .fault = shadow_fault_64, @@ -3602,13 +3722,11 @@ .gva_to_gpa = gva_to_gpa_64, }; #endif /* GUEST_PGENTRY_32 */ -#endif /* CONFIG_PAGING_LEVELS == 4 */ - #endif /* CONFIG_PAGING_LEVELS >= 3 */ #if CONFIG_PAGING_LEVELS == 2 -struct shadow_ops MODE_A_HANDLER = { +struct shadow_ops MODE_32_2_HANDLER = { .guest_paging_levels = 2, .invlpg = shadow_invlpg_32, .fault = shadow_fault_32, @@ -3620,25 +3738,9 @@ .is_out_of_sync = is_out_of_sync, .gva_to_gpa = gva_to_gpa_64, }; - -#elif CONFIG_PAGING_LEVELS == 3 - -struct shadow_ops MODE_B_HANDLER = { - .guest_paging_levels = 3, - .invlpg = shadow_invlpg_64, - .fault = shadow_fault_64, - .update_pagetables = shadow_update_pagetables, - .sync_all = sync_all, - .remove_all_write_access = remove_all_write_access, - .do_update_va_mapping = do_update_va_mapping, - .mark_mfn_out_of_sync = mark_mfn_out_of_sync, - .is_out_of_sync = is_out_of_sync, - .gva_to_gpa = gva_to_gpa_pae, -}; - -#endif - -#if CONFIG_PAGING_LEVELS == 3 || \ +#endif + +#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) || \ ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) ) /* @@ -3697,7 +3799,7 @@ } __shadow_get_l1e(v, vpa, &sl1e); - + if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) ) { sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER); diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/shadow_guest32.c --- a/xen/arch/x86/shadow_guest32.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/shadow_guest32.c Mon Feb 6 22:25:31 2006 @@ -1,9 +1,8 @@ #define GUEST_PGENTRY_32 -#if defined (__x86_64__) #include "shadow.c" -struct shadow_ops MODE_D_HANDLER = { - .guest_paging_levels = 2, +struct shadow_ops MODE_64_2_HANDLER = { + .guest_paging_levels = 2, .invlpg = shadow_invlpg_64, .fault = shadow_fault_64, .update_pagetables = shadow_update_pagetables, @@ -15,4 +14,3 @@ .gva_to_gpa = gva_to_gpa_64, }; -#endif diff -r 17b5d5cca484 -r f030f4b565a5 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Mon Feb 6 18:02:36 2006 +++ b/xen/arch/x86/shadow_public.c Mon Feb 6 22:25:31 2006 @@ -29,19 +29,9 @@ #include <xen/event.h> #include <xen/sched.h> #include <xen/trace.h> - -#if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> -#endif -#if CONFIG_PAGING_LEVELS == 4 -extern struct shadow_ops MODE_F_HANDLER; -extern struct shadow_ops MODE_D_HANDLER; - static void free_p2m_table(struct vcpu *v); -#endif - -extern struct shadow_ops MODE_A_HANDLER; #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16)) @@ -120,24 +110,27 @@ shadow_direct_map_clean(v); switch(levels) { -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS == 4 case 4: - if ( d->arch.ops != &MODE_F_HANDLER ) - d->arch.ops = &MODE_F_HANDLER; + if ( d->arch.ops != &MODE_64_4_HANDLER ) + d->arch.ops = &MODE_64_4_HANDLER; shadow_unlock(d); return 1; #endif +#if CONFIG_PAGING_LEVELS >= 3 case 3: + if ( d->arch.ops != &MODE_64_3_HANDLER ) + d->arch.ops = &MODE_64_3_HANDLER; + shadow_unlock(d); + return 1; +#endif case 2: #if CONFIG_PAGING_LEVELS == 2 - if ( d->arch.ops != &MODE_A_HANDLER ) - d->arch.ops = &MODE_A_HANDLER; -#elif CONFIG_PAGING_LEVELS == 3 - if ( d->arch.ops != &MODE_B_HANDLER ) - d->arch.ops = &MODE_B_HANDLER; -#elif CONFIG_PAGING_LEVELS == 4 - if ( d->arch.ops != &MODE_D_HANDLER ) - d->arch.ops = &MODE_D_HANDLER; + if ( d->arch.ops != &MODE_32_2_HANDLER ) + d->arch.ops = &MODE_32_2_HANDLER; +#elif CONFIG_PAGING_LEVELS >= 3 + if ( d->arch.ops != &MODE_64_2_HANDLER ) + d->arch.ops = &MODE_64_2_HANDLER; #endif shadow_unlock(d); return 1; @@ -235,14 +228,14 @@ pgentry_64_t *ple = map_domain_page(smfn); int i, external = shadow_mode_external(d); -#if CONFIG_PAGING_LEVELS >=3 +#if CONFIG_PAGING_LEVELS >= 3 if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) { struct page_info *page = mfn_to_page(smfn); for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ ) { if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) - free_fake_shadow_l2(d,entry_get_pfn(ple[i])); + free_fake_shadow_l2(d, entry_get_pfn(ple[i])); } page = mfn_to_page(entry_get_pfn(ple[0])); @@ -346,15 +339,79 @@ v->arch.monitor_vtable = 0; } #elif CONFIG_PAGING_LEVELS == 3 - static void alloc_monitor_pagetable(struct vcpu *v) { - BUG(); /* PAE not implemented yet */ + unsigned long m2mfn, m3mfn; + l2_pgentry_t *mpl2e; + l3_pgentry_t *mpl3e; + struct page_info *m2mfn_info, *m3mfn_info, *page; + struct domain *d = v->domain; + int i; + + ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */ + + m3mfn_info = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA); + ASSERT( m3mfn_info ); + + m3mfn = page_to_mfn(m3mfn_info); + mpl3e = (l3_pgentry_t *) map_domain_page_global(m3mfn); + memset(mpl3e, 0, L3_PAGETABLE_ENTRIES * sizeof(l3_pgentry_t)); + + m2mfn_info = alloc_domheap_page(NULL); + ASSERT( m2mfn_info ); + + m2mfn = page_to_mfn(m2mfn_info); + mpl2e = (l2_pgentry_t *) map_domain_page(m2mfn); + memset(mpl2e, 0, L2_PAGETABLE_ENTRIES * sizeof(l2_pgentry_t)); + + memcpy(&mpl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], + &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + /* + * Map L2 page into L3 + */ + mpl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(m2mfn, _PAGE_PRESENT); + page = l3e_get_page(mpl3e[L3_PAGETABLE_ENTRIES - 1]); + + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) + mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = + l2e_from_page( + virt_to_page(d->arch.mm_perdomain_pt) + i, + __PAGE_HYPERVISOR); + for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) + mpl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = + (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ? + l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) : + l2e_empty(); + mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty(); + + unmap_domain_page(mpl2e); + + v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */ + v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e; + + if ( v->vcpu_id == 0 ) + alloc_p2m_table(d); } void free_monitor_pagetable(struct vcpu *v) { - BUG(); /* PAE not implemented yet */ + unsigned long m2mfn, m3mfn; + /* + * free monitor_table. + */ + if ( v->vcpu_id == 0 ) + free_p2m_table(v); + + m3mfn = pagetable_get_pfn(v->arch.monitor_table); + m2mfn = l2e_get_pfn(v->arch.monitor_vtable[L3_PAGETABLE_ENTRIES - 1]); + + free_domheap_page(mfn_to_page(m2mfn)); + unmap_domain_page_global(v->arch.monitor_vtable); + free_domheap_page(mfn_to_page(m3mfn)); + + v->arch.monitor_table = mk_pagetable(0); + v->arch.monitor_vtable = 0; } #endif @@ -475,24 +532,35 @@ free_shadow_l1_table(struct domain *d, unsigned long smfn) { l1_pgentry_t *pl1e = map_domain_page(smfn); + l1_pgentry_t *pl1e_next = 0, *sl1e_p; int i; struct page_info *spage = mfn_to_page(smfn); u32 min_max = spage->tlbflush_timestamp; int min = SHADOW_MIN(min_max); int max; - if (d->arch.ops->guest_paging_levels == PAGING_L2) + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) + { max = SHADOW_MAX_GUEST32(min_max); + pl1e_next = map_domain_page(smfn + 1); + } else max = SHADOW_MAX(min_max); for ( i = min; i <= max; i++ ) { - shadow_put_page_from_l1e(pl1e[i], d); - pl1e[i] = l1e_empty(); + if ( pl1e_next && i >= L1_PAGETABLE_ENTRIES ) + sl1e_p = &pl1e_next[i - L1_PAGETABLE_ENTRIES]; + else + sl1e_p = &pl1e[i]; + + shadow_put_page_from_l1e(*sl1e_p, d); + *sl1e_p = l1e_empty(); } unmap_domain_page(pl1e); + if ( pl1e_next ) + unmap_domain_page(pl1e_next); } static void inline @@ -547,10 +615,8 @@ int i; for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 ) - { if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) put_shadow_ref(entry_get_pfn(ple[i])); - } unmap_domain_page(ple); } @@ -844,7 +910,7 @@ if (d->arch.ops->guest_paging_levels == PAGING_L2) { -#if CONFIG_PAGING_LEVELS >=4 +#if CONFIG_PAGING_LEVELS >=3 free_domheap_pages(page, SL1_ORDER); #else free_domheap_page(page); @@ -1011,13 +1077,6 @@ printk("alloc_p2m_table failed (out-of-memory?)\n"); goto nomem; } - } - else - { - // external guests provide their own memory for their P2M maps. - // - ASSERT(d == page_get_owner(mfn_to_page(pagetable_get_pfn( - d->arch.phys_table)))); } } @@ -1316,7 +1375,6 @@ { struct list_head *list_ent; unsigned long va = RO_MPT_VIRT_START; /* phys_to_machine_mapping */ -// unsigned long va = PML4_ADDR(264); #if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t *l4tab = NULL; @@ -1360,10 +1418,6 @@ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); - - if ( !l3tab ) - unmap_domain_page(l3tab); - l3tab = map_domain_page(page_to_mfn(page)); memset(l3tab, 0, PAGE_SIZE); l4e = l4tab[l4_table_offset(va)] = @@ -1376,9 +1430,6 @@ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); - if ( !l2tab ) - unmap_domain_page(l2tab); - l2tab = map_domain_page(page_to_mfn(page)); memset(l2tab, 0, PAGE_SIZE); l3e = l3tab[l3_table_offset(va)] = @@ -1391,10 +1442,6 @@ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); - - if ( !l1tab ) - unmap_domain_page(l1tab); - l1tab = map_domain_page(page_to_mfn(page)); memset(l1tab, 0, PAGE_SIZE); l2e = l2tab[l2_table_offset(va)] = @@ -1407,9 +1454,6 @@ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); - if ( !l0tab ) - unmap_domain_page(l0tab); - l0tab = map_domain_page(page_to_mfn(page)); memset(l0tab, 0, PAGE_SIZE); l1e = l1tab[l1_table_offset(va)] = @@ -1418,9 +1462,25 @@ else if ( l0tab == NULL) l0tab = map_domain_page(l1e_get_pfn(l1e)); - l0tab[i & ((1 << PAGETABLE_ORDER) - 1) ] = pfn; + l0tab[i & ((PAGE_SIZE / sizeof (pfn)) - 1) ] = pfn; list_ent = frame_table[pfn].list.next; va += sizeof (pfn); + + if ( l2tab ) + { + unmap_domain_page(l2tab); + l2tab = NULL; + } + if ( l1tab ) + { + unmap_domain_page(l1tab); + l1tab = NULL; + } + if ( l0tab ) + { + unmap_domain_page(l0tab); + l0tab = NULL; + } } #if CONFIG_PAGING_LEVELS >= 4 unmap_domain_page(l4tab); @@ -1428,14 +1488,10 @@ #if CONFIG_PAGING_LEVELS >= 3 unmap_domain_page(l3tab); #endif - unmap_domain_page(l2tab); - unmap_domain_page(l1tab); - unmap_domain_page(l0tab); - return 1; } -#if CONFIG_PAGING_LEVELS == 4 +#if CONFIG_PAGING_LEVELS >= 3 static void free_p2m_table(struct vcpu *v) { @@ -1447,9 +1503,9 @@ #if CONFIG_PAGING_LEVELS >= 3 l3_pgentry_t *l3tab; l3_pgentry_t l3e; +#endif +#if CONFIG_PAGING_LEVELS == 4 int i3; -#endif -#if CONFIG_PAGING_LEVELS == 4 l4_pgentry_t *l4tab; l4_pgentry_t l4e; #endif @@ -1463,6 +1519,10 @@ #if CONFIG_PAGING_LEVELS == 3 l3tab = map_domain_page( pagetable_get_pfn(v->arch.monitor_table)); + + va = RO_MPT_VIRT_START; + l3e = l3tab[l3_table_offset(va)]; + l2tab = map_domain_page(l3e_get_pfn(l3e)); #endif for ( va = RO_MPT_VIRT_START; va < RO_MPT_VIRT_END; ) @@ -1473,9 +1533,10 @@ if ( l4e_get_flags(l4e) & _PAGE_PRESENT ) { l3tab = map_domain_page(l4e_get_pfn(l4e)); -#endif - for ( i3 = 0; i3 < L1_PAGETABLE_ENTRIES; i3++ ) + + for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; i3++ ) { + l3e = l3tab[l3_table_offset(va)]; if ( l3e_get_flags(l3e) & _PAGE_PRESENT ) { @@ -1483,15 +1544,19 @@ l2tab = map_domain_page(l3e_get_pfn(l3e)); - for ( i2 = 0; i2 < L1_PAGETABLE_ENTRIES; i2++ ) + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { +#endif l2e = l2tab[l2_table_offset(va)]; if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) { int i1; l1tab = map_domain_page(l2e_get_pfn(l2e)); - + + /* + * unsigned long phys_to_machine_mapping[] + */ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++ ) { l1e = l1tab[l1_table_offset(va)]; @@ -1499,26 +1564,28 @@ if ( l1e_get_flags(l1e) & _PAGE_PRESENT ) free_domheap_page(mfn_to_page(l1e_get_pfn(l1e))); - va += 1UL << L1_PAGETABLE_SHIFT; + va += PAGE_SIZE; } unmap_domain_page(l1tab); free_domheap_page(mfn_to_page(l2e_get_pfn(l2e))); } else - va += 1UL << L2_PAGETABLE_SHIFT; + va += PAGE_SIZE * L1_PAGETABLE_ENTRIES; + +#if CONFIG_PAGING_LEVELS == 4 } unmap_domain_page(l2tab); free_domheap_page(mfn_to_page(l3e_get_pfn(l3e))); } else - va += 1UL << L3_PAGETABLE_SHIFT; + va += PAGE_SIZE * L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES; } -#if CONFIG_PAGING_LEVELS == 4 unmap_domain_page(l3tab); free_domheap_page(mfn_to_page(l4e_get_pfn(l4e))); } else - va += 1UL << L4_PAGETABLE_SHIFT; + va += PAGE_SIZE * + L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES * L3_PAGETABLE_ENTRIES; #endif } diff -r 17b5d5cca484 -r f030f4b565a5 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Feb 6 18:02:36 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Feb 6 22:25:31 2006 @@ -174,10 +174,10 @@ #define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */ /* These bits in the CR4 are owned by the host */ -#ifdef __i386__ +#if CONFIG_PAGING_LEVELS >= 3 +#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE) +#else #define VMX_CR4_HOST_MASK (X86_CR4_VMXE) -#else -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE) #endif #define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n" diff -r 17b5d5cca484 -r f030f4b565a5 xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Mon Feb 6 18:02:36 2006 +++ b/xen/include/asm-x86/shadow_64.h Mon Feb 6 22:25:31 2006 @@ -29,7 +29,15 @@ #include <asm/shadow.h> #include <asm/shadow_ops.h> -extern struct shadow_ops MODE_B_HANDLER; +/* + * The naming convention of the shadow_ops: + * MODE_<pgentry size>_<guest paging levels>_HANDLER + */ +extern struct shadow_ops MODE_64_2_HANDLER; +extern struct shadow_ops MODE_64_3_HANDLER; +#if CONFIG_PAGING_LEVELS == 4 +extern struct shadow_ops MODE_64_4_HANDLER; +#endif #if CONFIG_PAGING_LEVELS == 3 #define L4_PAGETABLE_SHIFT 39 @@ -118,7 +126,6 @@ #endif #endif default: - //printk("<table_offset_64> level %d is too big\n", level); return -1; } } @@ -142,7 +149,7 @@ } static inline int __entry( - struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag) + struct vcpu *v, unsigned long va, pgentry_64_t *e_p, u32 flag) { int i; pgentry_64_t *le_e; @@ -197,7 +204,7 @@ } static inline int __rw_entry( - struct vcpu *v, u64 va, void *e_p, u32 flag) + struct vcpu *v, unsigned long va, void *e_p, u32 flag) { pgentry_64_t *e = (pgentry_64_t *)e_p; @@ -235,7 +242,7 @@ __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3) static inline int __guest_set_l2e( - struct vcpu *v, u64 va, void *value, int size) + struct vcpu *v, unsigned long va, void *value, int size) { switch(size) { case 4: @@ -258,10 +265,10 @@ } #define __guest_set_l2e(v, va, value) \ - __guest_set_l2e(v, (u64)va, value, sizeof(*value)) + __guest_set_l2e(v, (unsigned long)va, value, sizeof(*value)) static inline int __guest_get_l2e( - struct vcpu *v, u64 va, void *gl2e, int size) + struct vcpu *v, unsigned long va, void *gl2e, int size) { switch(size) { case 4: @@ -283,10 +290,10 @@ } #define __guest_get_l2e(v, va, gl2e) \ - __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e)) + __guest_get_l2e(v, (unsigned long)va, gl2e, sizeof(*gl2e)) static inline int __guest_set_l1e( - struct vcpu *v, u64 va, void *value, int size) + struct vcpu *v, unsigned long va, void *value, int size) { switch(size) { case 4: @@ -322,10 +329,10 @@ } #define __guest_set_l1e(v, va, value) \ - __guest_set_l1e(v, (u64)va, value, sizeof(*value)) + __guest_set_l1e(v, (unsigned long)va, value, sizeof(*value)) static inline int __guest_get_l1e( - struct vcpu *v, u64 va, void *gl1e, int size) + struct vcpu *v, unsigned long va, void *gl1e, int size) { switch(size) { case 4: @@ -362,7 +369,7 @@ } #define __guest_get_l1e(v, va, gl1e) \ - __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e)) + __guest_get_l1e(v, (unsigned long)va, gl1e, sizeof(*gl1e)) static inline void entry_general( struct domain *d, _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |