diff -r 79920258510a xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/hvm/hvm.c Tue Feb 27 05:43:42 2007 -0600 @@ -155,7 +155,8 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); - rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); + /* paging support will be determined inside paging.c */ + rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); if ( rc != 0 ) return rc; diff -r 79920258510a xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Feb 27 18:06:37 2007 -0600 @@ -49,6 +49,7 @@ #include #include #include +#include #define SVM_EXTRA_DEBUG @@ -75,6 +76,10 @@ static void *root_vmcb[NR_CPUS] __read_m /* physical address of above for host VMSAVE/VMLOAD */ u64 root_vmcb_pa[NR_CPUS] __read_mostly; + +/* hardware assisted paging bits */ +extern int opt_hap_enabled; +extern int hap_capable_system; static inline void svm_inject_exception(struct vcpu *v, int trap, int ev, int error_code) @@ -256,27 +261,46 @@ static inline int long_mode_do_msr_write } #ifdef __x86_64__ - /* LME: 0 -> 1 */ - if ( msr_content & EFER_LME && - !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)) - { - if ( svm_paging_enabled(v) || - !test_bit(SVM_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_svm.cpu_state) ) - { - gdprintk(XENLOG_WARNING, "Trying to set LME bit when " - "in paging mode or PAE bit is not set\n"); - goto gp_fault; - } - set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state); - } - - /* We have already recorded that we want LME, so it will be set - * next time CR0 gets updated. So we clear that bit and continue. - */ - if ((msr_content ^ vmcb->efer) & EFER_LME) - msr_content &= ~EFER_LME; - /* No update for LME/LMA since it have no effect */ + if ( !paging_mode_hap(v->domain) ) + { + /* LME: 0 -> 1 */ + if ( msr_content & EFER_LME && + !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)) + { + if ( svm_paging_enabled(v) || + !test_bit(SVM_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_svm.cpu_state) ) + { + gdprintk(XENLOG_WARNING, "Trying to set LME bit when " + "in paging mode or PAE bit is not set\n"); + goto gp_fault; + } + set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state); + } + + /* We have already recorded that we want LME, so it will be set + * next time CR0 gets updated. So we clear that bit and continue. + */ + if ((msr_content ^ vmcb->efer) & EFER_LME) + msr_content &= ~EFER_LME; + /* No update for LME/LMA since it have no effect */ + } + else + { + if ( msr_content & EFER_LME ) + set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state); + else + clear_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state); + + /* check whether guest is about to turn off long mode */ + if ( !(msr_content & EFER_LME) && !(vmcb->cr0 & X86_CR0_PG) && + (vmcb->efer & EFER_LME) ) + { + clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); + } + + paging_update_paging_modes(v); + } #endif vmcb->efer = msr_content | EFER_SVME; break; @@ -905,6 +929,10 @@ static void arch_svm_do_launch(struct vc { svm_do_launch(v); + if ( paging_mode_hap(v->domain) ) { + v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); + } + if ( v->vcpu_id != 0 ) { cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs; @@ -1011,6 +1039,21 @@ static struct hvm_function_table svm_fun .event_injection_faulted = svm_event_injection_faulted }; +void svm_npt_detect(void) +{ + u32 eax, ebx, ecx, edx; + + /* check CPUID for nested paging support */ + cpuid(0x8000000A, &eax, &ebx, &ecx, &edx); + if ( edx & 0x01 ) { /* nested paging */ + hap_capable_system = 1; + } + else if ( opt_hap_enabled ) { + printk(" nested paging is not supported by this CPU.\n"); + hap_capable_system = 0; /* no nested paging, we disable flag. */ + } +} + int start_svm(void) { u32 eax, ecx, edx; @@ -1041,6 +1084,8 @@ int start_svm(void) wrmsr(MSR_EFER, eax, edx); printk("AMD SVM Extension is enabled for cpu %d.\n", cpu ); + svm_npt_detect(); + /* Initialize the HSA for this core */ phys_hsa = (u64) virt_to_maddr(hsa[cpu]); phys_hsa_lo = (u32) phys_hsa; @@ -1076,6 +1121,18 @@ void arch_svm_do_resume(struct vcpu *v) reset_stack_and_jump( svm_asm_do_resume ); } } + +static int svm_do_nested_pgfault(unsigned long gpa, struct cpu_user_regs *regs) +{ + if (mmio_space(gpa)) { + handle_mmio(gpa); + return 1; + } + + /* We should not reach here. Otherwise, P2M table is not correct.*/ + return 0; +} + static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) { @@ -1702,6 +1759,38 @@ static void svm_io_instruction(struct vc } } +static int npt_set_cr0(unsigned long value) +{ + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + ASSERT(vmcb); + + /* ET is reserved and should be always be 1*/ + value |= X86_CR0_ET; + + /* check whether guest is about to turn on long mode */ + if ( (value & X86_CR0_PG) && (vmcb->efer & EFER_LME) && + (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) && + !(vmcb->cr0 & X86_CR0_PG) ) + { + set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); + } + + vmcb->cr0 = value; + v->arch.hvm_svm.cpu_shadow_cr0 = value; + + /* TS cleared? Then initialise FPU now. */ + if ( !(value & X86_CR0_TS) ) { + setup_fpu(v); + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM; + } + + paging_update_paging_modes(v); + + return 1; +} + static int svm_set_cr0(unsigned long value) { struct vcpu *v = current; @@ -1798,6 +1887,89 @@ static int svm_set_cr0(unsigned long val } return 1; +} + +// +// nested paging functions +// + +static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs) +{ + unsigned long value; + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + struct vlapic *vlapic = vcpu_vlapic(v); + + ASSERT(vmcb); + + value = get_reg(gpreg, regs, vmcb); + + switch (cr) { + case 0: + return npt_set_cr0(value); + + case 3: + vmcb->cr3 = value; + v->arch.hvm_svm.cpu_cr3 = value; + break; + + case 4: /* CR4 */ + vmcb->cr4 = value; + v->arch.hvm_svm.cpu_shadow_cr4 = value; + if ( value & X86_CR4_PAE ) + set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); + else + clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); + + paging_update_paging_modes(v); + break; + + case 8: + vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); + break; + + default: + gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); + domain_crash(v->domain); + return 0; + } + + return 1; +} + +static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) +{ + unsigned long value = 0; + struct vcpu *v = current; + struct vmcb_struct *vmcb; + struct vlapic *vlapic = vcpu_vlapic(v); + + vmcb = v->arch.hvm_svm.vmcb; + ASSERT(vmcb); + + switch(cr) { + case 0: + value = vmcb->cr0; + break; + case 2: + value = vmcb->cr2; + break; + case 3: + value = vmcb->cr3; + break; + case 4: + value = vmcb->cr4; + break; + case 8: + value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); + value = (value & 0xF0) >> 4; + break; + default: + domain_crash(v->domain); + return; + } + + set_reg(gp, value, regs, vmcb); } /* @@ -2055,12 +2227,18 @@ static int svm_cr_access(struct vcpu *v, { case INSTR_MOV2CR: gpreg = decode_src_reg(prefix, buffer[index+2]); - result = mov_to_cr(gpreg, cr, regs); + if ( paging_mode_hap(v->domain) ) + result = npt_mov_to_cr(gpreg, cr, regs); + else + result = mov_to_cr(gpreg, cr, regs); break; case INSTR_MOVCR2: gpreg = decode_src_reg(prefix, buffer[index+2]); - mov_from_cr(cr, gpreg, regs); + if ( paging_mode_hap(v->domain) ) + npt_mov_from_cr(cr, gpreg, regs); + else + mov_from_cr(cr, gpreg, regs); break; case INSTR_CLTS: @@ -2087,7 +2265,10 @@ static int svm_cr_access(struct vcpu *v, if (svm_dbg_on) printk("CR0-LMSW CR0 - New value=%lx\n", value); - result = svm_set_cr0(value); + if ( paging_mode_hap(v->domain) ) + result = npt_set_cr0(value); + else + result = svm_set_cr0(value); break; case INSTR_SMSW: @@ -2370,6 +2551,11 @@ static int svm_do_vmmcall_reset_to_realm vmcb->cr4 = SVM_CR4_HOST_MASK; v->arch.hvm_svm.cpu_shadow_cr4 = 0; clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); + + if ( paging_mode_hap(v->domain) ) { + vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; + vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; + } /* This will jump to ROMBIOS */ vmcb->rip = 0xFFF0; @@ -3054,6 +3240,17 @@ asmlinkage void svm_vmexit_handler(struc hvm_triple_fault(); break; + case VMEXIT_NPF: + { + unsigned long gpa; + gpa = vmcb->exitinfo2; + regs->error_code = vmcb->exitinfo1; + if ( !svm_do_nested_pgfault(gpa, regs) ) { + domain_crash(v->domain); + } + break; + } + default: exit_and_crash: gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, " diff -r 79920258510a xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 27 05:43:42 2007 -0600 @@ -200,6 +200,13 @@ static int construct_vmcb(struct vcpu *v vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP; + + if ( paging_mode_hap(v->domain) ) { + vmcb->cr0 = arch_svm->cpu_shadow_cr0; + vmcb->np_enable = 1; /* enable nested paging */ + vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG; + } return 0; } @@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", (unsigned long long) vmcb->kerngsbase, (unsigned long long) vmcb->g_pat); - + printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3); + /* print out all the selectors */ svm_dump_sel("CS", &vmcb->cs); svm_dump_sel("DS", &vmcb->ds); diff -r 79920258510a xen/arch/x86/mm/Makefile --- a/xen/arch/x86/mm/Makefile Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/mm/Makefile Tue Feb 27 05:43:42 2007 -0600 @@ -1,4 +1,5 @@ subdir-y += shadow subdir-y += shadow +subdir-y += hap obj-y += paging.o obj-y += p2m.o diff -r 79920258510a xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/mm/paging.c Tue Feb 27 06:02:14 2007 -0600 @@ -24,10 +24,12 @@ #include #include #include +#include /* Xen command-line option to enable hardware-assisted paging */ int opt_hap_enabled = 0; boolean_param("hap", opt_hap_enabled); +int hap_capable_system = 0; /* Printouts */ #define PAGING_PRINTK(_f, _a...) \ @@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d { p2m_init(d); shadow_domain_init(d); + + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) + hap_domain_init(d); } /* vcpu paging struct initialization goes here */ void paging_vcpu_init(struct vcpu *v) { - shadow_vcpu_init(v); + if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) ) + hap_vcpu_init(v); + else + shadow_vcpu_init(v); } @@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_ XEN_GUEST_HANDLE(void) u_domctl) { /* Here, dispatch domctl to the appropriate paging code */ - return shadow_domctl(d, sc, u_domctl); + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) + return hap_domctl(d, sc, u_domctl); + else + return shadow_domctl(d, sc, u_domctl); } /* Call when destroying a domain */ void paging_teardown(struct domain *d) { - shadow_teardown(d); - /* Call other modes' teardown code here */ + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) + hap_teardown(d); + else + shadow_teardown(d); } /* Call once all of the references to the domain have gone away */ void paging_final_teardown(struct domain *d) { - shadow_teardown(d); - /* Call other modes' final teardown code here */ + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) + hap_final_teardown(d); + else + shadow_final_teardown(d); } /* Enable an arbitrary paging-assistance mode. Call once at domain * creation. */ int paging_enable(struct domain *d, u32 mode) { - if ( mode & PG_SH_enable ) - return shadow_enable(d, mode); + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) + return hap_enable(d, mode | PG_HAP_enable); else - /* No other modes supported yet */ - return -EINVAL; + return shadow_enable(d, mode | PG_SH_enable); } /* Print paging-assistance info to the console */ diff -r 79920258510a xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/arch/x86/mm/shadow/types.h Tue Feb 27 05:48:56 2007 -0600 @@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr #if GUEST_PAGING_LEVELS == 2 -#include "page-guest32.h" +#include "../page-guest32.h" #define GUEST_L1_PAGETABLE_ENTRIES 1024 #define GUEST_L2_PAGETABLE_ENTRIES 1024 diff -r 79920258510a xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Feb 27 05:00:26 2007 -0600 +++ b/xen/include/asm-x86/domain.h Tue Feb 27 05:43:42 2007 -0600 @@ -104,6 +104,21 @@ struct shadow_vcpu { }; /************************************************/ +/* hardware assisted paging */ +/************************************************/ +struct hap_domain { + spinlock_t lock; + int locker; + const char *locker_function; + + struct list_head freelists; + struct list_head p2m_freelist; + unsigned int total_pages; /* number of pages allocated */ + unsigned int free_pages; /* number of pages on freelists */ + unsigned int p2m_pages; /* number of pages allocates to p2m */ +}; + +/************************************************/ /* p2m handling */ /************************************************/ @@ -135,6 +150,7 @@ struct paging_domain { struct shadow_domain shadow; /* Other paging assistance code will have structs here */ + struct hap_domain hap; }; struct paging_vcpu { diff -r 79920258510a xen/arch/x86/mm/hap/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/hap/Makefile Tue Feb 27 05:43:42 2007 -0600 @@ -0,0 +1,2 @@ +obj-y += hap.o +obj-y += support.o diff -r 79920258510a xen/arch/x86/mm/hap/hap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/hap/hap.c Tue Feb 27 09:17:14 2007 -0600 @@ -0,0 +1,696 @@ +/****************************************************************************** + * arch/x86/mm/hap/hap.c + * + * hardware assisted paging + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Parts of this code are Copyright (c) 2007 by XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "private.h" + +/************************************************/ +/* HAP SUPPORT FUNCTIONS */ +/************************************************/ +mfn_t hap_alloc(struct domain *d, unsigned long backpointer) +{ + struct page_info *sp = NULL; + void *p; + + ASSERT(hap_locked_by_me(d)); + + sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list); + list_del(&sp->list); + d->arch.paging.hap.free_pages -= 1; + + /* Now safe to clear the page for reuse */ + p = hap_map_domain_page(page_to_mfn(sp)); + ASSERT(p != NULL); + clear_page(p); + hap_unmap_domain_page(p); + + return page_to_mfn(sp); +} + +void hap_free(struct domain *d, mfn_t smfn) +{ + struct page_info *sp = mfn_to_page(smfn); + + ASSERT(hap_locked_by_me(d)); + + d->arch.paging.hap.free_pages += 1; + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); +} + +static int hap_alloc_p2m_pages(struct domain *d) +{ + struct page_info *pg; + + ASSERT(hap_locked_by_me(d)); + + pg = mfn_to_page(hap_alloc(d, 0)); + d->arch.paging.hap.p2m_pages += 1; + d->arch.paging.hap.total_pages -= 1; + + page_set_owner(pg, d); + pg->count_info = 1; + list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist); + + return 1; +} + +struct page_info * hap_alloc_p2m_page(struct domain *d) +{ + struct list_head *entry; + struct page_info *pg; + mfn_t mfn; + void *p; + + hap_lock(d); + + if ( list_empty(&d->arch.paging.hap.p2m_freelist) && + !hap_alloc_p2m_pages(d) ) { + hap_unlock(d); + return NULL; + } + entry = d->arch.paging.hap.p2m_freelist.next; + list_del(entry); + + hap_unlock(d); + + pg = list_entry(entry, struct page_info, list); + mfn = page_to_mfn(pg); + p = hap_map_domain_page(mfn); + clear_page(p); + hap_unmap_domain_page(p); + + return pg; +} + +void hap_free_p2m_page(struct domain *d, struct page_info *pg) +{ + ASSERT(page_get_owner(pg) == d); + /* Should have just the one ref we gave it in alloc_p2m_page() */ + if ( (pg->count_info & PGC_count_mask) != 1 ) { + HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", + pg->count_info, pg->u.inuse.type_info); + } + /* Free should not decrement domain's total allocation, since + * these pages were allocated without an owner. */ + page_set_owner(pg, NULL); + free_domheap_pages(pg, 0); + d->arch.paging.hap.p2m_pages--; +} + +/* Return the size of the pool, rounded up to the nearest MB */ +static unsigned int +hap_get_allocation(struct domain *d) +{ + unsigned int pg = d->arch.paging.hap.total_pages; + + HERE_I_AM; + return ((pg >> (20 - PAGE_SHIFT)) + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); +} + +/* Set the pool of pages to the required number of pages. + * Returns 0 for success, non-zero for failure. */ +static unsigned int +hap_set_allocation(struct domain *d, unsigned int pages, int *preempted) +{ + struct page_info *sp; + + ASSERT(hap_locked_by_me(d)); + + while ( d->arch.paging.hap.total_pages != pages ) { + if ( d->arch.paging.hap.total_pages < pages ) { + /* Need to allocate more memory from domheap */ + sp = alloc_domheap_pages(NULL, 0, 0); + if ( sp == NULL ) { + HAP_PRINTK("failed to allocate hap pages.\n"); + return -ENOMEM; + } + d->arch.paging.hap.free_pages += 1; + d->arch.paging.hap.total_pages += 1; + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); + } + else if ( d->arch.paging.hap.total_pages > pages ) { + /* Need to return memory to domheap */ + ASSERT(!list_empty(&d->arch.paging.hap.freelists)); + sp = list_entry(d->arch.paging.hap.freelists.next, + struct page_info, list); + list_del(&sp->list); + d->arch.paging.hap.free_pages -= 1; + d->arch.paging.hap.total_pages -= 1; + free_domheap_pages(sp, 0); + } + + /* Check to see if we need to yield and try again */ + if ( preempted && hypercall_preempt_check() ) { + *preempted = 1; + return 0; + } + } + + return 0; +} + +#if CONFIG_PAGING_LEVELS == 4 +void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn) +{ + struct domain *d = v->domain; + l4_pgentry_t *sl4e; + + sl4e = hap_map_domain_page(sl4mfn); + ASSERT(sl4e != NULL); + + /* Copy the common Xen mappings from the idle domain */ + memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], + &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], + ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); + + /* Install the per-domain mappings for this domain */ + sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = + l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))), + __PAGE_HYPERVISOR); + + sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = + l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR); + + /* install domain-specific P2M table */ + sl4e[l4_table_offset(RO_MPT_VIRT_START)] = + l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), + __PAGE_HYPERVISOR); + + hap_unmap_domain_page(sl4e); +} +#endif /* CONFIG_PAGING_LEVELS == 4 */ + +#if CONFIG_PAGING_LEVELS == 3 +void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn) +{ + struct domain *d = v->domain; + l2_pgentry_t *sl2e; + + int i; + + sl2e = hap_map_domain_page(sl2hmfn); + ASSERT(sl2e != NULL); + + /* Copy the common Xen mappings from the idle domain */ + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], + &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + + /* Install the per-domain mappings for this domain */ + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = + l2e_from_pfn( + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), + __PAGE_HYPERVISOR); + + for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ ) + sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = + l2e_empty(); + + if ( paging_mode_translate(d) ) + { + /* Install the domain-specific p2m table */ + l3_pgentry_t *p2m; + ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0); + p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); + for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ ) + { + sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = + (l3e_get_flags(p2m[i]) & _PAGE_PRESENT) + ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))), + __PAGE_HYPERVISOR) + : l2e_empty(); + } + hap_unmap_domain_page(p2m); + } + + hap_unmap_domain_page(sl2e); +} +#endif + +#if CONFIG_PAGING_LEVELS == 2 +void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn) +{ + struct domain *d = v->domain; + l2_pgentry_t *sl2e; + int i; + + sl2e = hap_map_domain_page(sl2mfn); + ASSERT(sl2e != NULL); + + /* Copy the common Xen mappings from the idle domain */ + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT], + &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + + /* Install the per-domain mappings for this domain */ + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = + l2e_from_pfn( + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), + __PAGE_HYPERVISOR); + + + sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = + l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR); + + /* install domain-specific P2M table */ + sl2e[l2_table_offset(RO_MPT_VIRT_START)] = + l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), + __PAGE_HYPERVISOR); + + hap_unmap_domain_page(sl2e); +} +#endif + +mfn_t hap_make_monitor_table(struct vcpu *v) +{ + struct domain *d = v->domain; + + ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); + +#if CONFIG_PAGING_LEVELS == 4 + { + mfn_t m4mfn; + m4mfn = hap_alloc(d, 0); + hap_install_xen_entries_in_l4(v, m4mfn, m4mfn); + return m4mfn; + } +#elif CONFIG_PAGING_LEVELS == 3 + { + mfn_t m3mfn, m2mfn; + l3_pgentry_t *l3e; + l2_pgentry_t *l2e; + int i; + + m3mfn = hap_alloc(d, 0); + + /* Install a monitor l2 table in slot 3 of the l3 table. + * This is used for all Xen entries, including linear maps + */ + m2mfn = hap_alloc(d, 0); + l3e = hap_map_domain_page(m3mfn); + l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT); + hap_install_xen_entries_in_l2h(v, m2mfn); + /* Install the monitor's own linear map */ + l2e = hap_map_domain_page(m2mfn); + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) + l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = + (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) + ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) + : l2e_empty(); + hap_unmap_domain_page(l2e); + hap_unmap_domain_page(l3e); + + HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn)); + return m3mfn; + } +#else + { + mfn_t m2mfn; + + m2mfn = hap_alloc(d, 0); + hap_install_xen_entries_in_l2(v, m2mfn, m2mfn); + + return m2mfn; + } +#endif +} + +void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn) +{ + struct domain *d = v->domain; + +#if CONFIG_PAGING_LEVELS == 4 + /* Need to destroy the l3 monitor page in slot 0 too */ + { + mfn_t m3mfn; + l4_pgentry_t *l4e = hap_map_domain_page(mmfn); + ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); + m3mfn = _mfn(l4e_get_pfn(l4e[0])); + hap_free(d, m3mfn); + hap_unmap_domain_page(l4e); + } +#elif CONFIG_PAGING_LEVELS == 3 + /* Need to destroy the l2 monitor page in slot 4 too */ + { + l3_pgentry_t *l3e = hap_map_domain_page(mmfn); + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); + hap_free(d, _mfn(l3e_get_pfn(l3e[3]))); + hap_unmap_domain_page(l3e); + } +#endif + + /* Put the memory back in the pool */ + hap_free(d, mmfn); +} + +/************************************************/ +/* HAP DOMAIN LEVEL FUNCTIONS */ +/************************************************/ +void hap_domain_init(struct domain *d) +{ + hap_lock_init(d); + INIT_LIST_HEAD(&d->arch.paging.hap.freelists); + INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist); +} + +/* return 0 for success, -errno for failure */ +int hap_enable(struct domain *d, u32 mode) +{ + unsigned int old_pages; + int rv = 0; + + HERE_I_AM; + + domain_pause(d); + /* error check */ + if ( (d == current->domain) ) { + rv = -EINVAL; + goto out; + } + + old_pages = d->arch.paging.hap.total_pages; + if ( old_pages == 0 ) { + if ( hap_set_allocation(d, 256, NULL) != 0 ) { + hap_set_allocation(d, 0, NULL); + rv = -ENOMEM; + goto out; + } + } + + /* allocate P2m table */ + if ( mode & PG_translate ) { + rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page); + if ( rv != 0 ) + goto out; + } + + d->arch.paging.mode = mode | PG_SH_enable; + + out: + domain_unpause(d); + return rv; +} + +void hap_final_teardown(struct domain *d) +{ + HERE_I_AM; + + if ( d->arch.paging.hap.total_pages != 0 ) + hap_teardown(d); + + p2m_teardown(d); +} + +void hap_teardown(struct domain *d) +{ + struct vcpu *v; + mfn_t mfn; + HERE_I_AM; + + ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); + ASSERT(d != current->domain); + + if ( !hap_locked_by_me(d) ) + hap_lock(d); /* Keep various asserts happy */ + + if ( paging_mode_enabled(d) ) { + /* release the monitor table held by each vcpu */ + for_each_vcpu(d, v) { + if ( v->arch.paging.mode && paging_mode_external(d) ) { + mfn = pagetable_get_mfn(v->arch.monitor_table); + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) + hap_destroy_monitor_table(v, mfn); + v->arch.monitor_table = pagetable_null(); + } + } + } + + if ( d->arch.paging.hap.total_pages != 0 ) { + HAP_PRINTK("teardown of domain %u starts." + " pages total = %u, free = %u, p2m=%u\n", + d->domain_id, + d->arch.paging.hap.total_pages, + d->arch.paging.hap.free_pages, + d->arch.paging.hap.p2m_pages); + hap_set_allocation(d, 0, NULL); + HAP_PRINTK("teardown done." + " pages total = %u, free = %u, p2m=%u\n", + d->arch.paging.hap.total_pages, + d->arch.paging.hap.free_pages, + d->arch.paging.hap.p2m_pages); + ASSERT(d->arch.paging.hap.total_pages == 0); + } + + d->arch.paging.mode &= ~PG_log_dirty; + + hap_unlock(d); +} + +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl) +{ + int rc, preempted = 0; + + HERE_I_AM; + + if ( unlikely(d == current->domain) ) { + gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n"); + return -EINVAL; + } + + switch ( sc->op ) { + case XEN_DOMCTL_SHADOW_OP_OFF: + case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST: + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: + case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: + case XEN_DOMCTL_SHADOW_OP_CLEAN: + case XEN_DOMCTL_SHADOW_OP_PEEK: + case XEN_DOMCTL_SHADOW_OP_ENABLE: + HAP_ERROR("Bad hap domctl op %u\n", sc->op); + domain_crash(d); + return -EINVAL; + case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: + hap_lock(d); + rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); + hap_unlock(d); + if ( preempted ) + /* Not finished. Set up to re-run the call. */ + rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", + u_domctl); + else + /* Finished. Return the new allocation */ + sc->mb = hap_get_allocation(d); + return rc; + case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: + sc->mb = hap_get_allocation(d); + return 0; + default: + HAP_ERROR("Bad hap domctl op %u\n", sc->op); + return -EINVAL; + } +} + +void hap_vcpu_init(struct vcpu *v) +{ + v->arch.paging.mode = &hap_paging_real_mode; +} +/************************************************/ +/* HAP PAGING MODE FUNCTIONS */ +/************************************************/ +/* In theory, hap should not intercept guest page fault. This function can + * be recycled to handle host/nested page fault, if needed. + */ +int hap_page_fault(struct vcpu *v, unsigned long va, + struct cpu_user_regs *regs) +{ + HERE_I_AM; + domain_crash(v->domain); + return 0; +} + +/* called when guest issues a invlpg request. + * Return 1 if need to issue page invalidation on CPU; Return 0 if does not + * need to do so. + */ +int hap_invlpg(struct vcpu *v, unsigned long va) +{ + HERE_I_AM; + return 0; +} + +void hap_update_cr3(struct vcpu *v, int do_locking) +{ + struct domain *d = v->domain; + mfn_t gmfn; + + HERE_I_AM; + /* Don't do anything on an uninitialised vcpu */ + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { + ASSERT(v->arch.cr3 == 0); + return; + } + + if ( do_locking ) + hap_lock(v->domain); + + ASSERT(hap_locked_by_me(v->domain)); + ASSERT(v->arch.paging.mode); + + gmfn = pagetable_get_mfn(v->arch.guest_table); + + make_cr3(v, pagetable_get_pfn(v->arch.monitor_table)); + + hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table)); + + HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", + d->domain_id, v->vcpu_id, + (unsigned long)pagetable_get_pfn(v->arch.guest_table), + (unsigned long)pagetable_get_pfn(v->arch.monitor_table)); + + flush_tlb_mask(d->domain_dirty_cpumask); + + if ( do_locking ) + hap_unlock(v->domain); +} + +void hap_update_paging_modes(struct vcpu *v) +{ + struct domain *d; + + HERE_I_AM; + + d = v->domain; + hap_lock(d); + + /* update guest paging mode. Note that we rely on hvm functions to detect + * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER) + * reflect guest's status correctly. + */ + if ( hvm_paging_enabled(v) ) { + if ( hvm_long_mode_enabled(v) ) + v->arch.paging.mode = &hap_paging_long_mode; + else if ( hvm_pae_enabled(v) ) + v->arch.paging.mode = &hap_paging_pae_mode; + else + v->arch.paging.mode = &hap_paging_protected_mode; + } + else { + v->arch.paging.mode = &hap_paging_real_mode; + } + + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); + + /* use p2m map */ + v->arch.guest_table = + pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); + + if ( pagetable_is_null(v->arch.monitor_table) ) { + mfn_t mmfn = hap_make_monitor_table(v); + v->arch.monitor_table = pagetable_from_mfn(mmfn); + make_cr3(v, mfn_x(mmfn)); + } + + flush_tlb_mask(d->domain_dirty_cpumask); + hap_unlock(d); +} + +void +hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, + l1_pgentry_t new, unsigned int level) +{ + hap_lock(v->domain); + safe_write_pte(p, new); + hap_unlock(v->domain); +} + +/* Entry points into this mode of the hap code. */ +struct paging_mode hap_paging_real_mode = { + .page_fault = hap_page_fault, + .invlpg = hap_invlpg, + .gva_to_gfn = hap_gva_to_gfn_real_mode, + .update_cr3 = hap_update_cr3, + .update_paging_modes = hap_update_paging_modes, + .write_p2m_entry = hap_write_p2m_entry, + .guest_levels = 1 +}; + +struct paging_mode hap_paging_protected_mode = { + .page_fault = hap_page_fault, + .invlpg = hap_invlpg, + .gva_to_gfn = hap_gva_to_gfn_protected_mode, + .update_cr3 = hap_update_cr3, + .update_paging_modes = hap_update_paging_modes, + .write_p2m_entry = hap_write_p2m_entry, + .guest_levels = 2 +}; + +struct paging_mode hap_paging_pae_mode = { + .page_fault = hap_page_fault, + .invlpg = hap_invlpg, + .gva_to_gfn = hap_gva_to_gfn_pae_mode, + .update_cr3 = hap_update_cr3, + .update_paging_modes = hap_update_paging_modes, + .write_p2m_entry = hap_write_p2m_entry, + .guest_levels = 3 +}; + +struct paging_mode hap_paging_long_mode = { + .page_fault = hap_page_fault, + .invlpg = hap_invlpg, + .gva_to_gfn = hap_gva_to_gfn_long_mode, + .update_cr3 = hap_update_cr3, + .update_paging_modes = hap_update_paging_modes, + .write_p2m_entry = hap_write_p2m_entry, + .guest_levels = 4 +}; + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ + + diff -r 79920258510a xen/arch/x86/mm/hap/private.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/hap/private.h Tue Feb 27 06:43:47 2007 -0600 @@ -0,0 +1,106 @@ +/* + * arch/x86/mm/hap/private.h + * + * Copyright (c) 2007, AMD Corporation (Wei Huang) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ +#ifndef __HAP_PRIVATE_H__ +#define __HAP_PRIVATE_H__ + +#include +#include + +/********************************************/ +/* GUEST TRANSLATION FUNCS */ +/********************************************/ +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva); +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva); +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva); +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva); +/********************************************/ +/* MISC DEFINITIONS */ +/********************************************/ + +/* PT_SHIFT describes the amount by which a virtual address is shifted right + * to right justify the portion to be used for indexing into a page + * table, given the guest memory model (i.e. number of levels) and the level + * of the page table being accessed. The idea is from Virtual Iron's code. + */ +static const int PT_SHIFT[][5] = + { /* ------ level ------ nr_levels */ + /* 1 2 3 4 */ + { 0, 0, 0, 0, 0}, /* 0 not used */ + { 0, 0, 0, 0, 0}, /* 1 not used */ + { 0, 12, 22, 0, 0}, /* 2 */ + { 0, 12, 21, 30, 0}, /* 3 */ + { 0, 12, 21, 30, 39} /* 4 */ + }; + +/* PT_ENTRIES describes the number of entries in a page table, given the + * memory model (i.e. number of levels) and the level of the page table + * being considered. This idea from Virtual Iron's shadow code*/ +static const int PT_ENTRIES[][5] = + { /* ------ level ------ nr_levels */ + /* 1 2 3 4 */ + { 0, 0, 0, 0, 0}, /* 0 not used */ + { 0, 0, 0, 0, 0}, /* 1 not used */ + { 0, 1024, 1024, 0, 0}, /* 2 */ + { 0, 512, 512, 4, 0}, /* 3 */ + { 0, 512, 512, 512, 512} /* 4 */ + }; + +/********************************************/ +/* PAGING DEFINITION FOR GUEST */ +/********************************************/ +#define PHYSICAL_PAGE_4K_SIZE (1UL << 12) +#define PHYSICAL_PAGE_2M_SIZE (1UL << 21) +#define PHYSICAL_PAGE_4M_SIZE (1UL << 22) +#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) ) +#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) ) +#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) ) + +#define PAGE_NX_BIT (1ULL << 63) +/************************************************/ +/* PAGETABLE RELATED VARIABLES */ +/************************************************/ +#if CONFIG_PAGING_LEVELS == 2 +#define HAP_L1_PAGETABLE_ENTRIES 1024 +#define HAP_L2_PAGETABLE_ENTRIES 1024 +#define HAP_L1_PAGETABLE_SHIFT 12 +#define HAP_L2_PAGETABLE_SHIFT 22 +#endif + +#if CONFIG_PAGING_LEVELS == 3 +#define HAP_L1_PAGETABLE_ENTRIES 512 +#define HAP_L2_PAGETABLE_ENTRIES 512 +#define HAP_L3_PAGETABLE_ENTRIES 4 +#define HAP_L1_PAGETABLE_SHIFT 12 +#define HAP_L2_PAGETABLE_SHIFT 21 +#define HAP_L3_PAGETABLE_SHIFT 30 +#endif + +#if CONFIG_PAGING_LEVELS == 4 +#define HAP_L1_PAGETABLE_ENTRIES 512 +#define HAP_L2_PAGETABLE_ENTRIES 512 +#define HAP_L3_PAGETABLE_ENTRIES 512 +#define HAP_L4_PAGETABLE_ENTRIES 512 +#define HAP_L1_PAGETABLE_SHIFT 12 +#define HAP_L2_PAGETABLE_SHIFT 21 +#define HAP_L3_PAGETABLE_SHIFT 30 +#define HAP_L4_PAGETABLE_SHIFT 39 +#endif + +#endif /* __SVM_NPT_H__ */ diff -r 79920258510a xen/arch/x86/mm/hap/support.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/hap/support.c Tue Feb 27 09:17:58 2007 -0600 @@ -0,0 +1,325 @@ +/* + * arch/x86/mm/hap/support.c + * + * guest page table walker + * Copyright (c) 2007, AMD Corporation (Wei Huang) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "private.h" +#include "../page-guest32.h" + +/*******************************************/ +/* Platform Specific Functions */ +/*******************************************/ + +/* Translate guest virtual address to guest physical address. Specifically + * for real mode guest. + */ +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva) +{ + HERE_I_AM; + return ((paddr_t)gva >> PAGE_SHIFT); +} + +/* Translate guest virtual address to guest physical address. Specifically + * for protected guest. + */ +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva) +{ + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); + int mode = 2; /* two-level guest */ + int lev, index; + paddr_t gpa = 0; + unsigned long gpfn, mfn; + int result = 1; + l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */ + l1_pgentry_32_t *l1e; + + HERE_I_AM; + + gpfn = (gcr3 >> PAGE_SHIFT); + for ( lev = mode; lev >= 1; lev-- ) { + mfn = get_mfn_from_gpfn( gpfn ); + if ( mfn == INVALID_MFN ) { + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, + lev); + result = 0; + break; + } + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); + + if ( lev == 2 ) { + l2e = map_domain_page( mfn ); + HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", + l2e[index].l2, index); + if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); + result = 0; + } + + if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ + HAP_PRINTK("guest page table is PSE\n"); + if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */ + printk("guest physical memory size is too large!\n"); + domain_crash(v->domain); + } + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + + (gva & ~PHYSICAL_PAGE_4M_MASK); + unmap_domain_page(l2e); + break; /* last level page table, return from here */ + } + else { + gpfn = l2e_get_pfn( l2e[index] ); + } + unmap_domain_page(l2e); + } + + if ( lev == 1 ) { + l1e = map_domain_page( mfn ); + HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", + l1e[index].l1, index); + if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l1e_get_pfn( l1e[index] ); + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + + (gva & ~PHYSICAL_PAGE_4K_MASK); + unmap_domain_page(l1e); + } + + if ( result != 1 ) /* error happened, jump out */ + break; + } + + HAP_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa); + + return ((paddr_t)gpa >> PAGE_SHIFT); +} + + + +/* Translate guest virtual address to guest physical address. Specifically + * for PAE mode guest. + */ +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva) +{ +#if CONFIG_PAGING_LEVELS >= 3 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); + int mode = 3; /* three-level guest */ + int lev, index; + paddr_t gpa = 0; + unsigned long gpfn, mfn; + int result = 1; + l1_pgentry_t *l1e; + l2_pgentry_t *l2e; + l3_pgentry_t *l3e; + + HERE_I_AM; + + gpfn = (gcr3 >> PAGE_SHIFT); + for ( lev = mode; lev >= 1; lev-- ) { + mfn = get_mfn_from_gpfn( gpfn ); + if ( mfn == INVALID_MFN ) { + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, + lev); + result = 0; + break; + } + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); + + if ( lev == 3 ) { + l3e = map_domain_page( mfn ); + index += ( ((gcr3 >> 5 ) & 127 ) * 4 ); + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l3e_get_pfn( l3e[index] ); + unmap_domain_page(l3e); + } + + if ( lev == 2 ) { + l2e = map_domain_page( mfn ); + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); + result = 0; + } + + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ + HAP_PRINTK("guest page table is PSE\n"); + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + + (gva & ~PHYSICAL_PAGE_2M_MASK); + unmap_domain_page(l2e); + break; /* last level page table, jump out from here */ + } + else { + gpfn = l2e_get_pfn(l2e[index]); + } + unmap_domain_page(l2e); + } + + if ( lev == 1 ) { + l1e = map_domain_page( mfn ); + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l1e_get_pfn( l1e[index] ); + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + + (gva & ~PHYSICAL_PAGE_4K_MASK); + unmap_domain_page(l1e); + } + + if ( result != 1 ) /* error happened, jump out */ + break; + } + + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ + HAP_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa); + + return ((paddr_t)gpa >> PAGE_SHIFT); +#else + HERE_I_AM; + printk("guest paging level (3) is greater than host paging level!\n"); + domain_crash(v->domain); + return 0UL; +#endif +} + + + +/* Translate guest virtual address to guest physical address. Specifically + * for long mode guest. + */ +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva) +{ +#if CONFIG_PAGING_LEVELS == 4 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); + int mode = 4; /* four-level guest */ + int lev, index; + paddr_t gpa = 0; + unsigned long gpfn, mfn; + int result = 1; + l4_pgentry_t *l4e; + l3_pgentry_t *l3e; + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + + HERE_I_AM; + + gpfn = (gcr3 >> PAGE_SHIFT); + for ( lev = mode; lev >= 1; lev-- ) { + mfn = get_mfn_from_gpfn( gpfn ); + if ( mfn == INVALID_MFN ) { + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, + lev); + result = 0; + break; + } + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); + + if ( lev == 4 ) { + l4e = map_domain_page( mfn ); + if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 4 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l4e_get_pfn( l4e[index] ); + unmap_domain_page(l4e); + } + + if ( lev == 3 ) { + l3e = map_domain_page( mfn ); + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l3e_get_pfn( l3e[index] ); + unmap_domain_page(l3e); + } + + if ( lev == 2 ) { + l2e = map_domain_page( mfn ); + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); + result = 0; + } + + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ + HAP_PRINTK("guest page table is PSE\n"); + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + + (gva & ~PHYSICAL_PAGE_2M_MASK); + unmap_domain_page(l2e); + break; /* last level page table, jump out from here */ + } + else { + gpfn = l2e_get_pfn(l2e[index]); + } + unmap_domain_page(l2e); + } + + if ( lev == 1 ) { + l1e = map_domain_page( mfn ); + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); + result = 0; + } + gpfn = l1e_get_pfn( l1e[index] ); + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + + (gva & ~PHYSICAL_PAGE_4K_MASK); + unmap_domain_page(l1e); + } + + if ( result != 1 ) /* error happened, jump out */ + break; + } + + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ + HAP_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa); + + return ((paddr_t)gpa >> PAGE_SHIFT); +#else + HERE_I_AM; + printk("guest paging level (4) is greater than host paging level!\n"); + domain_crash(v->domain); + return 0UL; +#endif +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + diff -r 79920258510a xen/arch/x86/mm/page-guest32.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/page-guest32.h Tue Feb 27 05:44:58 2007 -0600 @@ -0,0 +1,100 @@ + +#ifndef __X86_PAGE_GUEST_H__ +#define __X86_PAGE_GUEST_H__ + +#ifndef __ASSEMBLY__ +# include +#endif + +#define PAGETABLE_ORDER_32 10 +#define L1_PAGETABLE_ENTRIES_32 (1<> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) +#define l2_table_offset_32(a) \ + (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) + +#endif /* __X86_PAGE_GUEST_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 79920258510a xen/include/asm-x86/hap.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/hap.h Tue Feb 27 08:52:52 2007 -0600 @@ -0,0 +1,122 @@ +/****************************************************************************** + * include/asm-x86/hap.h + * + * hardware-assisted paging + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_HAP_H +#define _XEN_HAP_H + +#define HERE_I_AM \ + debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__) +#define HAP_PRINTK(_f, _a...) \ + debugtrace_printk("hap: %s(): " _f, __func__, ##_a) +#define HAP_ERROR(_f, _a...) \ + printk("hap error: %s(): " _f, __func__, ##_a) + +/************************************************/ +/* hap domain page mapping */ +/************************************************/ +static inline void * +hap_map_domain_page(mfn_t mfn) +{ + return map_domain_page(mfn_x(mfn)); +} + +static inline void +hap_unmap_domain_page(void *p) +{ + unmap_domain_page(p); +} + +static inline void * +hap_map_domain_page_global(mfn_t mfn) +{ + return map_domain_page_global(mfn_x(mfn)); +} + +static inline void +hap_unmap_domain_page_global(void *p) +{ + unmap_domain_page_global(p); +} + +/************************************************/ +/* locking for hap code */ +/************************************************/ +#define hap_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.paging.hap.lock); \ + (_d)->arch.paging.hap.locker = -1; \ + (_d)->arch.paging.hap.locker_function = "nobody"; \ + } while (0) + +#define hap_locked_by_me(_d) \ + (current->processor == (_d)->arch.paging.hap.locker) + +#define hap_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\ + { \ + printk("Error: hap lock held by %s\n", \ + (_d)->arch.paging.hap.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.paging.hap.lock); \ + ASSERT((_d)->arch.paging.hap.locker == -1); \ + (_d)->arch.paging.hap.locker = current->processor; \ + (_d)->arch.paging.hap.locker_function = __func__; \ + } while (0) + +#define hap_unlock(_d) \ + do { \ + ASSERT((_d)->arch.paging.hap.locker == current->processor); \ + (_d)->arch.paging.hap.locker = -1; \ + (_d)->arch.paging.hap.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.paging.hap.lock); \ + } while (0) + +/************************************************/ +/* hap domain level functions */ +/************************************************/ +void hap_domain_init(struct domain *d); +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl); +int hap_enable(struct domain *d, u32 mode); +void hap_final_teardown(struct domain *d); +void hap_teardown(struct domain *d); +void hap_vcpu_init(struct vcpu *v); + +extern struct paging_mode hap_paging_real_mode; +extern struct paging_mode hap_paging_protected_mode; +extern struct paging_mode hap_paging_pae_mode; +extern struct paging_mode hap_paging_long_mode; +#endif /* XEN_HAP_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 79920258510a xen/arch/x86/mm/shadow/page-guest32.h --- a/xen/arch/x86/mm/shadow/page-guest32.h Tue Feb 27 05:00:26 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ - -#ifndef __X86_PAGE_GUEST_H__ -#define __X86_PAGE_GUEST_H__ - -#ifndef __ASSEMBLY__ -# include -#endif - -#define PAGETABLE_ORDER_32 10 -#define L1_PAGETABLE_ENTRIES_32 (1<> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) -#define l2_table_offset_32(a) \ - (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) - -#endif /* __X86_PAGE_GUEST_H__ */ - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */