[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] New paging-assistance interface.
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1171454540 0 # Node ID 6daa91dc924700c6b9821f54788ca27ea9a22e01 # Parent df25547d76382e14411ea36b72c37b8131596f8d [XEN] New paging-assistance interface. Adds paging_* functions, wrapping the existing shadow_* ones, so that later hardware paging assistance can be hooked in at this level. Separates p2m maintenance code into its own files. Adjusts shadow code to comply with the new interfaces. Changes all callers in arch/x86 to use the new interfaces. Signed-off-by: Wei Huang <Wei.Huang2@xxxxxxx> Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- xen/arch/x86/domain.c | 52 - xen/arch/x86/domain_build.c | 10 xen/arch/x86/domctl.c | 4 xen/arch/x86/hvm/hvm.c | 9 xen/arch/x86/hvm/io.c | 2 xen/arch/x86/hvm/platform.c | 8 xen/arch/x86/hvm/svm/intr.c | 2 xen/arch/x86/hvm/svm/svm.c | 30 xen/arch/x86/hvm/svm/vmcb.c | 4 xen/arch/x86/hvm/vlapic.c | 1 xen/arch/x86/hvm/vmx/vmcs.c | 2 xen/arch/x86/hvm/vmx/vmx.c | 34 - xen/arch/x86/mm.c | 91 +- xen/arch/x86/mm/Makefile | 3 xen/arch/x86/mm/p2m.c | 699 ++++++++++++++++++++++ xen/arch/x86/mm/paging.c | 143 ++++ xen/arch/x86/mm/shadow/common.c | 1197 ++++++++++---------------------------- xen/arch/x86/mm/shadow/multi.c | 134 ++-- xen/arch/x86/mm/shadow/multi.h | 2 xen/arch/x86/mm/shadow/private.h | 107 +-- xen/arch/x86/mm/shadow/types.h | 10 xen/arch/x86/setup.c | 2 xen/arch/x86/sysctl.c | 1 xen/arch/x86/traps.c | 10 xen/arch/x86/x86_32/domain_page.c | 1 xen/arch/x86/x86_64/traps.c | 1 xen/include/asm-x86/domain.h | 90 ++ xen/include/asm-x86/mm.h | 63 +- xen/include/asm-x86/p2m.h | 142 ++++ xen/include/asm-x86/page.h | 2 xen/include/asm-x86/paging.h | 376 +++++++++++ xen/include/asm-x86/shadow.h | 375 ----------- 32 files changed, 2062 insertions(+), 1545 deletions(-) diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/domain.c Wed Feb 14 12:02:20 2007 +0000 @@ -37,7 +37,7 @@ #include <asm/i387.h> #include <asm/mpspec.h> #include <asm/ldt.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/msr.h> @@ -331,6 +331,7 @@ int vcpu_initialise(struct vcpu *v) pae_l3_cache_init(&v->arch.pae_l3_cache); + paging_vcpu_init(v); if ( is_hvm_domain(d) ) { @@ -424,7 +425,7 @@ int arch_domain_create(struct domain *d) HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; #endif - shadow_domain_init(d); + paging_domain_init(d); if ( !is_idle_domain(d) ) { @@ -464,7 +465,7 @@ void arch_domain_destroy(struct domain * hvm_domain_destroy(d); } - shadow_final_teardown(d); + paging_final_teardown(d); free_xenheap_pages( d->arch.mm_perdomain_pt, @@ -613,7 +614,7 @@ int arch_set_info_guest( { cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); - if ( shadow_mode_refcounts(d) + if ( paging_mode_refcounts(d) ? !get_page(mfn_to_page(cr3_pfn), d) : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_base_page_table) ) @@ -631,7 +632,7 @@ int arch_set_info_guest( cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); - if ( shadow_mode_refcounts(d) + if ( paging_mode_refcounts(d) ? !get_page(mfn_to_page(cr3_pfn), d) : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_l3_page_table) ) @@ -652,8 +653,8 @@ int arch_set_info_guest( /* Don't redo final setup */ set_bit(_VCPUF_initialised, &v->vcpu_flags); - if ( shadow_mode_enabled(d) ) - shadow_update_paging_modes(v); + if ( paging_mode_enabled(d) ) + paging_update_paging_modes(v); update_cr3(v); @@ -1406,7 +1407,7 @@ static void vcpu_destroy_pagetables(stru if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1427,7 +1428,7 @@ static void vcpu_destroy_pagetables(stru pfn = pagetable_get_pfn(v->arch.guest_table); if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1443,7 +1444,7 @@ static void vcpu_destroy_pagetables(stru pfn = pagetable_get_pfn(v->arch.guest_table_user); if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1464,8 +1465,8 @@ void domain_relinquish_resources(struct for_each_vcpu ( d, v ) vcpu_destroy_pagetables(v); - /* Tear down shadow mode stuff. */ - shadow_teardown(d); + /* Tear down paging-assistance stuff. */ + paging_teardown(d); /* * Relinquish GDT mappings. No need for explicit unmapping of the LDT as @@ -1484,35 +1485,12 @@ void domain_relinquish_resources(struct void arch_dump_domain_info(struct domain *d) { - if ( shadow_mode_enabled(d) ) - { - printk(" shadow mode: "); - if ( d->arch.shadow.mode & SHM2_enable ) - printk("enabled "); - if ( shadow_mode_refcounts(d) ) - printk("refcounts "); - if ( shadow_mode_log_dirty(d) ) - printk("log_dirty "); - if ( shadow_mode_translate(d) ) - printk("translate "); - if ( shadow_mode_external(d) ) - printk("external "); - printk("\n"); - } + paging_dump_domain_info(d); } void arch_dump_vcpu_info(struct vcpu *v) { - if ( shadow_mode_enabled(v->domain) ) - { - if ( v->arch.shadow.mode ) - printk(" shadowed %u-on-%u, %stranslated\n", - v->arch.shadow.mode->guest_levels, - v->arch.shadow.mode->shadow_levels, - shadow_vcpu_mode_translate(v) ? "" : "not "); - else - printk(" not shadowed\n"); - } + paging_dump_vcpu_info(v); } /* diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/domain_build.c Wed Feb 14 12:02:20 2007 +0000 @@ -25,7 +25,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/i387.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <public/version.h> #include <public/libelf.h> @@ -777,8 +777,8 @@ int construct_dom0(struct domain *d, (void)alloc_vcpu(d, i, i); /* Set up CR3 value for write_ptbase */ - if ( shadow_mode_enabled(v->domain) ) - shadow_update_paging_modes(v); + if ( paging_mode_enabled(v->domain) ) + paging_update_paging_modes(v); else update_cr3(v); @@ -918,8 +918,8 @@ int construct_dom0(struct domain *d, regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) - if ( shadow_enable(d, SHM2_enable) == 0 ) - shadow_update_paging_modes(v); + if ( paging_enable(d, PG_SH_enable) == 0 ) + paging_update_paging_modes(v); if ( supervisor_mode_kernel ) { diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/domctl.c Wed Feb 14 12:02:20 2007 +0000 @@ -19,7 +19,7 @@ #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/irq.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> @@ -42,7 +42,7 @@ long arch_do_domctl( d = get_domain_by_id(domctl->domain); if ( d != NULL ) { - ret = shadow_domctl(d, + ret = paging_domctl(d, &domctl->u.shadow_op, guest_handle_cast(u_domctl, void)); put_domain(d); diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/hvm.c Wed Feb 14 12:02:20 2007 +0000 @@ -30,11 +30,10 @@ #include <xen/hypercall.h> #include <xen/guest_access.h> #include <xen/event.h> -#include <xen/shadow.h> #include <asm/current.h> #include <asm/e820.h> #include <asm/io.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -155,7 +154,7 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); - rc = shadow_enable(d, SHM2_refcounts|SHM2_translate|SHM2_external); + rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); if ( rc != 0 ) return rc; @@ -383,7 +382,7 @@ static int __hvm_copy(void *buf, paddr_t count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); if ( virt ) - mfn = get_mfn_from_gpfn(shadow_gva_to_gfn(current, addr)); + mfn = get_mfn_from_gpfn(paging_gva_to_gfn(current, addr)); else mfn = get_mfn_from_gpfn(addr >> PAGE_SHIFT); @@ -600,7 +599,7 @@ void hvm_do_hypercall(struct cpu_user_re return; } - if ( current->arch.shadow.mode->guest_levels == 4 ) + if ( current->arch.paging.mode->guest_levels == 4 ) { pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi, pregs->rsi, diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/io.c Wed Feb 14 12:02:20 2007 +0000 @@ -32,7 +32,7 @@ #include <asm/processor.h> #include <asm/msr.h> #include <asm/apic.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vpt.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/platform.c Wed Feb 14 12:02:20 2007 +0000 @@ -21,7 +21,6 @@ #include <xen/config.h> #include <xen/types.h> #include <xen/mm.h> -#include <xen/shadow.h> #include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> @@ -29,6 +28,7 @@ #include <xen/sched.h> #include <asm/regs.h> #include <asm/x86_emulate.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/io.h> @@ -809,7 +809,7 @@ void send_pio_req(unsigned long port, un if ( value_is_ptr ) /* get physical address of data */ { if ( hvm_paging_enabled(current) ) - p->data = shadow_gva_to_gpa(current, value); + p->data = paging_gva_to_gpa(current, value); else p->data = value; /* guest VA == guest PA */ } @@ -865,7 +865,7 @@ static void send_mmio_req(unsigned char if ( value_is_ptr ) { if ( hvm_paging_enabled(v) ) - p->data = shadow_gva_to_gpa(v, value); + p->data = paging_gva_to_gpa(v, value); else p->data = value; /* guest VA == guest PA */ } @@ -981,7 +981,7 @@ void handle_mmio(unsigned long gpa) if ( ad_size == WORD ) addr &= 0xFFFF; addr += hvm_get_segment_base(v, x86_seg_es); - if ( shadow_gva_to_gpa(v, addr) == gpa ) + if ( paging_gva_to_gpa(v, addr) == gpa ) { enum x86_segment seg; diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/svm/intr.c Wed Feb 14 12:02:20 2007 +0000 @@ -24,10 +24,10 @@ #include <xen/lib.h> #include <xen/trace.h> #include <xen/errno.h> -#include <xen/shadow.h> #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Feb 14 12:02:20 2007 +0000 @@ -29,7 +29,8 @@ #include <xen/domain_page.h> #include <asm/current.h> #include <asm/io.h> -#include <asm/shadow.h> +#include <asm/paging.h> +#include <asm/p2m.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -491,9 +492,6 @@ int svm_vmcb_restore(struct vcpu *v, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_svm.cpu_cr3 = c->cr3; } @@ -560,7 +558,7 @@ int svm_vmcb_restore(struct vcpu *v, str vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1095,7 +1093,7 @@ static int svm_do_page_fault(unsigned lo "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", va, (unsigned long)current->arch.hvm_svm.vmcb->rip, (unsigned long)regs->error_code); - return shadow_fault(va, regs); + return paging_fault(va, regs); } @@ -1730,7 +1728,7 @@ static int svm_set_cr0(unsigned long val v->arch.guest_table = pagetable_from_pfn(mfn); if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); @@ -1753,7 +1751,7 @@ static int svm_set_cr0(unsigned long val svm_inject_exception(v, TRAP_gp_fault, 1, 0); return 0; } - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) { @@ -1763,7 +1761,7 @@ static int svm_set_cr0(unsigned long val clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); } /* we should take care of this kind of situation */ - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } return 1; @@ -1866,7 +1864,7 @@ static int mov_to_cr(int gpreg, int cr, mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) goto bad_cr3; - shadow_update_cr3(v); + paging_update_cr3(v); } else { @@ -1917,7 +1915,7 @@ static int mov_to_cr(int gpreg, int cr, v->arch.guest_table = pagetable_from_pfn(mfn); if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); @@ -1946,7 +1944,7 @@ static int mov_to_cr(int gpreg, int cr, * all TLB entries except global entries. */ if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); break; case 8: @@ -2289,7 +2287,7 @@ void svm_handle_invlpg(const short invlp __update_guest_eip (vmcb, inst_len); } - shadow_invlpg(v, g_vaddr); + paging_invlpg(v, g_vaddr); } @@ -2660,7 +2658,7 @@ void walk_shadow_and_guest_pt(unsigned l struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; paddr_t gpa; - gpa = shadow_gva_to_gpa(current, gva); + gpa = paging_gva_to_gpa(current, gva); printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3); if( !svm_paging_enabled(v) || mmio_space(gpa) ) return; @@ -2726,7 +2724,7 @@ asmlinkage void svm_vmexit_handler(struc if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) { if (svm_paging_enabled(v) && - !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2))) + !mmio_space(paging_gva_to_gpa(current, vmcb->exitinfo2))) { printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64"," "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", " @@ -2736,7 +2734,7 @@ asmlinkage void svm_vmexit_handler(struc (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2, (u64)vmcb->exitintinfo.bytes, - (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2)); + (u64)paging_gva_to_gpa(current, vmcb->exitinfo2)); } else { diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Feb 14 12:02:20 2007 +0000 @@ -23,10 +23,10 @@ #include <xen/mm.h> #include <xen/lib.h> #include <xen/errno.h> -#include <xen/shadow.h> #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> @@ -196,7 +196,7 @@ static int construct_vmcb(struct vcpu *v read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK; - shadow_update_paging_modes(v); + paging_update_paging_modes(v); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP; diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/vlapic.c Wed Feb 14 12:02:20 2007 +0000 @@ -22,7 +22,6 @@ #include <xen/types.h> #include <xen/mm.h> #include <xen/xmalloc.h> -#include <xen/shadow.h> #include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Feb 14 12:02:20 2007 +0000 @@ -448,7 +448,7 @@ static void construct_vmcs(struct vcpu * vmx_vmcs_exit(v); - shadow_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ } int vmx_create_vmcs(struct vcpu *v) diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Feb 14 12:02:20 2007 +0000 @@ -35,12 +35,13 @@ #include <asm/types.h> #include <asm/msr.h> #include <asm/spinlock.h> +#include <asm/paging.h> +#include <asm/p2m.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> #include <asm/hvm/vmx/cpu.h> -#include <asm/shadow.h> #include <public/sched.h> #include <public/hvm/ioreq.h> #include <asm/hvm/vpic.h> @@ -484,9 +485,6 @@ int vmx_vmcs_restore(struct vcpu *v, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = c->cr3; } @@ -556,7 +554,7 @@ int vmx_vmcs_restore(struct vcpu *v, str vmx_vmcs_exit(v); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1126,7 +1124,7 @@ static int vmx_do_page_fault(unsigned lo } #endif - result = shadow_fault(va, regs); + result = paging_fault(va, regs); TRACE_VMEXIT(2, result); #if 0 @@ -1277,7 +1275,7 @@ static void vmx_do_invlpg(unsigned long * We do the safest things first, then try to update the shadow * copying from guest */ - shadow_invlpg(v, va); + paging_invlpg(v, va); } @@ -1691,9 +1689,6 @@ static int vmx_world_restore(struct vcpu v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = c->cr3; } @@ -1753,7 +1748,7 @@ static int vmx_world_restore(struct vcpu __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1906,14 +1901,11 @@ static int vmx_set_cr0(unsigned long val v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); } @@ -1981,7 +1973,7 @@ static int vmx_set_cr0(unsigned long val vm_entry_value &= ~VM_ENTRY_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); } - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } return 1; @@ -2070,7 +2062,7 @@ static int mov_to_cr(int gp, int cr, str mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) goto bad_cr3; - shadow_update_cr3(v); + paging_update_cr3(v); } else { /* * If different, make a shadow. Check if the PDBR is valid @@ -2084,9 +2076,6 @@ static int mov_to_cr(int gp, int cr, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = value; update_cr3(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); @@ -2120,9 +2109,6 @@ static int mov_to_cr(int gp, int cr, str HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); @@ -2148,7 +2134,7 @@ static int mov_to_cr(int gp, int cr, str * all TLB entries except global entries. */ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); break; case 8: diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm.c Wed Feb 14 12:02:20 2007 +0000 @@ -99,6 +99,7 @@ #include <xen/event.h> #include <xen/iocap.h> #include <xen/guest_access.h> +#include <asm/paging.h> #include <asm/shadow.h> #include <asm/page.h> #include <asm/flushtlb.h> @@ -385,9 +386,9 @@ void update_cr3(struct vcpu *v) { unsigned long cr3_mfn=0; - if ( shadow_mode_enabled(v->domain) ) - { - shadow_update_cr3(v); + if ( paging_mode_enabled(v->domain) ) + { + paging_update_cr3(v); return; } @@ -615,7 +616,7 @@ get_page_from_l1e( * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ okay = (((l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely(shadow_mode_external(d) && (d != current->domain)))) + !(unlikely(paging_mode_external(d) && (d != current->domain)))) ? get_page_and_type(page, d, PGT_writable_page) : get_page(page, d)); if ( !okay ) @@ -804,9 +805,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, } /* Remember we didn't take a type-count of foreign writable mappings - * to shadow external domains */ + * to paging-external domains */ if ( (l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely((e != d) && shadow_mode_external(e))) ) + !(unlikely((e != d) && paging_mode_external(e))) ) { put_page_and_type(page); } @@ -1259,20 +1260,13 @@ static inline int update_intpte(intpte_t { int rv = 1; #ifndef PTE_UPDATE_WITH_CMPXCHG - if ( unlikely(shadow_mode_enabled(v->domain)) ) - rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); - else - rv = (!__copy_to_user(p, &new, sizeof(new))); + rv = paging_write_guest_entry(v, p, new, _mfn(mfn)); #else { intpte_t t = old; for ( ; ; ) { - if ( unlikely(shadow_mode_enabled(v->domain)) ) - rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); - else - rv = (!cmpxchg_user(p, t, new)); - + rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); if ( unlikely(rv == 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte @@ -1310,7 +1304,7 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) return 0; - if ( unlikely(shadow_mode_refcounts(d)) ) + if ( unlikely(paging_mode_refcounts(d)) ) return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) @@ -1572,7 +1566,7 @@ void free_page_type(struct page_info *pa */ queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); - if ( unlikely(shadow_mode_enabled(owner)) ) + if ( unlikely(paging_mode_enabled(owner)) ) { /* A page table is dirtied when its type count becomes zero. */ mark_dirty(owner, page_to_mfn(page)); @@ -1771,7 +1765,7 @@ int new_guest_cr3(unsigned long mfn) #ifdef CONFIG_COMPAT if ( IS_COMPAT(d) ) { - okay = shadow_mode_refcounts(d) + okay = paging_mode_refcounts(d) ? 0 /* Old code was broken, but what should it be? */ : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| @@ -1788,7 +1782,7 @@ int new_guest_cr3(unsigned long mfn) return 1; } #endif - okay = shadow_mode_refcounts(d) + okay = paging_mode_refcounts(d) ? get_page_from_pagenr(mfn, d) : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); if ( unlikely(!okay) ) @@ -1808,7 +1802,7 @@ int new_guest_cr3(unsigned long mfn) if ( likely(old_base_mfn != 0) ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(old_base_mfn)); else put_page_and_type(mfn_to_page(old_base_mfn)); @@ -1861,7 +1855,7 @@ static int set_foreigndom(domid_t domid) d->domain_id); okay = 0; } - else if ( unlikely(shadow_mode_translate(d)) ) + else if ( unlikely(paging_mode_translate(d)) ) { MEM_LOG("Cannot mix foreign mappings with translated domains"); okay = 0; @@ -2007,7 +2001,7 @@ int do_mmuext_op( if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) break; - if ( shadow_mode_refcounts(FOREIGNDOM) ) + if ( paging_mode_refcounts(FOREIGNDOM) ) break; okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); @@ -2032,7 +2026,7 @@ int do_mmuext_op( break; case MMUEXT_UNPIN_TABLE: - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) break; if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) @@ -2070,7 +2064,7 @@ int do_mmuext_op( } if (likely(mfn != 0)) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) okay = get_page_from_pagenr(mfn, d); else okay = get_page_and_type_from_pagenr( @@ -2087,7 +2081,7 @@ int do_mmuext_op( v->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(old_mfn)); else put_page_and_type(mfn_to_page(old_mfn)); @@ -2101,8 +2095,8 @@ int do_mmuext_op( break; case MMUEXT_INVLPG_LOCAL: - if ( !shadow_mode_enabled(d) - || shadow_invlpg(v, op.arg1.linear_addr) != 0 ) + if ( !paging_mode_enabled(d) + || paging_invlpg(v, op.arg1.linear_addr) != 0 ) local_flush_tlb_one(op.arg1.linear_addr); break; @@ -2149,7 +2143,7 @@ int do_mmuext_op( unsigned long ptr = op.arg1.linear_addr; unsigned long ents = op.arg2.nr_ents; - if ( shadow_mode_external(d) ) + if ( paging_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " "domain %u", d->domain_id); @@ -2298,9 +2292,9 @@ int do_mmu_update( case PGT_l3_page_table: case PGT_l4_page_table: { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) { - MEM_LOG("mmu update on shadow-refcounted domain!"); + MEM_LOG("mmu update on auto-refcounted domain!"); break; } @@ -2351,13 +2345,7 @@ int do_mmu_update( if ( unlikely(!get_page_type(page, PGT_writable_page)) ) break; - if ( unlikely(shadow_mode_enabled(d)) ) - okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); - else - { - *(intpte_t *)va = req.val; - okay = 1; - } + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); put_page_type(page); } @@ -2380,9 +2368,9 @@ int do_mmu_update( break; } - if ( unlikely(shadow_mode_translate(FOREIGNDOM)) ) + if ( unlikely(paging_mode_translate(FOREIGNDOM)) ) { - MEM_LOG("Mach-phys update on shadow-translate guest"); + MEM_LOG("Mach-phys update on auto-translate guest"); break; } @@ -2472,7 +2460,7 @@ static int create_grant_pte_mapping( goto failed; } - if ( !shadow_mode_refcounts(d) ) + if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); put_page_type(page); @@ -2578,7 +2566,7 @@ static int create_grant_va_mapping( if ( !okay ) return GNTST_general_error; - if ( !shadow_mode_refcounts(d) ) + if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); return GNTST_okay; @@ -2704,7 +2692,7 @@ int do_update_va_mapping(unsigned long v perfc_incrc(calls_to_update_va); - if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) + if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) ) return -EINVAL; LOCK_BIGLOCK(d); @@ -2744,8 +2732,8 @@ int do_update_va_mapping(unsigned long v switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: - if ( !shadow_mode_enabled(d) - || (shadow_invlpg(current, va) != 0) ) + if ( !paging_mode_enabled(d) + || (paging_invlpg(current, va) != 0) ) local_flush_tlb_one(va); break; case UVMF_ALL: @@ -2980,7 +2968,7 @@ long arch_memory_op(int op, XEN_GUEST_HA break; } - if ( !shadow_mode_translate(d) || (mfn == 0) ) + if ( !paging_mode_translate(d) || (mfn == 0) ) { put_domain(d); return -EINVAL; @@ -3235,17 +3223,12 @@ static int ptwr_emulated_update( if ( do_cmpxchg ) { int okay; + intpte_t t = old; ol1e = l1e_from_intpte(old); - if ( shadow_mode_enabled(d) ) - { - intpte_t t = old; - okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, - &t, val, _mfn(mfn)); - okay = (okay && t == old); - } - else - okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); + okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e, + &t, val, _mfn(mfn)); + okay = (okay && t == old); if ( !okay ) { diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/Makefile --- a/xen/arch/x86/mm/Makefile Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/Makefile Wed Feb 14 12:02:20 2007 +0000 @@ -1,1 +1,4 @@ subdir-y += shadow subdir-y += shadow + +obj-y += paging.o +obj-y += p2m.o diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/p2m.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/p2m.c Wed Feb 14 12:02:20 2007 +0000 @@ -0,0 +1,699 @@ +/****************************************************************************** + * arch/x86/mm/p2m.c + * + * physical-to-machine mappings for automatically-translated domains. + * + * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <asm/domain.h> +#include <asm/page.h> +#include <asm/paging.h> +#include <asm/p2m.h> + +/* Debugging and auditing of the P2M code? */ +#define P2M_AUDIT 0 +#define P2M_DEBUGGING 1 + +/* The P2M lock. This protects all updates to the p2m table. + * Updates are expected to be safe against concurrent reads, + * which do *not* require the lock */ + +#define p2m_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.p2m.lock); \ + (_d)->arch.p2m.locker = -1; \ + (_d)->arch.p2m.locker_function = "nobody"; \ + } while (0) + +#define p2m_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.p2m.locker == current->processor) )\ + { \ + printk("Error: p2m lock held by %s\n", \ + (_d)->arch.p2m.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.p2m.lock); \ + ASSERT((_d)->arch.p2m.locker == -1); \ + (_d)->arch.p2m.locker = current->processor; \ + (_d)->arch.p2m.locker_function = __func__; \ + } while (0) + +#define p2m_unlock(_d) \ + do { \ + ASSERT((_d)->arch.p2m.locker == current->processor); \ + (_d)->arch.p2m.locker = -1; \ + (_d)->arch.p2m.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.p2m.lock); \ + } while (0) + + + +/* Printouts */ +#define P2M_PRINTK(_f, _a...) \ + debugtrace_printk("p2m: %s(): " _f, __func__, ##_a) +#define P2M_ERROR(_f, _a...) \ + printk("pg error: %s(): " _f, __func__, ##_a) +#if P2M_DEBUGGING +#define P2M_DEBUG(_f, _a...) \ + debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a) +#else +#define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0) +#endif + + +/* Override macros from asm/page.h to make them work with mfn_t */ +#undef mfn_to_page +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) +#undef mfn_valid +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#undef page_to_mfn +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) + + + +// Find the next level's P2M entry, checking for out-of-range gfn's... +// Returns NULL on error. +// +static l1_pgentry_t * +p2m_find_entry(void *table, unsigned long *gfn_remainder, + unsigned long gfn, u32 shift, u32 max) +{ + u32 index; + + index = *gfn_remainder >> shift; + if ( index >= max ) + { + P2M_DEBUG("gfn=0x%lx out of range " + "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", + gfn, *gfn_remainder, shift, index, max); + return NULL; + } + *gfn_remainder &= (1 << shift) - 1; + return (l1_pgentry_t *)table + index; +} + +// Walk one level of the P2M table, allocating a new table if required. +// Returns 0 on error. +// +static int +p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, + unsigned long *gfn_remainder, unsigned long gfn, u32 shift, + u32 max, unsigned long type) +{ + l1_pgentry_t *p2m_entry; + l1_pgentry_t new_entry; + void *next; + ASSERT(d->arch.p2m.alloc_page); + + if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, + shift, max)) ) + return 0; + + if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) + { + struct page_info *pg = d->arch.p2m.alloc_page(d); + if ( pg == NULL ) + return 0; + list_add_tail(&pg->list, &d->arch.p2m.pages); + pg->u.inuse.type_info = type | 1 | PGT_validated; + pg->count_info = 1; + + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), + __PAGE_HYPERVISOR|_PAGE_USER); + + switch ( type ) { + case PGT_l3_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 4); + break; + case PGT_l2_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 3); + break; + case PGT_l1_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 2); + break; + default: + BUG(); + break; + } + } + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); + next = map_domain_page(mfn_x(*table_mfn)); + unmap_domain_page(*table); + *table = next; + + return 1; +} + +// Returns 0 on error (out of memory) +static int +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +{ + // XXX -- this might be able to be faster iff current->domain == d + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + void *table =map_domain_page(mfn_x(table_mfn)); + unsigned long gfn_remainder = gfn; + l1_pgentry_t *p2m_entry; + l1_pgentry_t entry_content; + int rv=0; + +#if CONFIG_PAGING_LEVELS >= 4 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L4_PAGETABLE_SHIFT - PAGE_SHIFT, + L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) + goto out; +#endif +#if CONFIG_PAGING_LEVELS >= 3 + // When using PAE Xen, we only allow 33 bits of pseudo-physical + // address in translated guests (i.e. 8 GBytes). This restriction + // comes from wanting to map the P2M table into the 16MB RO_MPT hole + // in Xen's address space for translated PV guests. + // + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + (CONFIG_PAGING_LEVELS == 3 + ? 8 + : L3_PAGETABLE_ENTRIES), + PGT_l2_page_table) ) + goto out; +#endif + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) + goto out; + + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + 0, L1_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + /* Track the highest gfn for which we have ever had a valid mapping */ + if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) + d->arch.p2m.max_mapped_pfn = gfn; + + if ( mfn_valid(mfn) ) + entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); + else + entry_content = l1e_empty(); + + /* level 1 entry */ + paging_write_p2m_entry(d, gfn, p2m_entry, entry_content, 1); + + /* Success */ + rv = 1; + + out: + unmap_domain_page(table); + return rv; +} + + +/* Init the datastructures for later use by the p2m code */ +void p2m_init(struct domain *d) +{ + p2m_lock_init(d); + INIT_LIST_HEAD(&d->arch.p2m.pages); +} + + +// Allocate a new p2m table for a domain. +// +// The structure of the p2m table is that of a pagetable for xen (i.e. it is +// controlled by CONFIG_PAGING_LEVELS). +// +// The alloc_page and free_page functions will be used to get memory to +// build the p2m, and to release it again at the end of day. +// +// Returns 0 for success or -errno. +// +int p2m_alloc_table(struct domain *d, + struct page_info * (*alloc_page)(struct domain *d), + void (*free_page)(struct domain *d, struct page_info *pg)) + +{ + mfn_t mfn; + struct list_head *entry; + struct page_info *page, *p2m_top; + unsigned int page_count = 0; + unsigned long gfn; + + p2m_lock(d); + + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) + { + P2M_ERROR("p2m already allocated for this domain\n"); + p2m_unlock(d); + return -EINVAL; + } + + P2M_PRINTK("allocating p2m table\n"); + + d->arch.p2m.alloc_page = alloc_page; + d->arch.p2m.free_page = free_page; + + p2m_top = d->arch.p2m.alloc_page(d); + if ( p2m_top == NULL ) + { + p2m_unlock(d); + return -ENOMEM; + } +list_add_tail(&p2m_top->list, &d->arch.p2m.pages); + + p2m_top->count_info = 1; + p2m_top->u.inuse.type_info = +#if CONFIG_PAGING_LEVELS == 4 + PGT_l4_page_table +#elif CONFIG_PAGING_LEVELS == 3 + PGT_l3_page_table +#elif CONFIG_PAGING_LEVELS == 2 + PGT_l2_page_table +#endif + | 1 | PGT_validated; + + d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); + + P2M_PRINTK("populating p2m table\n"); + + /* Initialise physmap tables for slot zero. Other code assumes this. */ + gfn = 0; +mfn = _mfn(INVALID_MFN); + if ( !set_p2m_entry(d, gfn, mfn) ) + goto error; + + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = page_to_mfn(page); + gfn = get_gpfn_from_mfn(mfn_x(mfn)); + page_count++; + if ( +#ifdef __x86_64__ + (gfn != 0x5555555555555555L) +#else + (gfn != 0x55555555L) +#endif + && gfn != INVALID_M2P_ENTRY + && !set_p2m_entry(d, gfn, mfn) ) + goto error; + } + + P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); + p2m_unlock(d); + return 0; + + error: + P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" + PRI_mfn "\n", gfn, mfn_x(mfn)); + p2m_unlock(d); + return -ENOMEM; +} + +void p2m_teardown(struct domain *d) +/* Return all the p2m pages to Xen. + * We know we don't have any extra mappings to these pages */ +{ + struct list_head *entry, *n; + struct page_info *pg; + + p2m_lock(d); + d->arch.phys_table = pagetable_null(); + + list_for_each_safe(entry, n, &d->arch.p2m.pages) + { + pg = list_entry(entry, struct page_info, list); + list_del(entry); + d->arch.p2m.free_page(d, pg); + } + p2m_unlock(d); +} + +mfn_t +gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +/* Read another domain's p2m entries */ +{ + mfn_t mfn; + paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + + ASSERT(paging_mode_translate(d)); + mfn = pagetable_get_mfn(d->arch.phys_table); + + + if ( gpfn > d->arch.p2m.max_mapped_pfn ) + /* This pfn is higher than the highest the p2m map currently holds */ + return _mfn(INVALID_MFN); + +#if CONFIG_PAGING_LEVELS >= 4 + { + l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); + l4e += l4_table_offset(addr); + if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l4e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l4e_get_pfn(*l4e)); + unmap_domain_page(l4e); + } +#endif +#if CONFIG_PAGING_LEVELS >= 3 + { + l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); +#if CONFIG_PAGING_LEVELS == 3 + /* On PAE hosts the p2m has eight l3 entries, not four (see + * shadow_set_p2m_entry()) so we can't use l3_table_offset. + * Instead, just count the number of l3es from zero. It's safe + * to do this because we already checked that the gfn is within + * the bounds of the p2m. */ + l3e += (addr >> L3_PAGETABLE_SHIFT); +#else + l3e += l3_table_offset(addr); +#endif + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l3e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l3e_get_pfn(*l3e)); + unmap_domain_page(l3e); + } +#endif + + l2e = map_domain_page(mfn_x(mfn)); + l2e += l2_table_offset(addr); + if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l2e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l2e_get_pfn(*l2e)); + unmap_domain_page(l2e); + + l1e = map_domain_page(mfn_x(mfn)); + l1e += l1_table_offset(addr); + if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l1e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l1e_get_pfn(*l1e)); + unmap_domain_page(l1e); + + return mfn; +} + +#if P2M_AUDIT +static void audit_p2m(struct domain *d) +{ + struct list_head *entry; + struct page_info *page; + struct domain *od; + unsigned long mfn, gfn, m2pfn, lp2mfn = 0; + mfn_t p2mfn; + unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; + int test_linear; + + if ( !paging_mode_translate(d) ) + return; + + //P2M_PRINTK("p2m audit starts\n"); + + test_linear = ( (d == current->domain) + && !pagetable_is_null(current->arch.monitor_table) ); + if ( test_linear ) + local_flush_tlb(); + + /* Audit part one: walk the domain's page allocation list, checking + * the m2p entries. */ + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = mfn_x(page_to_mfn(page)); + + // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn); + + od = page_get_owner(page); + + if ( od != d ) + { + P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", + mfn, od, (od?od->domain_id:-1), d, d->domain_id); + continue; + } + + gfn = get_gpfn_from_mfn(mfn); + if ( gfn == INVALID_M2P_ENTRY ) + { + orphans_i++; + //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", + // mfn); + continue; + } + + if ( gfn == 0x55555555 ) + { + orphans_d++; + //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", + // mfn); + continue; + } + + p2mfn = gfn_to_mfn_foreign(d, gfn); + if ( mfn_x(p2mfn) != mfn ) + { + mpbad++; + P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" + " (-> gfn %#lx)\n", + mfn, gfn, mfn_x(p2mfn), + (mfn_valid(p2mfn) + ? get_gpfn_from_mfn(mfn_x(p2mfn)) + : -1u)); + /* This m2p entry is stale: the domain has another frame in + * this physical slot. No great disaster, but for neatness, + * blow away the m2p entry. */ + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + } + + if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) ) + { + lp2mfn = mfn_x(gfn_to_mfn_current(gfn)); + if ( lp2mfn != mfn_x(p2mfn) ) + { + P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " + "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn)); + } + } + + // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", + // mfn, gfn, p2mfn, lp2mfn); + } + + /* Audit part two: walk the domain's p2m table, checking the entries. */ + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) + { + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + int i1, i2; + +#if CONFIG_PAGING_LEVELS == 4 + l4_pgentry_t *l4e; + l3_pgentry_t *l3e; + int i3, i4; + l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#elif CONFIG_PAGING_LEVELS == 3 + l3_pgentry_t *l3e; + int i3; + l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#else /* CONFIG_PAGING_LEVELS == 2 */ + l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#endif + + gfn = 0; +#if CONFIG_PAGING_LEVELS >= 3 +#if CONFIG_PAGING_LEVELS >= 4 + for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) + { + if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); +#endif /* now at levels 3 or 4... */ + for ( i3 = 0; + i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); + i3++ ) + { + if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); +#endif /* all levels... */ + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) + { + if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); + + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) + { + if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) + continue; + mfn = l1e_get_pfn(l1e[i1]); + ASSERT(mfn_valid(_mfn(mfn))); + m2pfn = get_gpfn_from_mfn(mfn); + if ( m2pfn != gfn ) + { + pmbad++; + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn, mfn, m2pfn); + BUG(); + } + } + unmap_domain_page(l1e); + } +#if CONFIG_PAGING_LEVELS >= 3 + unmap_domain_page(l2e); + } +#if CONFIG_PAGING_LEVELS >= 4 + unmap_domain_page(l3e); + } +#endif +#endif + +#if CONFIG_PAGING_LEVELS == 4 + unmap_domain_page(l4e); +#elif CONFIG_PAGING_LEVELS == 3 + unmap_domain_page(l3e); +#else /* CONFIG_PAGING_LEVELS == 2 */ + unmap_domain_page(l2e); +#endif + + } + + //P2M_PRINTK("p2m audit complete\n"); + //if ( orphans_i | orphans_d | mpbad | pmbad ) + // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", + // orphans_i + orphans_d, orphans_i, orphans_d, + if ( mpbad | pmbad ) + P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", + pmbad, mpbad); +} +#else +#define audit_p2m(_d) do { (void)(_d); } while(0) +#endif /* P2M_AUDIT */ + + + +static void +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) +{ + if ( !paging_mode_translate(d) ) + return; + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); + + ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); + //ASSERT(mfn_to_gfn(d, mfn) == gfn); + + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); +} + +void +guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + p2m_lock(d); + audit_p2m(d); + p2m_remove_page(d, gfn, mfn); + audit_p2m(d); + p2m_unlock(d); +} + +void +guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + unsigned long ogfn; + mfn_t omfn; + + if ( !paging_mode_translate(d) ) + return; + + p2m_lock(d); + audit_p2m(d); + + P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); + + omfn = gfn_to_mfn(d, gfn); + if ( mfn_valid(omfn) ) + { + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + } + + ogfn = mfn_to_gfn(d, _mfn(mfn)); + if ( +#ifdef __x86_64__ + (ogfn != 0x5555555555555555L) +#else + (ogfn != 0x55555555L) +#endif + && (ogfn != INVALID_M2P_ENTRY) + && (ogfn != gfn) ) + { + /* This machine frame is already mapped at another physical address */ + P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", + mfn, ogfn, gfn); + if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) + { + P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", + ogfn , mfn_x(omfn)); + if ( mfn_x(omfn) == mfn ) + p2m_remove_page(d, ogfn, mfn); + } + } + + set_p2m_entry(d, gfn, _mfn(mfn)); + set_gpfn_from_mfn(mfn, gfn); + + audit_p2m(d); + p2m_unlock(d); +} + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/paging.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/paging.c Wed Feb 14 12:02:20 2007 +0000 @@ -0,0 +1,143 @@ +/****************************************************************************** + * arch/x86/paging.c + * + * x86 specific paging support + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Copyright (c) 2007 XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/init.h> +#include <asm/paging.h> +#include <asm/shadow.h> +#include <asm/p2m.h> + +/* Xen command-line option to enable hardware-assisted paging */ +int opt_hap_enabled = 0; +boolean_param("hap", opt_hap_enabled); + +/* Printouts */ +#define PAGING_PRINTK(_f, _a...) \ + debugtrace_printk("pg: %s(): " _f, __func__, ##_a) +#define PAGING_ERROR(_f, _a...) \ + printk("pg error: %s(): " _f, __func__, ##_a) +#define PAGING_DEBUG(flag, _f, _a...) \ + do { \ + if (PAGING_DEBUG_ ## flag) \ + debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ + } while (0) + + +/* Domain paging struct initialization. */ +void paging_domain_init(struct domain *d) +{ + p2m_init(d); + shadow_domain_init(d); +} + +/* vcpu paging struct initialization goes here */ +void paging_vcpu_init(struct vcpu *v) +{ + shadow_vcpu_init(v); +} + + +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl) +{ + /* Here, dispatch domctl to the appropriate paging code */ + return shadow_domctl(d, sc, u_domctl); +} + +/* Call when destroying a domain */ +void paging_teardown(struct domain *d) +{ + shadow_teardown(d); + /* Call other modes' teardown code here */ +} + +/* Call once all of the references to the domain have gone away */ +void paging_final_teardown(struct domain *d) +{ + shadow_teardown(d); + /* Call other modes' final teardown code here */ +} + +/* Enable an arbitrary paging-assistance mode. Call once at domain + * creation. */ +int paging_enable(struct domain *d, u32 mode) +{ + if ( mode & PG_SH_enable ) + return shadow_enable(d, mode); + else + /* No other modes supported yet */ + return -EINVAL; +} + +/* Print paging-assistance info to the console */ +void paging_dump_domain_info(struct domain *d) +{ + if ( paging_mode_enabled(d) ) + { + printk(" paging assistance: "); + if ( paging_mode_shadow(d) ) + printk("shadow "); + if ( paging_mode_hap(d) ) + printk("hap "); + if ( paging_mode_refcounts(d) ) + printk("refcounts "); + if ( paging_mode_log_dirty(d) ) + printk("log_dirty "); + if ( paging_mode_translate(d) ) + printk("translate "); + if ( paging_mode_external(d) ) + printk("external "); + printk("\n"); + } +} + +void paging_dump_vcpu_info(struct vcpu *v) +{ + if ( paging_mode_enabled(v->domain) ) + { + printk(" paging assistance: "); + if ( paging_mode_shadow(v->domain) ) + { + if ( v->arch.paging.mode ) + printk("shadowed %u-on-%u, %stranslated\n", + v->arch.paging.mode->guest_levels, + v->arch.paging.mode->shadow.shadow_levels, + paging_vcpu_mode_translate(v) ? "" : "not "); + else + printk("not shadowed\n"); + } + else if ( paging_mode_hap(v->domain) && v->arch.paging.mode ) + printk("hap, %u levels\n", + v->arch.paging.mode->guest_levels); + else + printk("none\n"); + } +} + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Wed Feb 14 12:02:20 2007 +0000 @@ -47,12 +47,27 @@ void shadow_domain_init(struct domain *d int i; shadow_lock_init(d); for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); -} - + INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); + INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); + INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); +} + +/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important + * job is to initialize the update_paging_modes() function pointer, which is + * used to initialized the rest of resources. Therefore, it really does not + * matter to have v->arch.paging.mode pointing to any mode, as long as it can + * be compiled. + */ +void shadow_vcpu_init(struct vcpu *v) +{ +#if CONFIG_PAGING_LEVELS == 4 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); +#elif CONFIG_PAGING_LEVELS == 3 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); +#elif CONFIG_PAGING_LEVELS == 2 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); +#endif +} #if SHADOW_AUDIT int shadow_audit_enable = 0; @@ -265,7 +280,7 @@ hvm_emulate_write(enum x86_segment seg, if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_write( + return v->arch.paging.mode->shadow.x86_emulate_write( v, addr, &val, bytes, sh_ctxt); } @@ -288,7 +303,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_cmpxchg( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( v, addr, old, new, bytes, sh_ctxt); } @@ -312,7 +327,7 @@ hvm_emulate_cmpxchg8b(enum x86_segment s if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt); } @@ -353,7 +368,7 @@ pv_emulate_write(enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_write( + return v->arch.paging.mode->shadow.x86_emulate_write( v, offset, &val, bytes, sh_ctxt); } @@ -368,7 +383,7 @@ pv_emulate_cmpxchg(enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_cmpxchg( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( v, offset, old, new, bytes, sh_ctxt); } @@ -384,7 +399,7 @@ pv_emulate_cmpxchg8b(enum x86_segment se struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt); } @@ -721,7 +736,7 @@ static inline int chunk_is_available(str int i; for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) - if ( !list_empty(&d->arch.shadow.freelists[i]) ) + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) return 1; return 0; } @@ -783,7 +798,7 @@ void shadow_prealloc(struct domain *d, u /* Stage one: walk the list of pinned pages, unpinning them */ perfc_incrc(shadow_prealloc_1); - list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows) + list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); smfn = shadow_page_to_mfn(sp); @@ -823,9 +838,9 @@ void shadow_prealloc(struct domain *d, u SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n" " shadow pages total = %u, free = %u, p2m=%u\n", 1 << order, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); BUG(); } @@ -840,7 +855,7 @@ static void shadow_blow_tables(struct do int i; /* Pass one: unpin all pinned pages */ - list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows) + list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); smfn = shadow_page_to_mfn(sp); @@ -905,9 +920,9 @@ mfn_t shadow_alloc(struct domain *d, /* Find smallest order which can satisfy the request. */ for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) - if ( !list_empty(&d->arch.shadow.freelists[i]) ) + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) { - sp = list_entry(d->arch.shadow.freelists[i].next, + sp = list_entry(d->arch.paging.shadow.freelists[i].next, struct shadow_page_info, list); list_del(&sp->list); @@ -916,10 +931,10 @@ mfn_t shadow_alloc(struct domain *d, { i--; sp->order = i; - list_add_tail(&sp->list, &d->arch.shadow.freelists[i]); + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); sp += 1 << i; } - d->arch.shadow.free_pages -= 1 << order; + d->arch.paging.shadow.free_pages -= 1 << order; /* Init page info fields and clear the pages */ for ( i = 0; i < 1<<order ; i++ ) @@ -976,7 +991,7 @@ void shadow_free(struct domain *d, mfn_t ASSERT(shadow_type != SH_type_p2m_table); order = shadow_order(shadow_type); - d->arch.shadow.free_pages += 1 << order; + d->arch.paging.shadow.free_pages += 1 << order; for ( i = 0; i < 1<<order; i++ ) { @@ -985,8 +1000,8 @@ void shadow_free(struct domain *d, mfn_t for_each_vcpu(d, v) { /* No longer safe to look for a writeable mapping in this shadow */ - if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) - v->arch.shadow.last_writeable_pte_smfn = 0; + if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) + v->arch.paging.shadow.last_writeable_pte_smfn = 0; } #endif /* Strip out the type: this is now a free shadow page */ @@ -1019,7 +1034,7 @@ void shadow_free(struct domain *d, mfn_t } sp->order = order; - list_add_tail(&sp->list, &d->arch.shadow.freelists[order]); + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); } /* Divert some memory from the pool to be used by the p2m mapping. @@ -1033,19 +1048,19 @@ void shadow_free(struct domain *d, mfn_t * returns non-zero on success. */ static int -shadow_alloc_p2m_pages(struct domain *d) +sh_alloc_p2m_pages(struct domain *d) { struct page_info *pg; u32 i; ASSERT(shadow_locked_by_me(d)); - if ( d->arch.shadow.total_pages + if ( d->arch.paging.shadow.total_pages < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) ) return 0; /* Not enough shadow memory: need to increase it first */ pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); - d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); - d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); + d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); + d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++) { /* Unlike shadow pages, mark p2m pages as owned by the domain. @@ -1055,34 +1070,59 @@ shadow_alloc_p2m_pages(struct domain *d) * believed to be a concern. */ page_set_owner(&pg[i], d); - list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist); + pg->count_info = 1; + list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); } return 1; } // Returns 0 if no memory is available... -mfn_t +struct page_info * shadow_alloc_p2m_page(struct domain *d) { struct list_head *entry; struct page_info *pg; mfn_t mfn; void *p; - - if ( list_empty(&d->arch.shadow.p2m_freelist) && - !shadow_alloc_p2m_pages(d) ) - return _mfn(0); - entry = d->arch.shadow.p2m_freelist.next; + + shadow_lock(d); + + if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && + !sh_alloc_p2m_pages(d) ) + { + shadow_unlock(d); + return NULL; + } + entry = d->arch.paging.shadow.p2m_freelist.next; list_del(entry); - list_add_tail(entry, &d->arch.shadow.p2m_inuse); + + shadow_unlock(d); + pg = list_entry(entry, struct page_info, list); - pg->count_info = 1; mfn = page_to_mfn(pg); p = sh_map_domain_page(mfn); clear_page(p); sh_unmap_domain_page(p); - return mfn; + return pg; +} + +void +shadow_free_p2m_page(struct domain *d, struct page_info *pg) +{ + ASSERT(page_get_owner(pg) == d); + /* Should have just the one ref we gave it in alloc_p2m_page() */ + if ( (pg->count_info & PGC_count_mask) != 1 ) + { + SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", + pg->count_info, pg->u.inuse.type_info); + } + /* Free should not decrement domain's total allocation, since + * these pages were allocated without an owner. */ + page_set_owner(pg, NULL); + free_domheap_pages(pg, 0); + d->arch.paging.shadow.p2m_pages--; + perfc_decr(shadow_alloc_count); } #if CONFIG_PAGING_LEVELS == 3 @@ -1130,344 +1170,6 @@ static void p2m_install_entry_in_monitor } #endif -// Find the next level's P2M entry, checking for out-of-range gfn's... -// Returns NULL on error. -// -static l1_pgentry_t * -p2m_find_entry(void *table, unsigned long *gfn_remainder, - unsigned long gfn, u32 shift, u32 max) -{ - u32 index; - - index = *gfn_remainder >> shift; - if ( index >= max ) - { - SHADOW_DEBUG(P2M, "gfn=0x%lx out of range " - "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", - gfn, *gfn_remainder, shift, index, max); - return NULL; - } - *gfn_remainder &= (1 << shift) - 1; - return (l1_pgentry_t *)table + index; -} - -// Walk one level of the P2M table, allocating a new table if required. -// Returns 0 on error. -// -static int -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, - unsigned long *gfn_remainder, unsigned long gfn, u32 shift, - u32 max, unsigned long type) -{ - l1_pgentry_t *p2m_entry; - void *next; - - if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, - shift, max)) ) - return 0; - - if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) - { - mfn_t mfn = shadow_alloc_p2m_page(d); - if ( mfn_x(mfn) == 0 ) - return 0; - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; - mfn_to_page(mfn)->count_info = 1; -#if CONFIG_PAGING_LEVELS == 3 - if (type == PGT_l2_page_table) - { - struct vcpu *v; - /* We have written to the p2m l3: need to sync the per-vcpu - * copies of it in the monitor tables */ - p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); - /* Also, any vcpus running on shadows of the p2m need to - * reload their CR3s so the change propagates to the shadow */ - ASSERT(shadow_locked_by_me(d)); - for_each_vcpu(d, v) - { - if ( pagetable_get_pfn(v->arch.guest_table) - == pagetable_get_pfn(d->arch.phys_table) - && v->arch.shadow.mode != NULL ) - v->arch.shadow.mode->update_cr3(v, 0); - } - } -#endif - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); - } - *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); - next = sh_map_domain_page(*table_mfn); - sh_unmap_domain_page(*table); - *table = next; - - return 1; -} - -// Returns 0 on error (out of memory) -int -shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) -{ - // XXX -- this might be able to be faster iff current->domain == d - mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); - void *table = sh_map_domain_page(table_mfn); - unsigned long gfn_remainder = gfn; - l1_pgentry_t *p2m_entry; - int rv=0; - -#if CONFIG_PAGING_LEVELS >= 4 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L4_PAGETABLE_SHIFT - PAGE_SHIFT, - L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) - goto out; -#endif -#if CONFIG_PAGING_LEVELS >= 3 - // When using PAE Xen, we only allow 33 bits of pseudo-physical - // address in translated guests (i.e. 8 GBytes). This restriction - // comes from wanting to map the P2M table into the 16MB RO_MPT hole - // in Xen's address space for translated PV guests. - // - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L3_PAGETABLE_SHIFT - PAGE_SHIFT, - (CONFIG_PAGING_LEVELS == 3 - ? 8 - : L3_PAGETABLE_ENTRIES), - PGT_l2_page_table) ) - goto out; -#endif - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L2_PAGETABLE_SHIFT - PAGE_SHIFT, - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) - goto out; - - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, - 0, L1_PAGETABLE_ENTRIES); - ASSERT(p2m_entry); - if ( mfn_valid(mfn) ) - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - else - *p2m_entry = l1e_empty(); - - /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) - d->arch.max_mapped_pfn = gfn; - - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, - p2m_entry, sizeof(*p2m_entry)); - - /* Success */ - rv = 1; - - out: - sh_unmap_domain_page(table); - return rv; -} - -// Allocate a new p2m table for a domain. -// -// The structure of the p2m table is that of a pagetable for xen (i.e. it is -// controlled by CONFIG_PAGING_LEVELS). -// -// Returns 0 if p2m table could not be initialized -// -static int -shadow_alloc_p2m_table(struct domain *d) -{ - mfn_t p2m_top, mfn; - struct list_head *entry; - struct page_info *page; - unsigned int page_count = 0; - unsigned long gfn; - - SHADOW_PRINTK("allocating p2m table\n"); - ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); - - p2m_top = shadow_alloc_p2m_page(d); - mfn_to_page(p2m_top)->count_info = 1; - mfn_to_page(p2m_top)->u.inuse.type_info = -#if CONFIG_PAGING_LEVELS == 4 - PGT_l4_page_table -#elif CONFIG_PAGING_LEVELS == 3 - PGT_l3_page_table -#elif CONFIG_PAGING_LEVELS == 2 - PGT_l2_page_table -#endif - | 1 | PGT_validated; - - if ( mfn_x(p2m_top) == 0 ) - return 0; - - d->arch.phys_table = pagetable_from_mfn(p2m_top); - - SHADOW_PRINTK("populating p2m table\n"); - - /* Initialise physmap tables for slot zero. Other code assumes this. */ - gfn = 0; - mfn = _mfn(INVALID_MFN); - if ( !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - - /* Build a p2m map that matches the m2p entries for this domain's - * allocated pages. Skip any pages that have an explicitly invalid - * or obviously bogus m2p entry. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = page_to_mfn(page); - gfn = get_gpfn_from_mfn(mfn_x(mfn)); - page_count++; - if ( -#ifdef __x86_64__ - (gfn != 0x5555555555555555L) -#else - (gfn != 0x55555555L) -#endif - && gfn != INVALID_M2P_ENTRY - && (gfn < - (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) - && !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - } - - SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count); - return 1; - - error: - SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" - SH_PRI_mfn "\n", gfn, mfn_x(mfn)); - return 0; -} - -mfn_t -sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) -/* Read another domain's p2m entries */ -{ - mfn_t mfn; - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - - ASSERT(shadow_mode_translate(d)); - mfn = pagetable_get_mfn(d->arch.phys_table); - - - if ( gpfn > d->arch.max_mapped_pfn ) - /* This pfn is higher than the highest the p2m map currently holds */ - return _mfn(INVALID_MFN); - -#if CONFIG_PAGING_LEVELS >= 4 - { - l4_pgentry_t *l4e = sh_map_domain_page(mfn); - l4e += l4_table_offset(addr); - if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l4e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l4e_get_pfn(*l4e)); - sh_unmap_domain_page(l4e); - } -#endif -#if CONFIG_PAGING_LEVELS >= 3 - { - l3_pgentry_t *l3e = sh_map_domain_page(mfn); -#if CONFIG_PAGING_LEVELS == 3 - /* On PAE hosts the p2m has eight l3 entries, not four (see - * shadow_set_p2m_entry()) so we can't use l3_table_offset. - * Instead, just count the number of l3es from zero. It's safe - * to do this because we already checked that the gfn is within - * the bounds of the p2m. */ - l3e += (addr >> L3_PAGETABLE_SHIFT); -#else - l3e += l3_table_offset(addr); -#endif - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l3e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l3e_get_pfn(*l3e)); - sh_unmap_domain_page(l3e); - } -#endif - - l2e = sh_map_domain_page(mfn); - l2e += l2_table_offset(addr); - if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l2e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l2e_get_pfn(*l2e)); - sh_unmap_domain_page(l2e); - - l1e = sh_map_domain_page(mfn); - l1e += l1_table_offset(addr); - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l1e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l1e_get_pfn(*l1e)); - sh_unmap_domain_page(l1e); - - return mfn; -} - -unsigned long -shadow_gfn_to_mfn_foreign(unsigned long gpfn) -{ - return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn)); -} - - -static void shadow_p2m_teardown(struct domain *d) -/* Return all the p2m pages to Xen. - * We know we don't have any extra mappings to these pages */ -{ - struct list_head *entry, *n; - struct page_info *pg; - - d->arch.phys_table = pagetable_null(); - - list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse) - { - pg = list_entry(entry, struct page_info, list); - list_del(entry); - /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) - { - SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", - pg->count_info, pg->u.inuse.type_info); - } - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation, since - * these pages were allocated without an owner. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.shadow.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist) - { - list_del(entry); - pg = list_entry(entry, struct page_info, list); - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.shadow.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - ASSERT(d->arch.shadow.p2m_pages == 0); -} - /* Set the pool of shadow pages to the required number of pages. * Input will be rounded up to at least shadow_min_acceptable_pages(), * plus space for the p2m table. @@ -1491,11 +1193,11 @@ static unsigned int sh_set_allocation(st pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1); SHADOW_PRINTK("current %i target %i\n", - d->arch.shadow.total_pages, pages); - - while ( d->arch.shadow.total_pages != pages ) - { - if ( d->arch.shadow.total_pages < pages ) + d->arch.paging.shadow.total_pages, pages); + + while ( d->arch.paging.shadow.total_pages != pages ) + { + if ( d->arch.paging.shadow.total_pages < pages ) { /* Need to allocate more memory from domheap */ sp = (struct shadow_page_info *) @@ -1505,8 +1207,8 @@ static unsigned int sh_set_allocation(st SHADOW_PRINTK("failed to allocate shadow pages.\n"); return -ENOMEM; } - d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER; - d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER; for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) { sp[j].type = 0; @@ -1518,18 +1220,18 @@ static unsigned int sh_set_allocation(st } sp->order = SHADOW_MAX_ORDER; list_add_tail(&sp->list, - &d->arch.shadow.freelists[SHADOW_MAX_ORDER]); + &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]); } - else if ( d->arch.shadow.total_pages > pages ) + else if ( d->arch.paging.shadow.total_pages > pages ) { /* Need to return memory to domheap */ shadow_prealloc(d, SHADOW_MAX_ORDER); - ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER])); - sp = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, + ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER])); + sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, struct shadow_page_info, list); list_del(&sp->list); - d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; - d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER); } @@ -1547,7 +1249,7 @@ static unsigned int sh_set_allocation(st /* Return the size of the shadow pool, rounded up to the nearest MB */ static unsigned int shadow_get_allocation(struct domain *d) { - unsigned int pg = d->arch.shadow.total_pages; + unsigned int pg = d->arch.paging.shadow.total_pages; return ((pg >> (20 - PAGE_SHIFT)) + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); } @@ -1583,7 +1285,7 @@ static void sh_hash_audit_bucket(struct if ( !(SHADOW_AUDIT_ENABLE) ) return; - sp = d->arch.shadow.hash_table[bucket]; + sp = d->arch.paging.shadow.hash_table[bucket]; while ( sp ) { /* Not a shadow? */ @@ -1608,7 +1310,7 @@ static void sh_hash_audit_bucket(struct if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) { - SHADOW_ERROR("MFN %#lx shadowed (by %#"SH_PRI_mfn")" + SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" " but has typecount %#lx\n", sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), gpg->u.inuse.type_info); @@ -1652,13 +1354,13 @@ static int shadow_hash_alloc(struct doma struct shadow_page_info **table; ASSERT(shadow_locked_by_me(d)); - ASSERT(!d->arch.shadow.hash_table); + ASSERT(!d->arch.paging.shadow.hash_table); table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); if ( !table ) return 1; memset(table, 0, SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); - d->arch.shadow.hash_table = table; + d->arch.paging.shadow.hash_table = table; return 0; } @@ -1667,10 +1369,10 @@ static void shadow_hash_teardown(struct static void shadow_hash_teardown(struct domain *d) { ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); - - xfree(d->arch.shadow.hash_table); - d->arch.shadow.hash_table = NULL; + ASSERT(d->arch.paging.shadow.hash_table); + + xfree(d->arch.paging.shadow.hash_table); + d->arch.paging.shadow.hash_table = NULL; } @@ -1683,7 +1385,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1692,16 +1394,16 @@ mfn_t shadow_hash_lookup(struct vcpu *v, key = sh_hash(n, t); sh_hash_audit_bucket(d, key); - sp = d->arch.shadow.hash_table[key]; + sp = d->arch.paging.shadow.hash_table[key]; prev = NULL; while(sp) { if ( sp->backpointer == n && sp->type == t ) { /* Pull-to-front if 'sp' isn't already the head item */ - if ( unlikely(sp != d->arch.shadow.hash_table[key]) ) + if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) { - if ( unlikely(d->arch.shadow.hash_walking != 0) ) + if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) /* Can't reorder: someone is walking the hash chains */ return shadow_page_to_mfn(sp); else @@ -1710,8 +1412,8 @@ mfn_t shadow_hash_lookup(struct vcpu *v, /* Delete sp from the list */ prev->next_shadow = sp->next_shadow; /* Re-insert it at the head of the list */ - sp->next_shadow = d->arch.shadow.hash_table[key]; - d->arch.shadow.hash_table[key] = sp; + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + d->arch.paging.shadow.hash_table[key] = sp; } } else @@ -1737,7 +1439,7 @@ void shadow_hash_insert(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1748,8 +1450,8 @@ void shadow_hash_insert(struct vcpu *v, /* Insert this shadow at the top of the bucket */ sp = mfn_to_shadow_page(smfn); - sp->next_shadow = d->arch.shadow.hash_table[key]; - d->arch.shadow.hash_table[key] = sp; + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + d->arch.paging.shadow.hash_table[key] = sp; sh_hash_audit_bucket(d, key); } @@ -1763,7 +1465,7 @@ void shadow_hash_delete(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1773,13 +1475,13 @@ void shadow_hash_delete(struct vcpu *v, sh_hash_audit_bucket(d, key); sp = mfn_to_shadow_page(smfn); - if ( d->arch.shadow.hash_table[key] == sp ) + if ( d->arch.paging.shadow.hash_table[key] == sp ) /* Easy case: we're deleting the head item. */ - d->arch.shadow.hash_table[key] = sp->next_shadow; + d->arch.paging.shadow.hash_table[key] = sp->next_shadow; else { /* Need to search for the one we want */ - x = d->arch.shadow.hash_table[key]; + x = d->arch.paging.shadow.hash_table[key]; while ( 1 ) { ASSERT(x); /* We can't have hit the end, since our target is @@ -1818,15 +1520,15 @@ static void hash_foreach(struct vcpu *v, /* Say we're here, to stop hash-lookups reordering the chains */ ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_walking == 0); - d->arch.shadow.hash_walking = 1; + ASSERT(d->arch.paging.shadow.hash_walking == 0); + d->arch.paging.shadow.hash_walking = 1; for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) { /* WARNING: This is not safe against changes to the hash table. * The callback *must* return non-zero if it has inserted or * deleted anything from the hash (lookups are OK, though). */ - for ( x = d->arch.shadow.hash_table[i]; x; x = x->next_shadow ) + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) { if ( callback_mask & (1 << x->type) ) { @@ -1839,7 +1541,7 @@ static void hash_foreach(struct vcpu *v, } if ( done ) break; } - d->arch.shadow.hash_walking = 0; + d->arch.paging.shadow.hash_walking = 0; } @@ -2008,27 +1710,27 @@ int sh_remove_write_access(struct vcpu * * and that mapping is likely to be in the current pagetable, * in the guest's linear map (on non-HIGHPTE linux and windows)*/ -#define GUESS(_a, _h) do { \ - if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ - perfc_incrc(shadow_writeable_h_ ## _h); \ - if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ - return 1; \ +#define GUESS(_a, _h) do { \ + if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \ + perfc_incrc(shadow_writeable_h_ ## _h); \ + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ + return 1; \ } while (0) - if ( v->arch.shadow.mode->guest_levels == 2 ) + if ( v->arch.paging.mode->guest_levels == 2 ) { if ( level == 1 ) /* 32bit non-PAE w2k3: linear map at 0xC0000000 */ GUESS(0xC0000000UL + (fault_addr >> 10), 1); /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } #if CONFIG_PAGING_LEVELS >= 3 - else if ( v->arch.shadow.mode->guest_levels == 3 ) + else if ( v->arch.paging.mode->guest_levels == 3 ) { /* 32bit PAE w2k3: linear map at 0xC0000000 */ switch ( level ) @@ -2038,11 +1740,11 @@ int sh_remove_write_access(struct vcpu * } /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } #if CONFIG_PAGING_LEVELS >= 4 - else if ( v->arch.shadow.mode->guest_levels == 4 ) + else if ( v->arch.paging.mode->guest_levels == 4 ) { /* 64bit w2k3: linear map at 0x0000070000000000 */ switch ( level ) @@ -2054,7 +1756,7 @@ int sh_remove_write_access(struct vcpu * /* 64bit Linux direct map at 0xffff810000000000; older kernels * had it at 0x0000010000000000UL */ - gfn = sh_mfn_to_gfn(v->domain, gmfn); + gfn = mfn_to_gfn(v->domain, gmfn); GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); } @@ -2073,10 +1775,10 @@ int sh_remove_write_access(struct vcpu * * the writeable mapping by looking at the same MFN where the last * brute-force search succeeded. */ - if ( v->arch.shadow.last_writeable_pte_smfn != 0 ) + if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 ) { unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); - mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn); + mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); int shtype = mfn_to_shadow_page(last_smfn)->type; if ( callbacks[shtype] ) @@ -2431,7 +2133,7 @@ static void sh_update_paging_modes(struc static void sh_update_paging_modes(struct vcpu *v) { struct domain *d = v->domain; - struct shadow_paging_mode *old_mode = v->arch.shadow.mode; + struct paging_mode *old_mode = v->arch.paging.mode; mfn_t old_guest_table; ASSERT(shadow_locked_by_me(d)); @@ -2446,8 +2148,8 @@ static void sh_update_paging_modes(struc // First, tear down any old shadow tables held by this vcpu. // - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); + if ( v->arch.paging.mode ) + v->arch.paging.mode->shadow.detach_old_tables(v); if ( !is_hvm_domain(d) ) { @@ -2456,17 +2158,17 @@ static void sh_update_paging_modes(struc /// #if CONFIG_PAGING_LEVELS == 4 if ( pv_32bit_guest(v) ) - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); else - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #elif CONFIG_PAGING_LEVELS == 2 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); #else #error unexpected paging mode #endif - v->arch.shadow.translate_enabled = !!shadow_mode_translate(d); + v->arch.paging.translate_enabled = !!shadow_mode_translate(d); } else { @@ -2476,8 +2178,8 @@ static void sh_update_paging_modes(struc ASSERT(shadow_mode_translate(d)); ASSERT(shadow_mode_external(d)); - v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v); - if ( !v->arch.shadow.translate_enabled ) + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); + if ( !v->arch.paging.translate_enabled ) { /* Set v->arch.guest_table to use the p2m map, and choose * the appropriate shadow mode */ @@ -2485,11 +2187,11 @@ static void sh_update_paging_modes(struc #if CONFIG_PAGING_LEVELS == 2 v->arch.guest_table = pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); #elif CONFIG_PAGING_LEVELS == 3 v->arch.guest_table = pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #else /* CONFIG_PAGING_LEVELS == 4 */ { l4_pgentry_t *l4e; @@ -2501,7 +2203,7 @@ static void sh_update_paging_modes(struc pagetable_from_pfn(l4e_get_pfn(l4e[0])); sh_unmap_domain_page(l4e); } - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #endif /* Fix up refcounts on guest_table */ get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d); @@ -2514,7 +2216,7 @@ static void sh_update_paging_modes(struc if ( hvm_long_mode_enabled(v) ) { // long mode guest... - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4); } else @@ -2523,7 +2225,7 @@ static void sh_update_paging_modes(struc { #if CONFIG_PAGING_LEVELS >= 3 // 32-bit PAE mode guest... - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3); #else SHADOW_ERROR("PAE not supported in 32-bit Xen\n"); @@ -2535,10 +2237,10 @@ static void sh_update_paging_modes(struc { // 32-bit 2 level guest... #if CONFIG_PAGING_LEVELS >= 3 - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2); #else - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2); #endif } @@ -2546,25 +2248,25 @@ static void sh_update_paging_modes(struc if ( pagetable_is_null(v->arch.monitor_table) ) { - mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); + mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); hvm_update_host_cr3(v); } - if ( v->arch.shadow.mode != old_mode ) + if ( v->arch.paging.mode != old_mode ) { SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " "(was g=%u s=%u)\n", d->domain_id, v->vcpu_id, is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1, - v->arch.shadow.mode->guest_levels, - v->arch.shadow.mode->shadow_levels, + v->arch.paging.mode->guest_levels, + v->arch.paging.mode->shadow.shadow_levels, old_mode ? old_mode->guest_levels : 0, - old_mode ? old_mode->shadow_levels : 0); + old_mode ? old_mode->shadow.shadow_levels : 0); if ( old_mode && - (v->arch.shadow.mode->shadow_levels != - old_mode->shadow_levels) ) + (v->arch.paging.mode->shadow.shadow_levels != + old_mode->shadow.shadow_levels) ) { /* Need to make a new monitor table for the new mode */ mfn_t new_mfn, old_mfn; @@ -2584,9 +2286,9 @@ static void sh_update_paging_modes(struc old_mfn = pagetable_get_mfn(v->arch.monitor_table); v->arch.monitor_table = pagetable_null(); - new_mfn = v->arch.shadow.mode->make_monitor_table(v); + new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(new_mfn); - SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", + SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", mfn_x(new_mfn)); /* Don't be running on the old monitor table when we @@ -2596,7 +2298,7 @@ static void sh_update_paging_modes(struc if ( v == current ) write_ptbase(v); hvm_update_host_cr3(v); - old_mode->destroy_monitor_table(v, old_mfn); + old_mode->shadow.destroy_monitor_table(v, old_mfn); } } @@ -2606,7 +2308,7 @@ static void sh_update_paging_modes(struc // This *does* happen, at least for CR4.PGE... } - v->arch.shadow.mode->update_cr3(v, 0); + v->arch.paging.mode->update_cr3(v, 0); } void shadow_update_paging_modes(struct vcpu *v) @@ -2626,9 +2328,7 @@ static void sh_new_mode(struct domain *d ASSERT(shadow_locked_by_me(d)); ASSERT(d != current->domain); - d->arch.shadow.mode = new_mode; - if ( new_mode & SHM2_translate ) - shadow_audit_p2m(d); + d->arch.paging.mode = new_mode; for_each_vcpu(d, v) sh_update_paging_modes(v); } @@ -2642,75 +2342,75 @@ int shadow_enable(struct domain *d, u32 unsigned int old_pages; int rv = 0; - mode |= SHM2_enable; + mode |= PG_SH_enable; domain_pause(d); - shadow_lock(d); /* Sanity check the arguments */ if ( (d == current->domain) || shadow_mode_enabled(d) || - ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) || - ((mode & SHM2_external) && !(mode & SHM2_translate)) ) + ((mode & PG_translate) && !(mode & PG_refcounts)) || + ((mode & PG_external) && !(mode & PG_translate)) ) { rv = -EINVAL; - goto out; - } - - // XXX -- eventually would like to require that all memory be allocated - // *after* shadow_enabled() is called... So here, we would test to make - // sure that d->page_list is empty. -#if 0 - spin_lock(&d->page_alloc_lock); - if ( !list_empty(&d->page_list) ) - { - spin_unlock(&d->page_alloc_lock); - rv = -EINVAL; - goto out; - } - spin_unlock(&d->page_alloc_lock); -#endif + goto out_unlocked; + } /* Init the shadow memory allocation if the user hasn't done so */ - old_pages = d->arch.shadow.total_pages; + old_pages = d->arch.paging.shadow.total_pages; if ( old_pages == 0 ) - if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ + { + unsigned int r; + shadow_lock(d); + r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */ + shadow_unlock(d); + if ( r != 0 ) { sh_set_allocation(d, 0, NULL); rv = -ENOMEM; - goto out; - } + goto out_unlocked; + } + } + + /* Init the P2M table. Must be done before we take the shadow lock + * to avoid possible deadlock. */ + if ( mode & PG_translate ) + { + rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page); + if (rv != 0) + goto out_unlocked; + } + + shadow_lock(d); + + /* Sanity check again with the lock held */ + if ( shadow_mode_enabled(d) ) + { + rv = -EINVAL; + goto out_locked; + } /* Init the hash table */ if ( shadow_hash_alloc(d) != 0 ) { - sh_set_allocation(d, old_pages, NULL); rv = -ENOMEM; - goto out; - } - - /* Init the P2M table */ - if ( mode & SHM2_translate ) - if ( !shadow_alloc_p2m_table(d) ) - { - shadow_hash_teardown(d); - sh_set_allocation(d, old_pages, NULL); - shadow_p2m_teardown(d); - rv = -ENOMEM; - goto out; - } + goto out_locked; + } #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) /* We assume we're dealing with an older 64bit linux guest until we * see the guest use more than one l4 per vcpu. */ - d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; + d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; #endif /* Update the bits */ sh_new_mode(d, mode); - shadow_audit_p2m(d); - out: + + out_locked: shadow_unlock(d); + out_unlocked: + if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) ) + p2m_teardown(d); domain_unpause(d); return rv; } @@ -2721,6 +2421,8 @@ void shadow_teardown(struct domain *d) { struct vcpu *v; mfn_t mfn; + struct list_head *entry, *n; + struct page_info *pg; ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); ASSERT(d != current->domain); @@ -2733,48 +2435,55 @@ void shadow_teardown(struct domain *d) /* Release the shadow and monitor tables held by each vcpu */ for_each_vcpu(d, v) { - if ( v->arch.shadow.mode ) + if ( v->arch.paging.mode ) { - v->arch.shadow.mode->detach_old_tables(v); + v->arch.paging.mode->shadow.detach_old_tables(v); if ( shadow_mode_external(d) ) { mfn = pagetable_get_mfn(v->arch.monitor_table); if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) - v->arch.shadow.mode->destroy_monitor_table(v, mfn); + v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn); v->arch.monitor_table = pagetable_null(); } } } } - if ( d->arch.shadow.total_pages != 0 ) + list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) + { + list_del(entry); + pg = list_entry(entry, struct page_info, list); + shadow_free_p2m_page(d, pg); + } + + if ( d->arch.paging.shadow.total_pages != 0 ) { SHADOW_PRINTK("teardown of domain %u starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ sh_set_allocation(d, 0, NULL); /* Release the hash table back to xenheap */ - if (d->arch.shadow.hash_table) + if (d->arch.paging.shadow.hash_table) shadow_hash_teardown(d); /* Release the log-dirty bitmap of dirtied pages */ sh_free_log_dirty_bitmap(d); /* Should not have any more memory held */ SHADOW_PRINTK("teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); - ASSERT(d->arch.shadow.total_pages == 0); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); + ASSERT(d->arch.paging.shadow.total_pages == 0); } /* We leave the "permanent" shadow modes enabled, but clear the * log-dirty mode bit. We don't want any more mark_dirty() * calls now that we've torn down the bitmap */ - d->arch.shadow.mode &= ~SHM2_log_dirty; + d->arch.paging.mode &= ~PG_log_dirty; shadow_unlock(d); } @@ -2782,30 +2491,28 @@ void shadow_final_teardown(struct domain void shadow_final_teardown(struct domain *d) /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */ { - SHADOW_PRINTK("dom %u final teardown starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); /* Double-check that the domain didn't have any shadow memory. * It is possible for a domain that never got domain_kill()ed * to get here with its shadow allocation intact. */ - if ( d->arch.shadow.total_pages != 0 ) + if ( d->arch.paging.shadow.total_pages != 0 ) shadow_teardown(d); /* It is now safe to pull down the p2m map. */ - if ( d->arch.shadow.p2m_pages != 0 ) - shadow_p2m_teardown(d); + p2m_teardown(d); SHADOW_PRINTK("dom %u final teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); } static int shadow_one_bit_enable(struct domain *d, u32 mode) @@ -2814,12 +2521,14 @@ static int shadow_one_bit_enable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || (d->arch.shadow.mode & mode) ) + if ( d == current->domain || (d->arch.paging.mode & mode) ) { return -EINVAL; } - if ( d->arch.shadow.mode == 0 ) + mode |= PG_SH_enable; + + if ( d->arch.paging.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ if ( sh_set_allocation(d, 1, NULL) != 0 @@ -2831,7 +2540,7 @@ static int shadow_one_bit_enable(struct } /* Update the bits */ - sh_new_mode(d, d->arch.shadow.mode | mode); + sh_new_mode(d, d->arch.paging.mode | mode); return 0; } @@ -2843,26 +2552,26 @@ static int shadow_one_bit_disable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || !(d->arch.shadow.mode & mode) ) + if ( d == current->domain || !(d->arch.paging.mode & mode) ) { return -EINVAL; } /* Update the bits */ - sh_new_mode(d, d->arch.shadow.mode & ~mode); - if ( d->arch.shadow.mode == 0 ) + sh_new_mode(d, d->arch.paging.mode & ~mode); + if ( d->arch.paging.mode == 0 ) { /* Get this domain off shadows */ SHADOW_PRINTK("un-shadowing of domain %u starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); for_each_vcpu(d, v) { - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); + if ( v->arch.paging.mode ) + v->arch.paging.mode->shadow.detach_old_tables(v); #if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); @@ -2885,9 +2594,9 @@ static int shadow_one_bit_disable(struct SHADOW_PRINTK("un-shadowing of domain %u done." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); } return 0; @@ -2909,7 +2618,7 @@ static int shadow_test_enable(struct dom goto out; } - ret = shadow_one_bit_enable(d, SHM2_enable); + ret = shadow_one_bit_enable(d, PG_SH_enable); out: shadow_unlock(d); domain_unpause(d); @@ -2923,7 +2632,7 @@ static int shadow_test_disable(struct do domain_pause(d); shadow_lock(d); - ret = shadow_one_bit_disable(d, SHM2_enable); + ret = shadow_one_bit_disable(d, PG_SH_enable); shadow_unlock(d); domain_unpause(d); @@ -2933,19 +2642,19 @@ static int static int sh_alloc_log_dirty_bitmap(struct domain *d) { - ASSERT(d->arch.shadow.dirty_bitmap == NULL); - d->arch.shadow.dirty_bitmap_size = + ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL); + d->arch.paging.shadow.dirty_bitmap_size = (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); - d->arch.shadow.dirty_bitmap = + d->arch.paging.shadow.dirty_bitmap = xmalloc_array(unsigned long, - d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG); - if ( d->arch.shadow.dirty_bitmap == NULL ) - { - d->arch.shadow.dirty_bitmap_size = 0; + d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG); + if ( d->arch.paging.shadow.dirty_bitmap == NULL ) + { + d->arch.paging.shadow.dirty_bitmap_size = 0; return -ENOMEM; } - memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8); + memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8); return 0; } @@ -2953,11 +2662,11 @@ static void static void sh_free_log_dirty_bitmap(struct domain *d) { - d->arch.shadow.dirty_bitmap_size = 0; - if ( d->arch.shadow.dirty_bitmap ) - { - xfree(d->arch.shadow.dirty_bitmap); - d->arch.shadow.dirty_bitmap = NULL; + d->arch.paging.shadow.dirty_bitmap_size = 0; + if ( d->arch.paging.shadow.dirty_bitmap ) + { + xfree(d->arch.paging.shadow.dirty_bitmap); + d->arch.paging.shadow.dirty_bitmap = NULL; } } @@ -2989,7 +2698,7 @@ static int shadow_log_dirty_enable(struc goto out; } - ret = shadow_one_bit_enable(d, SHM2_log_dirty); + ret = shadow_one_bit_enable(d, PG_log_dirty); if ( ret != 0 ) sh_free_log_dirty_bitmap(d); @@ -3005,7 +2714,7 @@ static int shadow_log_dirty_disable(stru domain_pause(d); shadow_lock(d); - ret = shadow_one_bit_disable(d, SHM2_log_dirty); + ret = shadow_one_bit_disable(d, PG_log_dirty); if ( !shadow_mode_log_dirty(d) ) sh_free_log_dirty_bitmap(d); shadow_unlock(d); @@ -3017,100 +2726,52 @@ static int shadow_log_dirty_disable(stru /**************************************************************************/ /* P2M map manipulations */ -static void -sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ - struct vcpu *v; - - if ( !shadow_mode_translate(d) ) - return; - - v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - - SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - - ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn); - //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn); - - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( sh_remove_all_mappings(v, _mfn(mfn)) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - - shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); -} - +/* shadow specific code which should be called when P2M table entry is updated + * with new content. It is responsible for update the entry, as well as other + * shadow processing jobs. + */ void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ +shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, + l1_pgentry_t new, unsigned int level) +{ + struct domain *d = v->domain; + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + mfn_t mfn; + shadow_lock(d); - shadow_audit_p2m(d); - sh_p2m_remove_page(d, gfn, mfn); - shadow_audit_p2m(d); - shadow_unlock(d); -} - -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - unsigned long ogfn; - mfn_t omfn; - - if ( !shadow_mode_translate(d) ) - return; - - shadow_lock(d); - shadow_audit_p2m(d); - - SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); - - omfn = sh_gfn_to_mfn(d, gfn); - if ( mfn_valid(omfn) ) - { - /* Get rid of the old mapping, especially any shadows */ - struct vcpu *v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, omfn); - if ( sh_remove_all_mappings(v, omfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); - } - - ogfn = sh_mfn_to_gfn(d, _mfn(mfn)); - if ( -#ifdef __x86_64__ - (ogfn != 0x5555555555555555L) -#else - (ogfn != 0x55555555L) -#endif - && (ogfn != INVALID_M2P_ENTRY) - && (ogfn != gfn) ) - { - /* This machine frame is already mapped at another physical address */ - SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", - mfn, ogfn, gfn); - if ( mfn_valid(omfn = sh_gfn_to_mfn(d, ogfn)) ) - { - SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", - ogfn , mfn_x(omfn)); - if ( mfn_x(omfn) == mfn ) - sh_p2m_remove_page(d, ogfn, mfn); - } - } - - shadow_set_p2m_entry(d, gfn, _mfn(mfn)); - set_gpfn_from_mfn(mfn, gfn); + + /* handle physmap_add and physmap_remove */ + mfn = gfn_to_mfn(d, gfn); + if ( v != NULL && level == 1 && mfn_valid(mfn) ) { + sh_remove_all_shadows_and_parents(v, mfn); + if ( sh_remove_all_mappings(v, mfn) ) + flush_tlb_mask(d->domain_dirty_cpumask); + } + + /* update the entry with new content */ + safe_write_pte(p, new); + + /* The P2M can be shadowed: keep the shadows synced */ + if ( d->vcpu[0] != NULL ) + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p)); + + /* install P2M in monitors for PAE Xen */ +#if CONFIG_PAGING_LEVELS == 3 + if ( level == 3 ) { + struct vcpu *v; + /* We have written to the p2m l3: need to sync the per-vcpu + * copies of it in the monitor tables */ + p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p); + /* Also, any vcpus running on shadows of the p2m need to + * reload their CR3s so the change propagates to the shadow */ + for_each_vcpu(d, v) { + if ( pagetable_get_pfn(v->arch.guest_table) + == pagetable_get_pfn(d->arch.phys_table) + && v->arch.paging.mode != NULL ) + v->arch.paging.mode->update_cr3(v, 0); + } + } +#endif #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) /* If we're doing FAST_FAULT_PATH, then shadow mode may have @@ -3122,7 +2783,6 @@ shadow_guest_physmap_add_page(struct dom shadow_blow_tables(d); #endif - shadow_audit_p2m(d); shadow_unlock(d); } @@ -3151,11 +2811,11 @@ static int shadow_log_dirty_op( SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", (clean) ? "clean" : "peek", d->domain_id, - d->arch.shadow.fault_count, - d->arch.shadow.dirty_count); - - sc->stats.fault_count = d->arch.shadow.fault_count; - sc->stats.dirty_count = d->arch.shadow.dirty_count; + d->arch.paging.shadow.fault_count, + d->arch.paging.shadow.dirty_count); + + sc->stats.fault_count = d->arch.paging.shadow.fault_count; + sc->stats.dirty_count = d->arch.paging.shadow.dirty_count; if ( clean ) { @@ -3164,22 +2824,22 @@ static int shadow_log_dirty_op( * but for now, we just unshadow everything except Xen. */ shadow_blow_tables(d); - d->arch.shadow.fault_count = 0; - d->arch.shadow.dirty_count = 0; + d->arch.paging.shadow.fault_count = 0; + d->arch.paging.shadow.dirty_count = 0; } if ( guest_handle_is_null(sc->dirty_bitmap) ) /* caller may have wanted just to clean the state or access stats. */ peek = 0; - if ( (peek || clean) && (d->arch.shadow.dirty_bitmap == NULL) ) + if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) ) { rv = -EINVAL; /* perhaps should be ENOMEM? */ goto out; } - if ( sc->pages > d->arch.shadow.dirty_bitmap_size ) - sc->pages = d->arch.shadow.dirty_bitmap_size; + if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size ) + sc->pages = d->arch.paging.shadow.dirty_bitmap_size; #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ for ( i = 0; i < sc->pages; i += CHUNK ) @@ -3192,7 +2852,7 @@ static int shadow_log_dirty_op( { if ( copy_to_guest_offset( sc->dirty_bitmap, i/8, - (uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), bytes) ) + (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) ) { rv = -EFAULT; goto out; @@ -3200,7 +2860,7 @@ static int shadow_log_dirty_op( } if ( clean ) - memset((uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), 0, bytes); + memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes); } #undef CHUNK @@ -3221,7 +2881,7 @@ void sh_mark_dirty(struct domain *d, mfn if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) return; - ASSERT(d->arch.shadow.dirty_bitmap != NULL); + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); @@ -3235,24 +2895,24 @@ void sh_mark_dirty(struct domain *d, mfn return; /* N.B. Can use non-atomic TAS because protected by shadow_lock. */ - if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) + if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) ) { - if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) ) + if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) { SHADOW_DEBUG(LOGDIRTY, - "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n", + "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", mfn_x(gmfn), pfn, d->domain_id); - d->arch.shadow.dirty_count++; + d->arch.paging.shadow.dirty_count++; } } else { SHADOW_PRINTK("mark_dirty OOR! " - "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n" + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" "owner=%d c=%08x t=%" PRtype_info "\n", mfn_x(gmfn), pfn, - d->arch.shadow.dirty_bitmap_size, + d->arch.paging.shadow.dirty_bitmap_size, d->domain_id, (page_get_owner(mfn_to_page(gmfn)) ? page_get_owner(mfn_to_page(gmfn))->domain_id @@ -3292,7 +2952,7 @@ int shadow_domctl(struct domain *d, return rc; if ( is_hvm_domain(d) ) return -EINVAL; - if ( d->arch.shadow.mode & SHM2_enable ) + if ( d->arch.paging.mode & PG_SH_enable ) if ( (rc = shadow_test_disable(d)) != 0 ) return rc; return 0; @@ -3304,7 +2964,7 @@ int shadow_domctl(struct domain *d, return shadow_log_dirty_enable(d); case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: - return shadow_enable(d, SHM2_refcounts|SHM2_translate); + return shadow_enable(d, PG_refcounts|PG_translate); case XEN_DOMCTL_SHADOW_OP_CLEAN: case XEN_DOMCTL_SHADOW_OP_PEEK: @@ -3313,7 +2973,7 @@ int shadow_domctl(struct domain *d, case XEN_DOMCTL_SHADOW_OP_ENABLE: if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY ) return shadow_log_dirty_enable(d); - return shadow_enable(d, sc->mode << SHM2_shift); + return shadow_enable(d, sc->mode << PG_mode_shift); case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: sc->mb = shadow_get_allocation(d); @@ -3390,7 +3050,7 @@ void shadow_audit_tables(struct vcpu *v) else { /* Audit only the current mode's tables */ - switch ( v->arch.shadow.mode->guest_levels ) + switch ( v->arch.paging.mode->guest_levels ) { case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE @@ -3405,199 +3065,6 @@ void shadow_audit_tables(struct vcpu *v) } #endif /* Shadow audit */ - - -/**************************************************************************/ -/* Auditing p2m tables */ - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M - -void shadow_audit_p2m(struct domain *d) -{ - struct list_head *entry; - struct page_info *page; - struct domain *od; - unsigned long mfn, gfn, m2pfn, lp2mfn = 0; - mfn_t p2mfn; - unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; - int test_linear; - - if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) ) - return; - - //SHADOW_PRINTK("p2m audit starts\n"); - - test_linear = ( (d == current->domain) - && !pagetable_is_null(current->arch.monitor_table) ); - if ( test_linear ) - local_flush_tlb(); - - /* Audit part one: walk the domain's page allocation list, checking - * the m2p entries. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = mfn_x(page_to_mfn(page)); - - // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); - - od = page_get_owner(page); - - if ( od != d ) - { - SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", - mfn, od, (od?od->domain_id:-1), d, d->domain_id); - continue; - } - - gfn = get_gpfn_from_mfn(mfn); - if ( gfn == INVALID_M2P_ENTRY ) - { - orphans_i++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", - // mfn); - continue; - } - - if ( gfn == 0x55555555 ) - { - orphans_d++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", - // mfn); - continue; - } - - p2mfn = sh_gfn_to_mfn_foreign(d, gfn); - if ( mfn_x(p2mfn) != mfn ) - { - mpbad++; - SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" - " (-> gfn %#lx)\n", - mfn, gfn, mfn_x(p2mfn), - (mfn_valid(p2mfn) - ? get_gpfn_from_mfn(mfn_x(p2mfn)) - : -1u)); - /* This m2p entry is stale: the domain has another frame in - * this physical slot. No great disaster, but for neatness, - * blow away the m2p entry. */ - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); - } - - if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) - { - lp2mfn = gfn_to_mfn_current(gfn); - if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) - { - SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " - "(!= mfn %#lx)\n", gfn, - mfn_x(lp2mfn), mfn_x(p2mfn)); - } - } - - // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", - // mfn, gfn, p2mfn, lp2mfn); - } - - /* Audit part two: walk the domain's p2m table, checking the entries. */ - if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) - { - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - int i1, i2; - -#if CONFIG_PAGING_LEVELS == 4 - l4_pgentry_t *l4e; - l3_pgentry_t *l3e; - int i3, i4; - l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#elif CONFIG_PAGING_LEVELS == 3 - l3_pgentry_t *l3e; - int i3; - l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#else /* CONFIG_PAGING_LEVELS == 2 */ - l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#endif - - gfn = 0; -#if CONFIG_PAGING_LEVELS >= 3 -#if CONFIG_PAGING_LEVELS >= 4 - for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) - { - if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4]))); -#endif /* now at levels 3 or 4... */ - for ( i3 = 0; - i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); - i3++ ) - { - if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3]))); -#endif /* all levels... */ - for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) - { - if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2]))); - - for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) - { - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) - continue; - mfn = l1e_get_pfn(l1e[i1]); - ASSERT(mfn_valid(_mfn(mfn))); - m2pfn = get_gpfn_from_mfn(mfn); - if ( m2pfn != gfn ) - { - pmbad++; - SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx" - " -> gfn %#lx\n", gfn, mfn, m2pfn); - BUG(); - } - } - sh_unmap_domain_page(l1e); - } -#if CONFIG_PAGING_LEVELS >= 3 - sh_unmap_domain_page(l2e); - } -#if CONFIG_PAGING_LEVELS >= 4 - sh_unmap_domain_page(l3e); - } -#endif -#endif - -#if CONFIG_PAGING_LEVELS == 4 - sh_unmap_domain_page(l4e); -#elif CONFIG_PAGING_LEVELS == 3 - sh_unmap_domain_page(l3e); -#else /* CONFIG_PAGING_LEVELS == 2 */ - sh_unmap_domain_page(l2e); -#endif - - } - - //SHADOW_PRINTK("p2m audit complete\n"); - //if ( orphans_i | orphans_d | mpbad | pmbad ) - // SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", - // orphans_i + orphans_d, orphans_i, orphans_d, - if ( mpbad | pmbad ) - SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", - pmbad, mpbad); -} - -#endif /* p2m audit */ /* * Local variables: diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Feb 14 12:02:20 2007 +0000 @@ -353,21 +353,21 @@ static inline void print_gw(walk_t *gw) SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va); #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ - SHADOW_PRINTK(" l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn)); + SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn)); SHADOW_PRINTK(" l4e=%p\n", gw->l4e); if ( gw->l4e ) SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4); #endif /* PAE or 64... */ - SHADOW_PRINTK(" l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn)); + SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn)); SHADOW_PRINTK(" l3e=%p\n", gw->l3e); if ( gw->l3e ) SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3); #endif /* All levels... */ - SHADOW_PRINTK(" l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn)); + SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn)); SHADOW_PRINTK(" l2e=%p\n", gw->l2e); if ( gw->l2e ) SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2); - SHADOW_PRINTK(" l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn)); + SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn)); SHADOW_PRINTK(" l1e=%p\n", gw->l1e); if ( gw->l1e ) SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1); @@ -1572,7 +1572,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf #if GUEST_PAGING_LEVELS == 4 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) if ( shadow_type == SH_type_l4_64_shadow && - unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) + unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) { /* We're shadowing a new l4, but we've been assuming the guest uses * only one l4 per vcpu and context switches using an l4 entry. @@ -1584,7 +1584,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf struct shadow_page_info *sp; struct vcpu *v2; int l4count = 0, vcpus = 0; - list_for_each(l, &v->domain->arch.shadow.pinned_shadows) + list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); if ( sp->type == SH_type_l4_64_shadow ) @@ -1595,13 +1595,13 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf if ( l4count > 2 * vcpus ) { /* Unpin all the pinned l3 tables, and don't pin any more. */ - list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows) + list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); if ( sp->type == SH_type_l3_64_shadow ) sh_unpin(v, shadow_page_to_mfn(sp)); } - v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; + v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; } } #endif @@ -1641,7 +1641,7 @@ make_fl1_shadow(struct vcpu *v, gfn_t gf mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow, (unsigned long) gfn_x(gfn)); - SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n", + SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n", gfn_x(gfn), mfn_x(smfn)); set_fl1_shadow_status(v, gfn, smfn); @@ -1851,7 +1851,7 @@ static shadow_l2e_t * shadow_get_and_cre #elif GUEST_PAGING_LEVELS == 3 /* PAE... */ /* We never demand-shadow PAE l3es: they are only created in * sh_update_cr3(). Check if the relevant sl3e is present. */ - shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table) + shadow_l3_linear_offset(gw->va); if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) return NULL; @@ -2358,7 +2358,7 @@ static int validate_gl1e(struct vcpu *v, gfn = guest_l1e_get_gfn(*new_gl1e); gmfn = vcpu_gfn_to_mfn(v, gfn); - mmio = (is_hvm_vcpu(v) && shadow_vcpu_mode_translate(v) && !mfn_valid(gmfn)); + mmio = (is_hvm_vcpu(v) && paging_vcpu_mode_translate(v) && !mfn_valid(gmfn)); l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, ft_prefetch, mmio); @@ -2506,7 +2506,7 @@ static inline void check_for_early_unsha static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) { #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) && + if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) && sh_mfn_is_a_page_table(gmfn) ) { u32 flags = mfn_to_page(gmfn)->shadow_flags; @@ -2516,7 +2516,7 @@ static inline void check_for_early_unsha sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); } } - v->arch.shadow.last_emulated_mfn = mfn_x(gmfn); + v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn); #endif } @@ -2524,7 +2524,7 @@ static inline void reset_early_unshadow( static inline void reset_early_unshadow(struct vcpu *v) { #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - v->arch.shadow.last_emulated_mfn = INVALID_MFN; + v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN; #endif } @@ -2589,7 +2589,7 @@ static void sh_prefetch(struct vcpu *v, gfn = guest_l1e_get_gfn(gl1e); gmfn = vcpu_gfn_to_mfn(v, gfn); mmio = ( is_hvm_vcpu(v) - && shadow_vcpu_mode_translate(v) + && paging_vcpu_mode_translate(v) && mmio_space(gfn_to_paddr(gfn)) ); /* Propagate the entry. Safe to use a pointer to our local @@ -2631,6 +2631,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", v->domain->domain_id, v->vcpu_id, va, regs->error_code); + perfc_incrc(shadow_fault); // // XXX: Need to think about eventually mapping superpages directly in the // shadow (when possible), as opposed to splintering them into a @@ -2651,7 +2652,7 @@ static int sh_page_fault(struct vcpu *v, if ( sh_l1e_is_gnp(sl1e) ) { if ( likely(!is_hvm_domain(d) || - shadow_vcpu_mode_translate(v)) ) + paging_vcpu_mode_translate(v)) ) { /* Not-present in a guest PT: pass to the guest as * a not-present fault (by flipping two bits). */ @@ -2701,7 +2702,7 @@ static int sh_page_fault(struct vcpu *v, if ( unlikely(shadow_locked_by_me(d)) ) { SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", - d->arch.shadow.locker_function); + d->arch.paging.shadow.locker_function); return 0; } @@ -2726,7 +2727,7 @@ static int sh_page_fault(struct vcpu *v, // if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) ) { - if ( is_hvm_domain(d) && !shadow_vcpu_mode_translate(v) ) + if ( is_hvm_domain(d) && !paging_vcpu_mode_translate(v) ) { /* Not present in p2m map, means this is mmio */ gpa = va; @@ -2784,13 +2785,13 @@ static int sh_page_fault(struct vcpu *v, gfn = guest_l1e_get_gfn(gw.eff_l1e); gmfn = vcpu_gfn_to_mfn(v, gfn); mmio = (is_hvm_domain(d) - && shadow_vcpu_mode_translate(v) + && paging_vcpu_mode_translate(v) && mmio_space(gfn_to_paddr(gfn))); if ( !mmio && !mfn_valid(gmfn) ) { perfc_incrc(shadow_fault_bail_bad_gfn); - SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", + SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", gfn_x(gfn), mfn_x(gmfn)); goto not_a_shadow_fault; } @@ -2848,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v, } perfc_incrc(shadow_fault_fixed); - d->arch.shadow.fault_count++; + d->arch.paging.shadow.fault_count++; reset_early_unshadow(v); done: @@ -2949,7 +2950,7 @@ sh_invlpg(struct vcpu *v, unsigned long return 0; } #elif SHADOW_PAGING_LEVELS == 3 - if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)]) + if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)]) & _PAGE_PRESENT) ) // no need to flush anything if there's no SL2... return 0; @@ -3120,7 +3121,7 @@ sh_update_linear_entries(struct vcpu *v) } /* Shadow l3 tables are made up by sh_update_cr3 */ - sl3e = v->arch.shadow.l3table; + sl3e = v->arch.paging.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) { @@ -3161,13 +3162,13 @@ sh_update_linear_entries(struct vcpu *v) #if GUEST_PAGING_LEVELS == 2 /* Shadow l3 tables were built by sh_update_cr3 */ if ( shadow_mode_external(d) ) - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; else BUG(); /* PV 2-on-3 is not supported yet */ #else /* GUEST_PAGING_LEVELS == 3 */ - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; /* Always safe to use guest_vtable, because it's globally mapped */ guest_l3e = v->arch.guest_vtable; @@ -3370,7 +3371,7 @@ sh_set_toplevel_shadow(struct vcpu *v, install_new_entry: /* Done. Install it */ - SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n", + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); v->arch.shadow_table[slot] = new_entry; @@ -3410,7 +3411,7 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( do_locking ) shadow_lock(v->domain); ASSERT(shadow_locked_by_me(v->domain)); - ASSERT(v->arch.shadow.mode); + ASSERT(v->arch.paging.mode); //// //// vcpu->arch.guest_table is already set @@ -3425,7 +3426,7 @@ sh_update_cr3(struct vcpu *v, int do_loc ASSERT(shadow_mode_external(d)); // Is paging enabled on this vcpu? - if ( shadow_vcpu_mode_translate(v) ) + if ( paging_vcpu_mode_translate(v) ) { gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3))); gmfn = vcpu_gfn_to_mfn(v, gfn); @@ -3472,7 +3473,7 @@ sh_update_cr3(struct vcpu *v, int do_loc sh_unmap_domain_page_global(v->arch.guest_vtable); if ( shadow_mode_external(d) ) { - if ( shadow_vcpu_mode_translate(v) ) + if ( paging_vcpu_mode_translate(v) ) /* Paging enabled: find where in the page the l3 table is */ guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); else @@ -3566,7 +3567,7 @@ sh_update_cr3(struct vcpu *v, int do_loc #endif /// - /// v->arch.shadow.l3table + /// v->arch.paging.shadow.l3table /// #if SHADOW_PAGING_LEVELS == 3 { @@ -3581,7 +3582,7 @@ sh_update_cr3(struct vcpu *v, int do_loc /* 3-on-3: make a PAE l3 that points at the four l2 pages */ smfn = pagetable_get_mfn(v->arch.shadow_table[i]); #endif - v->arch.shadow.l3table[i] = + v->arch.paging.shadow.l3table[i] = (mfn_x(smfn) == 0) ? shadow_l3e_empty() : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); @@ -3605,8 +3606,8 @@ sh_update_cr3(struct vcpu *v, int do_loc /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated. * Don't use make_cr3 because (a) we know it's below 4GB, and * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */ - ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL); - v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table); + ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL); + v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table); #else /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0])); @@ -3622,7 +3623,7 @@ sh_update_cr3(struct vcpu *v, int do_loc ASSERT(is_hvm_domain(d)); #if SHADOW_PAGING_LEVELS == 3 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ - hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.shadow.l3table)); + hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table)); #else /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0])); @@ -3665,7 +3666,7 @@ static int sh_guess_wrmap(struct vcpu *v if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; #elif SHADOW_PAGING_LEVELS == 3 - sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) + sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table) + shadow_l3_linear_offset(vaddr); if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; @@ -3709,7 +3710,7 @@ int sh_rm_write_access_from_l1(struct vc (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn); #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC /* Remember the last shadow that we shot a writeable mapping in */ - v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); + v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); #endif if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info & PGT_count_mask) == 0 ) @@ -4050,8 +4051,8 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, #define AUDIT_FAIL(_level, _fmt, _a...) do { \ printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \ - "gl" #_level "mfn = %" SH_PRI_mfn \ - " sl" #_level "mfn = %" SH_PRI_mfn \ + "gl" #_level "mfn = %" PRI_mfn \ + " sl" #_level "mfn = %" PRI_mfn \ " &gl" #_level "e = %p &sl" #_level "e = %p" \ " gl" #_level "e = %" SH_PRI_gpte \ " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \ @@ -4105,7 +4106,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g != PGT_writable_page ) return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ else - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); + return gfn_to_mfn(v->domain, gfn_x(gfn)); } @@ -4156,7 +4157,7 @@ int sh_audit_l1_table(struct vcpu *v, mf gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } } @@ -4219,8 +4220,8 @@ int sh_audit_l2_table(struct vcpu *v, mf SH_type_l1_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn - " (--> %" SH_PRI_mfn ")" - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " (--> %" PRI_mfn ")" + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), @@ -4262,7 +4263,7 @@ int sh_audit_l3_table(struct vcpu *v, mf : SH_type_l2_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } }); @@ -4297,7 +4298,7 @@ int sh_audit_l4_table(struct vcpu *v, mf SH_type_l3_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } }); @@ -4314,30 +4315,29 @@ int sh_audit_l4_table(struct vcpu *v, mf /**************************************************************************/ /* Entry points into this mode of the shadow code. * This will all be mangled by the preprocessor to uniquify everything. */ -struct shadow_paging_mode sh_paging_mode = { - .page_fault = sh_page_fault, - .invlpg = sh_invlpg, - .gva_to_gpa = sh_gva_to_gpa, - .gva_to_gfn = sh_gva_to_gfn, - .update_cr3 = sh_update_cr3, - .map_and_validate_gl1e = sh_map_and_validate_gl1e, - .map_and_validate_gl2e = sh_map_and_validate_gl2e, - .map_and_validate_gl2he = sh_map_and_validate_gl2he, - .map_and_validate_gl3e = sh_map_and_validate_gl3e, - .map_and_validate_gl4e = sh_map_and_validate_gl4e, - .detach_old_tables = sh_detach_old_tables, - .x86_emulate_write = sh_x86_emulate_write, - .x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, - .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, - .make_monitor_table = sh_make_monitor_table, - .destroy_monitor_table = sh_destroy_monitor_table, - .guest_map_l1e = sh_guest_map_l1e, - .guest_get_eff_l1e = sh_guest_get_eff_l1e, +struct paging_mode sh_paging_mode = { + .page_fault = sh_page_fault, + .invlpg = sh_invlpg, + .gva_to_gpa = sh_gva_to_gpa, + .gva_to_gfn = sh_gva_to_gfn, + .update_cr3 = sh_update_cr3, + .update_paging_modes = shadow_update_paging_modes, + .write_p2m_entry = shadow_write_p2m_entry, + .write_guest_entry = shadow_write_guest_entry, + .cmpxchg_guest_entry = shadow_cmpxchg_guest_entry, + .guest_map_l1e = sh_guest_map_l1e, + .guest_get_eff_l1e = sh_guest_get_eff_l1e, + .guest_levels = GUEST_PAGING_LEVELS, + .shadow.detach_old_tables = sh_detach_old_tables, + .shadow.x86_emulate_write = sh_x86_emulate_write, + .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, + .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, + .shadow.make_monitor_table = sh_make_monitor_table, + .shadow.destroy_monitor_table = sh_destroy_monitor_table, #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC - .guess_wrmap = sh_guess_wrmap, -#endif - .guest_levels = GUEST_PAGING_LEVELS, - .shadow_levels = SHADOW_PAGING_LEVELS, + .shadow.guess_wrmap = sh_guess_wrmap, +#endif + .shadow.shadow_levels = SHADOW_PAGING_LEVELS, }; /* diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/shadow/multi.h Wed Feb 14 12:02:20 2007 +0000 @@ -115,5 +115,5 @@ SHADOW_INTERNAL_NAME(sh_destroy_monitor_ (struct vcpu *v, mfn_t mmfn); #endif -extern struct shadow_paging_mode +extern struct paging_mode SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS); diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Wed Feb 14 12:02:20 2007 +0000 @@ -41,13 +41,12 @@ #define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ #define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ #define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ #ifdef NDEBUG #define SHADOW_AUDIT 0 #define SHADOW_AUDIT_ENABLE 0 #else -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW_AUDIT 0x15 /* Basic audit of all */ #define SHADOW_AUDIT_ENABLE shadow_audit_enable extern int shadow_audit_enable; #endif @@ -84,9 +83,9 @@ extern int shadow_audit_enable; #define SHADOW_DEBUG_PROPAGATE 1 #define SHADOW_DEBUG_MAKE_SHADOW 1 #define SHADOW_DEBUG_DESTROY_SHADOW 1 -#define SHADOW_DEBUG_P2M 0 #define SHADOW_DEBUG_A_AND_D 1 #define SHADOW_DEBUG_EMULATE 1 +#define SHADOW_DEBUG_P2M 1 #define SHADOW_DEBUG_LOGDIRTY 0 /****************************************************************************** @@ -108,36 +107,36 @@ extern int shadow_audit_enable; #error shadow.h currently requires CONFIG_SMP #endif -#define shadow_lock_init(_d) \ - do { \ - spin_lock_init(&(_d)->arch.shadow.lock); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ +#define shadow_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.paging.shadow.lock); \ + (_d)->arch.paging.shadow.locker = -1; \ + (_d)->arch.paging.shadow.locker_function = "nobody"; \ } while (0) #define shadow_locked_by_me(_d) \ - (current->processor == (_d)->arch.shadow.locker) - -#define shadow_lock(_d) \ - do { \ - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ - { \ - printk("Error: shadow lock held by %s\n", \ - (_d)->arch.shadow.locker_function); \ - BUG(); \ - } \ - spin_lock(&(_d)->arch.shadow.lock); \ - ASSERT((_d)->arch.shadow.locker == -1); \ - (_d)->arch.shadow.locker = current->processor; \ - (_d)->arch.shadow.locker_function = __func__; \ + (current->processor == (_d)->arch.paging.shadow.locker) + +#define shadow_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.paging.shadow.locker == current->processor) )\ + { \ + printk("Error: shadow lock held by %s\n", \ + (_d)->arch.paging.shadow.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.paging.shadow.lock); \ + ASSERT((_d)->arch.paging.shadow.locker == -1); \ + (_d)->arch.paging.shadow.locker = current->processor; \ + (_d)->arch.paging.shadow.locker_function = __func__; \ } while (0) -#define shadow_unlock(_d) \ - do { \ - ASSERT((_d)->arch.shadow.locker == current->processor); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - spin_unlock(&(_d)->arch.shadow.lock); \ +#define shadow_unlock(_d) \ + do { \ + ASSERT((_d)->arch.paging.shadow.locker == current->processor); \ + (_d)->arch.paging.shadow.locker = -1; \ + (_d)->arch.paging.shadow.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.paging.shadow.lock); \ } while (0) @@ -151,13 +150,6 @@ extern void shadow_audit_tables(struct v #else #define shadow_audit_tables(_v) do {} while(0) #endif - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M -extern void shadow_audit_p2m(struct domain *d); -#else -#define shadow_audit_p2m(_d) do {} while(0) -#endif - /****************************************************************************** * Macro for dealing with the naming of the internal names of the @@ -304,7 +296,7 @@ static inline int sh_type_is_pinnable(st * page. When we're shadowing those kernels, we have to pin l3 * shadows so they don't just evaporate on every context switch. * For all other guests, we'd rather use the up-pointer field in l3s. */ - if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) + if ( unlikely((v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) && CONFIG_PAGING_LEVELS >= 4 && t == SH_type_l3_64_shadow) ) return 1; @@ -379,12 +371,11 @@ void sh_install_xen_entries_in_l2(struct void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); /* Update the shadows in response to a pagetable write from Xen */ -extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); +int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); /* Update the shadows in response to a pagetable write from a HVM guest */ -extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); +void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); /* Remove all writeable mappings of a guest frame from the shadows. * Returns non-zero if we need to flush TLBs. @@ -394,6 +385,21 @@ extern int sh_remove_write_access(struct unsigned int level, unsigned long fault_addr); +/* Allocate/free functions for passing to the P2M code. */ +struct page_info *shadow_alloc_p2m_page(struct domain *d); +void shadow_free_p2m_page(struct domain *d, struct page_info *pg); + +/* Functions that atomically write PT/P2M entries and update state */ +void shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level); +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn); + + + /****************************************************************************** * Flags used in the return value of the shadow_set_lXe() functions... */ @@ -477,19 +483,6 @@ sh_unmap_domain_page_global(void *p) unmap_domain_page_global(p); } -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} - - /****************************************************************************** * Log-dirty mode bitmap handling */ @@ -502,13 +495,13 @@ sh_mfn_is_dirty(struct domain *d, mfn_t { unsigned long pfn; ASSERT(shadow_mode_log_dirty(d)); - ASSERT(d->arch.shadow.dirty_bitmap != NULL); + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); if ( likely(VALID_M2P(pfn)) - && likely(pfn < d->arch.shadow.dirty_bitmap_size) - && test_bit(pfn, d->arch.shadow.dirty_bitmap) ) + && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) + && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) return 1; return 0; @@ -612,7 +605,7 @@ static inline int sh_pin(struct vcpu *v, sp->pinned = 1; } /* Put it at the head of the list of pinned shadows */ - list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows); + list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); return 1; } diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/mm/shadow/types.h Wed Feb 14 12:02:20 2007 +0000 @@ -414,15 +414,9 @@ static inline mfn_t static inline mfn_t vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn) { - if ( !shadow_vcpu_mode_translate(v) ) + if ( !paging_vcpu_mode_translate(v) ) return _mfn(gfn_x(gfn)); - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); -} - -static inline gfn_t -mfn_to_gfn(struct domain *d, mfn_t mfn) -{ - return _gfn(sh_mfn_to_gfn(d, mfn)); + return gfn_to_mfn(v->domain, gfn_x(gfn)); } static inline paddr_t diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/setup.c Wed Feb 14 12:02:20 2007 +0000 @@ -29,7 +29,7 @@ #include <asm/mpspec.h> #include <asm/apic.h> #include <asm/desc.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/e820.h> #include <acm/acm_hooks.h> #include <xen/kexec.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/sysctl.c Wed Feb 14 12:02:20 2007 +0000 @@ -19,7 +19,6 @@ #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> -#include <asm/shadow.h> #include <asm/irq.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/traps.c Wed Feb 14 12:02:20 2007 +0000 @@ -46,7 +46,7 @@ #include <xen/nmi.h> #include <xen/version.h> #include <xen/kexec.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/system.h> #include <asm/io.h> #include <asm/atomic.h> @@ -860,8 +860,8 @@ static int fixup_page_fault(unsigned lon if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { - if ( shadow_mode_external(d) && guest_mode(regs) ) - return shadow_fault(addr, regs); + if ( paging_mode_external(d) && guest_mode(regs) ) + return paging_fault(addr, regs); if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); @@ -876,8 +876,8 @@ static int fixup_page_fault(unsigned lon ptwr_do_page_fault(v, addr, regs) ) return EXCRET_fault_fixed; - if ( shadow_mode_enabled(d) ) - return shadow_fault(addr, regs); + if ( paging_mode_enabled(d) ) + return paging_fault(addr, regs); return 0; } diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/x86_32/domain_page.c Wed Feb 14 12:02:20 2007 +0000 @@ -11,7 +11,6 @@ #include <xen/mm.h> #include <xen/perfc.h> #include <xen/domain_page.h> -#include <xen/shadow.h> #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/hardirq.h> diff -r df25547d7638 -r 6daa91dc9247 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/arch/x86/x86_64/traps.c Wed Feb 14 12:02:20 2007 +0000 @@ -16,7 +16,6 @@ #include <asm/flushtlb.h> #include <asm/msr.h> #include <asm/page.h> -#include <asm/shadow.h> #include <asm/shared.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/include/asm-x86/domain.h Wed Feb 14 12:02:20 2007 +0000 @@ -58,19 +58,22 @@ extern void toggle_guest_mode(struct vcp */ extern void hypercall_page_initialise(struct domain *d, void *); +/************************************************/ +/* shadow paging extension */ +/************************************************/ struct shadow_domain { - u32 mode; /* flags to control shadow operation */ spinlock_t lock; /* shadow domain lock */ int locker; /* processor which holds the lock */ const char *locker_function; /* Func that took it */ + unsigned int opt_flags; /* runtime tunable optimizations on/off */ + struct list_head pinned_shadows; + + /* Memory allocation */ struct list_head freelists[SHADOW_MAX_ORDER + 1]; struct list_head p2m_freelist; - struct list_head p2m_inuse; - struct list_head pinned_shadows; unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ - unsigned int p2m_pages; /* number of pages in p2m map */ - unsigned int opt_flags; /* runtime tunable optimizations on/off */ + unsigned int p2m_pages; /* number of pages allocates to p2m */ /* Shadow hashtable */ struct shadow_page_info **hash_table; @@ -83,6 +86,61 @@ struct shadow_domain { /* Shadow log-dirty mode stats */ unsigned int fault_count; unsigned int dirty_count; +}; + +struct shadow_vcpu { +#if CONFIG_PAGING_LEVELS >= 3 + /* PAE guests: per-vcpu shadow top-level table */ + l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); +#endif + /* Last MFN that we emulated a write to. */ + unsigned long last_emulated_mfn; + /* MFN of the last shadow that we shot a writeable mapping in */ + unsigned long last_writeable_pte_smfn; +}; + +/************************************************/ +/* p2m handling */ +/************************************************/ + +struct p2m_domain { + /* Lock that protects updates to the p2m */ + spinlock_t lock; + int locker; /* processor which holds the lock */ + const char *locker_function; /* Func that took it */ + + /* Pages used to construct the p2m */ + struct list_head pages; + + /* Functions to call to get or free pages for the p2m */ + struct page_info * (*alloc_page )(struct domain *d); + void (*free_page )(struct domain *d, + struct page_info *pg); + + /* Highest guest frame that's ever been mapped in the p2m */ + unsigned long max_mapped_pfn; +}; + +/************************************************/ +/* common paging data structure */ +/************************************************/ +struct paging_domain { + u32 mode; /* flags to control paging operation */ + + /* extension for shadow paging support */ + struct shadow_domain shadow; + + /* Other paging assistance code will have structs here */ +}; + +struct paging_vcpu { + /* Pointers to mode-specific entry points. */ + struct paging_mode *mode; + /* HVM guest: paging enabled (CR0.PG)? */ + unsigned int translate_enabled:1; + + /* paging support extension */ + struct shadow_vcpu shadow; }; struct arch_domain @@ -108,12 +166,11 @@ struct arch_domain struct hvm_domain hvm_domain; - struct shadow_domain shadow; + struct paging_domain paging; + struct p2m_domain p2m ; /* Shadow translated domain: P2M mapping */ pagetable_t phys_table; - /* Highest guest frame that's ever been mapped in the p2m */ - unsigned long max_mapped_pfn; /* Pseudophysical e820 map (XENMEM_memory_map). */ struct e820entry e820[3]; @@ -139,21 +196,6 @@ struct pae_l3_cache { }; #define pae_l3_cache_init(c) ((void)0) #endif -struct shadow_vcpu { -#if CONFIG_PAGING_LEVELS >= 3 - /* PAE guests: per-vcpu shadow top-level table */ - l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); -#endif - /* Pointers to mode-specific entry points. */ - struct shadow_paging_mode *mode; - /* Last MFN that we emulated a write to. */ - unsigned long last_emulated_mfn; - /* MFN of the last shadow that we shot a writeable mapping in */ - unsigned long last_writeable_pte_smfn; - /* HVM guest: paging enabled (CR0.PG)? */ - unsigned int translate_enabled:1; -}; - struct arch_vcpu { /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ @@ -205,7 +247,7 @@ struct arch_vcpu /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; - struct shadow_vcpu shadow; + struct paging_vcpu paging; } __cacheline_aligned; /* shorthands to improve code legibility */ diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/include/asm-x86/mm.h Wed Feb 14 12:02:20 2007 +0000 @@ -246,6 +246,64 @@ pae_copy_root(struct vcpu *v, l3_pgentry int check_descriptor(const struct domain *, struct desc_struct *d); + +/****************************************************************************** + * With shadow pagetables, the different kinds of address start + * to get get confusing. + * + * Virtual addresses are what they usually are: the addresses that are used + * to accessing memory while the guest is running. The MMU translates from + * virtual addresses to machine addresses. + * + * (Pseudo-)physical addresses are the abstraction of physical memory the + * guest uses for allocation and so forth. For the purposes of this code, + * we can largely ignore them. + * + * Guest frame numbers (gfns) are the entries that the guest puts in its + * pagetables. For normal paravirtual guests, they are actual frame numbers, + * with the translation done by the guest. + * + * Machine frame numbers (mfns) are the entries that the hypervisor puts + * in the shadow page tables. + * + * Elsewhere in the xen code base, the name "gmfn" is generally used to refer + * to a "machine frame number, from the guest's perspective", or in other + * words, pseudo-physical frame numbers. However, in the shadow code, the + * term "gmfn" means "the mfn of a guest page"; this combines naturally with + * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a + * guest L2 page), etc... + */ + +/* With this defined, we do some ugly things to force the compiler to + * give us type safety between mfns and gfns and other integers. + * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions + * that translate beween int and foo_t. + * + * It does have some performance cost because the types now have + * a different storage attribute, so may not want it on all the time. */ + +#ifndef NDEBUG +#define TYPE_SAFETY 1 +#endif + +#ifdef TYPE_SAFETY +#define TYPE_SAFE(_type,_name) \ +typedef struct { _type _name; } _name##_t; \ +static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ +static inline _type _name##_x(_name##_t n) { return n._name; } +#else +#define TYPE_SAFE(_type,_name) \ +typedef _type _name##_t; \ +static inline _name##_t _##_name(_type n) { return n; } \ +static inline _type _name##_x(_name##_t n) { return n; } +#endif + +TYPE_SAFE(unsigned long,mfn); + +/* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */ +#define PRI_mfn "05lx" + + /* * The MPT (machine->physical mapping table) is an array of word-sized * values, indexed on machine frame number. It is expected that guest OSes @@ -269,13 +327,12 @@ int check_descriptor(const struct domain #endif #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) - #define mfn_to_gmfn(_d, mfn) \ - ( (shadow_mode_translate(_d)) \ + ( (paging_mode_translate(_d)) \ ? get_gpfn_from_mfn(mfn) \ : (mfn) ) -#define gmfn_to_mfn(_d, gpfn) mfn_x(sh_gfn_to_mfn(_d, gpfn)) +#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn)) #define INVALID_MFN (~0UL) diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/p2m.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/p2m.h Wed Feb 14 12:02:20 2007 +0000 @@ -0,0 +1,142 @@ +/****************************************************************************** + * include/asm-x86/paging.h + * + * physical-to-machine mappings for automatically-translated domains. + * + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_P2M_H +#define _XEN_P2M_H + + +/* The phys_to_machine_mapping is the reversed mapping of MPT for full + * virtualization. It is only used by shadow_mode_translate()==true + * guests, so we steal the address space that would have normally + * been used by the read-only MPT map. + */ +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) + + +/* Read the current domain's P2M table. */ +static inline mfn_t gfn_to_mfn_current(unsigned long gfn) +{ + l1_pgentry_t l1e = l1e_empty(); + int ret; + + if ( gfn > current->domain->arch.p2m.max_mapped_pfn ) + return _mfn(INVALID_MFN); + + /* Don't read off the end of the p2m table */ + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); + + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[gfn], + sizeof(l1e)); + + if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) + return _mfn(l1e_get_pfn(l1e)); + + return _mfn(INVALID_MFN); +} + +/* Read another domain's P2M table, mapping pages as we go */ +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); + +/* General conversion function from gfn to mfn */ +static inline mfn_t gfn_to_mfn(struct domain *d, unsigned long gfn) +{ + if ( !paging_mode_translate(d) ) + return _mfn(gfn); + if ( likely(current->domain == d) ) + return gfn_to_mfn_current(gfn); + else + return gfn_to_mfn_foreign(d, gfn); +} + +/* General conversion function from mfn to gfn */ +static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn) +{ + if ( paging_mode_translate(d) ) + return get_gpfn_from_mfn(mfn_x(mfn)); + else + return mfn_x(mfn); +} + +/* Compatibility function for HVM code */ +static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) +{ + return mfn_x(gfn_to_mfn_current(pfn)); +} + +/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ +static inline int mmio_space(paddr_t gpa) +{ + unsigned long gfn = gpa >> PAGE_SHIFT; + return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); +} + +/* Translate the frame number held in an l1e from guest to machine */ +static inline l1_pgentry_t +gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) +{ + if ( unlikely(paging_mode_translate(d)) ) + l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), + l1e_get_flags(l1e)); + return l1e; +} + + + +/* Init the datastructures for later use by the p2m code */ +void p2m_init(struct domain *d); + +/* Allocate a new p2m table for a domain. + * + * The alloc_page and free_page functions will be used to get memory to + * build the p2m, and to release it again at the end of day. + * + * Returns 0 for success or -errno. */ +int p2m_alloc_table(struct domain *d, + struct page_info * (*alloc_page)(struct domain *d), + void (*free_page)(struct domain *d, struct page_info *pg)); + +/* Return all the p2m resources to Xen. */ +void p2m_teardown(struct domain *d); + +/* Add a page to a domain's p2m table */ +void guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Remove a page from a domain's p2m table */ +void guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + + +#endif /* _XEN_P2M_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/include/asm-x86/page.h Wed Feb 14 12:02:20 2007 +0000 @@ -208,8 +208,10 @@ typedef struct { u64 pfn; } pagetable_t; #define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) #define pagetable_get_page(x) mfn_to_page((x).pfn) #define pagetable_get_pfn(x) ((x).pfn) +#define pagetable_get_mfn(x) _mfn(((x).pfn)) #define pagetable_is_null(x) ((x).pfn == 0) #define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) }) +#define pagetable_from_mfn(mfn) ((pagetable_t) { mfn_x(mfn) }) #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) #define pagetable_null() pagetable_from_pfn(0) diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/paging.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/paging.h Wed Feb 14 12:02:20 2007 +0000 @@ -0,0 +1,376 @@ +/****************************************************************************** + * include/asm-x86/paging.h + * + * Common interface for paging support + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_PAGING_H +#define _XEN_PAGING_H + +#include <xen/mm.h> +#include <public/domctl.h> +#include <xen/sched.h> +#include <xen/perfc.h> +#include <xen/domain_page.h> +#include <asm/flushtlb.h> +#include <asm/domain.h> + +/***************************************************************************** + * Macros to tell which paging mode a domain is in */ + +#define PG_SH_shift 20 +#define PG_HAP_shift 21 +/* We're in one of the shadow modes */ +#define PG_SH_enable (1U << PG_SH_shift) +#define PG_HAP_enable (1U << PG_HAP_shift) + +/* common paging mode bits */ +#define PG_mode_shift 10 +/* Refcounts based on shadow tables instead of guest tables */ +#define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift) +/* Enable log dirty mode */ +#define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift) +/* Xen does p2m translation, not guest */ +#define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift) +/* Xen does not steal address space from the domain for its own booking; + * requires VT or similar mechanisms */ +#define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift) + +#define paging_mode_enabled(_d) ((_d)->arch.paging.mode) +#define paging_mode_shadow(_d) ((_d)->arch.paging.mode & PG_SH_enable) +#define paging_mode_hap(_d) ((_d)->arch.paging.mode & PG_HAP_enable) + +#define paging_mode_refcounts(_d) ((_d)->arch.paging.mode & PG_refcounts) +#define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty) +#define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate) +#define paging_mode_external(_d) ((_d)->arch.paging.mode & PG_external) + +/****************************************************************************** + * The equivalent for a particular vcpu of a shadowed domain. */ + +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? + * + * This is true of translated HVM domains on a vcpu which has paging + * enabled. (HVM vcpus with paging disabled are using the p2m table as + * its paging table, so no translation occurs in this case.) + * It is also true for all vcpus of translated PV domains. */ +#define paging_vcpu_mode_translate(_v) ((_v)->arch.paging.translate_enabled) + + + +/***************************************************************************** + * Mode-specific entry points into the shadow code. + * + * These shouldn't be used directly by callers; rather use the functions + * below which will indirect through this table as appropriate. */ + +struct sh_emulate_ctxt; +struct shadow_paging_mode { + void (*detach_old_tables )(struct vcpu *v); + int (*x86_emulate_write )(struct vcpu *v, unsigned long va, + void *src, u32 bytes, + struct sh_emulate_ctxt *sh_ctxt); + int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, + unsigned long old, + unsigned long new, + unsigned int bytes, + struct sh_emulate_ctxt *sh_ctxt); + int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, + unsigned long old_lo, + unsigned long old_hi, + unsigned long new_lo, + unsigned long new_hi, + struct sh_emulate_ctxt *sh_ctxt); + mfn_t (*make_monitor_table )(struct vcpu *v); + void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); + int (*guess_wrmap )(struct vcpu *v, + unsigned long vaddr, mfn_t gmfn); + /* For outsiders to tell what mode we're in */ + unsigned int shadow_levels; +}; + + +/************************************************/ +/* common paging interface */ +/************************************************/ +struct paging_mode { + int (*page_fault )(struct vcpu *v, unsigned long va, + struct cpu_user_regs *regs); + int (*invlpg )(struct vcpu *v, unsigned long va); + paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); + unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); + void (*update_cr3 )(struct vcpu *v, int do_locking); + void (*update_paging_modes )(struct vcpu *v); + void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level); + int (*write_guest_entry )(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); + int (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, + mfn_t gmfn); + void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, + unsigned long *gl1mfn); + void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, + void *eff_l1e); + unsigned int guest_levels; + + /* paging support extension */ + struct shadow_paging_mode shadow; +}; + + +/***************************************************************************** + * Entry points into the paging-assistance code */ + +/* Initialize the paging resource for vcpu struct. It is called by + * vcpu_initialise() in domain.c */ +void paging_vcpu_init(struct vcpu *v); + +/* Set up the paging-assistance-specific parts of a domain struct at + * start of day. Called for every domain from arch_domain_create() */ +void paging_domain_init(struct domain *d); + +/* Handler for paging-control ops: operations from user-space to enable + * and disable ephemeral shadow modes (test mode and log-dirty mode) and + * manipulate the log-dirty bitmap. */ +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl); + +/* Call when destroying a domain */ +void paging_teardown(struct domain *d); + +/* Call once all of the references to the domain have gone away */ +void paging_final_teardown(struct domain *d); + +/* Enable an arbitrary paging-assistance mode. Call once at domain + * creation. */ +int paging_enable(struct domain *d, u32 mode); + + +/* Page fault handler + * Called from pagefault handler in Xen, and from the HVM trap handlers + * for pagefaults. Returns 1 if this fault was an artefact of the + * paging code (and the guest should retry) or 0 if it is not (and the + * fault should be handled elsewhere or passed to the guest). + * + * Note: under shadow paging, this function handles all page faults; + * however, for hardware-assisted paging, this function handles only + * host page faults (i.e. nested page faults). */ +static inline int +paging_fault(unsigned long va, struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + return v->arch.paging.mode->page_fault(v, va, regs); +} + +/* Handle invlpg requests on vcpus. + * Returns 1 if the invlpg instruction should be issued on the hardware, + * or 0 if it's safe not to do so. */ +static inline int paging_invlpg(struct vcpu *v, unsigned long va) +{ + return v->arch.paging.mode->invlpg(v, va); +} + +/* Translate a guest virtual address to the physical address that the + * *guest* pagetables would map it to. */ +static inline paddr_t paging_gva_to_gpa(struct vcpu *v, unsigned long va) +{ + if ( unlikely(!paging_vcpu_mode_translate(v)) ) + return (paddr_t) va; + + return v->arch.paging.mode->gva_to_gpa(v, va); +} + +/* Translate a guest virtual address to the frame number that the + * *guest* pagetables would map it to. */ +static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va) +{ + if ( unlikely(!paging_vcpu_mode_translate(v)) ) + return va >> PAGE_SHIFT; + + return v->arch.paging.mode->gva_to_gfn(v, va); +} + +/* Update all the things that are derived from the guest's CR3. + * Called when the guest changes CR3; the caller can then use v->arch.cr3 + * as the value to load into the host CR3 to schedule this vcpu */ +static inline void paging_update_cr3(struct vcpu *v) +{ + v->arch.paging.mode->update_cr3(v, 1); +} + +/* Update all the things that are derived from the guest's CR0/CR3/CR4. + * Called to initialize paging structures if the paging mode + * has changed, and when bringing up a VCPU for the first time. */ +static inline void paging_update_paging_modes(struct vcpu *v) +{ + v->arch.paging.mode->update_paging_modes(v); +} + + +/* Write a new value into the guest pagetable, and update the + * paging-assistance state appropriately. Returns 0 if we page-faulted, + * 1 for success. */ +static inline int paging_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn) +{ + if ( unlikely(paging_mode_enabled(v->domain) + && v->arch.paging.mode != NULL) ) + return v->arch.paging.mode->write_guest_entry(v, p, new, gmfn); + else + return (!__copy_to_user(p, &new, sizeof(new))); +} + + +/* Cmpxchg a new value into the guest pagetable, and update the + * paging-assistance state appropriately. Returns 0 if we page-faulted, + * 1 if not. N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +static inline int paging_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, + mfn_t gmfn) +{ + if ( unlikely(paging_mode_enabled(v->domain) + && v->arch.paging.mode != NULL) ) + return v->arch.paging.mode->cmpxchg_guest_entry(v, p, old, new, gmfn); + else + return (!cmpxchg_user(p, *old, new)); +} + +/* Helper function that writes a pte in such a way that a concurrent read + * never sees a half-written entry that has _PAGE_PRESENT set */ +static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new) +{ +#if CONFIG_PAGING_LEVELS == 3 + /* PAE machines write 64bit PTEs as two 32bit writes. */ + volatile unsigned long *d = (unsigned long *) p; + unsigned long *s = (unsigned long *) &new; + BUILD_BUG_ON(sizeof (l1_pgentry_t) != 2 * sizeof (unsigned long)); + d[0] = 0; + d[1] = s[1]; + d[0] = s[0]; +#else + *p = new; +#endif +} + +/* Atomically write a P2M entry and update the paging-assistance state + * appropriately. */ +static inline void paging_write_p2m_entry(struct domain *d, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level) +{ + struct vcpu *v = current; + if ( v->domain != d ) + v = d->vcpu[0]; + if ( likely(paging_mode_enabled(d) && v->arch.paging.mode != NULL) ) + { + return v->arch.paging.mode->write_p2m_entry(v, gfn, p, new, level); + } + else + safe_write_pte(p, new); +} + +/* Print paging-assistance info to the console */ +void paging_dump_domain_info(struct domain *d); +void paging_dump_vcpu_info(struct vcpu *v); + + +/***************************************************************************** + * Access to the guest pagetables */ + +/* Get a mapping of a PV guest's l1e for this virtual address. */ +static inline void * +guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) +{ + l2_pgentry_t l2e; + + if ( unlikely(paging_mode_translate(v->domain)) ) + return v->arch.paging.mode->guest_map_l1e(v, addr, gl1mfn); + + /* Find this l1e and its enclosing l1mfn in the linear map */ + if ( __copy_from_user(&l2e, + &__linear_l2_table[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)) != 0 ) + return NULL; + /* Check flags that it will be safe to read the l1e */ + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) + != _PAGE_PRESENT ) + return NULL; + *gl1mfn = l2e_get_pfn(l2e); + return &__linear_l1_table[l1_linear_offset(addr)]; +} + +/* Pull down the mapping we got from guest_map_l1e() */ +static inline void +guest_unmap_l1e(struct vcpu *v, void *p) +{ + if ( unlikely(paging_mode_translate(v->domain)) ) + unmap_domain_page(p); +} + +/* Read the guest's l1e that maps this address. */ +static inline void +guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) +{ + if ( likely(!paging_mode_translate(v->domain)) ) + { + ASSERT(!paging_mode_external(v->domain)); + if ( __copy_from_user(eff_l1e, + &__linear_l1_table[l1_linear_offset(addr)], + sizeof(l1_pgentry_t)) != 0 ) + *(l1_pgentry_t *)eff_l1e = l1e_empty(); + return; + } + + v->arch.paging.mode->guest_get_eff_l1e(v, addr, eff_l1e); +} + +/* Read the guest's l1e that maps this address, from the kernel-mode + * pagetables. */ +static inline void +guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) +{ +#if defined(__x86_64__) + int user_mode = !(v->arch.flags & TF_kernel_mode); +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) +#else +#define TOGGLE_MODE() ((void)0) +#endif + + TOGGLE_MODE(); + guest_get_eff_l1e(v, addr, eff_l1e); + TOGGLE_MODE(); +} + + + +#endif /* XEN_PAGING_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r df25547d7638 -r 6daa91dc9247 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Feb 13 15:32:25 2007 +0000 +++ b/xen/include/asm-x86/shadow.h Wed Feb 14 12:02:20 2007 +0000 @@ -28,44 +28,25 @@ #include <xen/perfc.h> #include <xen/domain_page.h> #include <asm/flushtlb.h> +#include <asm/paging.h> +#include <asm/p2m.h> /***************************************************************************** - * Macros to tell which shadow paging mode a domain is in */ + * Macros to tell which shadow paging mode a domain is in*/ -#define SHM2_shift 10 -/* We're in one of the shadow modes */ -#define SHM2_enable (1U << SHM2_shift) -/* Refcounts based on shadow tables instead of guest tables */ -#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift) -/* Enable log dirty mode */ -#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift) -/* Xen does p2m translation, not guest */ -#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift) -/* Xen does not steal address space from the domain for its own booking; - * requires VT or similar mechanisms */ -#define SHM2_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift) - -#define shadow_mode_enabled(_d) ((_d)->arch.shadow.mode) -#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts) -#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty) -#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate) -#define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) +#define shadow_mode_enabled(_d) paging_mode_shadow(_d) +#define shadow_mode_refcounts(_d) (paging_mode_shadow(_d) && \ + paging_mode_refcounts(_d)) +#define shadow_mode_log_dirty(_d) (paging_mode_shadow(_d) && \ + paging_mode_log_dirty(_d)) +#define shadow_mode_translate(_d) (paging_mode_shadow(_d) && \ + paging_mode_translate(_d)) +#define shadow_mode_external(_d) (paging_mode_shadow(_d) && \ + paging_mode_external(_d)) /* Xen traps & emulates all reads of all page table pages: * not yet supported */ #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) - - -/****************************************************************************** - * The equivalent for a particular vcpu of a shadowed domain. */ - -/* Is this vcpu using the P2M table to translate between GFNs and MFNs? - * - * This is true of translated HVM domains on a vcpu which has paging - * enabled. (HVM vcpus with paging disabled are using the p2m table as - * its paging table, so no translation occurs in this case.) - * It is also true for all vcpus of translated PV domains. */ -#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) /* * 32on64 support @@ -76,121 +57,17 @@ #define pv_32bit_guest(_v) (!is_hvm_vcpu(_v)) #endif -/****************************************************************************** - * With shadow pagetables, the different kinds of address start - * to get get confusing. - * - * Virtual addresses are what they usually are: the addresses that are used - * to accessing memory while the guest is running. The MMU translates from - * virtual addresses to machine addresses. - * - * (Pseudo-)physical addresses are the abstraction of physical memory the - * guest uses for allocation and so forth. For the purposes of this code, - * we can largely ignore them. - * - * Guest frame numbers (gfns) are the entries that the guest puts in its - * pagetables. For normal paravirtual guests, they are actual frame numbers, - * with the translation done by the guest. - * - * Machine frame numbers (mfns) are the entries that the hypervisor puts - * in the shadow page tables. - * - * Elsewhere in the xen code base, the name "gmfn" is generally used to refer - * to a "machine frame number, from the guest's perspective", or in other - * words, pseudo-physical frame numbers. However, in the shadow code, the - * term "gmfn" means "the mfn of a guest page"; this combines naturally with - * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a - * guest L2 page), etc... - */ - -/* With this defined, we do some ugly things to force the compiler to - * give us type safety between mfns and gfns and other integers. - * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions - * that translate beween int and foo_t. - * - * It does have some performance cost because the types now have - * a different storage attribute, so may not want it on all the time. */ -#ifndef NDEBUG -#define TYPE_SAFETY 1 -#endif - -#ifdef TYPE_SAFETY -#define TYPE_SAFE(_type,_name) \ -typedef struct { _type _name; } _name##_t; \ -static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ -static inline _type _name##_x(_name##_t n) { return n._name; } -#else -#define TYPE_SAFE(_type,_name) \ -typedef _type _name##_t; \ -static inline _name##_t _##_name(_type n) { return n; } \ -static inline _type _name##_x(_name##_t n) { return n; } -#endif - -TYPE_SAFE(unsigned long,mfn) - -/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ -#define SH_PRI_mfn "05lx" - - -/***************************************************************************** - * Mode-specific entry points into the shadow code. - * - * These shouldn't be used directly by callers; rather use the functions - * below which will indirect through this table as appropriate. */ - -struct sh_emulate_ctxt; -struct shadow_paging_mode { - int (*page_fault )(struct vcpu *v, unsigned long va, - struct cpu_user_regs *regs); - int (*invlpg )(struct vcpu *v, unsigned long va); - paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); - unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); - void (*update_cr3 )(struct vcpu *v, int do_locking); - int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - void (*detach_old_tables )(struct vcpu *v); - int (*x86_emulate_write )(struct vcpu *v, unsigned long va, - void *src, u32 bytes, - struct sh_emulate_ctxt *sh_ctxt); - int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, - unsigned long old, - unsigned long new, - unsigned int bytes, - struct sh_emulate_ctxt *sh_ctxt); - int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, - struct sh_emulate_ctxt *sh_ctxt); - mfn_t (*make_monitor_table )(struct vcpu *v); - void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); - void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, - unsigned long *gl1mfn); - void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, - void *eff_l1e); - int (*guess_wrmap )(struct vcpu *v, - unsigned long vaddr, mfn_t gmfn); - /* For outsiders to tell what mode we're in */ - unsigned int shadow_levels; - unsigned int guest_levels; -}; - /***************************************************************************** * Entry points into the shadow code */ /* Set up the shadow-specific parts of a domain struct at start of day. - * Called for every domain from arch_domain_create() */ + * Called from paging_domain_init(). */ void shadow_domain_init(struct domain *d); + +/* Setup the shadow-specific parts of a vcpu struct. It is called by + * paging_vcpu_init() in paging.c */ +void shadow_vcpu_init(struct vcpu *v); /* Enable an arbitrary shadow mode. Call once at domain creation. */ int shadow_enable(struct domain *d, u32 mode); @@ -218,136 +95,11 @@ static inline void mark_dirty(struct dom shadow_mark_dirty(d, _mfn(gmfn)); } -/* Handle page-faults caused by the shadow pagetable mechanisms. - * Called from pagefault handler in Xen, and from the HVM trap handlers - * for pagefaults. Returns 1 if this fault was an artefact of the - * shadow code (and the guest should retry) or 0 if it is not (and the - * fault should be handled elsewhere or passed to the guest). */ -static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - perfc_incrc(shadow_fault); - return v->arch.shadow.mode->page_fault(v, va, regs); -} - -/* Handle invlpg requests on shadowed vcpus. - * Returns 1 if the invlpg instruction should be issued on the hardware, - * or 0 if it's safe not to do so. */ -static inline int shadow_invlpg(struct vcpu *v, unsigned long va) -{ - return v->arch.shadow.mode->invlpg(v, va); -} - -/* Translate a guest virtual address to the physical address that the - * *guest* pagetables would map it to. */ -static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) -{ - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) - return (paddr_t) va; - return v->arch.shadow.mode->gva_to_gpa(v, va); -} - -/* Translate a guest virtual address to the frame number that the - * *guest* pagetables would map it to. */ -static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) -{ - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) - return va >> PAGE_SHIFT; - return v->arch.shadow.mode->gva_to_gfn(v, va); -} - -/* Update all the things that are derived from the guest's CR3. - * Called when the guest changes CR3; the caller can then use v->arch.cr3 - * as the value to load into the host CR3 to schedule this vcpu */ -static inline void shadow_update_cr3(struct vcpu *v) -{ - v->arch.shadow.mode->update_cr3(v, 1); -} - /* Update all the things that are derived from the guest's CR0/CR3/CR4. * Called to initialize paging structures if the paging mode * has changed, and when bringing up a VCPU for the first time. */ void shadow_update_paging_modes(struct vcpu *v); - -/***************************************************************************** - * Access to the guest pagetables */ - -/* Get a mapping of a PV guest's l1e for this virtual address. */ -static inline void * -guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) -{ - l2_pgentry_t l2e; - - if ( unlikely(shadow_mode_translate(v->domain)) ) - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); - - /* Find this l1e and its enclosing l1mfn in the linear map */ - if ( __copy_from_user(&l2e, - &__linear_l2_table[l2_linear_offset(addr)], - sizeof(l2_pgentry_t)) != 0 ) - return NULL; - /* Check flags that it will be safe to read the l1e */ - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) - != _PAGE_PRESENT ) - return NULL; - *gl1mfn = l2e_get_pfn(l2e); - return &__linear_l1_table[l1_linear_offset(addr)]; -} - -/* Pull down the mapping we got from guest_map_l1e() */ -static inline void -guest_unmap_l1e(struct vcpu *v, void *p) -{ - if ( unlikely(shadow_mode_translate(v->domain)) ) - unmap_domain_page(p); -} - -/* Read the guest's l1e that maps this address. */ -static inline void -guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) -{ - if ( likely(!shadow_mode_translate(v->domain)) ) - { - ASSERT(!shadow_mode_external(v->domain)); - if ( __copy_from_user(eff_l1e, - &__linear_l1_table[l1_linear_offset(addr)], - sizeof(l1_pgentry_t)) != 0 ) - *(l1_pgentry_t *)eff_l1e = l1e_empty(); - return; - } - - v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); -} - -/* Read the guest's l1e that maps this address, from the kernel-mode - * pagetables. */ -static inline void -guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) -{ -#if defined(__x86_64__) - int user_mode = !(v->arch.flags & TF_kernel_mode); -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) -#else -#define TOGGLE_MODE() ((void)0) -#endif - - TOGGLE_MODE(); - guest_get_eff_l1e(v, addr, eff_l1e); - TOGGLE_MODE(); -} - -/* Write a new value into the guest pagetable, and update the shadows - * appropriately. Returns 0 if we page-faulted, 1 for success. */ -int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, - intpte_t new, mfn_t gmfn); - -/* Cmpxchg a new value into the guest pagetable, and update the shadows - * appropriately. Returns 0 if we page-faulted, 1 if not. - * N.B. caller should check the value of "old" to see if the - * cmpxchg itself was successful. */ -int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, - intpte_t *old, intpte_t new, mfn_t gmfn); /* Remove all mappings of the guest page from the shadows. * This is called from common code. It does not flush TLBs. */ @@ -368,99 +120,6 @@ static inline void shadow_remove_all_sha sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); } -/**************************************************************************/ -/* Guest physmap (p2m) support - * - * The phys_to_machine_mapping is the reversed mapping of MPT for full - * virtualization. It is only used by shadow_mode_translate()==true - * guests, so we steal the address space that would have normally - * been used by the read-only MPT map. - */ -#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) - -/* Add a page to a domain's p2m table */ -void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Remove a page from a domain's p2m table */ -void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Aliases, called from common code. */ -#define guest_physmap_add_page shadow_guest_physmap_add_page -#define guest_physmap_remove_page shadow_guest_physmap_remove_page - -/* Read the current domain's P2M table. */ -static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) -{ - l1_pgentry_t l1e = l1e_empty(); - int ret; - - if ( gfn > current->domain->arch.max_mapped_pfn ) - return _mfn(INVALID_MFN); - - /* Don't read off the end of the p2m table */ - ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); - - ret = __copy_from_user(&l1e, - &phys_to_machine_mapping[gfn], - sizeof(l1e)); - - if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) - return _mfn(l1e_get_pfn(l1e)); - - return _mfn(INVALID_MFN); -} - -/* Read another domain's P2M table, mapping pages as we go */ -mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); - -/* General conversion function from gfn to mfn */ -static inline mfn_t -sh_gfn_to_mfn(struct domain *d, unsigned long gfn) -{ - if ( !shadow_mode_translate(d) ) - return _mfn(gfn); - if ( likely(current->domain == d) ) - return sh_gfn_to_mfn_current(gfn); - else - return sh_gfn_to_mfn_foreign(d, gfn); -} - -/* Compatibility function for HVM code */ -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) -{ - return mfn_x(sh_gfn_to_mfn_current(pfn)); -} - -/* General conversion function from mfn to gfn */ -static inline unsigned long -sh_mfn_to_gfn(struct domain *d, mfn_t mfn) -{ - if ( shadow_mode_translate(d) ) - return get_gpfn_from_mfn(mfn_x(mfn)); - else - return mfn_x(mfn); -} - -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ -static inline int -mmio_space(paddr_t gpa) -{ - unsigned long gfn = gpa >> PAGE_SHIFT; - return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); -} - -/* Translate the frame number held in an l1e from guest to machine */ -static inline l1_pgentry_t -gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) -{ - if ( unlikely(shadow_mode_translate(d)) ) - l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), - l1e_get_flags(l1e)); - return l1e; -} - #endif /* _XEN_SHADOW_H */ /* _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |