[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [PATCH] VMX world switch
ChangeSet 1.1345, 2005/04/20 21:35:17+01:00, leendert@xxxxxxxxxxxxxx [PATCH] VMX world switch The attached code implements a VMX world switch to vmxassist (a small assist module residing in a VMX enabled partition where it is responsible for emulating real mode) whever CR0.PE is disabled. The patch temporarily disables the PGE feature flag in cpuid as it is currently broken (try running an unmodified 2.6 kernel that sets PGE in mm/init.c/paging_init()). The patch adds consistency checks before setting the ARCH_VMX_IO_WAIT state to detect race conditions on SMP systems. Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx> Signed-off-by: ian@xxxxxxxxxxxxx arch/x86/vmx.c | 281 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/vmx_platform.c | 5 include/asm-x86/vmx_vmcs.h | 13 -- include/public/vmx_assist.h | 101 +++++++++++++++ 4 files changed, 382 insertions(+), 18 deletions(-) diff -Nru a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c 2005-04-20 17:02:40 -04:00 +++ b/xen/arch/x86/vmx.c 2005-04-20 17:02:40 -04:00 @@ -195,6 +195,7 @@ cpuid(input, &eax, &ebx, &ecx, &edx); if (input == 1) { + clear_bit(X86_FEATURE_PGE, &edx); /* temporarily disabled */ clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); @@ -382,10 +383,261 @@ do_block(); } -static int -vm86assist(struct exec_domain *d) +enum { COPY_IN = 0, COPY_OUT }; + +static inline int +vmx_copy(void *buf, unsigned long laddr, int size, int dir) +{ + unsigned char *addr; + unsigned long mfn; + + if ((size + (laddr & (PAGE_SIZE - 1))) >= PAGE_SIZE) { + printf("vmx_copy exceeds page boundary\n"); + return 0; + } + + mfn = phys_to_machine_mapping(gva_to_gpte(laddr) >> PAGE_SHIFT); + addr = map_domain_mem((mfn << PAGE_SHIFT) | (laddr & ~PAGE_MASK)); + + if (dir == COPY_IN) + memcpy(buf, addr, size); + else + memcpy(addr, buf, size); + + unmap_domain_mem(addr); + return 1; +} + +int +vmx_world_save(struct exec_domain *d, struct vmx_assist_context *c) +{ + unsigned long inst_len; + int error = 0; + + error |= __vmread(INSTRUCTION_LEN, &inst_len); + error |= __vmread(GUEST_EIP, &c->eip); + c->eip += inst_len; /* skip transition instruction */ + error |= __vmread(GUEST_ESP, &c->esp); + error |= __vmread(GUEST_EFLAGS, &c->eflags); + + error |= __vmread(CR0_READ_SHADOW, &c->cr0); + c->cr3 = d->arch.arch_vmx.cpu_cr3; + error |= __vmread(CR4_READ_SHADOW, &c->cr4); + + error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit); + error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base); + + error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit); + error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base); + + error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel); + error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit); + error |= __vmread(GUEST_CS_BASE, &c->cs_base); + error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes); + + error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel); + error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit); + error |= __vmread(GUEST_DS_BASE, &c->ds_base); + error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes); + + error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel); + error |= __vmread(GUEST_ES_LIMIT, &c->es_limit); + error |= __vmread(GUEST_ES_BASE, &c->es_base); + error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes); + + error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel); + error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit); + error |= __vmread(GUEST_SS_BASE, &c->ss_base); + error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes); + + error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel); + error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit); + error |= __vmread(GUEST_FS_BASE, &c->fs_base); + error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes); + + error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel); + error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit); + error |= __vmread(GUEST_GS_BASE, &c->gs_base); + error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes); + + error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel); + error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit); + error |= __vmread(GUEST_TR_BASE, &c->tr_base); + error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes); + + error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel); + error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit); + error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base); + error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes); + + return !error; +} + +int +vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) +{ + unsigned long mfn, old_cr4; + int error = 0; + + error |= __vmwrite(GUEST_EIP, c->eip); + error |= __vmwrite(GUEST_ESP, c->esp); + error |= __vmwrite(GUEST_EFLAGS, c->eflags); + + error |= __vmwrite(CR0_READ_SHADOW, c->cr0); + + if (c->cr3 == d->arch.arch_vmx.cpu_cr3) { + /* + * This is simple TLB flush, implying the guest has + * removed some translation or changed page attributes. + * We simply invalidate the shadow. + */ + mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); + if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) { + VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value=%lx", c->cr3); + domain_crash_synchronous(); + return 0; + } + shadow_sync_all(d->domain); + } else { + /* + * If different, make a shadow. Check if the PDBR is valid + * first. + */ + VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %lx", c->cr3); + if ((c->cr3 >> PAGE_SHIFT) > d->domain->max_pages) { + VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value=%lx", c->cr3); + domain_crash_synchronous(); + return 0; + } + mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); + d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + update_pagetables(d); + /* + * arch.shadow_table should now hold the next CR3 for shadow + */ + d->arch.arch_vmx.cpu_cr3 = c->cr3; + VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", c->cr3); + __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); + } + + error |= __vmread(CR4_READ_SHADOW, &old_cr4); + error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE)); + error |= __vmwrite(CR4_READ_SHADOW, c->cr4); + + error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); + error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base); + + error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit); + error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base); + + error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel); + error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit); + error |= __vmwrite(GUEST_CS_BASE, c->cs_base); + error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes); + + error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel); + error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit); + error |= __vmwrite(GUEST_DS_BASE, c->ds_base); + error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes); + + error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel); + error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit); + error |= __vmwrite(GUEST_ES_BASE, c->es_base); + error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes); + + error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel); + error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit); + error |= __vmwrite(GUEST_SS_BASE, c->ss_base); + error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes); + + error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel); + error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit); + error |= __vmwrite(GUEST_FS_BASE, c->fs_base); + error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes); + + error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel); + error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit); + error |= __vmwrite(GUEST_GS_BASE, c->gs_base); + error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes); + + error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel); + error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit); + error |= __vmwrite(GUEST_TR_BASE, c->tr_base); + error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes); + + error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel); + error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit); + error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); + error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes); + + return !error; +} + +enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE }; + +int +vmx_assist(struct exec_domain *d, int mode) { - /* stay tuned ... */ + struct vmx_assist_context c; + unsigned long magic, cp; + + /* make sure vmxassist exists (this is not an error) */ + if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN)) + return 0; + if (magic != VMXASSIST_MAGIC) + return 0; + + switch (mode) { + /* + * Transfer control to vmxassist. + * Store the current context in VMXASSIST_OLD_CONTEXT and load + * the new VMXASSIST_NEW_CONTEXT context. This context was created + * by vmxassist and will transfer control to it. + */ + case VMX_ASSIST_INVOKE: + /* save the old context */ + if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN)) + goto error; + if (cp != 0) { + if (!vmx_world_save(d, &c)) + goto error; + if (!vmx_copy(&c, cp, sizeof(c), COPY_OUT)) + goto error; + } + + /* restore the new context, this should activate vmxassist */ + if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), COPY_IN)) + goto error; + if (cp != 0) { + if (!vmx_copy(&c, cp, sizeof(c), COPY_IN)) + goto error; + if (!vmx_world_restore(d, &c)) + goto error; + return 1; + } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |