|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH 08/10] connect vmport up
On 12/12/2013 19:15, Don Slutz wrote:
> From: Don Slutz <dslutz@xxxxxxxxxxx>
>
> Signed-off-by: Don Slutz <dslutz@xxxxxxxxxxx>
> ---
> xen/arch/x86/hvm/io.c | 4 ++
> xen/arch/x86/hvm/svm/svm.c | 104 ++++++++++++++++++++++++++++++++++++
> xen/arch/x86/hvm/svm/vmcb.c | 1 +
> xen/arch/x86/hvm/vmx/vmcs.c | 1 +
> xen/arch/x86/hvm/vmx/vmx.c | 125
> ++++++++++++++++++++++++++++++++++++++++++++
> xen/arch/x86/hvm/vmx/vvmx.c | 13 +++++
> xen/include/public/trace.h | 1 +
> 7 files changed, 249 insertions(+)
>
> diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
> index bf6309d..4bc4716 100644
> --- a/xen/arch/x86/hvm/io.c
> +++ b/xen/arch/x86/hvm/io.c
> @@ -42,6 +42,7 @@
> #include <asm/hvm/vlapic.h>
> #include <asm/hvm/trace.h>
> #include <asm/hvm/emulate.h>
> +#include <asm/hvm/vmport.h>
> #include <public/sched.h>
> #include <xen/iocap.h>
> #include <public/hvm/ioreq.h>
> @@ -236,6 +237,9 @@ int handle_pio(uint16_t port, unsigned int size, int dir)
> if ( dir == IOREQ_WRITE )
> data = guest_cpu_user_regs()->eax;
>
> + if ( port == VMPORT_PORT )
> + return vmport_ioport(dir, size, data, guest_cpu_user_regs());
> +
Use register_portio_handler(), which is the already-existing
infrastructure for intercepting ports.
> rc = hvmemul_do_pio(port, &reps, size, 0, dir, 0, &data);
>
> switch ( rc )
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 406d394..80cf2bf 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -56,6 +56,7 @@
> #include <asm/hvm/svm/nestedsvm.h>
> #include <asm/hvm/nestedhvm.h>
> #include <asm/x86_emulate.h>
> +#include <asm/hvm/vmport.h>
> #include <public/sched.h>
> #include <asm/hvm/vpt.h>
> #include <asm/hvm/trace.h>
> @@ -1904,6 +1905,105 @@ svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
> return;
> }
>
> +static void svm_vmexit_gp_intercept(struct cpu_user_regs *regs, struct vcpu
> *v)
> +{
> + struct hvm_domain *hd = &v->domain->arch.hvm_domain;
> + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
> + unsigned long inst_len, bytes_len;
> + int frc;
> + unsigned char bytes[15];
> +
> + regs->error_code = vmcb->exitinfo1;
> + if ( !cpu_has_svm_nrips || (vmcb->nextrip <= vmcb->rip) )
> + inst_len = 0;
> + else
> + inst_len = vmcb->nextrip - vmcb->rip;
> + bytes_len = 2 /* inst_len < 15 ? inst_len > 1 ? inst_len : 2 : 15 */;
> + frc = hvm_fetch_from_guest_virt_nofault(bytes, regs->eip,
> + bytes_len,
> + PFEC_page_present);
> +
> + if ( hvm_long_mode_enabled(v) )
> + HVMTRACE_LONG_4D(TRAP, TRAP_gp_fault, inst_len,
> + regs->error_code,
> + TRC_PAR_LONG(vmcb->exitinfo2) );
> + else
> + HVMTRACE_4D(TRAP, TRAP_gp_fault, inst_len,
> + regs->error_code, vmcb->exitinfo2 );
> +
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x400000 /*
> LOG_GP_FAIL_RD_INST */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: e2=%lx ec=%lx ip=%lx=>0x%x 0x%x(%ld,%ld,%d)
> nip(%d)=%lx(%d,%d(0x%x) 0x%x 0x%x)"
> + "\n",
> + current->domain->domain_id, current->vcpu_id, __func__,
> + (unsigned long)vmcb->exitinfo2,
> + (unsigned long)regs->error_code,
> + (unsigned long)regs->eip, (unsigned int)bytes[0],
> + (unsigned int)bytes[1], bytes_len, inst_len, frc,
> + cpu_has_svm_nrips, (unsigned long)vmcb->nextrip,
> + cpu_has_svm_decode, vmcb->guest_ins_len & 0xf,
> vmcb->guest_ins_len,
> + vmcb->guest_ins[0], vmcb->guest_ins[1]);
> +
> + if ( !frc && bytes[0] == 0xed && (regs->edx & 0xffff) == VMPORT_PORT &&
> + vmcb->exitinfo2 == 0 && regs->error_code == 0 )
> + {
> + /* in (%dx),%eax */
> + uint32_t magic = regs->eax;
> +
> + if ( magic == VMPORT_MAGIC ) {
> + __update_guest_eip(regs, 1);
> + vmport_ioport(IOREQ_READ, 4, 0, regs);
This appears to be intercepting an L2 guest doing vmport magic IO to the
L1 hypervisor.
Is this sane/sensible/wise?
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x800000 /*
> LOG_GP_VMWARE_AFTER */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: VMware ip=%lx ax=%lx bx=%lx cx=%lx dx=%lx si=%lx
> di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip,
> + (unsigned long)regs->eax, (unsigned long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned long)regs->edx,
> + (unsigned long)regs->esi, (unsigned long)regs->edi);
> + return;
> + } else {
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x200000 /*
> LOG_GP_NOT_VMWARE */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: ip=%lx ax=%lx bx=%lx cx=%lx dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip,
> + (unsigned long)regs->eax, (unsigned long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned long)regs->edx,
> + (unsigned long)regs->esi, (unsigned long)regs->edi);
> + hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code);
> + }
> + } else if (!frc && regs->error_code == 0
> + && bytes[0] == 0x0f && bytes[1] == 0x33 && regs->ecx ==
> 0x10000)
> + {
> + /* "rdpmc 0x10000" */
> + /* Not a very good emulation! But just not faulting is good enough
> + * to get NetApp booting. */
> + regs->edx = regs->eax = 0;
This doesn't look like it is logically part of "connecting vmport up"
~Andrew
> +
> + __update_guest_eip(regs, inst_len);
> +
> + /* Doing the log in this case was too noisy for NetApp, so I moved
> + * it to 'else' */
> + } else {
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x100000 /*
> LOG_GP_UNKNOWN */) {
> + printk("[HVM:%d.%d] <%s> "
> + "gp: e2=%lx ec=%lx ip=%lx=>0x%x 0x%x(%ld,%d) ax=%lx
> bx=%lx cx=%lx dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id, __func__,
> + (unsigned long)vmcb->exitinfo2, (unsigned
> long)regs->error_code,
> + (unsigned long)regs->eip, (unsigned int)bytes[0],
> + (unsigned int)bytes[1], inst_len, frc,
> + (unsigned long)regs->eax, (unsigned long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned long)regs->edx,
> + (unsigned long)regs->esi, (unsigned long)regs->edi);
> + }
> + hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code);
> + }
> +}
> +
> static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
> {
> struct hvm_emulate_ctxt ctxt;
> @@ -2253,6 +2353,10 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
> break;
> }
>
> + case VMEXIT_EXCEPTION_GP:
> + svm_vmexit_gp_intercept(regs, v);
> + break;
> +
> case VMEXIT_EXCEPTION_UD:
> svm_vmexit_ud_intercept(regs);
> break;
> diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
> index 21292bb..791c045 100644
> --- a/xen/arch/x86/hvm/svm/vmcb.c
> +++ b/xen/arch/x86/hvm/svm/vmcb.c
> @@ -193,6 +193,7 @@ static int construct_vmcb(struct vcpu *v)
>
> vmcb->_exception_intercepts =
> HVM_TRAP_MASK
> + | (1U << TRAP_gp_fault)
> | (1U << TRAP_no_device);
>
> if ( paging_mode_hap(v->domain) )
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 44f33cb..21cde2f 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1074,6 +1074,7 @@ static int construct_vmcs(struct vcpu *v)
>
> v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
> | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
> + | (1U << TRAP_gp_fault)
> | (1U << TRAP_no_device);
> vmx_update_exception_bitmap(v);
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index dfff628..248900d 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -44,6 +44,7 @@
> #include <asm/hvm/support.h>
> #include <asm/hvm/vmx/vmx.h>
> #include <asm/hvm/vmx/vmcs.h>
> +#include <asm/hvm/vmport.h>
> #include <public/sched.h>
> #include <public/hvm/ioreq.h>
> #include <asm/hvm/vpic.h>
> @@ -1211,6 +1212,7 @@ static void vmx_update_guest_cr(struct vcpu *v,
> unsigned int cr)
> v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
> | (paging_mode_hap(v->domain) ?
> 0 : (1U << TRAP_page_fault))
> + | (1U << TRAP_gp_fault)
> | (1U << TRAP_no_device);
> vmx_update_exception_bitmap(v);
> vmx_update_debug_state(v);
> @@ -2454,6 +2456,113 @@ static void vmx_idtv_reinject(unsigned long idtv_info)
> }
> }
>
> +void do_gp_fault(struct cpu_user_regs *regs, struct vcpu *v)
> +{
> + struct hvm_domain *hd = &v->domain->arch.hvm_domain;
> + unsigned long exit_qualification;
> + unsigned long inst_len;
> + unsigned long ecode;
> +
> + __vmread(EXIT_QUALIFICATION, &exit_qualification);
> + __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
> + __vmread(VM_EXIT_INTR_ERROR_CODE, &ecode);
> + regs->error_code = ecode;
> + if ( hvm_long_mode_enabled(v) )
> + HVMTRACE_LONG_4D(TRAP, TRAP_gp_fault, inst_len,
> + regs->error_code,
> + TRC_PAR_LONG(exit_qualification) );
> + else
> + HVMTRACE_4D(TRAP, TRAP_gp_fault, inst_len,
> + regs->error_code, exit_qualification );
> +
> + if ( inst_len == 1 && (regs->edx & 0xffff) == VMPORT_PORT &&
> + exit_qualification == 0 && regs->error_code == 0 ) {
> + uint32_t magic = regs->eax;
> +
> + if ( magic == VMPORT_MAGIC ) {
> + unsigned char bytes[1];
> + int frc = hvm_fetch_from_guest_virt_nofault(bytes, regs->eip,
> + 1,
> PFEC_page_present);
> + if (!frc && bytes[0] == 0xed) { /* in (%dx),%eax */
> + update_guest_eip();
> + vmport_ioport(IOREQ_READ, 4, 0, regs);
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x800000 /*
> LOG_GP_VMWARE_AFTER */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: VMware ip=%lx ax=%lx bx=%lx cx=%lx dx=%lx
> si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip,
> + (unsigned long)regs->eax, (unsigned
> long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned
> long)regs->edx,
> + (unsigned long)regs->esi, (unsigned
> long)regs->edi);
> + return;
> + } else {
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x400000 /*
> LOG_GP_FAIL_RD_INST */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: VMware? ip=%lx=>0x%x(%d) ax=%lx bx=%lx
> cx=%lx dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip, bytes[0], frc,
> + (unsigned long)regs->eax, (unsigned
> long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned
> long)regs->edx,
> + (unsigned long)regs->esi, (unsigned
> long)regs->edi);
> + }
> + } else {
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x200000 /*
> LOG_GP_NOT_VMWARE */)
> + printk("[HVM:%d.%d] <%s> "
> + "gp: ip=%lx ax=%lx bx=%lx cx=%lx dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip,
> + (unsigned long)regs->eax, (unsigned long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned long)regs->edx,
> + (unsigned long)regs->esi, (unsigned long)regs->edi);
> + hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code);
> + }
> + } else {
> + unsigned char bytes[15];
> + int frc;
> +
> + /*
> + * We can conditionalize this call on inst_len == 2 if we decide to
> + * remove the following printk.
> + */
> + frc = hvm_fetch_from_guest_virt_nofault(bytes, regs->eip,
> + inst_len < 15 ? inst_len :
> 15,
> + PFEC_page_present);
> +
> + /* Emulate "rdpmc 0x10000" */
> + if (!frc && inst_len == 2 && regs->error_code == 0
> + && bytes[0] == 0x0f && bytes[1] == 0x33 && regs->ecx == 0x10000)
> + {
> + /* Not a very good emulation! But just not faulting is good
> enough
> + * to get NetApp booting. */
> + regs->edx = regs->eax = 0;
> +
> + update_guest_eip();
> +
> + /* Doing the log in this case was too noisy for NetApp, so I
> moved
> + * it to 'else' */
> + } else {
> + /* We should probably turn this log off by default in production
> in
> + * case somebody decides to do a lot of #GPs. */
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x100000 /*
> LOG_GP_UNKNOWN */) {
> + printk("[HVM:%d.%d] <%s> "
> + "gp: eq=%lx ec=%lx ip=%lx=>0x%x 0x%x(%ld,%d) ax=%lx
> bx=%lx cx=%lx dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)exit_qualification, (unsigned
> long)regs->error_code,
> + (unsigned long)regs->eip, (unsigned int)bytes[0],
> + (unsigned int)bytes[1], inst_len, frc,
> + (unsigned long)regs->eax, (unsigned long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned long)regs->edx,
> + (unsigned long)regs->esi, (unsigned long)regs->edi);
> + }
> + hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code);
> + }
> + }
> +}
> +
> static int vmx_handle_apic_write(void)
> {
> unsigned long exit_qualification;
> @@ -2562,6 +2671,19 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
> && vector != TRAP_nmi
> && vector != TRAP_machine_check )
> {
> + if (vector == TRAP_gp_fault ) {
> + struct hvm_domain *hd = &v->domain->arch.hvm_domain;
> +
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x8000000 /*
> LOG_REALMODE_GP */)
> + printk("[HVM:%d.%d] <%s> "
> + "realmode gp: ip=%lx ax=%lx bx=%lx cx=%lx
> dx=%lx si=%lx di=%lx"
> + "\n",
> + current->domain->domain_id, current->vcpu_id,
> __func__,
> + (unsigned long)regs->eip,
> + (unsigned long)regs->eax, (unsigned
> long)regs->ebx,
> + (unsigned long)regs->ecx, (unsigned
> long)regs->edx,
> + (unsigned long)regs->esi, (unsigned
> long)regs->edi);
> + }
> perfc_incr(realmode_exits);
> v->arch.hvm_vmx.vmx_emulate = 1;
> HVMTRACE_0D(REALMODE_EMULATE);
> @@ -2677,6 +2799,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
> HVMTRACE_1D(TRAP, vector);
> vmx_fpu_dirty_intercept();
> break;
> + case TRAP_gp_fault:
> + do_gp_fault(regs, v);
> + break;
> case TRAP_page_fault:
> __vmread(EXIT_QUALIFICATION, &exit_qualification);
> __vmread(VM_EXIT_INTR_ERROR_CODE, &ecode);
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 0daad79..fcd03dd 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2166,6 +2166,19 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> if ( v->fpu_dirtied )
> nvcpu->nv_vmexit_pending = 1;
> }
> + else if ( vector == TRAP_gp_fault )
> + {
> + struct cpu_user_regs *ur = guest_cpu_user_regs();
> + struct hvm_domain *hd = &v->domain->arch.hvm_domain;
> +
> + if (hd->params[HVM_PARAM_VMPORT_LOGMASK] & 0x1000000 /*
> LOG_VGP_UNKNOWN */)
> + gdprintk(XENLOG_ERR, "Unexpected gp: ip=%lx ax=%lx bx=%lx
> cx=%lx dx=%lx si=%lx di=%lx\n",
> + (unsigned long)ur->eip,
> + (unsigned long)ur->eax, (unsigned long)ur->ebx,
> + (unsigned long)ur->ecx, (unsigned long)ur->edx,
> + (unsigned long)ur->esi, (unsigned long)ur->edi);
> + nvcpu->nv_vmexit_pending = 1;
> + }
> else if ( (intr_info & valid_mask) == valid_mask )
> {
> exec_bitmap =__get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
> diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
> index e2f60a6..32489f0 100644
> --- a/xen/include/public/trace.h
> +++ b/xen/include/public/trace.h
> @@ -223,6 +223,7 @@
> #define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21)
> #define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22)
> #define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23)
> +#define TRC_HVM_TRAP64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x23)
Haven't you already defined this in a previous patch?
> #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24)
> #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25)
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |