Xen project Mailing List

Re: [Xen-devel] [V10 PATCH 23/23] PVH xen: introduce vmexit handler for PVH

To: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>

From: Tim Deegan <tim@xxxxxxx>

Date: Thu, 25 Jul 2013 17:28:40 +0100

Cc: Xen-devel@xxxxxxxxxxxxxxxxxxx, keir.xen@xxxxxxxxx

Delivery-date: Thu, 25 Jul 2013 16:29:17 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

At 18:59 -0700 on 23 Jul (1374605971), Mukesh Rathor wrote: > +/* Just like HVM, PVH should be using "cpuid" from the kernel mode. */ > +static int vmxit_invalid_op(struct cpu_user_regs *regs) > +{ > + if ( guest_kernel_mode(current, regs) || > !emulate_forced_invalid_op(regs) ) > + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); Was this discussed before? It seems harsh to stop kernel-mode code from using the pv cpuid operation if it wants to. In particular, what about loadable kernel modules? If you do go with this restriction, please document it in include/public/arch-x86/xen.h beside the XEN_CPUID definition. > +/* Returns: rc == 0: success. */ > +static int access_cr0(struct cpu_user_regs *regs, uint acc_typ, uint64_t > *regp) > +{ > + struct vcpu *vp = current; > + > + if ( acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) > + { > + unsigned long new_cr0 = *regp; > + unsigned long old_cr0 = __vmread(GUEST_CR0); > + > + dbgp1("PVH:writing to CR0. RIP:%lx val:0x%lx\n", regs->rip, *regp); > + if ( (u32)new_cr0 != new_cr0 ) > + { > + printk(XENLOG_G_WARNING > + "Guest setting upper 32 bits in CR0: %lx", new_cr0); > + return -EPERM; AFAICS returning non-zero here crashes the guest. Shouldn't this inject #GP instead? > + } > + > + new_cr0 &= ~HVM_CR0_GUEST_RESERVED_BITS; > + /* ET is reserved and should always be 1. */ > + new_cr0 |= X86_CR0_ET; > + > + /* A pvh is not expected to change to real mode. */ > + if ( (new_cr0 & (X86_CR0_PE | X86_CR0_PG)) != > + (X86_CR0_PG | X86_CR0_PE) ) > + { > + printk(XENLOG_G_WARNING > + "PVH attempting to turn off PE/PG. CR0:%lx\n", new_cr0); > + return -EPERM; This I guess is more reasonable since the paging-mode restriction is part of the PVH ABI. > + } > + /* TS going from 1 to 0 */ > + if ( (old_cr0 & X86_CR0_TS) && ((new_cr0 & X86_CR0_TS) == 0) ) > + vmx_fpu_enter(vp); > + > + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = new_cr0; > + __vmwrite(GUEST_CR0, new_cr0); > + __vmwrite(CR0_READ_SHADOW, new_cr0); > + } > + else > + *regp = __vmread(GUEST_CR0); > + > + return 0; > +} > + > +/* Returns: rc == 0: success. */ > +static int access_cr4(struct cpu_user_regs *regs, uint acc_typ, uint64_t > *regp) > +{ > + if ( acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) > + { > + struct vcpu *vp = current; > + u64 old_val = __vmread(GUEST_CR4); > + u64 new = *regp; > + > + if ( new & HVM_CR4_GUEST_RESERVED_BITS(vp) ) > + { > + printk(XENLOG_G_WARNING > + "PVH guest attempts to set reserved bit in CR4: %lx", > new); > + hvm_inject_hw_exception(TRAP_gp_fault, 0); > + return 0; > + } > + > + if ( !(new & X86_CR4_PAE) && hvm_long_mode_enabled(vp) ) > + { > + printk(XENLOG_G_WARNING "Guest cleared CR4.PAE while " > + "EFER.LMA is set"); > + hvm_inject_hw_exception(TRAP_gp_fault, 0); > + return 0; > + } > + > + vp->arch.hvm_vcpu.guest_cr[4] = new; > + > + if ( (old_val ^ new) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) > + vpid_sync_all(); > + > + __vmwrite(CR4_READ_SHADOW, new); > + > + new &= ~X86_CR4_PAE; /* PVH always runs with hap enabled. */ The equivalent mask in vmx_update_guest_cr() is masking out a default setting of CR4.PAE _before_ the guest's requested bits get ORred in. This is masking out the PAE bit that we just insisted on. I'm surprised that VMENTER doesn't choke on this -- I guess it uses VM_ENTRY_IA32E_MODE rather than looking at these bits at all. > + new |= X86_CR4_VMXE | X86_CR4_MCE; > + __vmwrite(GUEST_CR4, new); > + } > + else > + *regp = __vmread(CR4_READ_SHADOW); > + > + return 0; > +} > + > +/* Returns: rc == 0: success, else -errno. */ > +static int vmxit_cr_access(struct cpu_user_regs *regs) > +{ > + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); > + uint acc_typ = VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification); > + int cr, rc = -EINVAL; > + > + switch ( acc_typ ) > + { > + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: > + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: > + { > + uint gpr = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification); > + uint64_t *regp = decode_register(gpr, regs, 0); > + cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification); > + > + if ( regp == NULL ) > + break; > + > + switch ( cr ) > + { > + case 0: > + rc = access_cr0(regs, acc_typ, regp); > + break; > + > + case 3: > + printk(XENLOG_G_ERR "PVH: unexpected cr3 vmexit. rip:%lx\n", > + regs->rip); > + domain_crash(current->domain); > + break; > + > + case 4: > + rc = access_cr4(regs, acc_typ, regp); > + break; > + } > + if ( rc == 0 ) > + vmx_update_guest_eip(); > + break; > + } > + > + case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: > + { > + struct vcpu *vp = current; > + unsigned long cr0 = vp->arch.hvm_vcpu.guest_cr[0] & ~X86_CR0_TS; > + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = cr0; > + > + vmx_fpu_enter(vp); > + __vmwrite(GUEST_CR0, cr0); > + __vmwrite(CR0_READ_SHADOW, cr0); > + vmx_update_guest_eip(); > + rc = 0; > + } > + } No "case VMX_CONTROL_REG_ACCESS_TYPE_LMSW"? Tim. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.