[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 23/23] PVH xen: introduce vmexit handler for PVH



At 18:59 -0700 on 23 Jul (1374605971), Mukesh Rathor wrote:
> +/* Just like HVM, PVH should be using "cpuid" from the kernel mode. */
> +static int vmxit_invalid_op(struct cpu_user_regs *regs)
> +{
> +    if ( guest_kernel_mode(current, regs) || 
> !emulate_forced_invalid_op(regs) )
> +        hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);

Was this discussed before?  It seems harsh to stop kernel-mode code from
using the pv cpuid operation if it wants to.  In particular, what about
loadable kernel modules?

If you do go with this restriction, please document it in
include/public/arch-x86/xen.h beside the XEN_CPUID definition.

> +/* Returns: rc == 0: success. */
> +static int access_cr0(struct cpu_user_regs *regs, uint acc_typ, uint64_t 
> *regp)
> +{
> +    struct vcpu *vp = current;
> +
> +    if ( acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR )
> +    {
> +        unsigned long new_cr0 = *regp;
> +        unsigned long old_cr0 = __vmread(GUEST_CR0);
> +
> +        dbgp1("PVH:writing to CR0. RIP:%lx val:0x%lx\n", regs->rip, *regp);
> +        if ( (u32)new_cr0 != new_cr0 )
> +        {
> +            printk(XENLOG_G_WARNING
> +                   "Guest setting upper 32 bits in CR0: %lx", new_cr0);
> +            return -EPERM;

AFAICS returning non-zero here crashes the guest.  Shouldn't this inject
#GP instead?

> +        }
> +
> +        new_cr0 &= ~HVM_CR0_GUEST_RESERVED_BITS;
> +        /* ET is reserved and should always be 1. */
> +        new_cr0 |= X86_CR0_ET;
> +
> +        /* A pvh is not expected to change to real mode. */
> +        if ( (new_cr0 & (X86_CR0_PE | X86_CR0_PG)) !=
> +             (X86_CR0_PG | X86_CR0_PE) )
> +        {
> +            printk(XENLOG_G_WARNING
> +                   "PVH attempting to turn off PE/PG. CR0:%lx\n", new_cr0);
> +            return -EPERM;

This I guess is more reasonable since the paging-mode restriction is
part of the PVH ABI.

> +        }
> +        /* TS going from 1 to 0 */
> +        if ( (old_cr0 & X86_CR0_TS) && ((new_cr0 & X86_CR0_TS) == 0) )
> +            vmx_fpu_enter(vp);
> +
> +        vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = new_cr0;
> +        __vmwrite(GUEST_CR0, new_cr0);
> +        __vmwrite(CR0_READ_SHADOW, new_cr0);
> +    }
> +    else
> +        *regp = __vmread(GUEST_CR0);
> +
> +    return 0;
> +}
> +
> +/* Returns: rc == 0: success. */
> +static int access_cr4(struct cpu_user_regs *regs, uint acc_typ, uint64_t 
> *regp)
> +{
> +    if ( acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR )
> +    {
> +        struct vcpu *vp = current;
> +        u64 old_val = __vmread(GUEST_CR4);
> +        u64 new = *regp;
> +
> +        if ( new & HVM_CR4_GUEST_RESERVED_BITS(vp) )
> +        {
> +            printk(XENLOG_G_WARNING
> +                   "PVH guest attempts to set reserved bit in CR4: %lx", 
> new);
> +            hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +            return 0;
> +        }
> +
> +        if ( !(new & X86_CR4_PAE) && hvm_long_mode_enabled(vp) )
> +        {
> +            printk(XENLOG_G_WARNING "Guest cleared CR4.PAE while "
> +                   "EFER.LMA is set");
> +            hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +            return 0;
> +        }
> +
> +        vp->arch.hvm_vcpu.guest_cr[4] = new;
> +
> +        if ( (old_val ^ new) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
> +            vpid_sync_all();
> +
> +        __vmwrite(CR4_READ_SHADOW, new);
> +
> +        new &= ~X86_CR4_PAE;         /* PVH always runs with hap enabled. */

The equivalent mask in vmx_update_guest_cr() is masking out a default
setting of CR4.PAE _before_ the guest's requested bits get ORred in.
This is masking out the PAE bit that we just insisted on.  I'm surprised
that VMENTER doesn't choke on this -- I guess it uses
VM_ENTRY_IA32E_MODE rather than looking at these bits at all.

> +        new |= X86_CR4_VMXE | X86_CR4_MCE;
> +        __vmwrite(GUEST_CR4, new);
> +    }
> +    else
> +        *regp = __vmread(CR4_READ_SHADOW);
> +
> +    return 0;
> +}
> +
> +/* Returns: rc == 0: success, else -errno. */
> +static int vmxit_cr_access(struct cpu_user_regs *regs)
> +{
> +    unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION);
> +    uint acc_typ = VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification);
> +    int cr, rc = -EINVAL;
> +
> +    switch ( acc_typ )
> +    {
> +    case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR:
> +    case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR:
> +    {
> +        uint gpr = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
> +        uint64_t *regp = decode_register(gpr, regs, 0);
> +        cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
> +
> +        if ( regp == NULL )
> +            break;
> +
> +        switch ( cr )
> +        {
> +        case 0:
> +            rc = access_cr0(regs, acc_typ, regp);
> +            break;
> +
> +        case 3:
> +            printk(XENLOG_G_ERR "PVH: unexpected cr3 vmexit. rip:%lx\n",
> +                   regs->rip);
> +            domain_crash(current->domain);
> +            break;
> +
> +        case 4:
> +            rc = access_cr4(regs, acc_typ, regp);
> +            break;
> +        }
> +        if ( rc == 0 )
> +            vmx_update_guest_eip();
> +        break;
> +    }
> +
> +    case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
> +    {
> +        struct vcpu *vp = current;
> +        unsigned long cr0 = vp->arch.hvm_vcpu.guest_cr[0] & ~X86_CR0_TS;
> +        vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = cr0;
> +
> +        vmx_fpu_enter(vp);
> +        __vmwrite(GUEST_CR0, cr0);
> +        __vmwrite(CR0_READ_SHADOW, cr0);
> +        vmx_update_guest_eip();
> +        rc = 0;
> +    }
> +    }

No "case VMX_CONTROL_REG_ACCESS_TYPE_LMSW"?

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.