x86emul: support MMX/SSE/SSE2 converts Signed-off-by: Jan Beulich --- v2: Don't pointlessly set TwoOp for cvtpi2p{s,d} and cvt{,t}p{s,d}2pi. Set Mov for all converts (with follow-on adjustments to case labels). Consistently generate #UD when VEX.l is disallowed. Don't check VEX.vvvv for vcvtsi2s{s,d}. --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -246,9 +246,10 @@ static const struct { [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM }, [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp }, [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp }, - [0x2a] = { ImplicitOps|ModRM }, + [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp }, - [0x2c ... 0x2f] = { ImplicitOps|ModRM }, + [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, + [0x2e ... 0x2f] = { ImplicitOps|ModRM }, [0x30 ... 0x35] = { ImplicitOps }, [0x37] = { ImplicitOps }, [0x38] = { DstReg|SrcMem|ModRM }, @@ -259,7 +260,7 @@ static const struct { [0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp }, [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp }, [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp }, - [0x5a ... 0x5b] = { ModRM }, + [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp }, [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other }, [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, @@ -322,7 +323,7 @@ static const struct { [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other }, [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, - [0xe6] = { ModRM }, + [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int }, [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, @@ -5391,6 +5392,99 @@ x86_emulate( goto done; break; + case X86EMUL_OPC_66(0x0f, 0x2a): /* cvtpi2pd mm/m64,xmm */ + if ( ea.type == OP_REG ) + { + case X86EMUL_OPC(0x0f, 0x2a): /* cvtpi2ps mm/m64,xmm */ + CASE_SIMD_PACKED_FP(, 0x0f, 0x2c): /* cvttp{s,d}2pi xmm/mem,mm */ + CASE_SIMD_PACKED_FP(, 0x0f, 0x2d): /* cvtp{s,d}2pi xmm/mem,mm */ + host_and_vcpu_must_have(mmx); + } + op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8; + goto simd_0f_fp; + + CASE_SIMD_SCALAR_FP(, 0x0f, 0x2a): /* cvtsi2s{s,d} r/m,xmm */ + CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */ + if ( vex.opcx == vex_none ) + { + if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) + vcpu_must_have(sse2); + else + vcpu_must_have(sse); + get_fpu(X86EMUL_FPU_xmm, &fic); + } + else + { + generate_exception_if(vex.l, EXC_UD); + host_and_vcpu_must_have(avx); + get_fpu(X86EMUL_FPU_ymm, &fic); + } + + if ( ea.type == OP_MEM ) + { + rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, + rex_prefix & REX_W ? 8 : 4, ctxt, ops); + if ( rc != X86EMUL_OKAY ) + goto done; + } + else + src.val = rex_prefix & REX_W ? *ea.reg : (uint32_t)*ea.reg; + + state->simd_size = simd_none; + goto simd_0f_rm; + + CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c): /* cvtts{s,d}2si xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d): /* cvts{s,d}2si xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ + if ( vex.opcx == vex_none ) + { + if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK ) + vcpu_must_have(sse2); + else + vcpu_must_have(sse); + get_fpu(X86EMUL_FPU_xmm, &fic); + } + else + { + generate_exception_if(vex.l, EXC_UD); + host_and_vcpu_must_have(avx); + get_fpu(X86EMUL_FPU_ymm, &fic); + } + + opc = init_prefixes(stub); + opc[0] = b; + /* Convert GPR destination to %rAX and memory operand to (%rCX). */ + rex_prefix &= ~REX_R; + vex.r = 1; + if ( ea.type == OP_MEM ) + { + rex_prefix &= ~REX_B; + vex.b = 1; + opc[1] = 0x01; + + rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, + vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt); + if ( rc != X86EMUL_OKAY ) + goto done; + } + else + opc[1] = modrm & 0xc7; + if ( !mode_64bit() ) + vex.w = 0; + fic.insn_bytes = PFX_BYTES + 2; + opc[2] = 0xc3; + + copy_REX_VEX(opc, rex_prefix, vex); + ea.reg = decode_register(modrm_reg, &_regs, 0); + invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp)); + + put_stub(stub); + put_fpu(&fic); + + state->simd_size = simd_none; + break; + case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */ generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if(ops->write_msr == NULL); @@ -5575,6 +5669,33 @@ x86_emulate( dst.bytes = 4; break; + CASE_SIMD_ALL_FP(, 0x0f, 0x5a): /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */ + CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a): /* vcvtp{s,d}2p{s,d} xmm/mem,xmm */ + /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */ + op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) + + !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK)); + simd_0f_cvt: + if ( vex.opcx == vex_none ) + { + vcpu_must_have(sse2); + get_fpu(X86EMUL_FPU_xmm, &fic); + } + else + { + host_and_vcpu_must_have(avx); + fail_if((vex.pfx & VEX_PREFIX_SCALAR_MASK) && vex.l); + get_fpu(X86EMUL_FPU_ymm, &fic); + } + goto simd_0f_common; + + CASE_SIMD_PACKED_FP(, 0x0f, 0x5b): /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */ + CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x5b): /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */ + case X86EMUL_OPC_F3(0x0f, 0x5b): /* cvttps2dq xmm/mem,xmm */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x5b): /* vcvttps2dq {x,y}mm/mem,{x,y}mm */ + d |= TwoOp; + op_bytes = 16 << vex.l; + goto simd_0f_cvt; + CASE_SIMD_PACKED_INT(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */ case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ CASE_SIMD_PACKED_INT(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */ @@ -5715,6 +5836,7 @@ x86_emulate( get_fpu(X86EMUL_FPU_mmx, &fic); } + simd_0f_rm: opc = init_prefixes(stub); opc[0] = b; /* Convert memory/GPR operand to (%rAX). */ @@ -6415,6 +6537,16 @@ x86_emulate( get_fpu(X86EMUL_FPU_mmx, &fic); goto simd_0f_common; + case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */ + case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */ + case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */ + case X86EMUL_OPC_VEX_F3(0x0f, 0xe6): /* vcvtdq2pd xmm/mem,{x,y}mm */ + case X86EMUL_OPC_F2(0x0f, 0xe6): /* cvtpd2dq xmm/mem,xmm */ + case X86EMUL_OPC_VEX_F2(0x0f, 0xe6): /* vcvtpd2dq {x,y}mm/mem,xmm */ + d |= TwoOp; + op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l); + goto simd_0f_cvt; + CASE_SIMD_PACKED_INT(0x0f, 0xf7): /* maskmov{q,dqu} {,x}mm,{,x}mm */ case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* vmaskmovdqu xmm,xmm */ generate_exception_if(ea.type != OP_REG, EXC_UD);