x86emul: support F16C insns Note that this avoids emulating the behavior of VCVTPS2PH found on at least some Intel CPUs, which update MXCSR even when the memory write faults. Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -3028,6 +3028,47 @@ int main(int argc, char **argv) printf("skipped\n"); #endif + printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1..."); + if ( stack_exec && cpu_has_f16c ) + { + decl_insn(vcvtph2ps); + decl_insn(vcvtps2ph); + + asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n" + put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1") + :: "c" (NULL) ); + + set_insn(vcvtph2ps); + res[1] = 0x40003c00; /* (1.0, 2.0) */ + res[2] = 0x44004200; /* (3.0, 4.0) */ + res[3] = 0x3400b800; /* (-.5, .25) */ + res[4] = 0xbc000000; /* (0.0, -1.) */ + memset(res + 5, 0xff, 16); + regs.ecx = (unsigned long)(res + 1); + rc = x86_emulate(&ctxt, &emulops); + asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) ); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)..."); + asm volatile ( "vmovups %0, %%ymm1\n" + put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)") + :: "m" (res[16]), "d" (NULL) ); + + set_insn(vcvtps2ph); + memset(res + 7, 0, 32); + regs.edx = (unsigned long)(res + 7); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) || + memcmp(res + 1, res + 7, 16) || + res[11] || res[12] || res[13] || res[14] ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + #undef decl_insn #undef put_insn #undef set_insn --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x (res.c & (1U << 28)) != 0; \ }) +#define cpu_has_f16c ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ + res.c = 0; \ + (res.c & (1U << 29)) != 0; \ +}) + #define cpu_has_avx2 ({ \ struct cpuid_leaf res; \ emul_test_cpuid(1, 0, &res, NULL); \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -369,6 +369,7 @@ static const struct { [0x00 ... 0x0b] = { .simd_size = simd_packed_int }, [0x0c ... 0x0f] = { .simd_size = simd_packed_fp }, [0x10] = { .simd_size = simd_packed_int }, + [0x13] = { .simd_size = simd_other, .two_op = 1 }, [0x14 ... 0x15] = { .simd_size = simd_packed_fp }, [0x17] = { .simd_size = simd_packed_int, .two_op = 1 }, [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 }, @@ -411,6 +412,7 @@ static const struct { [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 }, [0x18] = { .simd_size = simd_128 }, [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, + [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 }, [0x20] = { .simd_size = simd_none }, [0x21] = { .simd_size = simd_other }, [0x22] = { .simd_size = simd_none }, @@ -1601,6 +1603,7 @@ static bool vcpu_has( #define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) #define vcpu_has_aesni() vcpu_has( 1, ECX, 25, ctxt, ops) #define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) +#define vcpu_has_f16c() vcpu_has( 1, ECX, 29, ctxt, ops) #define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) #define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \ vcpu_has_sse()) @@ -7216,6 +7219,12 @@ x86_emulate( host_and_vcpu_must_have(sse4_1); goto simd_0f38_common; + case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */ + generate_exception_if(vex.w, EXC_UD); + host_and_vcpu_must_have(f16c); + op_bytes = 8 << vex.l; + goto simd_0f_ymm; + case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */ @@ -7607,6 +7616,50 @@ x86_emulate( opc = init_prefixes(stub); goto pextr; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */ + { + uint32_t mxcsr; + + generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD); + host_and_vcpu_must_have(f16c); + fail_if(!ops->write); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + if ( ea.type == OP_MEM ) + { + /* Convert memory operand to (%rAX). */ + vex.b = 1; + opc[1] &= 0x38; + } + opc[2] = imm1; + fic.insn_bytes = PFX_BYTES + 3; + opc[3] = 0xc3; + + copy_VEX(opc, vex); + /* Latch MXCSR - we may need to restore it below. */ + invoke_stub("stmxcsr %[mxcsr]", "", + "=m" (*mmvalp), "+m" (fic.exn_raised), [mxcsr] "=m" (mxcsr) + : "a" (mmvalp)); + + put_stub(stub); + check_xmm_exn(&fic); + + if ( ea.type == OP_MEM ) + { + rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, 8 << vex.l, ctxt); + if ( rc != X86EMUL_OKAY ) + { + asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); + goto done; + } + } + + state->simd_size = simd_none; + break; + } + case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */ case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */ host_and_vcpu_must_have(sse4_1); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -60,6 +60,7 @@ #define cpu_has_aesni boot_cpu_has(X86_FEATURE_AESNI) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) +#define cpu_has_f16c boot_cpu_has(X86_FEATURE_F16C) #define cpu_has_rdrand boot_cpu_has(X86_FEATURE_RDRAND) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)