Xen project Mailing List

[Xen-devel] [PATCH v7 03/49] x86emul: support AVX512{F, BW, DQ} extract insns

To: "xen-devel" <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: "Jan Beulich" <JBeulich@xxxxxxxx>

Date: Wed, 19 Dec 2018 07:36:49 -0700

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wei.liu2@xxxxxxxxxx>, Roger Pau Monne <roger.pau@xxxxxxxxxx>

Delivery-date: Wed, 19 Dec 2018 14:36:56 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v7: Re-base. v4: Make use of d8s_dq64. v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -212,6 +212,7 @@ static const struct test avx512f_all[] = }; static const struct test avx512f_128[] = { + INSN(extractps, 66, 0f3a, 17, el, d, el), INSN(mov, 66, 0f, 6e, el, dq64, el), INSN(mov, 66, 0f, 7e, el, dq64, el), INSN(movq, f3, 0f, 7e, el, q, el), @@ -221,10 +222,14 @@ static const struct test avx512f_128[] = static const struct test avx512f_no128[] = { INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl), INSN(broadcastsd, 66, 0f38, 19, el, q, el), + INSN(extractf32x4, 66, 0f3a, 19, el_4, d, vl), + INSN(extracti32x4, 66, 0f3a, 39, el_4, d, vl), }; static const struct test avx512f_512[] = { INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl), + INSN(extractf64x4, 66, 0f3a, 1b, el_4, q, vl), + INSN(extracti64x4, 66, 0f3a, 3b, el_4, q, vl), }; static const struct test avx512bw_all[] = { @@ -280,6 +285,12 @@ static const struct test avx512bw_all[] INSN(ptestnm, f3, 0f38, 26, vl, bw, vl), }; +static const struct test avx512bw_128[] = { + INSN(pextrb, 66, 0f3a, 14, el, b, el), +// pextrw, 66, 0f, c5, w + INSN(pextrw, 66, 0f3a, 15, el, w, el), +}; + static const struct test avx512dq_all[] = { INSN_PFP(and, 0f, 54), INSN_PFP(andn, 0f, 55), @@ -288,13 +299,21 @@ static const struct test avx512dq_all[] INSN_PFP(xor, 0f, 57), }; +static const struct test avx512dq_128[] = { + INSN(pextr, 66, 0f3a, 16, el, dq64, el), +}; + static const struct test avx512dq_no128[] = { INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl), INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl), + INSN(extractf64x2, 66, 0f3a, 19, el_2, q, vl), + INSN(extracti64x2, 66, 0f3a, 39, el_2, q, vl), }; static const struct test avx512dq_512[] = { INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl), + INSN(extractf32x8, 66, 0f3a, 1b, el_8, d, vl), + INSN(extracti32x8, 66, 0f3a, 3b, el_8, d, vl), }; static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 }; @@ -632,7 +651,9 @@ void evex_disp8_test(void *instr, struct RUN(avx512f, no128); RUN(avx512f, 512); RUN(avx512bw, all); + RUN(avx512bw, 128); RUN(avx512dq, all); + RUN(avx512dq, 128); RUN(avx512dq, no128); RUN(avx512dq, 512); } --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -512,9 +512,13 @@ static const struct ext0f3a_table { [0x0a ... 0x0b] = { .simd_size = simd_scalar_opc }, [0x0c ... 0x0d] = { .simd_size = simd_packed_fp }, [0x0e ... 0x0f] = { .simd_size = simd_packed_int }, - [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 }, + [0x14] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 0 }, + [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 }, + [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 }, + [0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 2 }, [0x18] = { .simd_size = simd_128 }, - [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, + [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 }, + [0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 }, [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 }, [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x20] = { .simd_size = simd_none }, @@ -523,7 +527,8 @@ static const struct ext0f3a_table { [0x25] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 }, [0x38] = { .simd_size = simd_128 }, - [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, + [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 }, + [0x3b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 }, [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x40 ... 0x41] = { .simd_size = simd_packed_fp }, [0x42] = { .simd_size = simd_packed_int }, @@ -2676,6 +2681,8 @@ x86_decode_0f3a( ... X86EMUL_OPC_66(0, 0x17): /* pextr*, extractps */ case X86EMUL_OPC_VEX_66(0, 0x14) ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */ + case X86EMUL_OPC_EVEX_66(0, 0x14) + ... X86EMUL_OPC_EVEX_66(0, 0x17): /* vpextr*, vextractps */ case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */ break; @@ -8866,9 +8873,9 @@ x86_emulate( opc[0] = b; /* Convert memory/GPR operand to (%rAX). */ rex_prefix &= ~REX_B; - vex.b = 1; + evex.b = vex.b = 1; if ( !mode_64bit() ) - vex.w = 0; + evex.w = vex.w = 0; opc[1] = modrm & 0x38; opc[2] = imm1; opc[3] = 0xc3; @@ -8878,7 +8885,10 @@ x86_emulate( --opc; } - copy_REX_VEX(opc, rex_prefix, vex); + if ( evex_encoded() ) + copy_EVEX(opc, evex); + else + copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val)); put_stub(stub); @@ -8903,6 +8913,52 @@ x86_emulate( opc = init_prefixes(stub); goto pextr; + case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ + generate_exception_if(ea.type != OP_REG, EXC_UD); + /* Convert to alternative encoding: We want to use a memory operand. */ + evex.opcx = ext_0f3a; + b = 0x15; + modrm <<= 3; + evex.r = evex.b; + evex.R = evex.x; + /* fall through */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */ + generate_exception_if((evex.lr || evex.reg != 0xf || !evex.RX || + evex.opmsk || evex.brs), + EXC_UD); + if ( !(b & 2) ) + host_and_vcpu_must_have(avx512bw); + else if ( !(b & 1) ) + host_and_vcpu_must_have(avx512dq); + else + host_and_vcpu_must_have(avx512f); + get_fpu(X86EMUL_FPU_zmm); + opc = init_evex(stub); + goto pextr; + + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x19): /* vextractf32x4 $imm8,{y,z}mm,xmm/m128{k} */ + /* vextractf64x2 $imm8,{y,z}mm,xmm/m128{k} */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x39): /* vextracti32x4 $imm8,{y,z}mm,xmm/m128{k} */ + /* vextracti64x2 $imm8,{y,z}mm,xmm/m128{k} */ + if ( evex.w ) + host_and_vcpu_must_have(avx512dq); + generate_exception_if(!evex.lr || evex.brs, EXC_UD); + fault_suppression = false; + goto avx512f_imm8_no_sae; + + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1b): /* vextractf32x8 $imm8,zmm,ymm/m256{k} */ + /* vextractf64x4 $imm8,zmm,ymm/m256{k} */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3b): /* vextracti32x8 $imm8,zmm,ymm/m256{k} */ + /* vextracti64x4 $imm8,zmm,ymm/m256{k} */ + if ( !evex.w ) + host_and_vcpu_must_have(avx512dq); + generate_exception_if(evex.lr != 2 || evex.brs, EXC_UD); + fault_suppression = false; + goto avx512f_imm8_no_sae; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */ { uint32_t mxcsr; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.