[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen staging] x86emul: support AVX512{F, DQ} FP broadcast insns
commit 88855b727bd666754fd0b8ed42aa63c480fb0179 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Tue Nov 20 15:11:50 2018 +0100 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Tue Nov 20 15:11:50 2018 +0100 x86emul: support AVX512{F,DQ} FP broadcast insns Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/tests/x86_emulator/evex-disp8.c | 25 ++++++++++++++++ xen/arch/x86/x86_emulate/x86_emulate.c | 55 ++++++++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c index 41f0faac81..43c4a9f992 100644 --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -105,6 +105,7 @@ enum esz { static const struct test avx512f_all[] = { INSN_FP(add, 0f, 58), + INSN(broadcastss, 66, 0f38, 18, el, d, el), INSN_FP(cmp, 0f, c2), INSN_FP(div, 0f, 5e), INSN(fmadd132, 66, 0f38, 98, vl, sd, vl), @@ -176,6 +177,15 @@ static const struct test avx512f_128[] = { INSN(movq, 66, 0f, d6, el, q, el), }; +static const struct test avx512f_no128[] = { + INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl), + INSN(broadcastsd, 66, 0f38, 19, el, q, el), +}; + +static const struct test avx512f_512[] = { + INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl), +}; + static const struct test avx512bw_all[] = { INSN(movdqu8, f2, 0f, 6f, vl, b, vl), INSN(movdqu8, f2, 0f, 7f, vl, b, vl), @@ -190,8 +200,19 @@ static const struct test avx512dq_all[] = { INSN_PFP(xor, 0f, 57), }; +static const struct test avx512dq_no128[] = { + INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl), + INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl), +}; + +static const struct test avx512dq_512[] = { + INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl), +}; + static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 }; static const unsigned char vl_128[] = { VL_128 }; +static const unsigned char vl_no128[] = { VL_512, VL_256 }; +static const unsigned char vl_512[] = { VL_512 }; /* * This table, indicating the presence of an immediate (byte) for an opcode @@ -520,6 +541,10 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt, RUN(avx512f, all); RUN(avx512f, 128); + RUN(avx512f, no128); + RUN(avx512f, 512); RUN(avx512bw, all); RUN(avx512dq, all); + RUN(avx512dq, no128); + RUN(avx512dq, 512); } diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index 9bd5d35a44..73ce8ddd99 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -234,10 +234,16 @@ enum simd_opsize { /* * 128 bits of integer or floating point data, with no further - * formatting information. + * formatting information, or with it encoded by EVEX.W. */ simd_128, + /* + * 256 bits of integer or floating point data, with formatting + * encoded by EVEX.W. + */ + simd_256, + /* Operand size encoded in non-standard way. */ simd_other }; @@ -432,8 +438,10 @@ static const struct ext0f38_table { [0x13] = { .simd_size = simd_other, .two_op = 1 }, [0x14 ... 0x16] = { .simd_size = simd_packed_fp }, [0x17] = { .simd_size = simd_packed_int, .two_op = 1 }, - [0x18 ... 0x19] = { .simd_size = simd_scalar_opc, .two_op = 1 }, - [0x1a] = { .simd_size = simd_128, .two_op = 1 }, + [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 }, + [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 }, + [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 }, + [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 }, [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 }, [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 }, [0x28 ... 0x29] = { .simd_size = simd_packed_int }, @@ -3339,6 +3347,10 @@ x86_decode( op_bytes = 16; break; + case simd_256: + op_bytes = 32; + break; + default: op_bytes = 0; break; @@ -7993,6 +8005,43 @@ x86_emulate( dst.type = OP_NONE; break; + case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} */ + generate_exception_if(evex.w || evex.br, EXC_UD); + avx512_broadcast: + /* + * For the respective code below the main switch() to work we need to + * fold op_mask here: A source element gets read whenever any of its + * respective destination elements' mask bits is set. + */ + if ( fault_suppression ) + { + n = 1 << ((b & 3) - evex.w); + EXPECT(elem_bytes > 0); + ASSERT(op_bytes == n * elem_bytes); + for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n ) + op_mask |= (op_mask >> i) & ((1 << n) - 1); + } + goto avx512f_no_sae; + + case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */ + /* vbroadcastf64x4 m256,zmm{k} */ + generate_exception_if(ea.type != OP_MEM || evex.lr != 2, EXC_UD); + /* fall through */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} */ + /* vbroadcastf32x2 xmm/m64,{y,z}mm{k} */ + generate_exception_if(!evex.lr || evex.br, EXC_UD); + if ( !evex.w ) + host_and_vcpu_must_have(avx512dq); + goto avx512_broadcast; + + case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} */ + /* vbroadcastf64x2 m128,{y,z}mm{k} */ + generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.br, + EXC_UD); + if ( evex.w ) + host_and_vcpu_must_have(avx512dq); + goto avx512_broadcast; + case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */ case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */ case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */ -- generated by git-patchbot for /home/xen/git/xen.git#staging _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |