[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 29/44] x86emul: support AVX512{F, BW} integer unpack insns
There's once again one extra twobyte_table[] entry which gets its Disp8 shift value set right away without getting support implemented just yet, again to avoid needlessly splitting groups of entries. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v4: Move OVR() additions into __AVX512VL__ conditional. v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -218,6 +218,10 @@ static const struct test avx512f_all[] = INSN(pternlog, 66, 0f3a, 25, vl, dq, vl), INSN(ptestm, 66, 0f38, 27, vl, dq, vl), INSN(ptestnm, f3, 0f38, 27, vl, dq, vl), + INSN(punpckhdq, 66, 0f, 6a, vl, d, vl), + INSN(punpckhqdq, 66, 0f, 6d, vl, q, vl), + INSN(punpckldq, 66, 0f, 62, vl, d, vl), + INSN(punpcklqdq, 66, 0f, 6c, vl, q, vl), INSN(pxor, 66, 0f, ef, vl, dq, vl), INSN_PFP(shuf, 0f, c6), INSN_FP(sqrt, 0f, 51), @@ -316,6 +320,10 @@ static const struct test avx512bw_all[] INSN(psubw, 66, 0f, f9, vl, w, vl), INSN(ptestm, 66, 0f38, 26, vl, bw, vl), INSN(ptestnm, f3, 0f38, 26, vl, bw, vl), + INSN(punpckhbw, 66, 0f, 68, vl, b, vl), + INSN(punpckhwd, 66, 0f, 69, vl, w, vl), + INSN(punpcklbw, 66, 0f, 60, vl, b, vl), + INSN(punpcklwd, 66, 0f, 61, vl, w, vl), }; static const struct test avx512bw_128[] = { --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -294,6 +294,10 @@ static inline bool _to_bool(byte_vec_t b asm ( "vpbroadcastd %k1, %0" : "=v" (t_) : "r" (x) ); \ t_; \ }) +# if VEC_SIZE == 16 +# define interleave_hi(x, y) ((vec_t)B(punpckhdq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) +# define interleave_lo(x, y) ((vec_t)B(punpckldq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) +# endif # define mix(x, y) ((vec_t)B(movdqa32_, _mask, (vsi_t)(x), (vsi_t)(y), \ (0b0101010101010101 & ((1 << ELEM_COUNT) - 1)))) # define shrink1(x) ((half_t)B(pmovqd, _mask, (vdi_t)(x), (vsi_half_t){}, ~0)) @@ -311,6 +315,10 @@ static inline bool _to_bool(byte_vec_t b t_; \ }) # endif +# if VEC_SIZE == 16 +# define interleave_hi(x, y) ((vec_t)B(punpckhqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) +# define interleave_lo(x, y) ((vec_t)B(punpcklqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) +# endif # define mix(x, y) ((vec_t)B(movdqa64_, _mask, (vdi_t)(x), (vdi_t)(y), 0b01010101)) # endif # if INT_SIZE == 4 --- a/tools/tests/x86_emulator/simd.h +++ b/tools/tests/x86_emulator/simd.h @@ -252,6 +252,10 @@ OVR(pmovzxwq); OVR(pmulld); OVR(pmuldq); OVR(pmuludq); +OVR(punpckhdq); +OVR(punpckhqdq); +OVR(punpckldq); +OVR(punpcklqdq); #endif #undef OVR_VFP --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -312,10 +312,10 @@ static const struct twobyte_table { [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, - [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other }, + [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl }, [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, - [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other }, - [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl }, + [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 }, [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl }, [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other }, @@ -6643,6 +6643,12 @@ x86_emulate( get_fpu(X86EMUL_FPU_mmx); goto simd_0f_common; + case X86EMUL_OPC_EVEX_66(0x0f, 0x60): /* vpunpcklbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x61): /* vpunpcklwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x68): /* vpunpckhbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x69): /* vpunpckhwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + op_bytes = 16 << evex.lr; + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,[xyz]mm,[xyz]mm{k} */ @@ -6671,6 +6677,13 @@ x86_emulate( elem_bytes = 1 << (b & 1); goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_66(0x0f, 0x62): /* vpunpckldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x6a): /* vpunpckhdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + generate_exception_if(evex.w, EXC_UD); + fault_suppression = false; + op_bytes = 16 << evex.lr; + goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */ case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */ op_bytes = 16 << evex.lr; @@ -6697,6 +6710,10 @@ x86_emulate( avx512_vlen_check(false); goto simd_zmm; + case X86EMUL_OPC_EVEX_66(0x0f, 0x6c): /* vpunpcklqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x6d): /* vpunpckhqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + fault_suppression = false; + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |