[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 15/47] x86emul: support AVX512{F, BW} packed integer arithmetic insns
Note: vpadd* / vpsub* et al are put at seemingly the wrong slot of the big switch(). This is in anticipation of adding e.g. vpunpck* to those groups (see the legacy/VEX encoded case labels nearby to support this). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v4: Move a case block further down. v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -160,6 +160,8 @@ static const struct test avx512f_all[] = INSN_PFP_NB(movu, 0f, 10), INSN_PFP_NB(movu, 0f, 11), INSN_FP(mul, 0f, 59), + INSN(paddd, 66, 0f, fe, vl, d, vl), + INSN(paddq, 66, 0f, d4, vl, q, vl), INSN(pand, 66, 0f, db, vl, dq, vl), INSN(pandn, 66, 0f, df, vl, dq, vl), INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl), @@ -168,7 +170,16 @@ static const struct test avx512f_all[] = INSN(pcmpgtd, 66, 0f, 66, vl, d, vl), INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl), INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl), + INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl), + INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl), + INSN(pmins, 66, 0f38, 39, vl, dq, vl), + INSN(pminu, 66, 0f38, 3b, vl, dq, vl), + INSN(pmuldq, 66, 0f38, 28, vl, q, vl), + INSN(pmulld, 66, 0f38, 40, vl, d, vl), + INSN(pmuludq, 66, 0f, f4, vl, q, vl), INSN(por, 66, 0f, eb, vl, dq, vl), + INSN(psubd, 66, 0f, fa, vl, d, vl), + INSN(psubq, 66, 0f, fb, vl, q, vl), INSN(pternlog, 66, 0f3a, 25, vl, dq, vl), INSN(ptestm, 66, 0f38, 27, vl, dq, vl), INSN(ptestnm, f3, 0f38, 27, vl, dq, vl), @@ -203,12 +214,39 @@ static const struct test avx512bw_all[] INSN(movdqu8, f2, 0f, 7f, vl, b, vl), INSN(movdqu16, f2, 0f, 6f, vl, w, vl), INSN(movdqu16, f2, 0f, 7f, vl, w, vl), + INSN(paddb, 66, 0f, fc, vl, b, vl), + INSN(paddsb, 66, 0f, ec, vl, b, vl), + INSN(paddsw, 66, 0f, ed, vl, w, vl), + INSN(paddusb, 66, 0f, dc, vl, b, vl), + INSN(paddusw, 66, 0f, dd, vl, w, vl), + INSN(paddw, 66, 0f, fd, vl, w, vl), + INSN(pavgb, 66, 0f, e0, vl, b, vl), + INSN(pavgw, 66, 0f, e3, vl, w, vl), INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl), INSN(pcmpeqb, 66, 0f, 74, vl, b, vl), INSN(pcmpeqw, 66, 0f, 75, vl, w, vl), INSN(pcmpgtb, 66, 0f, 64, vl, b, vl), INSN(pcmpgtw, 66, 0f, 65, vl, w, vl), INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl), + INSN(pmaddwd, 66, 0f, f5, vl, w, vl), + INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl), + INSN(pmaxsw, 66, 0f, ee, vl, w, vl), + INSN(pmaxub, 66, 0f, de, vl, b, vl), + INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl), + INSN(pminsb, 66, 0f38, 38, vl, b, vl), + INSN(pminsw, 66, 0f, ea, vl, w, vl), + INSN(pminub, 66, 0f, da, vl, b, vl), + INSN(pminuw, 66, 0f38, 3a, vl, w, vl), + INSN(pmulhuw, 66, 0f, e4, vl, w, vl), + INSN(pmulhw, 66, 0f, e5, vl, w, vl), + INSN(pmullw, 66, 0f, d5, vl, w, vl), + INSN(psadbw, 66, 0f, f6, vl, b, vl), + INSN(psubb, 66, 0f, f8, vl, b, vl), + INSN(psubsb, 66, 0f, e8, vl, b, vl), + INSN(psubsw, 66, 0f, e9, vl, w, vl), + INSN(psubusb, 66, 0f, d8, vl, b, vl), + INSN(psubusw, 66, 0f, d9, vl, w, vl), + INSN(psubw, 66, 0f, f9, vl, w, vl), INSN(ptestm, 66, 0f38, 26, vl, bw, vl), INSN(ptestnm, f3, 0f38, 26, vl, bw, vl), }; @@ -217,6 +255,7 @@ static const struct test avx512dq_all[] INSN_PFP(and, 0f, 54), INSN_PFP(andn, 0f, 55), INSN_PFP(or, 0f, 56), + INSN(pmullq, 66, 0f38, 40, vl, q, vl), INSN_PFP(xor, 0f, 57), }; --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -367,21 +367,21 @@ static const struct twobyte_table { [0xc8 ... 0xcf] = { ImplicitOps }, [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other }, [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other }, - [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 }, [0xd7] = { DstReg|SrcImplicit|ModRM|Mov }, [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, - [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other }, - [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl }, [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other }, - [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int }, - [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xff] = { ModRM } }; @@ -451,7 +451,7 @@ static const struct ext0f38_table { [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 }, [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 }, [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, - [0x40] = { .simd_size = simd_packed_int }, + [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x41] = { .simd_size = simd_packed_int, .two_op = 1 }, [0x45 ... 0x47] = { .simd_size = simd_packed_int }, [0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 }, @@ -5978,6 +5978,10 @@ x86_emulate( case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ avx512f_no_sae: host_and_vcpu_must_have(avx512f); generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD); @@ -6578,6 +6582,31 @@ x86_emulate( get_fpu(X86EMUL_FPU_mmx); goto simd_0f_common; + case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + fault_suppression = false; + /* fall through */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512bw); + generate_exception_if(evex.br, EXC_UD); + elem_bytes = 1 << (b & 1); + goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */ case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */ op_bytes = 16 << evex.lr; @@ -6604,6 +6633,12 @@ x86_emulate( avx512_vlen_check(false); goto simd_zmm; + case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + generate_exception_if(!evex.w, EXC_UD); + goto avx512f_no_sae; + CASE_SIMD_PACKED_INT(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */ case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */ CASE_SIMD_PACKED_INT(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */ @@ -7825,6 +7860,12 @@ x86_emulate( op_bytes = vex.pfx ? 16 : 8; goto simd_0f_int; + case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + generate_exception_if(evex.w != (b & 1), EXC_UD); + goto avx512f_no_sae; + case X86EMUL_OPC(0x0f, 0xd4): /* paddq mm/m64,mm */ case X86EMUL_OPC(0x0f, 0xf4): /* pmuludq mm/m64,mm */ case X86EMUL_OPC(0x0f, 0xfb): /* psubq mm/m64,mm */ @@ -7853,6 +7894,16 @@ x86_emulate( vcpu_must_have(mmxext); goto simd_0f_mmx; + case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512bw); + generate_exception_if(evex.br, EXC_UD); + elem_bytes = b & 0x10 ? 1 : 2; + goto avx512f_no_sae; + case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */ case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */ case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */ @@ -8227,6 +8278,20 @@ x86_emulate( host_and_vcpu_must_have(sse4_2); goto simd_0f38_common; + case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512bw); + generate_exception_if(evex.br, EXC_UD); + elem_bytes = b & 2 ?: 1; + goto avx512f_no_sae; + + case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + if ( evex.w ) + host_and_vcpu_must_have(avx512dq); + goto avx512f_no_sae; + case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */ case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */ case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |