[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512{F, BW} packed integer arithmetic insns



commit 64f3090d9c49158149293af6ad5564c22bfb7344
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Nov 20 15:13:54 2018 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Nov 20 15:13:54 2018 +0100

    x86emul: support AVX512{F,BW} packed integer arithmetic insns
    
    Note: vpadd* / vpsub* et al are put at seemingly the wrong slot of the
    big switch(). This is in anticipation of adding e.g. vpunpck* to those
    groups (see the legacy/VEX encoded case labels nearby to support this).
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  | 39 +++++++++++++++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 77 +++++++++++++++++++++++++++++++---
 2 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index 0061bbf6ee..1c0f6c4040 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -160,6 +160,8 @@ static const struct test avx512f_all[] = {
     INSN_PFP_NB(movu,        0f, 10),
     INSN_PFP_NB(movu,        0f, 11),
     INSN_FP(mul,             0f, 59),
+    INSN(paddd,        66,   0f, fe,    vl,      d, vl),
+    INSN(paddq,        66,   0f, d4,    vl,      q, vl),
     INSN(pand,         66,   0f, db,    vl,     dq, vl),
     INSN(pandn,        66,   0f, df,    vl,     dq, vl),
     INSN(pcmp,         66, 0f3a, 1f,    vl,     dq, vl),
@@ -168,7 +170,16 @@ static const struct test avx512f_all[] = {
     INSN(pcmpgtd,      66,   0f, 66,    vl,      d, vl),
     INSN(pcmpgtq,      66, 0f38, 37,    vl,      q, vl),
     INSN(pcmpu,        66, 0f3a, 1e,    vl,     dq, vl),
+    INSN(pmaxs,        66, 0f38, 3d,    vl,     dq, vl),
+    INSN(pmaxu,        66, 0f38, 3f,    vl,     dq, vl),
+    INSN(pmins,        66, 0f38, 39,    vl,     dq, vl),
+    INSN(pminu,        66, 0f38, 3b,    vl,     dq, vl),
+    INSN(pmuldq,       66, 0f38, 28,    vl,      q, vl),
+    INSN(pmulld,       66, 0f38, 40,    vl,      d, vl),
+    INSN(pmuludq,      66,   0f, f4,    vl,      q, vl),
     INSN(por,          66,   0f, eb,    vl,     dq, vl),
+    INSN(psubd,        66,   0f, fa,    vl,      d, vl),
+    INSN(psubq,        66,   0f, fb,    vl,      q, vl),
     INSN(pternlog,     66, 0f3a, 25,    vl,     dq, vl),
     INSN(ptestm,       66, 0f38, 27,    vl,     dq, vl),
     INSN(ptestnm,      f3, 0f38, 27,    vl,     dq, vl),
@@ -203,12 +214,39 @@ static const struct test avx512bw_all[] = {
     INSN(movdqu8,     f2,   0f, 7f,    vl,    b, vl),
     INSN(movdqu16,    f2,   0f, 6f,    vl,    w, vl),
     INSN(movdqu16,    f2,   0f, 7f,    vl,    w, vl),
+    INSN(paddb,       66,   0f, fc,    vl,    b, vl),
+    INSN(paddsb,      66,   0f, ec,    vl,    b, vl),
+    INSN(paddsw,      66,   0f, ed,    vl,    w, vl),
+    INSN(paddusb,     66,   0f, dc,    vl,    b, vl),
+    INSN(paddusw,     66,   0f, dd,    vl,    w, vl),
+    INSN(paddw,       66,   0f, fd,    vl,    w, vl),
+    INSN(pavgb,       66,   0f, e0,    vl,    b, vl),
+    INSN(pavgw,       66,   0f, e3,    vl,    w, vl),
     INSN(pcmp,        66, 0f3a, 3f,    vl,   bw, vl),
     INSN(pcmpeqb,     66,   0f, 74,    vl,    b, vl),
     INSN(pcmpeqw,     66,   0f, 75,    vl,    w, vl),
     INSN(pcmpgtb,     66,   0f, 64,    vl,    b, vl),
     INSN(pcmpgtw,     66,   0f, 65,    vl,    w, vl),
     INSN(pcmpu,       66, 0f3a, 3e,    vl,   bw, vl),
+    INSN(pmaddwd,     66,   0f, f5,    vl,    w, vl),
+    INSN(pmaxsb,      66, 0f38, 3c,    vl,    b, vl),
+    INSN(pmaxsw,      66,   0f, ee,    vl,    w, vl),
+    INSN(pmaxub,      66,   0f, de,    vl,    b, vl),
+    INSN(pmaxuw,      66, 0f38, 3e,    vl,    w, vl),
+    INSN(pminsb,      66, 0f38, 38,    vl,    b, vl),
+    INSN(pminsw,      66,   0f, ea,    vl,    w, vl),
+    INSN(pminub,      66,   0f, da,    vl,    b, vl),
+    INSN(pminuw,      66, 0f38, 3a,    vl,    w, vl),
+    INSN(pmulhuw,     66,   0f, e4,    vl,    w, vl),
+    INSN(pmulhw,      66,   0f, e5,    vl,    w, vl),
+    INSN(pmullw,      66,   0f, d5,    vl,    w, vl),
+    INSN(psadbw,      66,   0f, f6,    vl,    b, vl),
+    INSN(psubb,       66,   0f, f8,    vl,    b, vl),
+    INSN(psubsb,      66,   0f, e8,    vl,    b, vl),
+    INSN(psubsw,      66,   0f, e9,    vl,    w, vl),
+    INSN(psubusb,     66,   0f, d8,    vl,    b, vl),
+    INSN(psubusw,     66,   0f, d9,    vl,    w, vl),
+    INSN(psubw,       66,   0f, f9,    vl,    w, vl),
     INSN(ptestm,      66, 0f38, 26,    vl,   bw, vl),
     INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
 };
@@ -217,6 +255,7 @@ static const struct test avx512dq_all[] = {
     INSN_PFP(and,              0f, 54),
     INSN_PFP(andn,             0f, 55),
     INSN_PFP(or,               0f, 56),
+    INSN(pmullq,         66, 0f38, 40,   vl,  q, vl),
     INSN_PFP(xor,              0f, 57),
 };
 
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 2c559740b8..ec5892fb01 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -367,21 +367,21 @@ static const struct twobyte_table {
     [0xc8 ... 0xcf] = { ImplicitOps },
     [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
     [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
     [0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
     [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
     [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
-    [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xff] = { ModRM }
 };
 
@@ -451,7 +451,7 @@ static const struct ext0f38_table {
     [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
     [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
     [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x40] = { .simd_size = simd_packed_int },
+    [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x45 ... 0x47] = { .simd_size = simd_packed_int },
     [0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 },
@@ -5984,6 +5984,10 @@ x86_emulate(
     case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     avx512f_no_sae:
         host_and_vcpu_must_have(avx512f);
         generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD);
@@ -6585,6 +6589,31 @@ x86_emulate(
         get_fpu(X86EMUL_FPU_mmx);
         goto simd_0f_common;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        fault_suppression = false;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512bw);
+        generate_exception_if(evex.br, EXC_UD);
+        elem_bytes = 1 << (b & 1);
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} 
[xyz]mm/mem,[xyz]mm,k{k} */
     case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} 
[xyz]mm/mem,[xyz]mm,k{k} */
         op_bytes = 16 << evex.lr;
@@ -6611,6 +6640,12 @@ x86_emulate(
         avx512_vlen_check(false);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        generate_exception_if(!evex.w, EXC_UD);
+        goto avx512f_no_sae;
+
     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
     CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
@@ -7834,6 +7869,12 @@ x86_emulate(
         op_bytes = vex.pfx ? 16 : 8;
         goto simd_0f_int;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        generate_exception_if(evex.w != (b & 1), EXC_UD);
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
     case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
     case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
@@ -7862,6 +7903,16 @@ x86_emulate(
         vcpu_must_have(mmxext);
         goto simd_0f_mmx;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512bw);
+        generate_exception_if(evex.br, EXC_UD);
+        elem_bytes = b & 0x10 ? 1 : 2;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_66(0x0f, 0xe6):       /* cvttpd2dq xmm/mem,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe6):   /* vcvttpd2dq {x,y}mm/mem,xmm */
     case X86EMUL_OPC_F3(0x0f, 0xe6):       /* cvtdq2pd xmm/mem,xmm */
@@ -8236,6 +8287,20 @@ x86_emulate(
         host_and_vcpu_must_have(sse4_2);
         goto simd_0f38_common;
 
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512bw);
+        generate_exception_if(evex.br, EXC_UD);
+        elem_bytes = b & 2 ?: 1;
+        goto avx512f_no_sae;
+
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_66(0x0f38, 0xdb):     /* aesimc xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
     case X86EMUL_OPC_66(0x0f38, 0xdc):     /* aesenc xmm/m128,xmm,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.