[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] x86emul: handle AVX512-FP16 complex multiplication insns



commit d14c52cba0f544774b49f38ecceef1eed6349628
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Jun 5 14:59:36 2023 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Jun 5 14:59:36 2023 +0200

    x86emul: handle AVX512-FP16 complex multiplication insns
    
    Aspects to consider are that these have 32-bit element size (pairs of
    FP16) and that there are restrictions on the registers valid to use.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  |  8 ++++++++
 tools/tests/x86_emulator/predicates.c  |  8 ++++++++
 xen/arch/x86/x86_emulate/decode.c      |  8 ++++++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 28 ++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index 137b88a325..b5329fcfc3 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -614,12 +614,18 @@ static const struct test avx512_fp16_all[] = {
     INSN(comish,          , map5, 2f,    el, fp16, el),
     INSN(divph,           , map5, 5e,    vl, fp16, vl),
     INSN(divsh,         f3, map5, 5e,    el, fp16, el),
+    INSNX(fcmaddcph,    f2, map6, 56, 1, vl,    d, vl),
+    INSNX(fcmaddcsh,    f2, map6, 57, 1, el,    d, el),
+    INSNX(fcmulcph,     f2, map6, d6, 1, vl,    d, vl),
+    INSNX(fcmulcsh,     f2, map6, d7, 1, el,    d, el),
     INSN(fmadd132ph,    66, map6, 98,    vl, fp16, vl),
     INSN(fmadd132sh,    66, map6, 99,    el, fp16, el),
     INSN(fmadd213ph,    66, map6, a8,    vl, fp16, vl),
     INSN(fmadd213sh,    66, map6, a9,    el, fp16, el),
     INSN(fmadd231ph,    66, map6, b8,    vl, fp16, vl),
     INSN(fmadd231sh,    66, map6, b9,    el, fp16, el),
+    INSNX(fmaddcph,     f3, map6, 56, 1, vl,    d, vl),
+    INSNX(fmaddcsh,     f3, map6, 57, 1, el,    d, el),
     INSN(fmaddsub132ph, 66, map6, 96,    vl, fp16, vl),
     INSN(fmaddsub213ph, 66, map6, a6,    vl, fp16, vl),
     INSN(fmaddsub231ph, 66, map6, b6,    vl, fp16, vl),
@@ -632,6 +638,8 @@ static const struct test avx512_fp16_all[] = {
     INSN(fmsubadd132ph, 66, map6, 97,    vl, fp16, vl),
     INSN(fmsubadd213ph, 66, map6, a7,    vl, fp16, vl),
     INSN(fmsubadd231ph, 66, map6, b7,    vl, fp16, vl),
+    INSNX(fmulcph,      f3, map6, d6, 1, vl,    d, vl),
+    INSNX(fmulcsh,      f3, map6, d7, 1, el,    d, el),
     INSN(fnmadd132ph,   66, map6, 9c,    vl, fp16, vl),
     INSN(fnmadd132sh,   66, map6, 9d,    el, fp16, el),
     INSN(fnmadd213ph,   66, map6, ac,    vl, fp16, vl),
diff --git a/tools/tests/x86_emulator/predicates.c 
b/tools/tests/x86_emulator/predicates.c
index a9d0a2af12..acbd32a5be 100644
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2074,6 +2074,10 @@ static const struct evex {
     { { 0x4d }, 2, T, R, pfx_66, W0, LIG }, /* vrcpsh */
     { { 0x4e }, 2, T, R, pfx_66, W0, Ln }, /* vrsqrtph */
     { { 0x4f }, 2, T, R, pfx_66, W0, LIG }, /* vrsqrtsh */
+    { { 0x56 }, 2, T, R, pfx_f3, W0, Ln }, /* vfmaddcph */
+    { { 0x56 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmaddcph */
+    { { 0x57 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmaddcsh */
+    { { 0x57 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmaddcsh */
     { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
     { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
     { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
@@ -2104,6 +2108,10 @@ static const struct evex {
     { { 0xbd }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd231sh */
     { { 0xbe }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub231ph */
     { { 0xbf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub231sh */
+    { { 0xd6 }, 2, T, R, pfx_f3, W0, Ln }, /* vfmulcph */
+    { { 0xd6 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmulcph */
+    { { 0xd7 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmulcsh */
+    { { 0xd7 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmulcsh */
 };
 
 static const struct {
diff --git a/xen/arch/x86/x86_emulate/decode.c 
b/xen/arch/x86/x86_emulate/decode.c
index 9d9bc75f3b..aef64d9854 100644
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -367,6 +367,8 @@ static const struct ext0f38_table {
     [0x4f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0x50 ... 0x53] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x54 ... 0x55] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = 
d8s_vl },
+    [0x56] = { .simd_size = simd_other, .d8s = d8s_vl },
+    [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0x58] = { .simd_size = simd_other, .two_op = 1, .d8s = 2 },
     [0x59] = { .simd_size = simd_other, .two_op = 1, .d8s = 3 },
     [0x5a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
@@ -431,6 +433,8 @@ static const struct ext0f38_table {
     [0xcc] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
     [0xcd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0xd6] = { .simd_size = simd_other, .d8s = d8s_vl },
+    [0xd7] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0xf0] = { .two_op = 1 },
@@ -1504,6 +1508,10 @@ int x86emul_decode(struct x86_emulate_state *s,
                 if ( s->evex.pfx == vex_66 )
                     s->fp16 = true;
                 break;
+
+            case 0x56: case 0x57: /* vf{,c}maddc{p,s}h */
+            case 0xd6: case 0xd7: /* vf{,c}mulc{p,s}h */
+                break;
             }
 
             disp8scale = decode_disp8scale(ext0f38_table[b].d8s, s);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 905a3dc432..5c1ec668de 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7841,6 +7841,34 @@ x86_emulate(
         avx512_vlen_check(true);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_F3(6, 0x56): /* vfmaddcph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F2(6, 0x56): /* vfcmaddcph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F3(6, 0xd6): /* vfmulcph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F2(6, 0xd6): /* vfcmulcph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        op_bytes = 16 << evex.lr;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_F3(6, 0x57): /* vfmaddcsh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_F2(6, 0x57): /* vfcmaddcsh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_F3(6, 0xd7): /* vfmulcsh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_F2(6, 0xd7): /* vfcmulcsh xmm/m16,xmm,xmm{k} */
+    {
+        unsigned int src1 = ~evex.reg;
+
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w || ((b & 1) && ea.type != OP_REG && 
evex.brs),
+                              X86_EXC_UD);
+        if ( mode_64bit() )
+            src1 = (src1 & 0xf) | (!evex.RX << 4);
+        else
+            src1 &= 7;
+        generate_exception_if(modrm_reg == src1 ||
+                              (ea.type != OP_MEM && modrm_reg == modrm_rm),
+                              X86_EXC_UD);
+        if ( ea.type != OP_REG || (b & 1) || !evex.brs )
+            avx512_vlen_check(!(b & 1));
+        goto simd_zmm;
+    }
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.