[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns



commit baf4a376f55051b18bdf3d65a7d78cff8bb4fb51
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri May 24 10:22:55 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri May 24 10:22:55 2019 +0200

    x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns
    
    VCVT{,T}S{S,D}2SI use EVEX.W for their destination (register) rather
    than their (possibly memory) source operand size and hence need a
    "manual" override of disp8scale.
    
    While the SDM claims that EVEX.L'L needs to be zero for the 32-bit forms
    of VCVT{,U}SI2SD (exception type E10NF), observations on my test system
    do not confirm this (and I've got informal confirmation that this is a
    doc mistake). Nevertheless, to be on the safe side, force evex.lr to be
    zero in this case though when constructing the stub.
    
    Slightly adjust the scalar to_int() in the test harness, to increase the
    chances of the operand ending up in memory.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  | 13 +++++-
 tools/tests/x86_emulator/simd.c        |  2 +-
 tools/tests/x86_emulator/simd.h        | 18 ++++++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 80 ++++++++++++++++++++++++++++++++--
 4 files changed, 107 insertions(+), 6 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index aff0aa7a27..847266ced3 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -117,8 +117,16 @@ static const struct test avx512f_all[] = {
     INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
+    INSN(cvtsd2si,     f2,   0f, 2d,    el,      q, el),
     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
+    INSN(cvtsi2sd,     f2,   0f, 2a,    el,   dq64, el),
+    INSN(cvtsi2ss,     f3,   0f, 2a,    el,   dq64, el),
     INSN(cvtss2sd,     f3,   0f, 5a,    el,      d, el),
+    INSN(cvtss2si,     f3,   0f, 2d,    el,      d, el),
+    INSN(cvttpd2dq,    66,   0f, e6,    vl,      q, vl),
+    INSN(cvttps2dq,    f3,   0f, 5b,    vl,      d, vl),
+    INSN(cvttsd2si,    f2,   0f, 2c,    el,      q, el),
+    INSN(cvttss2si,    f3,   0f, 2c,    el,      d, el),
     INSN_FP(div,             0f, 5e),
     INSN(fmadd132,     66, 0f38, 98,    vl,     sd, vl),
     INSN(fmadd132,     66, 0f38, 99,    el,     sd, el),
@@ -746,8 +754,9 @@ static void test_group(const struct test tests[], unsigned 
int nr_test,
                 break;
 
             case ESZ_dq:
-                test_pair(&tests[i], vl[j], ESZ_d, "d", ESZ_q, "q",
-                          instr, ctxt);
+                test_pair(&tests[i], vl[j], ESZ_d,
+                          strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
+                          ESZ_q, "q", instr, ctxt);
                 break;
 
 #ifdef __i386__
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index ee01361b10..91e36bd8cc 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -89,7 +89,7 @@ static inline bool _to_bool(byte_vec_t bv)
 #endif
 
 #if VEC_SIZE == FLOAT_SIZE
-# define to_int(x) ((vec_t){ (int)(x)[0] })
+# define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); })
 #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
diff --git a/tools/tests/x86_emulator/simd.h b/tools/tests/x86_emulator/simd.h
index 74e4eaa7c5..3e6abe5de2 100644
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -340,10 +340,28 @@ OVR(cvtps2dq);
 OVR(cvtps2pd);
 OVR(cvtps2ph);
 OVR(cvtsd2ss);
+OVR(cvtsd2si);
+OVR(cvtsd2sil);
+OVR(cvtsd2siq);
+OVR(cvtsi2sd);
+OVR(cvtsi2sdl);
+OVR(cvtsi2sdq);
+OVR(cvtsi2ss);
+OVR(cvtsi2ssl);
+OVR(cvtsi2ssq);
 OVR(cvtss2sd);
+OVR(cvtss2si);
+OVR(cvtss2sil);
+OVR(cvtss2siq);
 OVR(cvttpd2dqx);
 OVR(cvttpd2dqy);
 OVR(cvttps2dq);
+OVR(cvttsd2si);
+OVR(cvttsd2sil);
+OVR(cvttsd2siq);
+OVR(cvttss2si);
+OVR(cvttss2sil);
+OVR(cvttss2siq);
 OVR(movddup);
 OVR(movntdq);
 OVR(movntdqa);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 23a2ab67d9..c1c9ae5be3 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -296,7 +296,7 @@ static const struct twobyte_table {
     [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
     [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
     [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
     [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp, simd_none, d8s_dq },
@@ -3040,6 +3040,12 @@ x86_decode(
                 modrm_mod = 3;
                 break;
 
+            case 0x2c: /* vcvtts{s,d}2si need special casing */
+            case 0x2d: /* vcvts{s,d}2si need special casing */
+                if ( evex_encoded() )
+                    disp8scale = 2 + (evex.pfx & VEX_PREFIX_DOUBLE_MASK);
+                break;
+
             case 0x5a: /* vcvtps2pd needs special casing */
                 if ( disp8scale && !evex.pfx && !evex.brs )
                     --disp8scale;
@@ -6173,6 +6179,48 @@ x86_emulate(
         state->simd_size = simd_none;
         goto simd_0f_rm;
 
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
+        generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
+                              EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( !evex.brs )
+            avx512_vlen_check(true);
+        get_fpu(X86EMUL_FPU_zmm);
+
+        if ( ea.type == OP_MEM )
+        {
+            rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
+                            rex_prefix & REX_W ? 8 : 4, ctxt, ops);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+        }
+        else
+            src.val = *ea.reg;
+
+        opc = init_evex(stub);
+        opc[0] = b;
+        /* Convert memory/GPR source to %rAX. */
+        evex.b = 1;
+        if ( !mode_64bit() )
+            evex.w = 0;
+        /*
+         * SDM version 067 claims that exception type E10NF implies #UD when
+         * EVEX.L'L is non-zero for 32-bit VCVT{,U}SI2SD. Experimentally this
+         * cannot be confirmed, but be on the safe side for the stub.
+         */
+        if ( !evex.w && evex.pfx == vex_f2 )
+            evex.lr = 0;
+        opc[1] = (modrm & 0x38) | 0xc0;
+        insn_bytes = EVEX_PFX_BYTES + 2;
+        opc[2] = 0xc3;
+
+        copy_EVEX(opc, evex);
+        invoke_stub("", "", "=g" (dummy) : "a" (src.val));
+
+        put_stub(stub);
+        state->simd_size = simd_none;
+        break;
+
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c):     /* cvtts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d):     /* cvts{s,d}2si xmm/mem,reg */
@@ -6196,14 +6244,17 @@ x86_emulate(
         }
 
         opc = init_prefixes(stub);
+    cvts_2si:
         opc[0] = b;
         /* Convert GPR destination to %rAX and memory operand to (%rCX). */
         rex_prefix &= ~REX_R;
         vex.r = 1;
+        evex.r = 1;
         if ( ea.type == OP_MEM )
         {
             rex_prefix &= ~REX_B;
             vex.b = 1;
+            evex.b = 1;
             opc[1] = 0x01;
 
             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
@@ -6214,11 +6265,22 @@ x86_emulate(
         else
             opc[1] = modrm & 0xc7;
         if ( !mode_64bit() )
+        {
             vex.w = 0;
-        insn_bytes = PFX_BYTES + 2;
+            evex.w = 0;
+        }
+        if ( evex_encoded() )
+        {
+            insn_bytes = EVEX_PFX_BYTES + 2;
+            copy_EVEX(opc, evex);
+        }
+        else
+        {
+            insn_bytes = PFX_BYTES + 2;
+            copy_REX_VEX(opc, rex_prefix, vex);
+        }
         opc[2] = 0xc3;
 
-        copy_REX_VEX(opc, rex_prefix, vex);
         ea.reg = decode_gpr(&_regs, modrm_reg);
         invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
 
@@ -6226,6 +6288,18 @@ x86_emulate(
         state->simd_size = simd_none;
         break;
 
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+        generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
+                               (ea.type != OP_REG && evex.brs)),
+                              EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( !evex.brs )
+            avx512_vlen_check(true);
+        get_fpu(X86EMUL_FPU_zmm);
+        opc = init_evex(stub);
+        goto cvts_2si;
+
     CASE_SIMD_PACKED_FP(, 0x0f, 0x2e):     /* ucomis{s,d} xmm/mem,xmm */
     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
     CASE_SIMD_PACKED_FP(, 0x0f, 0x2f):     /* comis{s,d} xmm/mem,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.