[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 10/11] x86emul: handle AVX512-FP16 conversion to/from (packed) int{32,64} insns


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 15 Jun 2022 12:31:09 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=72avtIJigMrJ/t9S9LNVdKFGNIXvlVkWO5c2zIOjy7c=; b=ZqTHbFdGBDATJ5lryi3tsw/74ur0CLsmgtOjdhth2+LIUNTjH3mj3VwoUBkKvKVSMxkpNecCMc6NdqqrxqTda49qGygkReGgVaDDfxUTSOtDUDIxmw2NA3w9NFYdRjo+xS4aKyOQk0bVyZzUL8R71s0Q7G1POINrQqwTs3jSLlrjXYI9fFflqdDsn/bwR4Ws+A8o6pOdvjS0Pz6qADhZ5PSs/KlGA/0OGvNePGY/0QVpYB/C4JE7mGSTUeGLZVFRYmst6jlT0DJOvS+TdLrFBgAmh7jo/5KdVA9Qv+0RrciJladYkHtV1JmPz2MRgiSMTowhjFFRW6sljyUODBZ59A==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=EvwiM6N8HVmwTxiIQoHRpvSK8v76sNx61DemjfPJXfGBkHPlwOY7r+TQFnQMjrwGA8+H+l6O15fHtM/atjvoknZs6QFC1cj9sM2SWsN+LRDsrTR+xa8AYf1whTZ6PopL2+xSpYmlL2VJLrwQkTIUkLK7wD3shomNMQV/EYlaPrScn1iMt6dFOit6ILiL5LiugkgIyTbN7IqNxEO0TKiiiGc9dl8t+0iV3xSpmVsM+0Xv8YVgINl+qUEhlcLqOE9COLX+ttiD9JJlUCkT1CObChAL/wdcnYlAKsDOv8fmaX/KmncPPDHIoWHopGLjFwJIqRpUyomw70EdE6DlET28Rw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Wed, 15 Jun 2022 10:31:29 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -612,18 +612,36 @@ static const struct test avx512_fp16_all
     INSN(cmpph,           , 0f3a, c2,    vl, fp16, vl),
     INSN(cmpsh,         f3, 0f3a, c2,    el, fp16, el),
     INSN(comish,          , map5, 2f,    el, fp16, el),
+    INSN(cvtdq2ph,        , map5, 5b,    vl,    d, vl),
     INSN(cvtpd2ph,      66, map5, 5a,    vl,    q, vl),
+    INSN(cvtph2dq,      66, map5, 5b,  vl_2, fp16, vl),
     INSN(cvtph2pd,        , map5, 5a,  vl_4, fp16, vl),
     INSN(cvtph2psx,     66, map6, 13,  vl_2, fp16, vl),
+    INSN(cvtph2qq,      66, map5, 7b,  vl_4, fp16, vl),
+    INSN(cvtph2udq,       , map5, 79,  vl_2, fp16, vl),
+    INSN(cvtph2uqq,     66, map5, 79,  vl_4, fp16, vl),
     INSN(cvtph2uw,        , map5, 7d,    vl, fp16, vl),
     INSN(cvtph2w,       66, map5, 7d,    vl, fp16, vl),
     INSN(cvtps2phx,     66, map5, 1d,    vl,    d, vl),
+    INSN(cvtqq2ph,        , map5, 5b,    vl,    q, vl),
     INSN(cvtsd2sh,      f2, map5, 5a,    el,    q, el),
     INSN(cvtsh2sd,      f3, map5, 5a,    el, fp16, el),
+    INSN(cvtsh2si,      f3, map5, 2d,    el, fp16, el),
     INSN(cvtsh2ss,        , map6, 13,    el, fp16, el),
+    INSN(cvtsh2usi,     f3, map5, 79,    el, fp16, el),
+    INSN(cvtsi2sh,      f3, map5, 2a,    el, dq64, el),
     INSN(cvtss2sh,        , map5, 1d,    el,    d, el),
+    INSN(cvttph2dq,     f3, map5, 5b,  vl_2, fp16, vl),
+    INSN(cvttph2qq,     66, map5, 7a,  vl_4, fp16, vl),
+    INSN(cvttph2udq,      , map5, 78,  vl_2, fp16, vl),
+    INSN(cvttph2uqq,    66, map5, 78,  vl_4, fp16, vl),
     INSN(cvttph2uw,       , map5, 7c,    vl, fp16, vl),
     INSN(cvttph2w,      66, map5, 7c,    vl, fp16, vl),
+    INSN(cvttsh2si,     f3, map5, 2c,    el, fp16, el),
+    INSN(cvttsh2usi,    f3, map5, 78,    el, fp16, el),
+    INSN(cvtudq2ph,     f2, map5, 7a,    vl,    d, vl),
+    INSN(cvtuqq2ph,     f2, map5, 7a,    vl,    q, vl),
+    INSN(cvtusi2sh,     f3, map5, 7b,    el, dq64, el),
     INSN(cvtuw2ph,      f2, map5, 7d,    vl, fp16, vl),
     INSN(cvtw2ph,       f3, map5, 7d,    vl, fp16, vl),
     INSN(divph,           , map5, 5e,    vl, fp16, vl),
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2033,6 +2033,9 @@ static const struct evex {
     { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */
     { { 0x1d }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2phx */
     { { 0x1d }, 2, T, R, pfx_no, W0, LIG }, /* vcvtss2sh */
+    { { 0x2a }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsi2sh */
+    { { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */
+    { { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */
     { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
     { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
     { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
@@ -2045,6 +2048,10 @@ static const struct evex {
     { { 0x5a }, 2, T, R, pfx_66, W1, Ln }, /* vcvtpd2ph */
     { { 0x5a }, 2, T, R, pfx_f3, W0, LIG }, /* vcvtsh2sd */
     { { 0x5a }, 2, T, R, pfx_f2, W1, LIG }, /* vcvtsd2sh */
+    { { 0x5b }, 2, T, R, pfx_no, W0, Ln }, /* vcvtdq2ph */
+    { { 0x5b }, 2, T, R, pfx_no, W1, Ln }, /* vcvtqq2ph */
+    { { 0x5b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2dq */
+    { { 0x5b }, 2, T, R, pfx_f3, W0, Ln }, /* vcvttph2dq */
     { { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */
     { { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */
     { { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */
@@ -2054,6 +2061,17 @@ static const struct evex {
     { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */
     { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
     { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */
+    { { 0x78 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2udq */
+    { { 0x78 }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2uqq */
+    { { 0x78 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2usi */
+    { { 0x79 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2udq */
+    { { 0x79 }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2uqq */
+    { { 0x79 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2usi */
+    { { 0x7a }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2qq */
+    { { 0x7a }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtudq2ph */
+    { { 0x7a }, 2, T, R, pfx_f2, W1, Ln }, /* vcvtuqq2ph */
+    { { 0x7b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2qq */
+    { { 0x7b }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtusi2sh */
     { { 0x7c }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2uw */
     { { 0x7c }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2w */
     { { 0x7d }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2uw */
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1489,12 +1489,25 @@ int x86emul_decode(struct x86_emulate_st
                     s->simd_size = simd_scalar_vexw;
                 break;
 
+            case 0x2a: /* vcvtsi2sh */
+                break;
+
+            case 0x2c: case 0x2d: /* vcvt{,t}sh2si */
+                if ( s->evex.pfx == vex_f3 )
+                    s->fp16 = true;
+                break;
+
             case 0x2e: case 0x2f: /* v{,u}comish */
                 if ( !s->evex.pfx )
                     s->fp16 = true;
                 s->simd_size = simd_none;
                 break;
 
+            case 0x5b: /* vcvt{d,q}q2ph, vcvt{,t}ph2dq */
+                if ( s->evex.pfx && s->evex.pfx != vex_f2 )
+                    s->fp16 = true;
+                break;
+
             case 0x6e: /* vmovw r/m16, xmm */
                 d = (d & ~SrcMask) | SrcMem16;
                 /* fall through */
@@ -1504,6 +1517,17 @@ int x86emul_decode(struct x86_emulate_st
                 s->simd_size = simd_none;
                 break;
 
+            case 0x78: case 0x79: /* vcvt{,t}ph2u{d,q}q, vcvt{,t}sh2usi */
+                if ( s->evex.pfx != vex_f2 )
+                    s->fp16 = true;
+                break;
+
+            case 0x7a: /* vcvttph2qq, vcvtu{d,q}q2ph */
+            case 0x7b: /* vcvtph2qq, vcvtusi2sh */
+                if ( s->evex.pfx == vex_66 )
+                    s->fp16 = true;
+                break;
+
             case 0x7c: /* vcvttph2{,u}w */
             case 0x7d: /* vcvtph2{,u}w / vcvt{,u}w2ph */
                 d = DstReg | SrcMem | TwoOp;
@@ -1515,10 +1539,34 @@ int x86emul_decode(struct x86_emulate_st
 
             switch ( b )
             {
+            case 0x78:
+            case 0x79:
+                /* vcvt{,t}ph2u{d,q}q need special casing */
+                if ( s->evex.pfx <= vex_66 )
+                {
+                    if ( !s->evex.brs )
+                        disp8scale -= 1 + (s->evex.pfx == vex_66);
+                    break;
+                }
+                /* vcvt{,t}sh2usi needs special casing: fall through */
+            case 0x2c: case 0x2d: /* vcvt{,t}sh2si need special casing */
+                disp8scale = 1;
+                break;
+
             case 0x5a: /* vcvtph2pd needs special casing */
                 if ( !s->evex.pfx && !s->evex.brs )
                     disp8scale -= 2;
                 break;
+
+            case 0x5b: /* vcvt{,t}ph2dq need special casing */
+                if ( s->evex.pfx && !s->evex.brs )
+                    --disp8scale;
+                break;
+
+            case 0x7a: case 0x7b: /* vcvt{,t}ph2qq need special casing */
+                if ( s->evex.pfx == vex_66 && !s->evex.brs )
+                    disp8scale = s->evex.brs ? 1 : 2 + s->evex.lr;
+                break;
             }
 
             break;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3581,6 +3581,12 @@ x86_emulate(
         state->simd_size = simd_none;
         goto simd_0f_rm;
 
+#ifndef X86EMUL_NO_SIMD
+
+    case X86EMUL_OPC_EVEX_F3(5, 0x2a):      /* vcvtsi2sh r/m,xmm,xmm */
+    case X86EMUL_OPC_EVEX_F3(5, 0x7b):      /* vcvtusi2sh r/m,xmm,xmm */
+        host_and_vcpu_must_have(avx512_fp16);
+        /* fall through */
     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x7b): /* vcvtusi2s{s,d} r/m,xmm,xmm */
         generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
@@ -3659,7 +3665,9 @@ x86_emulate(
             opc[1] = 0x01;
 
             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
-                           vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt);
+                           vex.pfx & VEX_PREFIX_DOUBLE_MASK
+                           ? 8 : 2 << !state->fp16,
+                           ctxt);
             if ( rc != X86EMUL_OKAY )
                 goto done;
         }
@@ -3689,6 +3697,12 @@ x86_emulate(
         state->simd_size = simd_none;
         break;
 
+    case X86EMUL_OPC_EVEX_F3(5, 0x2c):      /* vcvttsh2si xmm/mem,reg */
+    case X86EMUL_OPC_EVEX_F3(5, 0x2d):      /* vcvtsh2si xmm/mem,reg */
+    case X86EMUL_OPC_EVEX_F3(5, 0x78):      /* vcvttsh2usi xmm/mem,reg */
+    case X86EMUL_OPC_EVEX_F3(5, 0x79):      /* vcvtsh2usi xmm/mem,reg */
+        host_and_vcpu_must_have(avx512_fp16);
+        /* fall through */
     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
@@ -3760,8 +3774,6 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
-#ifndef X86EMUL_NO_SIMD
-
     case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
     case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
         host_and_vcpu_must_have(avx512_fp16);
@@ -7789,6 +7801,38 @@ x86_emulate(
                          2 * evex.w);
         goto avx512f_all_fp;
 
+    case X86EMUL_OPC_EVEX   (5, 0x5b): /* vcvtdq2ph [xyz]mm/mem,[xy]mm{k} */
+                                       /* vcvtqq2ph [xyz]mm/mem,xmm{k} */
+    case X86EMUL_OPC_EVEX_F2(5, 0x7a): /* vcvtudq2ph [xyz]mm/mem,[xy]mm{k} */
+                                       /* vcvtuqq2ph [xyz]mm/mem,xmm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        op_bytes = 16 << evex.lr;
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX_66(5, 0x5b): /* vcvtph2dq [xy]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F3(5, 0x5b): /* vcvttph2dq [xy]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX   (5, 0x78): /* vcvttph2udq [xy]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX   (5, 0x79): /* vcvtph2udq [xy]mm/mem,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        op_bytes = 8 << evex.lr;
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(5, 0x7b): /* vcvtph2qq xmm/mem,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        op_bytes = 4 << (evex.w + evex.lr);
+        goto simd_zmm;
+
     case X86EMUL_OPC_EVEX   (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX   (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.