[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86: AVX instruction emulation fixes



- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M
  one as the second prefix byte
- early decoding normalized vex.reg, thus corrupting it for the main
  consumer (copy_REX_VEX()), resulting in #UD on the two-operand
  instructions we emulate

Also add respective test cases to the testing utility plus
- fix get_fpu() (the fall-through order was inverted)
- add cpu_has_avx2, even if it's currently unused (as in the new test
  cases I decided to refrain from using AVX2 instructions in order to
  be able to actually run all the tests on the hardware I have)
- slightly tweak cpu_has_avx to more consistently express the outputs
  we don't care about (sinking them all into the same variable)

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x
 }
 
 #define cpu_has_avx ({ \
-    unsigned int eax = 1, ecx = 0, edx; \
-    cpuid(&eax, &edx, &ecx, &edx, NULL); \
+    unsigned int eax = 1, ecx = 0; \
+    cpuid(&eax, &eax, &ecx, &eax, NULL); \
     if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
         ecx = 0; \
     (ecx & (1U << 28)) != 0; \
 })
 
+#define cpu_has_avx2 ({ \
+    unsigned int eax = 1, ebx, ecx = 0; \
+    cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        ebx = 0; \
+    else { \
+        eax = 7, ecx = 0; \
+        cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    } \
+    (ebx & (1U << 5)) != 0; \
+})
+
 int get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
@@ -111,14 +123,14 @@ int get_fpu(
     {
     case X86EMUL_FPU_fpu:
         break;
-    case X86EMUL_FPU_ymm:
-        if ( cpu_has_avx )
+    case X86EMUL_FPU_mmx:
+        if ( cpu_has_mmx )
             break;
     case X86EMUL_FPU_xmm:
         if ( cpu_has_sse )
             break;
-    case X86EMUL_FPU_mmx:
-        if ( cpu_has_mmx )
+    case X86EMUL_FPU_ymm:
+        if ( cpu_has_avx )
             break;
     default:
         return X86EMUL_UNHANDLEABLE;
@@ -629,6 +641,73 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_to_mem[];
+
+        asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
+                       ".popsection" :: "c" (NULL) );
+
+        memcpy(instr, vmovdqu_to_mem, 15);
+        memset(res, 0x55, 128);
+        memset(res + 16, 0xff, 16);
+        memset(res + 20, 0x00, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_from_mem[];
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
+#else
+        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
+                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
+#endif
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovdqu_from_mem, 15);
+        memset(res + 4, 0xff, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( rc != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movsd %xmm5,(%ecx)...");
     memset(res, 0x77, 64);
     memset(res + 10, 0x66, 8);
@@ -683,6 +762,59 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovsd %xmm5,(%ecx)...");
+    memset(res, 0x88, 64);
+    memset(res + 10, 0x77, 8);
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovsd_to_mem[];
+
+        asm volatile ( "vbroadcastsd %0, %%ymm5\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
+                       ".popsection" :: "m" (res[10]), "c" (NULL) );
+
+        memcpy(instr, vmovsd_to_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)(res + 2);
+        regs.edx    = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+    {
+        printf("skipped\n");
+        memset(res + 2, 0x77, 8);
+    }
+
+    printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovaps_from_mem[];
+
+        asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovaps_from_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+        asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
+              "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) );
+        if ( rc != 0xff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     for ( j = 1; j <= 2; j++ )
     {
 #if defined(__i386__)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1454,10 +1454,10 @@ x86_emulate(
                 /* VEX */
                 generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
 
-                vex.raw[0] = b;
+                vex.raw[0] = modrm;
                 if ( b & 1 )
                 {
-                    vex.raw[1] = b;
+                    vex.raw[1] = modrm;
                     vex.opcx = vex_0f;
                     vex.x = 1;
                     vex.b = 1;
@@ -1479,10 +1479,7 @@ x86_emulate(
                         }
                     }
                 }
-                vex.reg ^= 0xf;
-                if ( !mode_64bit() )
-                    vex.reg &= 0x7;
-                else if ( !vex.r )
+                if ( mode_64bit() && !vex.r )
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
@@ -3899,8 +3896,9 @@ x86_emulate(
         else
         {
             fail_if((vex.opcx != vex_0f) ||
-                    (vex.reg && ((ea.type == OP_MEM) ||
-                                 !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
+                    ((vex.reg != 0xf) &&
+                     ((ea.type == OP_MEM) ||
+                      !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
@@ -4168,7 +4166,7 @@ x86_emulate(
         }
         else
         {
-            fail_if((vex.opcx != vex_0f) || vex.reg ||
+            fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);


Attachment: x86-emul-test-AVX.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.