[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/3] x86emul: support {,V}LDDQU



Also take the opportunity and adjust the vmovdqu test case the new one
here has been cloned from: To zero a ymm register we don't need to go
through hoops, as 128-bit AVX insns zero the upper portion of the
destination register, and in the disabled AVX2 code there was a wrong
YMM register used.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -968,12 +968,7 @@ int main(int argc, char **argv)
     {
         decl_insn(vmovdqu_from_mem);
 
-#if 0 /* Don't use AVX2 instructions for now */
-        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
-#else
-        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
-                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
-#endif
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
                        put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
                        :: "d" (NULL) );
 
@@ -987,7 +982,7 @@ int main(int argc, char **argv)
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
-              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
         asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
               "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
@@ -1404,6 +1399,67 @@ int main(int argc, char **argv)
         printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing lddqu 4(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse3 )
+    {
+        decl_insn(lddqu);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(lddqu, "lddqu 4(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(lddqu);
+        memset(res, 0x55, 64);
+        memset(res + 1, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(lddqu) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vlddqu (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vlddqu);
+
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
+                       put_insn(vlddqu, "vlddqu (%0), %%ymm4")
+                       :: "c" (NULL) );
+
+        set_insn(vlddqu);
+        memset(res + 1, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 1);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vlddqu) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -71,6 +71,12 @@ static int cpuid(
     (edx & (1U << 26)) != 0; \
 })
 
+#define cpu_has_sse3 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 0)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4993,6 +4993,9 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq xmm,m128 */
                                          /* vmovntdq ymm,m256 */
+    case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu xmm,m128 */
+    case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu xmm,m128 */
+                                         /* vlddqu ymm,m256 */
         fail_if(ea.type != OP_MEM);
         /* fall through */
     case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
@@ -5040,6 +5043,11 @@ x86_emulate(
         {
             switch ( vex.pfx )
             {
+            case vex_f2:
+                /* Converting lddqu to movdqa (see also below). */
+                vcpu_must_have(sse3);
+                buf[3] = 0x6f;
+                /* fall through */
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
@@ -5056,8 +5064,6 @@ x86_emulate(
                 get_fpu(X86EMUL_FPU_mmx, &fic);
                 ea.bytes = 8;
                 break;
-            default:
-                goto cannot_emulate;
             }
         }
         else
@@ -5079,6 +5085,7 @@ x86_emulate(
                 ea.bytes = 8;
                 /* fall through */
         case 0x6f:
+        case 0xf0:
                 load = true;
             }
             break;
@@ -5094,7 +5101,7 @@ x86_emulate(
         {
             uint32_t mxcsr = 0;
 
-            if ( ea.bytes < 16 || vex.pfx == vex_f3 )
+            if ( ea.bytes < 16 || vex.pfx >= vex_f3 )
                 mxcsr = MXCSR_MM;
             else if ( vcpu_has_misalignsse() )
                 asm ( "stmxcsr %0" : "=m" (mxcsr) );


Attachment: x86emul-LDDQU.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.