[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/emul: Corrections to cmpxchg{8, 16}b emulation (to fix 32bit PV guests)



c/s ff913f6 "x86/PV: restrict permitted instructions during memory write
emulation" added an x86_insn_is_mem_write() restriction to all PV instructions
which trap for emulation because of read-only mappings (pagetables, mmcfg and
msi-x intercepts).

Because of the way cmpxchg{8,16}b was decoded (being part of Grp9), it didn't
end up flagged as a DstMem instruction, and failed the x86_insn_is_mem_write()
check.  This causes the emulation to be (incorrectly) rejected, causing
particular problems for 32bit PV guests which need to use cmpxchg8b for atomic
pagetable updates.

Add an early operand adjustment for cmpxchg{8,16}b to flag it as DstMem, which
allows for the removal of final special case for lock_prefix handling of
DstNone instructions.  This does however require introducing an exclusion into
the DstMem handling, as cmpxchg16b would overflow &dst.val.  The emulation of
cmpxchg{8,16}b completes all actions, so avoid the repeated operand writeback.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>

Partially RFC: This is a bit of a rats nest.
---
 tools/tests/x86_emulator/test_x86_emulator.c | 70 ++++++++++++++++++++++++++++
 xen/arch/x86/x86_emulate/x86_emulate.c       | 19 +++++---
 2 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/tools/tests/x86_emulator/test_x86_emulator.c 
b/tools/tests/x86_emulator/test_x86_emulator.c
index 245d1c6..7c7553b 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -23,6 +23,11 @@ static const struct {
 #endif
 };
 
+struct test_local_state
+{
+    unsigned int fetches, reads, writes, cmpxchgs;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_OF (1<<11)
 #define EFLG_DF (1<<10)
@@ -41,6 +46,10 @@ static int read(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    struct test_local_state *state = ctxt->data;
+
+    state->reads++;
+
     if ( verbose )
         printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset, bytes);
 
@@ -96,6 +105,10 @@ static int fetch(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    struct test_local_state *state = ctxt->data;
+
+    state->fetches++;
+
     if ( verbose )
         printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset, bytes);
 
@@ -110,6 +123,10 @@ static int write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    struct test_local_state *state = ctxt->data;
+
+    state->writes++;
+
     if ( verbose )
         printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset, bytes);
 
@@ -127,6 +144,10 @@ static int cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    struct test_local_state *state = ctxt->data;
+
+    state->cmpxchgs++;
+
     if ( verbose )
         printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset, bytes);
 
@@ -182,6 +203,7 @@ static struct x86_emulate_ops emulops = {
 
 int main(int argc, char **argv)
 {
+    struct test_local_state state;
     struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char *instr;
@@ -196,6 +218,7 @@ int main(int argc, char **argv)
     setbuf(stdout, NULL);
 
     ctxt.regs = &regs;
+    ctxt.data = &state;
     ctxt.force_writeback = 0;
     ctxt.vendor    = X86_VENDOR_UNKNOWN;
     ctxt.addr_size = 8 * sizeof(void *);
@@ -461,6 +484,53 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+#ifdef __x86_64__
+    memset(&state, 0, sizeof(state));
+    printf("%-40s", "Testing lock cmpxchg16b (%rdi) [succeeding]...");
+    instr[0] = 0xf0; instr[1] = 0x48; instr[2] = 0x0f; instr[3] = 0xc7; 
instr[4] = 0x0f;
+    res[0]      = 0x9abcdef0;
+    res[1]      = 0x12345678;
+    res[2]      = 0x87654321;
+    res[3]      = 0x12345678;
+    regs.rax    = 0x123456789abcdef0ull;
+    regs.rdx    = 0x1234567887654321ull;
+    regs.rflags = 0x200;
+    regs.rcx    = 0xCCCCCCCCAAAAAAAAull;
+    regs.rbx    = 0x99999999FFFFFFFFull;
+    regs.rip    = (unsigned long)&instr[0];
+    regs.rdi    = (unsigned long)res;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (res[0] != 0xFFFFFFFF) ||
+         (res[1] != 0x99999999) ||
+         (res[2] != 0xAAAAAAAA) ||
+         (res[3] != 0xCCCCCCCC) ||
+         (state.reads != 1) ||
+         (state.writes != 0) ||
+         (state.cmpxchgs != 1) ||
+         ((regs.rflags&0x240) != 0x240) ||
+         (regs.rip != (unsigned long)&instr[5]) )
+        goto fail;
+    printf("okay\n");
+
+    memset(&state, 0, sizeof(state));
+    printf("%-40s", "Testing lock cmpxchg16b (%rdi) [misaligned]...");
+    instr[0] = 0xf0; instr[1] = 0x48; instr[2] = 0x0f; instr[3] = 0xc7; 
instr[4] = 0x0f;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.edi    = ((unsigned long)res) + 1;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_EXCEPTION) ||
+         (ctxt.event.vector != 0xd) ||
+         (ctxt.event.error_code != 0) ||
+         (state.reads != 0) ||
+         (state.writes != 0) ||
+         (state.cmpxchgs != 0) ||
+         (regs.eip != (unsigned long)&instr[0]) )
+        goto fail;
+    printf("okay\n");
+#endif
+
     printf("%-40s", "Testing movsxbd (%eax),%ecx...");
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eflags = 0x200;
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 880d565..d6a5e1a 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1045,6 +1045,7 @@ static int read_ulong(
         const struct x86_emulate_ops *ops)
 {
     *val = 0;
+    ASSERT(bytes <= sizeof(*val));
     return ops->read(seg, offset, val, bytes, ctxt);
 }
 
@@ -2140,6 +2141,10 @@ x86_decode_twobyte(
     case 0xbd: bsr / lzcnt
          * They're being dealt with in the execution phase (if at all).
          */
+
+    case 0xc7: /* Grp9 */
+        if ( modrm_mod != 3 && (modrm_reg & 7) == 1 ) /* cmpxchg{8,16}b. */
+            state->desc |= DstMem;
     }
 
  done:
@@ -2775,12 +2780,7 @@ x86_emulate(
     switch ( d & DstMask )
     {
     case DstNone: /* case DstImplicit: */
-        /*
-         * The only implicit-operands instructions allowed a LOCK prefix are
-         * CMPXCHG{8,16}B (MOV CRn is being handled elsewhere).
-         */
-        generate_exception_if(lock_prefix && (ext != ext_0f || b != 0xc7),
-                              EXC_UD);
+        generate_exception_if(lock_prefix, EXC_UD);
         dst.type = OP_NONE;
         break;
 
@@ -2852,6 +2852,11 @@ x86_emulate(
         else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
         {
             fail_if(lock_prefix ? !ops->cmpxchg : !ops->write);
+
+            /* cmpxchg{8,16}b handles its own operand read. */
+            if ( ext == ext_0f && b == 0xc7 )
+                break;
+
             if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
                                   &dst.val, dst.bytes, ctxt, ops)) )
                 goto done;
@@ -5912,7 +5917,7 @@ x86_emulate(
                 goto done;
             _regs._eflags |= EFLG_ZF;
         }
-        break;
+        goto complete_insn;
     }
 
     case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.