[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 20/25] x86emul: correctly handle CMPXCHG* comparison failures



If the ->cmpxchg() hook finds a mismatch, we should deal with this the
same way as when the "manual" comparison reports a mismatch.

This involves reverting bfce0e62c3 ("x86/emul: Drop
X86EMUL_CMPXCHG_FAILED"), albeit with X86EMUL_CMPXCHG_FAILED now
becoming a value distinct from X86EMUL_RETRY.

In order to not leave mixed code also fully switch affected functions
from paddr_t to intpte_t.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.
---
The code could be further simplified if we could rely on all
->cmpxchg() hooks always using CMPXCHG, but for now we need to cope
with them using plain writes (and hence accept the double reads if
CMPXCHG is actually being used).
Note that the patch doesn't address the incorrectness of there not
being a memory write even in the comparison-failed case.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -302,8 +302,12 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-               v, addr, old, new, bytes, sh_ctxt);
+    rc = v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+             v, addr, &old, new, bytes, sh_ctxt);
+
+    memcpy(p_old, &old, bytes);
+
+    return rc;
 }
 
 static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4741,11 +4741,11 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
 static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
-                        unsigned long old, unsigned long new,
-                        unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
+                       unsigned long *p_old, unsigned long new,
+                       unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
     void *addr;
-    unsigned long prev;
+    unsigned long prev, old = *p_old;
     int rv = X86EMUL_OKAY;
 
     /* Unaligned writes are only acceptable on HVM */
@@ -4769,7 +4769,10 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     }
 
     if ( prev != old )
-        rv = X86EMUL_RETRY;
+    {
+        *p_old = prev;
+        rv = X86EMUL_CMPXCHG_FAILED;
+    }
 
     SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
                   " wanted %#lx now %#lx bytes %u\n",
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -65,14 +65,16 @@ static int ptwr_emulated_read(enum x86_s
     return X86EMUL_OKAY;
 }
 
-static int ptwr_emulated_update(unsigned long addr, paddr_t old, paddr_t val,
-                                unsigned int bytes, unsigned int do_cmpxchg,
+static int ptwr_emulated_update(unsigned long addr, intpte_t *p_old,
+                                intpte_t val, unsigned int bytes,
                                 struct x86_emulate_ctxt *ctxt)
 {
     unsigned long mfn;
     unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
+    intpte_t old = p_old ? *p_old : 0;
+    unsigned int offset = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
     struct ptwr_emulate_ctxt *ptwr_ctxt = ctxt->data;
@@ -88,28 +90,30 @@ static int ptwr_emulated_update(unsigned
     }
 
     /* Turn a sub-word access into a full-word access. */
-    if ( bytes != sizeof(paddr_t) )
+    if ( bytes != sizeof(val) )
     {
-        paddr_t      full;
-        unsigned int rc, offset = addr & (sizeof(paddr_t) - 1);
+        intpte_t full;
+        unsigned int rc;
+
+        offset = addr & (sizeof(full) - 1);
 
         /* Align address; read full word. */
-        addr &= ~(sizeof(paddr_t) - 1);
-        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
+        addr &= ~(sizeof(full) - 1);
+        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(full))) != 0 )
         {
             x86_emul_pagefault(0, /* Read fault. */
-                               addr + sizeof(paddr_t) - rc,
+                               addr + sizeof(full) - rc,
                                ctxt);
             return X86EMUL_EXCEPTION;
         }
         /* Mask out bits provided by caller. */
-        full &= ~((((paddr_t)1 << (bytes * 8)) - 1) << (offset * 8));
+        full &= ~((((intpte_t)1 << (bytes * 8)) - 1) << (offset * 8));
         /* Shift the caller value and OR in the missing bits. */
-        val  &= (((paddr_t)1 << (bytes * 8)) - 1);
+        val  &= (((intpte_t)1 << (bytes * 8)) - 1);
         val <<= (offset) * 8;
         val  |= full;
         /* Also fill in missing parts of the cmpxchg old value. */
-        old  &= (((paddr_t)1 << (bytes * 8)) - 1);
+        old  &= (((intpte_t)1 << (bytes * 8)) - 1);
         old <<= (offset) * 8;
         old  |= full;
     }
@@ -131,7 +135,7 @@ static int ptwr_emulated_update(unsigned
     {
     default:
         if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
-             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+             !p_old && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
              * If this is an upper-half write to a PAE PTE then we assume that
@@ -162,21 +166,26 @@ static int ptwr_emulated_update(unsigned
     /* Checked successfully: do the update (write or cmpxchg). */
     pl1e = map_domain_page(_mfn(mfn));
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
-    if ( do_cmpxchg )
+    if ( p_old )
     {
-        bool okay;
-        intpte_t t = old;
 
         ol1e = l1e_from_intpte(old);
-        okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
-                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
-        okay = (okay && t == old);
+        if ( !paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
+                                         &old, l1e_get_intpte(nl1e), 
_mfn(mfn)) )
+            ret = X86EMUL_UNHANDLEABLE;
+        else if ( l1e_get_intpte(ol1e) == old )
+            ret = X86EMUL_OKAY;
+        else
+        {
+            *p_old = old >> (offset * 8);
+            ret = X86EMUL_CMPXCHG_FAILED;
+        }
 
-        if ( !okay )
+        if ( ret != X86EMUL_OKAY )
         {
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
-            return X86EMUL_RETRY;
+            return ret;
         }
     }
     else
@@ -200,9 +209,9 @@ static int ptwr_emulated_write(enum x86_
                                void *p_data, unsigned int bytes,
                                struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t val = 0;
+    intpte_t val = 0;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes )
+    if ( (bytes > sizeof(val)) || (bytes & (bytes - 1)) || !bytes )
     {
         gdprintk(XENLOG_WARNING, "bad write size (addr=%lx, bytes=%u)\n",
                  offset, bytes);
@@ -211,16 +220,17 @@ static int ptwr_emulated_write(enum x86_
 
     memcpy(&val, p_data, bytes);
 
-    return ptwr_emulated_update(offset, 0, val, bytes, 0, ctxt);
+    return ptwr_emulated_update(offset, NULL, val, bytes, ctxt);
 }
 
 static int ptwr_emulated_cmpxchg(enum x86_segment seg, unsigned long offset,
                                  void *p_old, void *p_new, unsigned int bytes,
                                  bool lock, struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t old = 0, new = 0;
+    intpte_t old = 0, new = 0;
+    int rc;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) )
+    if ( (bytes > sizeof(new)) || (bytes & (bytes - 1)) )
     {
         gdprintk(XENLOG_WARNING, "bad cmpxchg size (addr=%lx, bytes=%u)\n",
                  offset, bytes);
@@ -230,7 +240,11 @@ static int ptwr_emulated_cmpxchg(enum x8
     memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return ptwr_emulated_update(offset, old, new, bytes, 1, ctxt);
+    rc = ptwr_emulated_update(offset, &old, new, bytes, ctxt);
+
+    memcpy(p_old, &old, bytes);
+
+    return rc;
 }
 
 static const struct x86_emulate_ops ptwr_emulate_ops = {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1985,6 +1985,9 @@ protmode_load_seg(
 
         default:
             return rc;
+
+        case X86EMUL_CMPXCHG_FAILED:
+            return X86EMUL_RETRY;
         }
 
         /* Force the Accessed flag in our local copy. */
@@ -6644,21 +6647,45 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
-        /* Save real source value, then compare EAX against destination. */
-        src.orig_val = src.val;
-        src.val = _regs.r(ax);
-        /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
-        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
-        if ( _regs.eflags & X86_EFLAGS_ZF )
+        fail_if(!ops->cmpxchg);
+        _regs.eflags &= ~EFLAGS_MASK;
+        if ( !((dst.val ^ _regs.r(ax)) &
+               (~0UL >> (8 * (sizeof(long) - dst.bytes)))) )
         {
             /* Success: write back to memory. */
-            dst.val = src.orig_val;
+            if ( dst.type == OP_MEM )
+            {
+                dst.val = _regs.r(ax);
+                switch ( rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val,
+                                           &src.val, dst.bytes, lock_prefix,
+                                           ctxt) )
+                {
+                case X86EMUL_OKAY:
+                    dst.type = OP_NONE;
+                    _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
+                    break;
+                case X86EMUL_CMPXCHG_FAILED:
+                    rc = X86EMUL_OKAY;
+                    break;
+                default:
+                    goto done;
+                }
+            }
+            else
+            {
+                dst.val = src.val;
+                _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
+            }
         }
-        else
+        if ( !(_regs.eflags & X86_EFLAGS_ZF) )
         {
             /* Failure: write the value we saw to EAX. */
             dst.type = OP_REG;
             dst.reg  = (unsigned long *)&_regs.r(ax);
+            /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
+            src.val = _regs.r(ax);
+            emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
+            ASSERT(!(_regs.eflags & X86_EFLAGS_ZF));
         }
         break;
 
@@ -6959,6 +6986,7 @@ x86_emulate(
 
         if ( memcmp(old, aux, op_bytes) )
         {
+        cmpxchgNb_failed:
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
             _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
             _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
@@ -6968,7 +6996,7 @@ x86_emulate(
         {
             /*
              * Expected == actual: Get proposed value, attempt atomic cmpxchg
-             * and set ZF.
+             * and set ZF if successful.
              */
             if ( !(rex_prefix & REX_W) )
             {
@@ -6981,11 +7009,20 @@ x86_emulate(
                 aux->u64[1] = _regs.r(cx);
             }
 
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                    op_bytes, lock_prefix,
-                                    ctxt)) != X86EMUL_OKAY )
+            switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                       op_bytes, lock_prefix, ctxt) )
+            {
+            case X86EMUL_OKAY:
+                _regs.eflags |= X86_EFLAGS_ZF;
+                break;
+
+            case X86EMUL_CMPXCHG_FAILED:
+                rc = X86EMUL_OKAY;
+                goto cmpxchgNb_failed;
+
+            default:
                 goto done;
-            _regs.eflags |= X86_EFLAGS_ZF;
+            }
         }
         break;
     }
@@ -8436,6 +8473,8 @@ x86_emulate(
             rc = ops->cmpxchg(
                 dst.mem.seg, dst.mem.off, &dst.orig_val,
                 &dst.val, dst.bytes, true, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+                rc = X86EMUL_RETRY;
         }
         else
         {
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -150,6 +150,8 @@ struct x86_emul_fpu_aux {
   * strictly expected for now.
  */
 #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
+ /* (cmpxchg accessor): CMPXCHG failed. */
+#define X86EMUL_CMPXCHG_FAILED 7
 
 /* FPU sub-types which may be requested via ->get_fpu(). */
 enum x86_emulate_fpu_type {
@@ -239,6 +241,8 @@ struct x86_emulate_ops
     /*
      * cmpxchg: Emulate a CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *          [OUT] Pointer to value found at @addr (may always be
+     *                updated, meaningful for X86EMUL_CMPXCHG_FAILED only).
      *  @p_new: [IN ] Pointer to value to write to @addr.
      *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      *  @lock:  [IN ] atomic (LOCKed) operation
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -86,7 +86,7 @@ struct shadow_paging_mode {
                                             void *src, u32 bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
     int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long old, 
+                                            unsigned long *old,
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.