[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] hvm emul: fix cmpxchg emulation to use an atomic operation



# HG changeset patch
# User Patrick Colp <Patrick.Colp@xxxxxxxxxx>
# Date 1249555177 -3600
# Node ID 684c8fc69d658d058246eb9edfc8eba187ae6f2c
# Parent  68e8b8379244e293c55875e7dc3692fc81d3d212
hvm emul: fix cmpxchg emulation to use an atomic operation.

Currently HVM cmpxchg emulation is done by doing a normal emulated write,
which is not atomic. This patch changes it to use a cmpxchg operation
instead, which is atomic.

Signed-off-by: Patrick Colp <Patrick.Colp@xxxxxxxxxx>

diff -r 68e8b8379244 -r 684c8fc69d65 xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c        Sun Aug 02 13:43:15 2009 +0100
+++ b/xen/arch/x86/hvm/emulate.c        Thu Aug 06 11:39:37 2009 +0100
@@ -520,6 +520,183 @@
     return X86EMUL_OKAY;
 }
 
+/* Translate a VA to an MFN, injecting a page-fault if we fail */
+#define BAD_GVA_TO_GFN (~0UL)
+#define BAD_GFN_TO_MFN (~1UL)
+#define READONLY_GFN   (~2UL)
+static mfn_t emulate_gva_to_mfn(
+    struct vcpu *v,
+    unsigned long vaddr)
+{
+    unsigned long gfn;
+    mfn_t mfn;
+    p2m_type_t p2mt;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+
+    /* Translate the VA to a GFN */
+    gfn = paging_gva_to_gfn(v, vaddr, &pfec);
+    if ( gfn == INVALID_GFN )
+    {
+        hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
+        return _mfn(BAD_GVA_TO_GFN);
+    }
+
+    /* Translate the GFN to an MFN */
+    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+        
+    if ( p2mt == p2m_ram_ro )
+        return _mfn(READONLY_GFN);
+    if ( !p2m_is_ram(p2mt) )
+        return _mfn(BAD_GFN_TO_MFN);
+
+    ASSERT(mfn_valid(mfn_x(mfn)));
+
+    return mfn;
+}
+
+/* Check that the user is allowed to perform this write. 
+ * Returns a mapped pointer to write to, or NULL for error. */
+#define MAPPING_UNHANDLEABLE ((void *)(unsigned long)X86EMUL_UNHANDLEABLE)
+#define MAPPING_EXCEPTION    ((void *)(unsigned long)X86EMUL_EXCEPTION)
+#define MAPPING_SILENT_FAIL  ((void *)(unsigned long)X86EMUL_OKAY)
+#define emulate_map_dest_failed(rc) ((unsigned long)(rc) <= 3)
+static void *emulate_map_dest(
+    struct vcpu *v,
+    unsigned long vaddr,
+    u32 bytes)
+{
+    unsigned long offset;
+    void *map = NULL;
+    mfn_t mfn1;
+    mfn_t mfn2;
+
+    mfn1 = emulate_gva_to_mfn(v, vaddr);
+    if ( !mfn_valid(mfn_x(mfn1)) )
+        return ((mfn_x(mfn1) == BAD_GVA_TO_GFN) ?
+                MAPPING_EXCEPTION :
+                (mfn_x(mfn1) == READONLY_GFN) ?
+                MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE);
+
+    if ( likely(((vaddr + bytes - 1) & PAGE_MASK) == (vaddr & PAGE_MASK)) )
+    {
+        /* Whole write fits on a single page */
+        mfn2 = _mfn(INVALID_MFN);
+        map = map_domain_page(mfn_x(mfn1)) + (vaddr & ~PAGE_MASK);
+    }
+    else 
+    {
+        /* Cross-page emulated writes are only supported for HVM guests; 
+         * PV guests ought to know better */
+        mfn2 = emulate_gva_to_mfn(v, (vaddr + bytes - 1) & PAGE_MASK);
+        if ( !mfn_valid(mfn_x(mfn2)) ) 
+            return ((mfn_x(mfn2) == BAD_GVA_TO_GFN) ?
+                    MAPPING_EXCEPTION :
+                    (mfn_x(mfn2) == READONLY_GFN) ?
+                    MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE);
+
+        /* Hack: we map the pages into the vcpu's LDT space, since we
+         * know that we're not going to need the LDT for HVM guests, 
+         * and only HVM guests are allowed unaligned writes. */
+        map = (void *)LDT_VIRT_START(v);
+        offset = l1_linear_offset((unsigned long) map);
+        l1e_write(&__linear_l1_table[offset],
+                  l1e_from_pfn(mfn_x(mfn1), __PAGE_HYPERVISOR));
+        l1e_write(&__linear_l1_table[offset + 1],
+                  l1e_from_pfn(mfn_x(mfn2), __PAGE_HYPERVISOR));
+        flush_tlb_local();
+        map += (vaddr & ~PAGE_MASK);
+    }
+
+    return map;
+}
+
+/* Tidy up after the emulated write: undo the mapping */
+static void emulate_unmap_dest(
+    struct vcpu *v,
+    void *addr,
+    u32 bytes,
+    unsigned long vaddr)
+{
+    if ( likely(((vaddr + bytes - 1) & PAGE_MASK) == (vaddr & PAGE_MASK)) )
+        unmap_domain_page(addr);
+    else
+    {
+        unsigned long offset;
+        /* Undo the hacky two-frame contiguous map. */
+        ASSERT(((unsigned long) addr & PAGE_MASK) == LDT_VIRT_START(v));
+        offset = l1_linear_offset((unsigned long) addr);
+        l1e_write(&__linear_l1_table[offset], l1e_empty());
+        l1e_write(&__linear_l1_table[offset + 1], l1e_empty());
+        flush_tlb_all();
+    }
+}
+
+static int hvm_x86_emulate_cmpxchg(
+    struct vcpu *v,
+    unsigned long vaddr,
+    unsigned long old,
+    unsigned long new,
+    unsigned int bytes)
+{
+    void *addr;
+    unsigned long prev;
+    int rc = X86EMUL_OKAY;
+
+    addr = emulate_map_dest(v, vaddr, bytes);
+    if ( emulate_map_dest_failed(addr) )
+        return (long)addr;
+
+    switch ( bytes )
+    {
+    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
+    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
+    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
+    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
+    default:
+        gdprintk(XENLOG_ERR, "%s: cmpxchg of size %i is not supported\n",
+                 __func__, bytes);
+        prev = ~old;
+    }
+
+    if ( prev != old ) 
+        rc = X86EMUL_CMPXCHG_FAILED;
+
+    emulate_unmap_dest(v, addr, bytes, vaddr);
+
+    return rc;
+}
+
+#ifdef __i386__
+static int hvm_x86_emulate_cmpxchg8b(
+    struct vcpu *v,
+    unsigned long vaddr,
+    unsigned long old_lo,
+    unsigned long old_hi,
+    unsigned long new_lo,
+    unsigned long new_hi)
+{
+    void *addr;
+    u64 old, new, prev;
+    int rc = X86EMUL_OKAY;
+
+    addr = emulate_map_dest(v, vaddr, 8);
+    if ( emulate_map_dest_failed(addr) )
+        return (long)addr;
+
+    old = (((u64) old_hi) << 32) | (u64) old_lo;
+    new = (((u64) new_hi) << 32) | (u64) new_lo;
+
+    prev = cmpxchg(((u64 *)addr), old, new);
+
+    if ( prev != old )
+        rc = X86EMUL_CMPXCHG_FAILED;
+
+    emulate_unmap_dest(v, addr, 8, sh_ctxt);
+
+    return rc;
+}
+#endif
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -528,8 +705,32 @@
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *v = current;
+    unsigned long addr, old[2], new[2];
+    unsigned long reps = 1;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(seg, offset, bytes, &reps, hvm_access_write,
+                                   hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    old[0] = new[0] = 0;
+    memcpy(old, p_old, bytes);
+    memcpy(new, p_new, bytes);
+
+    if ( bytes <= sizeof(long) )
+        return hvm_x86_emulate_cmpxchg(v, addr, old[0], new[0], bytes);
+
+#ifdef __i386__
+    if ( bytes == 8 )
+        return hvm_x86_emulate_cmpxchg8b(v, addr, old[0], old[1],
+                                         new[0], new[1]);
+#endif
+
+    return X86EMUL_UNHANDLEABLE;
 }
 
 static int hvmemul_rep_ins(

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.