[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 4/6] vm_event: Use slotted channels for sync requests.



In high throughput introspection scenarios where lots of monitor
vm_events are generated, the ring buffer can fill up before the monitor
application gets a chance to handle all the requests thus blocking
other vcpus which will have to wait for a slot to become available.

This patch adds support for a different mechanism to handle synchronous
vm_event requests / responses. As each synchronous request pauses the
vcpu until the corresponsing response is handled, it can be stored in
a slotted memory buffer (one per vcpu) shared between the hypervisor and
the controlling domain. The asynchronous vm_event requests will be sent
to the controlling domain using a ring buffer, but without blocking the
vcpu as no response is required.

The memory for the asynchronous ring and the synchronous channels will
be allocated from domheap and mapped to the controlling domain using the
foreignmemory_map_resource interface. Unlike the current implementation,
the allocated pages are not part of the target DomU, so they will not be
reclaimed when the vm_event domain is disabled.

Signed-off-by: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>
---
 tools/libxc/include/xenctrl.h |  11 +
 tools/libxc/xc_monitor.c      |  36 +++
 tools/libxc/xc_private.h      |  14 ++
 tools/libxc/xc_vm_event.c     |  74 +++++-
 xen/arch/x86/mm.c             |   7 +
 xen/common/vm_event.c         | 515 ++++++++++++++++++++++++++++++++++++++----
 xen/include/public/domctl.h   |  25 +-
 xen/include/public/memory.h   |   2 +
 xen/include/public/vm_event.h |  15 ++
 xen/include/xen/vm_event.h    |   4 +
 10 files changed, 660 insertions(+), 43 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index de0b990..fad8bc4 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2012,6 +2012,17 @@ int xc_get_mem_access(xc_interface *xch, uint32_t 
domain_id,
  * Caller has to unmap this page when done.
  */
 void *xc_monitor_enable(xc_interface *xch, uint32_t domain_id, uint32_t *port);
+
+struct xenforeignmemory_resource_handle *xc_monitor_enable_ex(
+    xc_interface *xch,
+    uint32_t domain_id,
+    void **_ring_buffer,
+    uint32_t ring_frames,
+    uint32_t *ring_port,
+    void **_sync_buffer,
+    uint32_t *sync_ports,
+    uint32_t nr_sync_channels);
+
 int xc_monitor_disable(xc_interface *xch, uint32_t domain_id);
 int xc_monitor_resume(xc_interface *xch, uint32_t domain_id);
 /*
diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c
index 718fe8b..4ceb528 100644
--- a/tools/libxc/xc_monitor.c
+++ b/tools/libxc/xc_monitor.c
@@ -49,6 +49,42 @@ void *xc_monitor_enable(xc_interface *xch, uint32_t 
domain_id, uint32_t *port)
     return buffer;
 }
 
+struct xenforeignmemory_resource_handle *xc_monitor_enable_ex(
+    xc_interface *xch,
+    uint32_t domain_id,
+    void **_ring_buffer,
+    uint32_t ring_frames,
+    uint32_t *ring_port,
+    void **_sync_buffer,
+    uint32_t *sync_ports,
+    uint32_t nr_sync_channels)
+{
+    xenforeignmemory_resource_handle *fres;
+    int saved_errno;
+
+    /* Pause the domain for ring page setup */
+    if ( xc_domain_pause(xch, domain_id) )
+    {
+        PERROR("Unable to pause domain\n");
+        return NULL;
+    }
+
+    fres = xc_vm_event_enable_ex(xch, domain_id, XEN_VM_EVENT_TYPE_MONITOR,
+                                _ring_buffer, ring_frames, ring_port,
+                                _sync_buffer, sync_ports, nr_sync_channels);
+
+    saved_errno = errno;
+    if ( xc_domain_unpause(xch, domain_id) )
+    {
+        if ( fres )
+            saved_errno = errno;
+        PERROR("Unable to unpause domain");
+    }
+
+    errno = saved_errno;
+    return fres;
+}
+
 int xc_monitor_disable(xc_interface *xch, uint32_t domain_id)
 {
     return xc_vm_event_control(xch, domain_id,
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 482451c..1f70223 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -420,6 +420,20 @@ int xc_vm_event_control(xc_interface *xch, uint32_t 
domain_id, unsigned int op,
 void *xc_vm_event_enable(xc_interface *xch, uint32_t domain_id, int type,
                          uint32_t *port);
 
+/*
+ * Enables vm_event for using the xenforeignmemory_map_resource interface.
+ * The vm_event type can be XEN_VM_EVENT_TYPE_(PAGING/MONITOR/SHARING).
+ *
+ * The function returns:
+ *  - A ring for asynchronous vm_events.
+ *  - A slotted buffer for synchronous vm_events (one slot per vcpu)
+ *  - xenforeignmemory_resource_handle used exclusively for resource cleanup
+ */
+xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface *xch,
+    uint32_t domain_id, int type,
+    void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port,
+    void **_sync_buffer, uint32_t *sync_ports, uint32_t nr_sync_channels);
+
 int do_dm_op(xc_interface *xch, uint32_t domid, unsigned int nr_bufs, ...);
 
 #endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c
index 4fc2548..0a976b4 100644
--- a/tools/libxc/xc_vm_event.c
+++ b/tools/libxc/xc_vm_event.c
@@ -22,6 +22,12 @@
 
 #include "xc_private.h"
 
+#include <xen/vm_event.h>
+
+#ifndef PFN_UP
+#define PFN_UP(x)     (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#endif /* PFN_UP */
+
 int xc_vm_event_control(xc_interface *xch, uint32_t domain_id, unsigned int op,
                         unsigned int type)
 {
@@ -120,7 +126,7 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t 
domain_id, int type,
         goto out;
     }
 
-    *port = domctl.u.vm_event_op.port;
+    *port = domctl.u.vm_event_op.u.enable.port;
 
     /* Remove the ring_pfn from the guest's physmap */
     rc = xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0, &ring_pfn);
@@ -138,6 +144,72 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t 
domain_id, int type,
     return ring_page;
 }
 
+xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface *xch,
+    uint32_t domain_id, int type,
+    void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port,
+    void **_sync_buffer, uint32_t *sync_ports, uint32_t nr_sync_channels)
+{
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(sync_ports, nr_sync_channels * sizeof(uint32_t),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    xenforeignmemory_resource_handle *fres;
+    unsigned long nr_frames;
+    void *buffer;
+
+    if ( !_ring_buffer || !ring_port || !_sync_buffer || !sync_ports )
+    {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    nr_frames = ring_frames + PFN_UP(nr_sync_channels * sizeof(struct 
vm_event_slot));
+
+    fres = xenforeignmemory_map_resource(xch->fmem, domain_id,
+                                         XENMEM_resource_vm_event, type, 0,
+                                         nr_frames, &buffer,
+                                         PROT_READ | PROT_WRITE, 0);
+    if ( !fres )
+    {
+        PERROR("Could not map the vm_event pages\n");
+        return NULL;
+    }
+
+    domctl.cmd = XEN_DOMCTL_vm_event_op;
+    domctl.domain = domain_id;
+    domctl.u.vm_event_op.op = XEN_VM_EVENT_GET_PORTS;
+    domctl.u.vm_event_op.type = type;
+
+    if ( xc_hypercall_bounce_pre(xch, sync_ports) )
+    {
+        PERROR("Could not bounce memory for XEN_DOMCTL_vm_event_op");
+        errno = ENOMEM;
+        return NULL;
+    }
+
+    set_xen_guest_handle(domctl.u.vm_event_op.u.get_ports.sync, sync_ports);
+
+    if ( do_domctl(xch, &domctl) )
+    {
+        PERROR("Failed to get vm_event ports\n");
+        goto out;
+    }
+
+    xc_hypercall_bounce_post(xch, sync_ports);
+    *ring_port = domctl.u.vm_event_op.u.get_ports.async;
+
+    *_sync_buffer = buffer + ring_frames * PAGE_SIZE;
+    *_ring_buffer = buffer;
+
+    return fres;
+
+out:
+    xc_hypercall_bounce_post(xch, sync_ports);
+    if ( fres )
+        xenforeignmemory_unmap_resource(xch->fmem, fres);
+    return NULL;
+}
+
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1431f34..256c63b 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -103,6 +103,7 @@
 #include <xen/efi.h>
 #include <xen/grant_table.h>
 #include <xen/hypercall.h>
+#include <xen/vm_event.h>
 #include <asm/paging.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
@@ -4469,6 +4470,12 @@ int arch_acquire_resource(struct domain *d, unsigned int 
type,
     }
 #endif
 
+    case XENMEM_resource_vm_event:
+    {
+        rc = vm_event_get_frames(d, id, frame, nr_frames, mfn_list);
+        break;
+    }
+
     default:
         rc = -EOPNOTSUPP;
         break;
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 77da41b..a2712a0 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -28,6 +28,8 @@
 #include <asm/p2m.h>
 #include <asm/monitor.h>
 #include <asm/vm_event.h>
+#include <xen/guest_access.h>
+#include <xen/vmap.h>
 #include <xsm/xsm.h>
 
 /* for public/io/ring.h macros */
@@ -40,6 +42,7 @@
 #define vm_event_unlock(_ved)     spin_unlock(&(_ved)->lock)
 
 #define to_vm_event_domain_ring(_ved) container_of(_ved, struct 
vm_event_domain_ring, ved)
+#define to_vm_event_domain_channel(_ved) container_of(_ved, struct 
vm_event_domain_channel, ved)
 
 struct vm_event_domain
 {
@@ -48,7 +51,8 @@ struct vm_event_domain
     int (*claim_slot)(struct vm_event_domain *ved, bool allow_sleep);
     void (*release_slot)(struct vm_event_domain *ved);
     void (*put_request)(struct vm_event_domain *ved, vm_event_request_t *req);
-    int (*get_response)(struct vm_event_domain *ved, vm_event_response_t *rsp);
+    int (*get_response)(struct vm_event_domain *ved, struct vcpu *v,
+                        unsigned int port, vm_event_response_t *rsp);
     int (*disable)(struct vm_event_domain **_ved);
 
     /* The domain associated with the VM event */
@@ -58,11 +62,6 @@ struct vm_event_domain
     spinlock_t lock;
 };
 
-bool vm_event_check(struct vm_event_domain *ved)
-{
-    return (ved && ved->check(ved));
-}
-
 /* VM event domain ring implementation */
 struct vm_event_domain_ring
 {
@@ -78,22 +77,57 @@ struct vm_event_domain_ring
     vm_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
-    /* vm_event bit for vcpu->pause_flags */
-    int pause_flag;
     /* list of vcpus waiting for room in the ring */
     struct waitqueue_head wq;
     /* the number of vCPUs blocked */
     unsigned int blocked;
+    /* vm_event bit for vcpu->pause_flags */
+    int pause_flag;
     /* The last vcpu woken up */
     unsigned int last_vcpu_wake_up;
 };
 
+struct vm_event_buffer
+{
+    void *va;
+    unsigned int nr_frames;
+    mfn_t mfn[0];
+};
+
+struct vm_event_domain_channel
+{
+    /* VM event domain */
+    struct vm_event_domain ved;
+    /* ring for asynchronous vm events */
+    struct vm_event_buffer *ring;
+    /* front-end ring */
+    vm_event_front_ring_t front_ring;
+    /* per vcpu channels for synchronous vm events */
+    struct vm_event_buffer *channels;
+    /*
+     * event channels ports
+     * - one per vcpu for the synchronous channels.
+     * - one for the asynchronous ring.
+     */
+    uint32_t xen_ports[0];
+};
+
+bool vm_event_check(struct vm_event_domain *ved)
+{
+    return (ved && ved->check(ved));
+}
+
 static bool vm_event_ring_check(struct vm_event_domain *ved)
 {
     struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved);
     return impl->ring_page != NULL;
 }
 
+static bool is_vm_event_domain_ring(struct vm_event_domain *ved)
+{
+    return ved->check == vm_event_ring_check;
+}
+
 static unsigned int vm_event_ring_available(struct vm_event_domain_ring *ved)
 {
     int avail_req = RING_FREE_REQUESTS(&ved->front_ring);
@@ -317,12 +351,15 @@ static void vm_event_ring_put_request(struct 
vm_event_domain *ved,
     notify_via_xen_event_channel(d, impl->xen_port);
 }
 
-static int vm_event_ring_get_response(struct vm_event_domain *ved,
-                                      vm_event_response_t *rsp)
+static int vm_event_ring_get_response(
+    struct vm_event_domain *ved,
+    struct vcpu *v,
+    unsigned int port,
+    vm_event_response_t *rsp)
 {
     vm_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
-    struct vm_event_domain_ring *impl = (struct vm_event_domain_ring *)ved;
+    struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved);
 
     vm_event_lock(ved);
 
@@ -332,7 +369,7 @@ static int vm_event_ring_get_response(struct 
vm_event_domain *ved,
     if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
     {
         vm_event_unlock(ved);
-        return 0;
+        return -1;
     }
 
     /* Copy response */
@@ -353,6 +390,35 @@ static int vm_event_ring_get_response(struct 
vm_event_domain *ved,
 }
 
 /*
+ * The response is received only from the sync channels
+ */
+static int vm_event_channel_get_response(
+    struct vm_event_domain *ved,
+    struct vcpu *v,
+    unsigned int port,
+    vm_event_response_t *rsp)
+{
+    struct vm_event_domain_channel *impl = to_vm_event_domain_channel(ved);
+    struct vm_event_slot *slot = impl->channels->va + v->vcpu_id * 
sizeof(struct vm_event_slot);
+
+    vm_event_lock(ved);
+
+    if ( slot->state != VM_EVENT_SLOT_STATE_FINISH )
+    {
+        gdprintk(XENLOG_G_WARNING, "The VM event slot state for d%dv%d is 
invalid.\n",
+                 ved->d->domain_id, v->vcpu_id);
+        vm_event_unlock(ved);
+        return -1;
+    }
+
+    memcpy(rsp, &slot->u.rsp, sizeof(*rsp));
+    slot->state = VM_EVENT_SLOT_STATE_IDLE;
+
+    vm_event_unlock(ved);
+    return 0;
+}
+
+/*
  * Pull all responses from the given ring and unpause the corresponding vCPU
  * if required. Based on the response type, here we can also call custom
  * handlers.
@@ -360,10 +426,11 @@ static int vm_event_ring_get_response(struct 
vm_event_domain *ved,
  * Note: responses are handled the same way regardless of which ring they
  * arrive on.
  */
-static int vm_event_resume(struct vm_event_domain *ved)
+static int vm_event_resume(struct vm_event_domain *ved, struct vcpu *v, 
unsigned int port)
 {
     vm_event_response_t rsp;
     struct domain *d;
+    int rc;
 
     if (! vm_event_check(ved))
         return -ENODEV;
@@ -380,22 +447,25 @@ static int vm_event_resume(struct vm_event_domain *ved)
      */
     ASSERT(d != current->domain);
 
-    /* Pull all responses off the ring. */
-    while ( ved->get_response(ved, &rsp) )
+    /* Loop until all available responses are read. */
+    do
     {
-        struct vcpu *v;
+        struct vcpu *rsp_v;
+        rc = ved->get_response(ved, v, port, &rsp);
+        if ( rc < 0 )
+            break;
 
         if ( rsp.version != VM_EVENT_INTERFACE_VERSION )
         {
             printk(XENLOG_G_WARNING "vm_event interface version mismatch\n");
-            continue;
+            goto end_loop;
         }
 
         /* Validate the vcpu_id in the response. */
         if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
-            continue;
+            goto end_loop;
 
-        v = d->vcpu[rsp.vcpu_id];
+        rsp_v = d->vcpu[rsp.vcpu_id];
 
         /*
          * In some cases the response type needs extra handling, so here
@@ -403,7 +473,7 @@ static int vm_event_resume(struct vm_event_domain *ved)
          */
 
         /* Check flags which apply only when the vCPU is paused */
-        if ( atomic_read(&v->vm_event_pause_count) )
+        if ( atomic_read(&rsp_v->vm_event_pause_count) )
         {
 #ifdef CONFIG_HAS_MEM_PAGING
             if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
@@ -415,34 +485,36 @@ static int vm_event_resume(struct vm_event_domain *ved)
              * has to set arch-specific flags when supported, and to avoid
              * bitmask overhead when it isn't supported.
              */
-            vm_event_emulate_check(v, &rsp);
+            vm_event_emulate_check(rsp_v, &rsp);
 
             /*
              * Check in arch-specific handler to avoid bitmask overhead when
              * not supported.
              */
-            vm_event_register_write_resume(v, &rsp);
+            vm_event_register_write_resume(rsp_v, &rsp);
 
             /*
              * Check in arch-specific handler to avoid bitmask overhead when
              * not supported.
              */
-            vm_event_toggle_singlestep(d, v, &rsp);
+            vm_event_toggle_singlestep(d, rsp_v, &rsp);
 
             /* Check for altp2m switch */
             if ( rsp.flags & VM_EVENT_FLAG_ALTERNATE_P2M )
-                p2m_altp2m_check(v, rsp.altp2m_idx);
+                p2m_altp2m_check(rsp_v, rsp.altp2m_idx);
 
             if ( rsp.flags & VM_EVENT_FLAG_SET_REGISTERS )
-                vm_event_set_registers(v, &rsp);
+                vm_event_set_registers(rsp_v, &rsp);
 
             if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
-                vm_event_monitor_next_interrupt(v);
+                vm_event_monitor_next_interrupt(rsp_v);
 
             if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED )
-                vm_event_vcpu_unpause(v);
+                vm_event_vcpu_unpause(rsp_v);
         }
+end_loop: ;
     }
+    while ( rc > 0 );
 
     return 0;
 }
@@ -527,28 +599,28 @@ int __vm_event_claim_slot(struct vm_event_domain *ved, 
bool allow_sleep)
     if ( !vm_event_check(ved) )
         return -EOPNOTSUPP;
 
-    return ved->claim_slot(ved, allow_sleep);
+    return (ved->claim_slot) ? ved->claim_slot(ved, allow_sleep) : 0;
 }
 
 #ifdef CONFIG_HAS_MEM_PAGING
 /* Registered with Xen-bound event channel for incoming notifications. */
 static void mem_paging_notification(struct vcpu *v, unsigned int port)
 {
-    vm_event_resume(v->domain->vm_event_paging);
+    vm_event_resume(v->domain->vm_event_paging, v, port);
 }
 #endif
 
 /* Registered with Xen-bound event channel for incoming notifications. */
 static void monitor_notification(struct vcpu *v, unsigned int port)
 {
-    vm_event_resume(v->domain->vm_event_monitor);
+    vm_event_resume(v->domain->vm_event_monitor, v, port);
 }
 
 #ifdef CONFIG_HAS_MEM_SHARING
 /* Registered with Xen-bound event channel for incoming notifications. */
 static void mem_sharing_notification(struct vcpu *v, unsigned int port)
 {
-    vm_event_resume(v->domain->vm_event_share);
+    vm_event_resume(v->domain->vm_event_share, v, port);
 }
 #endif
 
@@ -565,19 +637,24 @@ void vm_event_cleanup(struct domain *d)
          * Finally, because this code path involves previously
          * pausing the domain (domain_kill), unpausing the
          * vcpus causes no harm. */
-        
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_paging)->wq);
+        if ( is_vm_event_domain_ring(d->vm_event_paging) )
+            
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_paging)->wq);
         (void)vm_event_disable(&d->vm_event_paging);
     }
 #endif
+
     if ( vm_event_check(d->vm_event_monitor) )
     {
-        
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_monitor)->wq);
+        if ( is_vm_event_domain_ring(d->vm_event_monitor) )
+            
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_monitor)->wq);
         (void)vm_event_disable(&d->vm_event_monitor);
     }
+
 #ifdef CONFIG_HAS_MEM_SHARING
     if ( vm_event_check(d->vm_event_share) )
     {
-        
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_share)->wq);
+        if ( is_vm_event_domain_ring(d->vm_event_share) )
+            
destroy_waitqueue_head(&to_vm_event_domain_ring(d->vm_event_share)->wq);
         (void)vm_event_disable(&d->vm_event_share);
     }
 #endif
@@ -641,7 +718,7 @@ static int vm_event_ring_enable(
     if ( rc < 0 )
         goto err;
 
-    impl->xen_port = vec->port = rc;
+    impl->xen_port = vec->u.enable.port = rc;
 
     /* Prepare ring buffer */
     FRONT_RING_INIT(&impl->front_ring,
@@ -668,6 +745,294 @@ static int vm_event_ring_enable(
     return rc;
 }
 
+/*
+ * Helper functions for allocating / freeing vm_event buffers
+ */
+static int vm_event_alloc_buffer(struct domain *d, unsigned int nr_frames,
+                                 struct vm_event_buffer **_veb)
+{
+    struct vm_event_buffer *veb;
+    int i = 0, rc;
+
+    veb = _xzalloc(sizeof(struct vm_event_buffer) + nr_frames * sizeof(mfn_t),
+                   __alignof__(struct vm_event_buffer));
+    if ( unlikely(!veb) )
+    {
+        rc = -ENOMEM;
+        goto err;
+    }
+
+    veb->nr_frames = nr_frames;
+
+    for ( i = 0; i < nr_frames; i++ )
+    {
+        struct page_info *page = alloc_domheap_page(d, 0);
+
+        if ( !page )
+        {
+            rc = -ENOMEM;
+            goto err;
+        }
+
+        if ( !get_page_and_type(page, d, PGT_writable_page) )
+        {
+            domain_crash(d);
+            rc = -ENODATA;
+            goto err;
+        }
+
+        veb->mfn[i] = page_to_mfn(page);
+    }
+
+    veb->va = vmap(veb->mfn, nr_frames);
+    if ( !veb->va )
+    {
+        rc = -ENOMEM;
+        goto err;
+    }
+
+    for( i = 0; i < nr_frames; i++ )
+        clear_page(veb->va + i * PAGE_SIZE);
+
+    *_veb = veb;
+    return 0;
+
+err:
+    while ( --i >= 0 )
+    {
+        struct page_info *page = mfn_to_page(veb->mfn[i]);
+
+        if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
+            put_page(page);
+        put_page_and_type(page);
+    }
+
+    xfree(veb);
+    return rc;
+}
+
+static void vm_event_free_buffer(struct vm_event_buffer **_veb)
+{
+    struct vm_event_buffer *veb = *_veb;
+
+    if ( !veb )
+        return;
+
+    if ( veb->va )
+    {
+        int i;
+
+        vunmap(veb->va);
+        for ( i = 0; i < veb->nr_frames; i++ )
+        {
+            struct page_info *page = mfn_to_page(veb->mfn[i]);
+
+            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
+                put_page(page);
+            put_page_and_type(page);
+        }
+    }
+    XFREE(*_veb);
+}
+
+static bool vm_event_channel_check(struct vm_event_domain *ved)
+{
+    struct vm_event_domain_channel *impl = to_vm_event_domain_channel(ved);
+    return impl->ring->va != NULL && impl->channels->va != NULL;
+}
+
+static void vm_event_channel_put_request(struct vm_event_domain *ved,
+                                         vm_event_request_t *req)
+{
+    struct vcpu *curr = current;
+    struct vm_event_domain_channel *impl = to_vm_event_domain_channel(ved);
+    struct domain *d;
+    struct vm_event_slot *slot;
+    bool sync;
+
+    if ( !vm_event_check(ved) )
+        return;
+
+    d = ved->d;
+    slot = impl->channels->va + req->vcpu_id * sizeof(struct vm_event_slot);
+
+    if ( curr->domain != d )
+    {
+        req->flags |= VM_EVENT_FLAG_FOREIGN;
+#ifndef NDEBUG
+        if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
+            gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n",
+                     d->domain_id, req->vcpu_id);
+#endif
+    }
+
+    req->version = VM_EVENT_INTERFACE_VERSION;
+
+    sync = req->flags & VM_EVENT_FLAG_VCPU_PAUSED;
+
+    vm_event_lock(ved);
+
+    if ( sync )
+    {
+        if ( slot->state != VM_EVENT_SLOT_STATE_IDLE )
+        {
+            gdprintk(XENLOG_G_WARNING, "The VM event slot for d%dv%d is not 
IDLE.\n",
+                     d->domain_id, req->vcpu_id);
+            vm_event_unlock(ved);
+            return;
+        }
+        memcpy( &slot->u.req, req, sizeof(*req) );
+        slot->state = VM_EVENT_SLOT_STATE_SUBMIT;
+    }
+    else
+    {
+        vm_event_front_ring_t *front_ring;
+        RING_IDX req_prod;
+
+        /* Due to the reservations, this step must succeed. */
+        front_ring = &impl->front_ring;
+
+        /* Copy request */
+        req_prod = front_ring->req_prod_pvt;
+        memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
+        req_prod++;
+
+        /* Update ring */
+        front_ring->req_prod_pvt = req_prod;
+        RING_PUSH_REQUESTS(front_ring);
+    }
+
+    vm_event_unlock(ved);
+
+    notify_via_xen_event_channel(d, impl->xen_ports[(sync) ? req->vcpu_id : 
d->max_vcpus]);
+}
+
+static int vm_event_channel_disable(struct vm_event_domain **_ved)
+{
+    struct vm_event_domain_channel *ved = to_vm_event_domain_channel(*_ved);
+    struct domain *d = ved->ved.d;
+    struct vcpu *v;
+    int i;
+
+    vm_event_lock(&ved->ved);
+
+    for_each_vcpu ( d, v )
+    {
+        if ( atomic_read(&v->vm_event_pause_count) )
+            vm_event_vcpu_unpause(v);
+        /*
+        if ( test_and_clear_bit(ved->ved.pause_flag, &v->pause_flags) )
+        {
+            vcpu_unpause(v);
+        }
+        */
+    }
+
+    /* Free domU's event channels and leave the other one unbound */
+    for ( i = 0; i < d->max_vcpus; i++ )
+        evtchn_close(d, ved->xen_ports[i], 0);
+    evtchn_close(d, ved->xen_ports[d->max_vcpus], 0);
+
+    vm_event_free_buffer(&ved->ring);
+    vm_event_free_buffer(&ved->channels);
+
+    vm_event_cleanup_domain(d);
+
+    vm_event_unlock(&ved->ved);
+
+    XFREE(*_ved);
+
+    return 0;
+}
+
+static int vm_event_channel_enable(
+    struct domain *d,
+    struct vm_event_domain **_ved,
+    unsigned int nr_frames,
+    xen_event_channel_notification_t notification_fn)
+{
+    int i = 0, rc;
+    struct vm_event_domain_channel *impl;
+    unsigned int nr_ring_frames, nr_channel_frames;
+
+    if ( *_ved )
+        return -EBUSY;
+
+    if ( nr_frames <= PFN_UP(d->max_vcpus * sizeof(struct vm_event_slot)) )
+        return -EINVAL;
+
+    impl = _xzalloc(sizeof(struct vm_event_domain_channel) +
+                        ( d->max_vcpus + 1 ) * sizeof(uint32_t),
+                    __alignof__(struct vm_event_domain_channel));
+    if ( !impl )
+        return -ENOMEM;
+
+    impl->ved.d = d;
+    impl->ved.check = vm_event_channel_check;
+    impl->ved.claim_slot = NULL;
+    impl->ved.release_slot = NULL;
+    impl->ved.put_request = vm_event_channel_put_request;
+    impl->ved.get_response = vm_event_channel_get_response;
+    impl->ved.disable = vm_event_channel_disable;
+
+    nr_channel_frames = PFN_UP(d->max_vcpus * sizeof(vm_event_request_t));
+    nr_ring_frames = nr_frames - nr_channel_frames;
+
+    vm_event_lock_init(&impl->ved);
+    vm_event_lock(&impl->ved);
+
+    rc = vm_event_init_domain(d);
+    if ( rc < 0 )
+        goto err;
+
+    rc = vm_event_alloc_buffer(d, nr_ring_frames, &impl->ring);
+    if ( rc )
+        goto err;
+
+    /* Allocate event channel for the async ring*/
+    rc = alloc_unbound_xen_event_channel(d, 0, current->domain->domain_id,
+                                         notification_fn);
+    if ( rc < 0 )
+        goto err;
+
+    impl->xen_ports[d->max_vcpus] = rc;
+
+    /* Prepare ring buffer */
+    FRONT_RING_INIT(&impl->front_ring,
+                    (vm_event_sring_t *)impl->ring->va,
+                    impl->ring->nr_frames * PAGE_SIZE);
+
+    rc = vm_event_alloc_buffer(d, nr_channel_frames, &impl->channels);
+    if ( rc != 0)
+        goto err;
+
+    for ( i = 0; i < d->max_vcpus; i++)
+    {
+        rc = alloc_unbound_xen_event_channel(d, i, current->domain->domain_id,
+                                             notification_fn);
+        if ( rc < 0 )
+            goto err;
+
+        impl->xen_ports[i] = rc;
+    }
+
+    *_ved = &impl->ved;
+
+    vm_event_unlock(&impl->ved);
+    return 0;
+
+err:
+    while (i--)
+        evtchn_close(d, impl->xen_ports[i], 0);
+    evtchn_close(d, impl->xen_ports[d->max_vcpus], 0);
+    vm_event_free_buffer(&impl->ring);
+    vm_event_free_buffer(&impl->channels);
+    vm_event_cleanup_domain(d);
+    vm_event_unlock(&impl->ved);
+    xfree(impl);
+    return rc;
+}
+
 int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
                     XEN_GUEST_HANDLE_PARAM(void) u_domctl)
 {
@@ -748,7 +1113,9 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec,
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_resume(d->vm_event_paging);
+            if ( vm_event_check(d->vm_event_paging) &&
+                 is_vm_event_domain_ring(d->vm_event_paging) )
+                rc = vm_event_resume(d->vm_event_paging, NULL, 0);
             break;
 
         default:
@@ -786,7 +1153,30 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec,
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_resume(d->vm_event_monitor);
+            if ( vm_event_check(d->vm_event_monitor) &&
+                 is_vm_event_domain_ring(d->vm_event_monitor) )
+                rc = vm_event_resume(d->vm_event_monitor, NULL, 0);
+            break;
+
+        case XEN_VM_EVENT_GET_PORTS:
+            if ( !vm_event_check(d->vm_event_monitor) )
+                break;
+
+            if ( !is_vm_event_domain_ring(d->vm_event_monitor) )
+            {
+                struct vm_event_domain_channel *impl = 
to_vm_event_domain_channel(d->vm_event_monitor);
+
+                if ( copy_to_guest(vec->u.get_ports.sync,
+                                   impl->xen_ports,
+                                   d->max_vcpus) != 0 )
+                {
+                    rc = -EFAULT;
+                    break;
+                }
+
+                vec->u.get_ports.async = impl->xen_ports[d->max_vcpus];
+                rc = 0;
+            }
             break;
 
         default:
@@ -830,7 +1220,10 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec,
             break;
 
         case XEN_VM_EVENT_RESUME:
-            rc = vm_event_resume(d->vm_event_share);
+            if ( vm_event_check(d->vm_event_monitor) &&
+                 is_vm_event_domain_ring(d->vm_event_monitor) )
+                rc = vm_event_resume(d->vm_event_share, NULL, 0);
+            break;
 
         default:
             rc = -ENOSYS;
@@ -847,6 +1240,52 @@ int vm_event_domctl(struct domain *d, struct 
xen_domctl_vm_event_op *vec,
     return rc;
 }
 
+int vm_event_get_frames(struct domain *d, unsigned int id,
+                        unsigned long frame, unsigned int nr_frames,
+                        xen_pfn_t mfn_list[])
+{
+    int rc = 0, i, j;
+    struct vm_event_domain **_ved;
+    struct vm_event_domain_channel *impl;
+    xen_event_channel_notification_t fn;
+
+    switch ( id )
+    {
+    case XEN_VM_EVENT_TYPE_MONITOR:
+        /* domain_pause() not required here, see XSA-99 */
+        rc = arch_monitor_init_domain(d);
+        if ( rc )
+            return rc;
+        _ved = &d->vm_event_monitor;
+        fn = monitor_notification;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+
+    rc = vm_event_channel_enable(d, _ved, nr_frames, fn);
+    if ( rc )
+    {
+        switch ( id )
+        {
+            case XEN_VM_EVENT_TYPE_MONITOR:
+                arch_monitor_cleanup_domain(d);
+                break;
+        }
+        return rc;
+    }
+
+    impl = to_vm_event_domain_channel(*_ved);
+    j = 0;
+    for ( i = 0; i < impl->ring->nr_frames; i++ )
+        mfn_list[j++] = mfn_x(impl->ring->mfn[i]);
+    for ( i = 0; i < impl->channels->nr_frames; i++ )
+        mfn_list[j++] = mfn_x(impl->channels->mfn[i]);
+
+    return rc;
+}
+
 void vm_event_vcpu_pause(struct vcpu *v)
 {
     ASSERT(v == current);
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 26b1a55..78262a1 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -38,7 +38,7 @@
 #include "hvm/save.h"
 #include "memory.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000011
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000012
 
 /*
  * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -836,6 +836,7 @@ struct xen_domctl_gdbsx_domstatus {
 #define XEN_VM_EVENT_ENABLE               0
 #define XEN_VM_EVENT_DISABLE              1
 #define XEN_VM_EVENT_RESUME               2
+#define XEN_VM_EVENT_GET_PORTS            3
 
 /*
  * Use for teardown/setup of helper<->hypervisor interface for paging,
@@ -843,10 +844,26 @@ struct xen_domctl_gdbsx_domstatus {
  */
 /* XEN_DOMCTL_vm_event_op */
 struct xen_domctl_vm_event_op {
-    uint32_t        op;           /* XEN_VM_EVENT_* */
-    uint32_t        type;         /* XEN_VM_EVENT_TYPE_* */
+    /* IN: Xen vm_event opcode (XEN_VM_EVENT_*) */
+    uint32_t            op;
+    /* IN: Xen vm event ring type (XEN_VM_EVENT_TYPE_*) */
+    uint32_t            type;
 
-    uint32_t        port;         /* OUT: event channel for ring */
+    union {
+        struct {
+            /* OUT: remote port for event channel ring */
+            uint32_t    port;
+        } enable;
+        struct {
+            /* OUT: remote port for the async event channel ring */
+            uint32_t    async;
+            /*
+             * OUT: remote ports for the sync event vm_event channels
+             * The number for ports will be equal with the vcpu count.
+             */
+            XEN_GUEST_HANDLE_64(uint32) sync;
+        } get_ports;
+    } u;
 };
 
 /*
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 8638023..cfd280d 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -612,6 +612,7 @@ struct xen_mem_acquire_resource {
 
 #define XENMEM_resource_ioreq_server 0
 #define XENMEM_resource_grant_table 1
+#define XENMEM_resource_vm_event 2
 
     /*
      * IN - a type-specific resource identifier, which must be zero
@@ -619,6 +620,7 @@ struct xen_mem_acquire_resource {
      *
      * type == XENMEM_resource_ioreq_server -> id == ioreq server id
      * type == XENMEM_resource_grant_table -> id defined below
+     * type == XENMEM_resource_vm_event -> id == vm_event type
      */
     uint32_t id;
 
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index b2bafc0..499fbbc 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -388,6 +388,21 @@ typedef struct vm_event_st {
 
 DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
 
+struct vm_event_slot
+{
+    uint32_t state;
+    union {
+        vm_event_request_t req;
+        vm_event_response_t rsp;
+    } u;
+};
+
+enum vm_event_slot_state {
+    VM_EVENT_SLOT_STATE_IDLE,   /* no contents */
+    VM_EVENT_SLOT_STATE_SUBMIT, /* request ready */
+    VM_EVENT_SLOT_STATE_FINISH, /* response ready */
+};
+
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 #endif /* _XEN_PUBLIC_VM_EVENT_H */
 
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
index a5c82d6..d4bd184 100644
--- a/xen/include/xen/vm_event.h
+++ b/xen/include/xen/vm_event.h
@@ -64,6 +64,10 @@ void vm_event_put_request(struct vm_event_domain *ved,
 int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
                     XEN_GUEST_HANDLE_PARAM(void) u_domctl);
 
+int vm_event_get_frames(struct domain *d, unsigned int id,
+                        unsigned long frame, unsigned int nr_frames,
+                        xen_pfn_t mfn_list[]);
+
 void vm_event_vcpu_pause(struct vcpu *v);
 void vm_event_vcpu_unpause(struct vcpu *v);
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.