[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] mem_event: use wait queue when ring is full


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Olaf Hering <olaf@xxxxxxxxx>
  • Date: Mon, 05 Dec 2011 12:19:03 +0100
  • Delivery-date: Mon, 05 Dec 2011 11:20:10 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

# HG changeset patch
# User Olaf Hering <olaf@xxxxxxxxx>
# Date 1323083831 -3600
# Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
# Parent  a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.


v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>

diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
 
 static int mem_event_enable(struct domain *d,
                             xen_domctl_mem_event_op_t *mec,
+                            int bit,
                             struct mem_event_domain *med)
 {
     int rc;
@@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
 
     mem_event_ring_lock_init(med);
 
+    med->bit = bit;
+
+    init_waitqueue_head(&med->wq);
+
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    /* Update accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp)
@@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
     mem_event_ring_unlock(med);
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+
+    mem_event_ring_unlock(med);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to 
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->bit, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->bit, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d ) {
+        med->target_producers++;
+        ring_full = 0;
+    } else if ( med->foreign_producers + med->target_producers + 1 < free_req )
     {
-        med->req_producers++;
+        med->foreign_producers++;
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
     mem_event_ring_unlock(med);
 
     return ring_full;
@@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
         }
         break;
 
@@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
         }
         break;
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -653,7 +643,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0); 
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
@@ -661,6 +651,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
@@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long 
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the 
message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event listener: 
pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full or no 
listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req);
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,8 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    unsigned short foreign_producers;
+    unsigned short target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +194,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int bit;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +621,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.