[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2 of 4] xenpaging: use wait queues


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Olaf Hering <olaf@xxxxxxxxx>
  • Date: Thu, 01 Dec 2011 12:09:18 +0100
  • Delivery-date: Thu, 01 Dec 2011 12:22:44 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

# HG changeset patch
# User Olaf Hering <olaf@xxxxxxxxx>
# Date 1322737507 -3600
# Node ID 8147822efdee65d1f5b94656ab2032aedb76979f
# Parent  612f69531fd15cf59c58404f6e4762733a9a268c
xenpaging: use wait queues

Use a wait queue to put a guest vcpu to sleep while the requested gfn is
in paging state. This adds missing p2m_mem_paging_populate() calls to
some callers of the new get_gfn* variants, which would crash now
because they get an invalid mfn. It also fixes guest crashes due to
unexpected returns from do_memory_op because copy_to/from_guest ran into
a paged gfn. Now those places will always get a valid mfn.

Since each gfn could be requested by several guest vcpus at the same
time a queue of paged gfns is maintained. Each vcpu will be attached to
that queue. Once p2m_mem_paging_resume restored the gfn the waiting
vcpus will resume execution.

There is untested code in p2m_mem_paging_init_queue() to allow cpu
hotplug. Since each vcpu may wait on a different gfn there have to be as
many queues as vcpus. But xl vcpu-set does not seem to work right now,
so this code path cant be excercised right now.


Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>

diff -r 612f69531fd1 -r 8147822efdee xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -454,6 +454,8 @@ int hvm_domain_initialise(struct domain 
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
     spin_lock_init(&d->arch.hvm_domain.uc_lock);
 
+    spin_lock_init(&d->arch.hvm_domain.gfn_lock);
+
     INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
     spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
 
diff -r 612f69531fd1 -r 8147822efdee xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -30,6 +30,7 @@
 #include <asm/p2m.h>
 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
 #include <xen/iommu.h>
+#include <xen/wait.h>
 #include <asm/mem_event.h>
 #include <public/mem_event.h>
 #include <asm/mem_sharing.h>
@@ -144,6 +145,182 @@ void p2m_change_entry_type_global(struct
     p2m_unlock(p2m);
 }
 
+#ifdef __x86_64__
+struct p2m_mem_paging_queue {
+    struct list_head list;
+    struct waitqueue_head wq;
+    unsigned long gfn;
+    unsigned short waiters;
+    unsigned short woken;
+    unsigned short index;
+};
+
+struct p2m_mem_paging_queue_head {
+    struct list_head list;
+    unsigned int max;
+};
+
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max)
+{
+    struct p2m_mem_paging_queue_head *h;
+    struct p2m_mem_paging_queue *q;
+    unsigned int i, nr;
+    int ret = 0;
+
+    spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+    if (!d->arch.hvm_domain.gfn_queue) {
+        ret = -ENOMEM;
+        h = xzalloc(struct p2m_mem_paging_queue_head);
+        if (!h) {
+            domain_crash(d);
+            goto out;
+        }
+
+        INIT_LIST_HEAD(&h->list);
+        nr = max;
+    } else {
+        h = d->arch.hvm_domain.gfn_queue;
+        if (max <= h->max)
+            goto out;
+        nr = max - h->max;
+    }
+
+    ret = -ENOMEM;
+    q = xzalloc_array(struct p2m_mem_paging_queue, nr);
+    if (!q) {
+        if (!d->arch.hvm_domain.gfn_queue)
+            xfree(h);
+        domain_crash(d);
+        goto out;
+    }
+
+    for (i = 0; i < nr; i++) {
+        init_waitqueue_head(&q[i].wq);
+        INIT_LIST_HEAD(&q[i].list);
+        q[i].index = h->max + i + 1;
+        list_add_tail(&q[i].list, &h->list);
+    }
+
+    h->max = max;
+    d->arch.hvm_domain.gfn_queue = h;
+    ret = 0;
+
+out:
+    spin_unlock(&d->arch.hvm_domain.gfn_lock);
+    return ret;
+}
+
+static struct p2m_mem_paging_queue *p2m_mem_paging_get_queue(struct domain *d, 
unsigned long gfn)
+{
+    struct p2m_mem_paging_queue_head *h;
+    struct p2m_mem_paging_queue *q, *q_match, *q_free;
+    
+    h = d->arch.hvm_domain.gfn_queue;
+    q_match = q_free = NULL;
+
+    spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+    list_for_each_entry(q, &h->list, list) {
+        if (q->gfn == gfn) {
+            q_match = q;
+            break;
+        }
+        if (!q_free && !q->waiters)
+            q_free = q;
+    }
+
+    if (!q_match && q_free)
+        q_match = q_free;
+
+    if (q_match) {
+        if (q_match->woken)
+            printk("wq woken for gfn %u:%u %lx %u %u %u\n", 
current->domain->domain_id, current->vcpu_id, gfn, q_match->index, 
q_match->woken, q_match->waiters);
+        q_match->waiters++;
+        q_match->gfn = gfn;
+    }
+
+    if (!q_match)
+        printk("No wq_get for gfn %u:%u %lx\n", current->domain->domain_id, 
current->vcpu_id, gfn);
+
+    spin_unlock(&d->arch.hvm_domain.gfn_lock);
+    return q_match;
+}
+
+static void p2m_mem_paging_put_queue(struct domain *d, struct 
p2m_mem_paging_queue *q_match)
+{
+    spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+    if (q_match->waiters == 0)
+        printk("wq_put no waiters, gfn %u:%u %lx %u\n", 
current->domain->domain_id, current->vcpu_id, q_match->gfn, q_match->woken);
+    else if (--q_match->waiters == 0)
+        q_match->gfn = q_match->woken = 0;;
+
+    spin_unlock(&d->arch.hvm_domain.gfn_lock);
+}
+
+static void p2m_mem_paging_wake_queue(struct domain *d, unsigned long gfn)
+{
+    struct p2m_mem_paging_queue_head *h;
+    struct p2m_mem_paging_queue *q, *q_match = NULL;
+
+    spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+    h = d->arch.hvm_domain.gfn_queue;
+    list_for_each_entry(q, &h->list, list) {
+        if (q->gfn == gfn) {
+            q_match = q;
+            break;
+        }
+    }
+    if (q_match) {
+        if (q_match->woken || q_match->waiters == 0)
+            printk("Wrong wake for gfn %u:%u %p %lx %u %u\n", 
current->domain->domain_id, current->vcpu_id, q_match, gfn, q_match->woken, 
q_match->waiters);
+        q_match->woken++;
+        wake_up_all(&q_match->wq);
+    }
+    spin_unlock(&d->arch.hvm_domain.gfn_lock);
+}
+
+/* Returns 0 if the gfn is still paged */
+static int p2m_mem_paging_get_entry(mfn_t *mfn,
+               struct p2m_domain *p2m, unsigned long gfn, 
+               p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
+               unsigned int *page_order)
+{
+    *mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
+
+    return p2m_is_paging(*t) ? 0 : 1;
+}
+
+/* Go to sleep in case of guest access */
+static void p2m_mem_paging_wait(mfn_t *mfn,
+                    struct p2m_domain *p2m, unsigned long gfn,
+                    p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
+                    unsigned int *page_order)
+{
+    struct p2m_mem_paging_queue *pmpq;
+
+    /* Return p2mt as is in case of query */
+    if ( q == p2m_query )
+        return;
+    /* Foreign domains can not go to sleep */
+    if ( current->domain != p2m->domain )
+        return;
+
+    pmpq = p2m_mem_paging_get_queue(p2m->domain, gfn);
+    if ( !pmpq )
+        return;
+
+    /* Populate the page once */
+    if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged )
+        p2m_mem_paging_populate(p2m->domain, gfn);
+
+    wait_event(pmpq->wq, p2m_mem_paging_get_entry(mfn, p2m, gfn, t, a, q, 
page_order));
+    p2m_mem_paging_put_queue(p2m->domain, pmpq);
+}
+#endif
+
 mfn_t get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
                     unsigned int *page_order)
@@ -161,6 +338,11 @@ mfn_t get_gfn_type_access(struct p2m_dom
     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
 
 #ifdef __x86_64__
+    if ( unlikely(p2m_is_paging(*t)) )
+        p2m_mem_paging_wait(&mfn, p2m, gfn, t, a, q, page_order);
+#endif
+
+#ifdef __x86_64__
     if ( q == p2m_unshare && p2m_is_shared(*t) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
@@ -914,54 +1096,42 @@ void p2m_mem_paging_drop_page(struct dom
 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
     p2m_type_t p2mt;
     p2m_access_t a;
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    int put_request = 0;
 
     /* Check that there's space on the ring for this request */
     if ( mem_event_check_ring(d, &d->mem_event->paging) )
         return;
 
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_PAGING;
-
     /* Fix p2m mapping */
     p2m_lock(p2m);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
-    /* Allow only nominated or evicted pages to enter page-in path */
-    if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
-    {
-        /* Evict will fail now, tag this request for pager */
-        if ( p2mt == p2m_ram_paging_out )
-            req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;
+    /* Forward the state only if gfn is in page-out path */
+    if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) {
+        /* Ignore foreign requests to allow mmap in pager */
+        if ( mfn_valid(mfn) && p2mt == p2m_ram_paging_out && v->domain == d ) {
+            /* Restore gfn because it is needed by guest before evict */
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
+                       paging_mode_log_dirty(d) ? p2m_ram_logdirty : 
p2m_ram_rw, a);
+        } else {
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, 
p2m_ram_paging_in_start, a);
+            put_request = 1;
+        }
+        /* Evict will fail now, the pager has to try another gfn */
 
-        set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in_start, 
a);
         audit_p2m(p2m, 1);
     }
     p2m_unlock(p2m);
 
-    /* Pause domain if request came from guest and gfn has paging type */
-    if (  p2m_is_paging(p2mt) && v->domain == d )
-    {
-        vcpu_pause_nosync(v);
-        req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-    }
-    /* No need to inform pager if the gfn is not in the page-out path */
-    else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
-    {
-        /* gfn is already on its way back and vcpu is not paused */
+    /* One request per gfn, guest vcpus go to sleep, foreigners try again */
+    if ( put_request )
+        mem_event_put_request(d, &d->mem_event->paging, &req);
+    else
         mem_event_put_req_producers(d, &d->mem_event->paging);
-        return;
-    }
-
-    /* Send request to pager */
-    req.gfn = gfn;
-    req.p2mt = p2mt;
-    req.vcpu_id = v->vcpu_id;
-
-    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -1062,12 +1232,11 @@ void p2m_mem_paging_resume(struct domain
         p2m_unlock(p2m);
     }
 
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
-
     /* Wake vcpus waiting for room in the ring */
     mem_event_wake_requesters(&d->mem_event->paging);
+
+    /* Unpause all vcpus that were paused because the gfn was paged */
+    p2m_mem_paging_wake_queue(d, rsp.gfn);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
diff -r 612f69531fd1 -r 8147822efdee xen/common/domctl.c
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -547,6 +547,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
                 goto maxvcpu_out;
         }
 
+        if ( p2m_mem_paging_init_queue(d, max) )
+            goto maxvcpu_out;
+
         ret = 0;
 
     maxvcpu_out:
diff -r 612f69531fd1 -r 8147822efdee xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -91,6 +91,9 @@ struct hvm_domain {
 
     struct viridian_domain viridian;
 
+    spinlock_t                        gfn_lock;
+    struct p2m_mem_paging_queue_head *gfn_queue;
+
     bool_t                 hap_enabled;
     bool_t                 mem_sharing_enabled;
     bool_t                 qemu_mapcache_invalidate;
diff -r 612f69531fd1 -r 8147822efdee xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -468,6 +468,8 @@ p2m_pod_offline_or_broken_replace(struct
 /* Modify p2m table for shared gfn */
 int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 
+/* Initialize per-gfn wait queue */
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max);
 /* Check if a nominated gfn is valid to be paged out */
 int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn);
 /* Evict a frame */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.