[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2 of 4] xenpaging: use wait queues
# HG changeset patch # User Olaf Hering <olaf@xxxxxxxxx> # Date 1322737507 -3600 # Node ID 8147822efdee65d1f5b94656ab2032aedb76979f # Parent 612f69531fd15cf59c58404f6e4762733a9a268c xenpaging: use wait queues Use a wait queue to put a guest vcpu to sleep while the requested gfn is in paging state. This adds missing p2m_mem_paging_populate() calls to some callers of the new get_gfn* variants, which would crash now because they get an invalid mfn. It also fixes guest crashes due to unexpected returns from do_memory_op because copy_to/from_guest ran into a paged gfn. Now those places will always get a valid mfn. Since each gfn could be requested by several guest vcpus at the same time a queue of paged gfns is maintained. Each vcpu will be attached to that queue. Once p2m_mem_paging_resume restored the gfn the waiting vcpus will resume execution. There is untested code in p2m_mem_paging_init_queue() to allow cpu hotplug. Since each vcpu may wait on a different gfn there have to be as many queues as vcpus. But xl vcpu-set does not seem to work right now, so this code path cant be excercised right now. Signed-off-by: Olaf Hering <olaf@xxxxxxxxx> diff -r 612f69531fd1 -r 8147822efdee xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -454,6 +454,8 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); + spin_lock_init(&d->arch.hvm_domain.gfn_lock); + INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); diff -r 612f69531fd1 -r 8147822efdee xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -30,6 +30,7 @@ #include <asm/p2m.h> #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */ #include <xen/iommu.h> +#include <xen/wait.h> #include <asm/mem_event.h> #include <public/mem_event.h> #include <asm/mem_sharing.h> @@ -144,6 +145,182 @@ void p2m_change_entry_type_global(struct p2m_unlock(p2m); } +#ifdef __x86_64__ +struct p2m_mem_paging_queue { + struct list_head list; + struct waitqueue_head wq; + unsigned long gfn; + unsigned short waiters; + unsigned short woken; + unsigned short index; +}; + +struct p2m_mem_paging_queue_head { + struct list_head list; + unsigned int max; +}; + +int p2m_mem_paging_init_queue(struct domain *d, unsigned int max) +{ + struct p2m_mem_paging_queue_head *h; + struct p2m_mem_paging_queue *q; + unsigned int i, nr; + int ret = 0; + + spin_lock(&d->arch.hvm_domain.gfn_lock); + + if (!d->arch.hvm_domain.gfn_queue) { + ret = -ENOMEM; + h = xzalloc(struct p2m_mem_paging_queue_head); + if (!h) { + domain_crash(d); + goto out; + } + + INIT_LIST_HEAD(&h->list); + nr = max; + } else { + h = d->arch.hvm_domain.gfn_queue; + if (max <= h->max) + goto out; + nr = max - h->max; + } + + ret = -ENOMEM; + q = xzalloc_array(struct p2m_mem_paging_queue, nr); + if (!q) { + if (!d->arch.hvm_domain.gfn_queue) + xfree(h); + domain_crash(d); + goto out; + } + + for (i = 0; i < nr; i++) { + init_waitqueue_head(&q[i].wq); + INIT_LIST_HEAD(&q[i].list); + q[i].index = h->max + i + 1; + list_add_tail(&q[i].list, &h->list); + } + + h->max = max; + d->arch.hvm_domain.gfn_queue = h; + ret = 0; + +out: + spin_unlock(&d->arch.hvm_domain.gfn_lock); + return ret; +} + +static struct p2m_mem_paging_queue *p2m_mem_paging_get_queue(struct domain *d, unsigned long gfn) +{ + struct p2m_mem_paging_queue_head *h; + struct p2m_mem_paging_queue *q, *q_match, *q_free; + + h = d->arch.hvm_domain.gfn_queue; + q_match = q_free = NULL; + + spin_lock(&d->arch.hvm_domain.gfn_lock); + + list_for_each_entry(q, &h->list, list) { + if (q->gfn == gfn) { + q_match = q; + break; + } + if (!q_free && !q->waiters) + q_free = q; + } + + if (!q_match && q_free) + q_match = q_free; + + if (q_match) { + if (q_match->woken) + printk("wq woken for gfn %u:%u %lx %u %u %u\n", current->domain->domain_id, current->vcpu_id, gfn, q_match->index, q_match->woken, q_match->waiters); + q_match->waiters++; + q_match->gfn = gfn; + } + + if (!q_match) + printk("No wq_get for gfn %u:%u %lx\n", current->domain->domain_id, current->vcpu_id, gfn); + + spin_unlock(&d->arch.hvm_domain.gfn_lock); + return q_match; +} + +static void p2m_mem_paging_put_queue(struct domain *d, struct p2m_mem_paging_queue *q_match) +{ + spin_lock(&d->arch.hvm_domain.gfn_lock); + + if (q_match->waiters == 0) + printk("wq_put no waiters, gfn %u:%u %lx %u\n", current->domain->domain_id, current->vcpu_id, q_match->gfn, q_match->woken); + else if (--q_match->waiters == 0) + q_match->gfn = q_match->woken = 0;; + + spin_unlock(&d->arch.hvm_domain.gfn_lock); +} + +static void p2m_mem_paging_wake_queue(struct domain *d, unsigned long gfn) +{ + struct p2m_mem_paging_queue_head *h; + struct p2m_mem_paging_queue *q, *q_match = NULL; + + spin_lock(&d->arch.hvm_domain.gfn_lock); + + h = d->arch.hvm_domain.gfn_queue; + list_for_each_entry(q, &h->list, list) { + if (q->gfn == gfn) { + q_match = q; + break; + } + } + if (q_match) { + if (q_match->woken || q_match->waiters == 0) + printk("Wrong wake for gfn %u:%u %p %lx %u %u\n", current->domain->domain_id, current->vcpu_id, q_match, gfn, q_match->woken, q_match->waiters); + q_match->woken++; + wake_up_all(&q_match->wq); + } + spin_unlock(&d->arch.hvm_domain.gfn_lock); +} + +/* Returns 0 if the gfn is still paged */ +static int p2m_mem_paging_get_entry(mfn_t *mfn, + struct p2m_domain *p2m, unsigned long gfn, + p2m_type_t *t, p2m_access_t *a, p2m_query_t q, + unsigned int *page_order) +{ + *mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order); + + return p2m_is_paging(*t) ? 0 : 1; +} + +/* Go to sleep in case of guest access */ +static void p2m_mem_paging_wait(mfn_t *mfn, + struct p2m_domain *p2m, unsigned long gfn, + p2m_type_t *t, p2m_access_t *a, p2m_query_t q, + unsigned int *page_order) +{ + struct p2m_mem_paging_queue *pmpq; + + /* Return p2mt as is in case of query */ + if ( q == p2m_query ) + return; + /* Foreign domains can not go to sleep */ + if ( current->domain != p2m->domain ) + return; + + pmpq = p2m_mem_paging_get_queue(p2m->domain, gfn); + if ( !pmpq ) + return; + + /* Populate the page once */ + if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged ) + p2m_mem_paging_populate(p2m->domain, gfn); + + wait_event(pmpq->wq, p2m_mem_paging_get_entry(mfn, p2m, gfn, t, a, q, page_order)); + p2m_mem_paging_put_queue(p2m->domain, pmpq); +} +#endif + mfn_t get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) @@ -161,6 +338,11 @@ mfn_t get_gfn_type_access(struct p2m_dom mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order); #ifdef __x86_64__ + if ( unlikely(p2m_is_paging(*t)) ) + p2m_mem_paging_wait(&mfn, p2m, gfn, t, a, q, page_order); +#endif + +#ifdef __x86_64__ if ( q == p2m_unshare && p2m_is_shared(*t) ) { ASSERT(!p2m_is_nestedp2m(p2m)); @@ -914,54 +1096,42 @@ void p2m_mem_paging_drop_page(struct dom void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) { struct vcpu *v = current; - mem_event_request_t req; + mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn }; p2m_type_t p2mt; p2m_access_t a; mfn_t mfn; struct p2m_domain *p2m = p2m_get_hostp2m(d); + int put_request = 0; /* Check that there's space on the ring for this request */ if ( mem_event_check_ring(d, &d->mem_event->paging) ) return; - memset(&req, 0, sizeof(req)); - req.type = MEM_EVENT_TYPE_PAGING; - /* Fix p2m mapping */ p2m_lock(p2m); mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL); - /* Allow only nominated or evicted pages to enter page-in path */ - if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) - { - /* Evict will fail now, tag this request for pager */ - if ( p2mt == p2m_ram_paging_out ) - req.flags |= MEM_EVENT_FLAG_EVICT_FAIL; + /* Forward the state only if gfn is in page-out path */ + if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) { + /* Ignore foreign requests to allow mmap in pager */ + if ( mfn_valid(mfn) && p2mt == p2m_ram_paging_out && v->domain == d ) { + /* Restore gfn because it is needed by guest before evict */ + set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, + paging_mode_log_dirty(d) ? p2m_ram_logdirty : p2m_ram_rw, a); + } else { + set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in_start, a); + put_request = 1; + } + /* Evict will fail now, the pager has to try another gfn */ - set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in_start, a); audit_p2m(p2m, 1); } p2m_unlock(p2m); - /* Pause domain if request came from guest and gfn has paging type */ - if ( p2m_is_paging(p2mt) && v->domain == d ) - { - vcpu_pause_nosync(v); - req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; - } - /* No need to inform pager if the gfn is not in the page-out path */ - else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) - { - /* gfn is already on its way back and vcpu is not paused */ + /* One request per gfn, guest vcpus go to sleep, foreigners try again */ + if ( put_request ) + mem_event_put_request(d, &d->mem_event->paging, &req); + else mem_event_put_req_producers(d, &d->mem_event->paging); - return; - } - - /* Send request to pager */ - req.gfn = gfn; - req.p2mt = p2mt; - req.vcpu_id = v->vcpu_id; - - mem_event_put_request(d, &d->mem_event->paging, &req); } /** @@ -1062,12 +1232,11 @@ void p2m_mem_paging_resume(struct domain p2m_unlock(p2m); } - /* Unpause domain */ - if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) - vcpu_unpause(d->vcpu[rsp.vcpu_id]); - /* Wake vcpus waiting for room in the ring */ mem_event_wake_requesters(&d->mem_event->paging); + + /* Unpause all vcpus that were paused because the gfn was paged */ + p2m_mem_paging_wake_queue(d, rsp.gfn); } void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, diff -r 612f69531fd1 -r 8147822efdee xen/common/domctl.c --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -547,6 +547,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc goto maxvcpu_out; } + if ( p2m_mem_paging_init_queue(d, max) ) + goto maxvcpu_out; + ret = 0; maxvcpu_out: diff -r 612f69531fd1 -r 8147822efdee xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -91,6 +91,9 @@ struct hvm_domain { struct viridian_domain viridian; + spinlock_t gfn_lock; + struct p2m_mem_paging_queue_head *gfn_queue; + bool_t hap_enabled; bool_t mem_sharing_enabled; bool_t qemu_mapcache_invalidate; diff -r 612f69531fd1 -r 8147822efdee xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -468,6 +468,8 @@ p2m_pod_offline_or_broken_replace(struct /* Modify p2m table for shared gfn */ int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); +/* Initialize per-gfn wait queue */ +int p2m_mem_paging_init_queue(struct domain *d, unsigned int max); /* Check if a nominated gfn is valid to be paged out */ int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn); /* Evict a frame */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |