Xen project Mailing List

[Xen-devel] [PATCH 4 of 7] Re-order calls to put_gfn() around wait queue invocations

From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

Date: Thu, 09 Feb 2012 00:45:49 -0500

Cc: george.dunlap@xxxxxxxxxxxxx, andres@xxxxxxxxxxxxxx, keir.xen@xxxxxxxxx, tim@xxxxxxx, adin@xxxxxxxxxxxxxx

Delivery-date: Thu, 09 Feb 2012 04:43:29 +0000

Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id :in-reply-to:references:date:from:to:cc; q=dns; s= lagarcavilla.org; b=vnwQ3v+TA+Kr6shFKJ2SBFPu94904wi0pErOf9xN/q+d hlenT1c+LAZho3BXnqufqO1U0mNkwmMiO3NyPvSUUvAPtiY8/4TR7kIFUisWUjGv r9XDEtaNzoWo4tuOMi7VFt/OdPF60+P+qOHCjh8autECmjqmfIy1rHovHMV/Qpo=

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

xen/arch/x86/hvm/emulate.c | 2 +- xen/arch/x86/hvm/hvm.c | 25 +++++++++++++------ xen/arch/x86/mm.c | 10 +++--- xen/arch/x86/mm/guest_walk.c | 2 +- xen/arch/x86/mm/hap/guest_walk.c | 6 +--- xen/arch/x86/mm/mem_access.c | 10 +++++++ xen/arch/x86/mm/mem_event.c | 5 +++ xen/arch/x86/mm/p2m.c | 52 ++++++++++++++++++++++----------------- xen/common/grant_table.c | 2 +- xen/common/memory.c | 2 +- xen/include/asm-x86/mem_access.h | 1 + xen/include/asm-x86/mem_event.h | 3 ++ xen/include/asm-x86/p2m.h | 10 +++++-- 13 files changed, 83 insertions(+), 47 deletions(-) Since we use wait queues to handle potential ring congestion cases, code paths that try to generate a mem event while holding a gfn lock would go to sleep in non-preemptible mode. Most such code paths can be fixed by simply postponing event generation until locks are released. Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> Acked-by: Tim Deegan <tim@xxxxxxx> diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -67,8 +67,8 @@ static int hvmemul_do_io( ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt); if ( p2m_is_paging(p2mt) ) { + put_gfn(curr->domain, ram_gfn); p2m_mem_paging_populate(curr->domain, ram_gfn); - put_gfn(curr->domain, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -64,6 +64,7 @@ #include <public/version.h> #include <public/memory.h> #include <asm/mem_event.h> +#include <asm/mem_access.h> #include <public/mem_event.h> bool_t __read_mostly hvm_enabled; @@ -363,8 +364,8 @@ static int hvm_set_ioreq_page( } if ( p2m_is_paging(p2mt) ) { + put_gfn(d, gmfn); p2m_mem_paging_populate(d, gmfn); - put_gfn(d, gmfn); return -ENOENT; } if ( p2m_is_shared(p2mt) ) @@ -1195,7 +1196,8 @@ int hvm_hap_nested_page_fault(unsigned l mfn_t mfn; struct vcpu *v = current; struct p2m_domain *p2m; - int rc, fall_through = 0; + int rc, fall_through = 0, paged = 0; + mem_event_request_t *req_ptr = NULL; /* On Nested Virtualization, walk the guest page table. * If this succeeds, all is fine. @@ -1270,7 +1272,7 @@ int hvm_hap_nested_page_fault(unsigned l if ( violation ) { if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, - access_w, access_x) ) + access_w, access_x, &req_ptr) ) { fall_through = 1; } else { @@ -1297,7 +1299,7 @@ int hvm_hap_nested_page_fault(unsigned l #ifdef __x86_64__ /* Check if the page has been paged out */ if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) ) - p2m_mem_paging_populate(v->domain, gfn); + paged = 1; /* Mem sharing: unshare the page and try again */ if ( access_w && (p2mt == p2m_ram_shared) ) @@ -1343,6 +1345,13 @@ int hvm_hap_nested_page_fault(unsigned l out_put_gfn: put_gfn(p2m->domain, gfn); + if ( paged ) + p2m_mem_paging_populate(v->domain, gfn); + if ( req_ptr ) + { + mem_access_send_req(v->domain, req_ptr); + xfree(req_ptr); + } return rc; } @@ -1849,8 +1858,8 @@ static void *__hvm_map_guest_frame(unsig } if ( p2m_is_paging(p2mt) ) { + put_gfn(d, gfn); p2m_mem_paging_populate(d, gfn); - put_gfn(d, gfn); return NULL; } @@ -2325,8 +2334,8 @@ static enum hvm_copy_result __hvm_copy( if ( p2m_is_paging(p2mt) ) { + put_gfn(curr->domain, gfn); p2m_mem_paging_populate(curr->domain, gfn); - put_gfn(curr->domain, gfn); return HVMCOPY_gfn_paged_out; } if ( p2m_is_shared(p2mt) ) @@ -3923,8 +3932,8 @@ long do_hvm_op(unsigned long op, XEN_GUE mfn_t mfn = get_gfn_unshare(d, pfn, &t); if ( p2m_is_paging(t) ) { + put_gfn(d, pfn); p2m_mem_paging_populate(d, pfn); - put_gfn(d, pfn); rc = -EINVAL; goto param_fail3; } @@ -4040,8 +4049,8 @@ long do_hvm_op(unsigned long op, XEN_GUE mfn = get_gfn_unshare(d, pfn, &t); if ( p2m_is_paging(t) ) { + put_gfn(d, pfn); p2m_mem_paging_populate(d, pfn); - put_gfn(d, pfn); rc = -EINVAL; goto param_fail4; } diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3536,8 +3536,8 @@ int do_mmu_update( if ( p2m_is_paged(p2mt) ) { + put_gfn(pt_owner, gmfn); p2m_mem_paging_populate(pg_owner, gmfn); - put_gfn(pt_owner, gmfn); rc = -ENOENT; break; } @@ -3568,8 +3568,8 @@ int do_mmu_update( if ( p2m_is_paged(l1e_p2mt) ) { + put_gfn(pg_owner, l1egfn); p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e)); - put_gfn(pg_owner, l1egfn); rc = -ENOENT; break; } @@ -3617,8 +3617,8 @@ int do_mmu_update( if ( p2m_is_paged(l2e_p2mt) ) { + put_gfn(pg_owner, l2egfn); p2m_mem_paging_populate(pg_owner, l2egfn); - put_gfn(pg_owner, l2egfn); rc = -ENOENT; break; } @@ -3652,8 +3652,8 @@ int do_mmu_update( if ( p2m_is_paged(l3e_p2mt) ) { + put_gfn(pg_owner, l3egfn); p2m_mem_paging_populate(pg_owner, l3egfn); - put_gfn(pg_owner, l3egfn); rc = -ENOENT; break; } @@ -3687,8 +3687,8 @@ int do_mmu_update( if ( p2m_is_paged(l4e_p2mt) ) { + put_gfn(pg_owner, l4egfn); p2m_mem_paging_populate(pg_owner, l4egfn); - put_gfn(pg_owner, l4egfn); rc = -ENOENT; break; } diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -102,8 +102,8 @@ static inline void *map_domain_gfn(struc if ( p2m_is_paging(*p2mt) ) { ASSERT(!p2m_is_nestedp2m(p2m)); + __put_gfn(p2m, gfn_x(gfn)); p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); - __put_gfn(p2m, gfn_x(gfn)); *rc = _PAGE_PAGED; return NULL; } diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c +++ b/xen/arch/x86/mm/hap/guest_walk.c @@ -64,10 +64,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA if ( p2m_is_paging(p2mt) ) { ASSERT(!p2m_is_nestedp2m(p2m)); - p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); - pfec[0] = PFEC_page_paged; __put_gfn(p2m, top_gfn); + p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) @@ -101,10 +100,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA if ( p2m_is_paging(p2mt) ) { ASSERT(!p2m_is_nestedp2m(p2m)); - p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); - pfec[0] = PFEC_page_paged; __put_gfn(p2m, gfn_x(gfn)); + p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/mem_access.c --- a/xen/arch/x86/mm/mem_access.c +++ b/xen/arch/x86/mm/mem_access.c @@ -47,6 +47,16 @@ int mem_access_domctl(struct domain *d, return rc; } +int mem_access_send_req(struct domain *d, mem_event_request_t *req) +{ + int rc = mem_event_claim_slot(d, &d->mem_event->access); + if ( rc < 0 ) + return rc; + + mem_event_put_request(d, &d->mem_event->access, req); + + return 0; +} /* * Local variables: diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/mem_event.c --- a/xen/arch/x86/mm/mem_event.c +++ b/xen/arch/x86/mm/mem_event.c @@ -423,6 +423,11 @@ static int mem_event_wait_slot(struct me return rc; } +bool_t mem_event_check_ring(struct mem_event_domain *med) +{ + return (med->ring_page != NULL); +} + /* * Determines whether or not the current vCPU belongs to the target domain, * and calls the appropriate wait function. If it is a guest vCPU, then we diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -1130,17 +1130,18 @@ void p2m_mem_paging_resume(struct domain } bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, - bool_t access_r, bool_t access_w, bool_t access_x) + bool_t access_r, bool_t access_w, bool_t access_x, + mem_event_request_t **req_ptr) { struct vcpu *v = current; - mem_event_request_t req; unsigned long gfn = gpa >> PAGE_SHIFT; struct domain *d = v->domain; struct p2m_domain* p2m = p2m_get_hostp2m(d); mfn_t mfn; p2m_type_t p2mt; p2m_access_t p2ma; - + mem_event_request_t *req; + /* First, handle rx2rw conversion automatically */ gfn_lock(p2m, gfn, 0); mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL); @@ -1159,7 +1160,7 @@ bool_t p2m_mem_access_check(unsigned lon gfn_unlock(p2m, gfn, 0); /* Otherwise, check if there is a memory event listener, and send the message along */ - if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS ) + if ( !mem_event_check_ring(&d->mem_event->access) || !req_ptr ) { /* No listener */ if ( p2m->access_required ) @@ -1183,29 +1184,34 @@ bool_t p2m_mem_access_check(unsigned lon } } - memset(&req, 0, sizeof(req)); - req.type = MEM_EVENT_TYPE_ACCESS; - req.reason = MEM_EVENT_REASON_VIOLATION; + *req_ptr = NULL; + req = xmalloc(mem_event_request_t); + if ( req ) + { + *req_ptr = req; + memset(req, 0, sizeof(req)); + req->type = MEM_EVENT_TYPE_ACCESS; + req->reason = MEM_EVENT_REASON_VIOLATION; + + /* Pause the current VCPU */ + if ( p2ma != p2m_access_n2rwx ) + req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED; + + /* Send request to mem event */ + req->gfn = gfn; + req->offset = gpa & ((1 << PAGE_SHIFT) - 1); + req->gla_valid = gla_valid; + req->gla = gla; + req->access_r = access_r; + req->access_w = access_w; + req->access_x = access_x; + + req->vcpu_id = v->vcpu_id; + } /* Pause the current VCPU */ if ( p2ma != p2m_access_n2rwx ) - { vcpu_pause_nosync(v); - req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; - } - - /* Send request to mem event */ - req.gfn = gfn; - req.offset = gpa & ((1 << PAGE_SHIFT) - 1); - req.gla_valid = gla_valid; - req.gla = gla; - req.access_r = access_r; - req.access_w = access_w; - req.access_x = access_x; - - req.vcpu_id = v->vcpu_id; - - mem_event_put_request(d, &d->mem_event->access, &req); /* VCPU may be paused, return whether we promoted automatically */ return (p2ma == p2m_access_n2rwx); diff -r 5416a3b371f2 -r f2d967332acc xen/common/grant_table.c --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -164,8 +164,8 @@ static int __get_paged_frame(unsigned lo *frame = mfn_x(mfn); if ( p2m_is_paging(p2mt) ) { + put_gfn(rd, gfn); p2m_mem_paging_populate(rd, gfn); - put_gfn(rd, gfn); rc = GNTST_eagain; } } else { diff -r 5416a3b371f2 -r f2d967332acc xen/common/memory.c --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -166,8 +166,8 @@ int guest_remove_page(struct domain *d, if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); + put_gfn(d, gmfn); p2m_mem_paging_drop_page(d, gmfn, p2mt); - put_gfn(d, gmfn); return 1; } #else diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/mem_access.h --- a/xen/include/asm-x86/mem_access.h +++ b/xen/include/asm-x86/mem_access.h @@ -23,6 +23,7 @@ int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, XEN_GUEST_HANDLE(void) u_domctl); +int mem_access_send_req(struct domain *d, mem_event_request_t *req); /* diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/mem_event.h --- a/xen/include/asm-x86/mem_event.h +++ b/xen/include/asm-x86/mem_event.h @@ -24,6 +24,9 @@ #ifndef __MEM_EVENT_H__ #define __MEM_EVENT_H__ +/* Returns whether a ring has been set up */ +bool_t mem_event_check_ring(struct mem_event_domain *med); + /* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no * available space. For success or -EBUSY, the vCPU may be left blocked * temporarily to ensure that the ring does not lose future events. In diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -494,9 +494,12 @@ static inline void p2m_mem_paging_popula #ifdef __x86_64__ /* Send mem event based on the access (gla is -1ull if not available). Handles * the rw2rx conversion. Boolean return value indicates if access rights have - * been promoted with no underlying vcpu pause. */ + * been promoted with no underlying vcpu pause. If the req_ptr has been populated, + * then the caller must put the event in the ring (once having released get_gfn* + * locks -- caller must also xfree the request. */ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, - bool_t access_r, bool_t access_w, bool_t access_x); + bool_t access_r, bool_t access_w, bool_t access_x, + mem_event_request_t **req_ptr); /* Resumes the running of the VCPU, restarting the last instruction */ void p2m_mem_access_resume(struct domain *d); @@ -513,7 +516,8 @@ int p2m_get_mem_access(struct domain *d, #else static inline bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, bool_t access_r, - bool_t access_w, bool_t access_x) + bool_t access_w, bool_t access_x, + mem_event_request_t **req_ptr) { return 1; } static inline int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.