[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH 4/6] vm_event: Use slotted channels for sync requests.
> -----Original Message----- > From: Xen-devel [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxxx] On Behalf > Of Petre Pircalabu > Sent: 19 December 2018 18:52 > To: xen-devel@xxxxxxxxxxxxxxxxxxxx > Cc: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>; Stefano Stabellini > <sstabellini@xxxxxxxxxx>; Wei Liu <wei.liu2@xxxxxxxxxx>; Razvan Cojocaru > <rcojocaru@xxxxxxxxxxxxxxx>; Konrad Rzeszutek Wilk > <konrad.wilk@xxxxxxxxxx>; George Dunlap <George.Dunlap@xxxxxxxxxx>; Andrew > Cooper <Andrew.Cooper3@xxxxxxxxxx>; Ian Jackson <Ian.Jackson@xxxxxxxxxx>; > Tim (Xen.org) <tim@xxxxxxx>; Julien Grall <julien.grall@xxxxxxx>; Tamas K > Lengyel <tamas@xxxxxxxxxxxxx>; Jan Beulich <jbeulich@xxxxxxxx>; Roger Pau > Monne <roger.pau@xxxxxxxxxx> > Subject: [Xen-devel] [RFC PATCH 4/6] vm_event: Use slotted channels for > sync requests. > > In high throughput introspection scenarios where lots of monitor > vm_events are generated, the ring buffer can fill up before the monitor > application gets a chance to handle all the requests thus blocking > other vcpus which will have to wait for a slot to become available. > > This patch adds support for a different mechanism to handle synchronous > vm_event requests / responses. As each synchronous request pauses the > vcpu until the corresponsing response is handled, it can be stored in > a slotted memory buffer (one per vcpu) shared between the hypervisor and > the controlling domain. The asynchronous vm_event requests will be sent > to the controlling domain using a ring buffer, but without blocking the > vcpu as no response is required. > > The memory for the asynchronous ring and the synchronous channels will > be allocated from domheap and mapped to the controlling domain using the > foreignmemory_map_resource interface. Unlike the current implementation, > the allocated pages are not part of the target DomU, so they will not be > reclaimed when the vm_event domain is disabled. Why re-invent the wheel here? The ioreq infrastructure already does pretty much everything you need AFAICT. Paul > > Signed-off-by: Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx> > --- > tools/libxc/include/xenctrl.h | 11 + > tools/libxc/xc_monitor.c | 36 +++ > tools/libxc/xc_private.h | 14 ++ > tools/libxc/xc_vm_event.c | 74 +++++- > xen/arch/x86/mm.c | 7 + > xen/common/vm_event.c | 515 > ++++++++++++++++++++++++++++++++++++++---- > xen/include/public/domctl.h | 25 +- > xen/include/public/memory.h | 2 + > xen/include/public/vm_event.h | 15 ++ > xen/include/xen/vm_event.h | 4 + > 10 files changed, 660 insertions(+), 43 deletions(-) > > diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h > index de0b990..fad8bc4 100644 > --- a/tools/libxc/include/xenctrl.h > +++ b/tools/libxc/include/xenctrl.h > @@ -2012,6 +2012,17 @@ int xc_get_mem_access(xc_interface *xch, uint32_t > domain_id, > * Caller has to unmap this page when done. > */ > void *xc_monitor_enable(xc_interface *xch, uint32_t domain_id, uint32_t > *port); > + > +struct xenforeignmemory_resource_handle *xc_monitor_enable_ex( > + xc_interface *xch, > + uint32_t domain_id, > + void **_ring_buffer, > + uint32_t ring_frames, > + uint32_t *ring_port, > + void **_sync_buffer, > + uint32_t *sync_ports, > + uint32_t nr_sync_channels); > + > int xc_monitor_disable(xc_interface *xch, uint32_t domain_id); > int xc_monitor_resume(xc_interface *xch, uint32_t domain_id); > /* > diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c > index 718fe8b..4ceb528 100644 > --- a/tools/libxc/xc_monitor.c > +++ b/tools/libxc/xc_monitor.c > @@ -49,6 +49,42 @@ void *xc_monitor_enable(xc_interface *xch, uint32_t > domain_id, uint32_t *port) > return buffer; > } > > +struct xenforeignmemory_resource_handle *xc_monitor_enable_ex( > + xc_interface *xch, > + uint32_t domain_id, > + void **_ring_buffer, > + uint32_t ring_frames, > + uint32_t *ring_port, > + void **_sync_buffer, > + uint32_t *sync_ports, > + uint32_t nr_sync_channels) > +{ > + xenforeignmemory_resource_handle *fres; > + int saved_errno; > + > + /* Pause the domain for ring page setup */ > + if ( xc_domain_pause(xch, domain_id) ) > + { > + PERROR("Unable to pause domain\n"); > + return NULL; > + } > + > + fres = xc_vm_event_enable_ex(xch, domain_id, > XEN_VM_EVENT_TYPE_MONITOR, > + _ring_buffer, ring_frames, ring_port, > + _sync_buffer, sync_ports, > nr_sync_channels); > + > + saved_errno = errno; > + if ( xc_domain_unpause(xch, domain_id) ) > + { > + if ( fres ) > + saved_errno = errno; > + PERROR("Unable to unpause domain"); > + } > + > + errno = saved_errno; > + return fres; > +} > + > int xc_monitor_disable(xc_interface *xch, uint32_t domain_id) > { > return xc_vm_event_control(xch, domain_id, > diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h > index 482451c..1f70223 100644 > --- a/tools/libxc/xc_private.h > +++ b/tools/libxc/xc_private.h > @@ -420,6 +420,20 @@ int xc_vm_event_control(xc_interface *xch, uint32_t > domain_id, unsigned int op, > void *xc_vm_event_enable(xc_interface *xch, uint32_t domain_id, int type, > uint32_t *port); > > +/* > + * Enables vm_event for using the xenforeignmemory_map_resource > interface. > + * The vm_event type can be XEN_VM_EVENT_TYPE_(PAGING/MONITOR/SHARING). > + * > + * The function returns: > + * - A ring for asynchronous vm_events. > + * - A slotted buffer for synchronous vm_events (one slot per vcpu) > + * - xenforeignmemory_resource_handle used exclusively for resource > cleanup > + */ > +xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface > *xch, > + uint32_t domain_id, int type, > + void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port, > + void **_sync_buffer, uint32_t *sync_ports, uint32_t > nr_sync_channels); > + > int do_dm_op(xc_interface *xch, uint32_t domid, unsigned int nr_bufs, > ...); > > #endif /* __XC_PRIVATE_H__ */ > diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c > index 4fc2548..0a976b4 100644 > --- a/tools/libxc/xc_vm_event.c > +++ b/tools/libxc/xc_vm_event.c > @@ -22,6 +22,12 @@ > > #include "xc_private.h" > > +#include <xen/vm_event.h> > + > +#ifndef PFN_UP > +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) > +#endif /* PFN_UP */ > + > int xc_vm_event_control(xc_interface *xch, uint32_t domain_id, unsigned > int op, > unsigned int type) > { > @@ -120,7 +126,7 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t > domain_id, int type, > goto out; > } > > - *port = domctl.u.vm_event_op.port; > + *port = domctl.u.vm_event_op.u.enable.port; > > /* Remove the ring_pfn from the guest's physmap */ > rc = xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0, > &ring_pfn); > @@ -138,6 +144,72 @@ void *xc_vm_event_enable(xc_interface *xch, uint32_t > domain_id, int type, > return ring_page; > } > > +xenforeignmemory_resource_handle *xc_vm_event_enable_ex(xc_interface > *xch, > + uint32_t domain_id, int type, > + void **_ring_buffer, uint32_t ring_frames, uint32_t *ring_port, > + void **_sync_buffer, uint32_t *sync_ports, uint32_t nr_sync_channels) > +{ > + DECLARE_DOMCTL; > + DECLARE_HYPERCALL_BOUNCE(sync_ports, nr_sync_channels * > sizeof(uint32_t), > + XC_HYPERCALL_BUFFER_BOUNCE_OUT); > + xenforeignmemory_resource_handle *fres; > + unsigned long nr_frames; > + void *buffer; > + > + if ( !_ring_buffer || !ring_port || !_sync_buffer || !sync_ports ) > + { > + errno = EINVAL; > + return NULL; > + } > + > + nr_frames = ring_frames + PFN_UP(nr_sync_channels * sizeof(struct > vm_event_slot)); > + > + fres = xenforeignmemory_map_resource(xch->fmem, domain_id, > + XENMEM_resource_vm_event, type, > 0, > + nr_frames, &buffer, > + PROT_READ | PROT_WRITE, 0); > + if ( !fres ) > + { > + PERROR("Could not map the vm_event pages\n"); > + return NULL; > + } > + > + domctl.cmd = XEN_DOMCTL_vm_event_op; > + domctl.domain = domain_id; > + domctl.u.vm_event_op.op = XEN_VM_EVENT_GET_PORTS; > + domctl.u.vm_event_op.type = type; > + > + if ( xc_hypercall_bounce_pre(xch, sync_ports) ) > + { > + PERROR("Could not bounce memory for XEN_DOMCTL_vm_event_op"); > + errno = ENOMEM; > + return NULL; > + } > + > + set_xen_guest_handle(domctl.u.vm_event_op.u.get_ports.sync, > sync_ports); > + > + if ( do_domctl(xch, &domctl) ) > + { > + PERROR("Failed to get vm_event ports\n"); > + goto out; > + } > + > + xc_hypercall_bounce_post(xch, sync_ports); > + *ring_port = domctl.u.vm_event_op.u.get_ports.async; > + > + *_sync_buffer = buffer + ring_frames * PAGE_SIZE; > + *_ring_buffer = buffer; > + > + return fres; > + > +out: > + xc_hypercall_bounce_post(xch, sync_ports); > + if ( fres ) > + xenforeignmemory_unmap_resource(xch->fmem, fres); > + return NULL; > +} > + > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index 1431f34..256c63b 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -103,6 +103,7 @@ > #include <xen/efi.h> > #include <xen/grant_table.h> > #include <xen/hypercall.h> > +#include <xen/vm_event.h> > #include <asm/paging.h> > #include <asm/shadow.h> > #include <asm/page.h> > @@ -4469,6 +4470,12 @@ int arch_acquire_resource(struct domain *d, > unsigned int type, > } > #endif > > + case XENMEM_resource_vm_event: > + { > + rc = vm_event_get_frames(d, id, frame, nr_frames, mfn_list); > + break; > + } > + > default: > rc = -EOPNOTSUPP; > break; > diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c > index 77da41b..a2712a0 100644 > --- a/xen/common/vm_event.c > +++ b/xen/common/vm_event.c > @@ -28,6 +28,8 @@ > #include <asm/p2m.h> > #include <asm/monitor.h> > #include <asm/vm_event.h> > +#include <xen/guest_access.h> > +#include <xen/vmap.h> > #include <xsm/xsm.h> > > /* for public/io/ring.h macros */ > @@ -40,6 +42,7 @@ > #define vm_event_unlock(_ved) spin_unlock(&(_ved)->lock) > > #define to_vm_event_domain_ring(_ved) container_of(_ved, struct > vm_event_domain_ring, ved) > +#define to_vm_event_domain_channel(_ved) container_of(_ved, struct > vm_event_domain_channel, ved) > > struct vm_event_domain > { > @@ -48,7 +51,8 @@ struct vm_event_domain > int (*claim_slot)(struct vm_event_domain *ved, bool allow_sleep); > void (*release_slot)(struct vm_event_domain *ved); > void (*put_request)(struct vm_event_domain *ved, vm_event_request_t > *req); > - int (*get_response)(struct vm_event_domain *ved, vm_event_response_t > *rsp); > + int (*get_response)(struct vm_event_domain *ved, struct vcpu *v, > + unsigned int port, vm_event_response_t *rsp); > int (*disable)(struct vm_event_domain **_ved); > > /* The domain associated with the VM event */ > @@ -58,11 +62,6 @@ struct vm_event_domain > spinlock_t lock; > }; > > -bool vm_event_check(struct vm_event_domain *ved) > -{ > - return (ved && ved->check(ved)); > -} > - > /* VM event domain ring implementation */ > struct vm_event_domain_ring > { > @@ -78,22 +77,57 @@ struct vm_event_domain_ring > vm_event_front_ring_t front_ring; > /* event channel port (vcpu0 only) */ > int xen_port; > - /* vm_event bit for vcpu->pause_flags */ > - int pause_flag; > /* list of vcpus waiting for room in the ring */ > struct waitqueue_head wq; > /* the number of vCPUs blocked */ > unsigned int blocked; > + /* vm_event bit for vcpu->pause_flags */ > + int pause_flag; > /* The last vcpu woken up */ > unsigned int last_vcpu_wake_up; > }; > > +struct vm_event_buffer > +{ > + void *va; > + unsigned int nr_frames; > + mfn_t mfn[0]; > +}; > + > +struct vm_event_domain_channel > +{ > + /* VM event domain */ > + struct vm_event_domain ved; > + /* ring for asynchronous vm events */ > + struct vm_event_buffer *ring; > + /* front-end ring */ > + vm_event_front_ring_t front_ring; > + /* per vcpu channels for synchronous vm events */ > + struct vm_event_buffer *channels; > + /* > + * event channels ports > + * - one per vcpu for the synchronous channels. > + * - one for the asynchronous ring. > + */ > + uint32_t xen_ports[0]; > +}; > + > +bool vm_event_check(struct vm_event_domain *ved) > +{ > + return (ved && ved->check(ved)); > +} > + > static bool vm_event_ring_check(struct vm_event_domain *ved) > { > struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved); > return impl->ring_page != NULL; > } > > +static bool is_vm_event_domain_ring(struct vm_event_domain *ved) > +{ > + return ved->check == vm_event_ring_check; > +} > + > static unsigned int vm_event_ring_available(struct vm_event_domain_ring > *ved) > { > int avail_req = RING_FREE_REQUESTS(&ved->front_ring); > @@ -317,12 +351,15 @@ static void vm_event_ring_put_request(struct > vm_event_domain *ved, > notify_via_xen_event_channel(d, impl->xen_port); > } > > -static int vm_event_ring_get_response(struct vm_event_domain *ved, > - vm_event_response_t *rsp) > +static int vm_event_ring_get_response( > + struct vm_event_domain *ved, > + struct vcpu *v, > + unsigned int port, > + vm_event_response_t *rsp) > { > vm_event_front_ring_t *front_ring; > RING_IDX rsp_cons; > - struct vm_event_domain_ring *impl = (struct vm_event_domain_ring > *)ved; > + struct vm_event_domain_ring *impl = to_vm_event_domain_ring(ved); > > vm_event_lock(ved); > > @@ -332,7 +369,7 @@ static int vm_event_ring_get_response(struct > vm_event_domain *ved, > if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) ) > { > vm_event_unlock(ved); > - return 0; > + return -1; > } > > /* Copy response */ > @@ -353,6 +390,35 @@ static int vm_event_ring_get_response(struct > vm_event_domain *ved, > } > > /* > + * The response is received only from the sync channels > + */ > +static int vm_event_channel_get_response( > + struct vm_event_domain *ved, > + struct vcpu *v, > + unsigned int port, > + vm_event_response_t *rsp) > +{ > + struct vm_event_domain_channel *impl = > to_vm_event_domain_channel(ved); > + struct vm_event_slot *slot = impl->channels->va + v->vcpu_id * > sizeof(struct vm_event_slot); > + > + vm_event_lock(ved); > + > + if ( slot->state != VM_EVENT_SLOT_STATE_FINISH ) > + { > + gdprintk(XENLOG_G_WARNING, "The VM event slot state for d%dv%d is > invalid.\n", > + ved->d->domain_id, v->vcpu_id); > + vm_event_unlock(ved); > + return -1; > + } > + > + memcpy(rsp, &slot->u.rsp, sizeof(*rsp)); > + slot->state = VM_EVENT_SLOT_STATE_IDLE; > + > + vm_event_unlock(ved); > + return 0; > +} > + > +/* > * Pull all responses from the given ring and unpause the corresponding > vCPU > * if required. Based on the response type, here we can also call custom > * handlers. > @@ -360,10 +426,11 @@ static int vm_event_ring_get_response(struct > vm_event_domain *ved, > * Note: responses are handled the same way regardless of which ring they > * arrive on. > */ > -static int vm_event_resume(struct vm_event_domain *ved) > +static int vm_event_resume(struct vm_event_domain *ved, struct vcpu *v, > unsigned int port) > { > vm_event_response_t rsp; > struct domain *d; > + int rc; > > if (! vm_event_check(ved)) > return -ENODEV; > @@ -380,22 +447,25 @@ static int vm_event_resume(struct vm_event_domain > *ved) > */ > ASSERT(d != current->domain); > > - /* Pull all responses off the ring. */ > - while ( ved->get_response(ved, &rsp) ) > + /* Loop until all available responses are read. */ > + do > { > - struct vcpu *v; > + struct vcpu *rsp_v; > + rc = ved->get_response(ved, v, port, &rsp); > + if ( rc < 0 ) > + break; > > if ( rsp.version != VM_EVENT_INTERFACE_VERSION ) > { > printk(XENLOG_G_WARNING "vm_event interface version > mismatch\n"); > - continue; > + goto end_loop; > } > > /* Validate the vcpu_id in the response. */ > if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] ) > - continue; > + goto end_loop; > > - v = d->vcpu[rsp.vcpu_id]; > + rsp_v = d->vcpu[rsp.vcpu_id]; > > /* > * In some cases the response type needs extra handling, so here > @@ -403,7 +473,7 @@ static int vm_event_resume(struct vm_event_domain > *ved) > */ > > /* Check flags which apply only when the vCPU is paused */ > - if ( atomic_read(&v->vm_event_pause_count) ) > + if ( atomic_read(&rsp_v->vm_event_pause_count) ) > { > #ifdef CONFIG_HAS_MEM_PAGING > if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING ) > @@ -415,34 +485,36 @@ static int vm_event_resume(struct vm_event_domain > *ved) > * has to set arch-specific flags when supported, and to > avoid > * bitmask overhead when it isn't supported. > */ > - vm_event_emulate_check(v, &rsp); > + vm_event_emulate_check(rsp_v, &rsp); > > /* > * Check in arch-specific handler to avoid bitmask overhead > when > * not supported. > */ > - vm_event_register_write_resume(v, &rsp); > + vm_event_register_write_resume(rsp_v, &rsp); > > /* > * Check in arch-specific handler to avoid bitmask overhead > when > * not supported. > */ > - vm_event_toggle_singlestep(d, v, &rsp); > + vm_event_toggle_singlestep(d, rsp_v, &rsp); > > /* Check for altp2m switch */ > if ( rsp.flags & VM_EVENT_FLAG_ALTERNATE_P2M ) > - p2m_altp2m_check(v, rsp.altp2m_idx); > + p2m_altp2m_check(rsp_v, rsp.altp2m_idx); > > if ( rsp.flags & VM_EVENT_FLAG_SET_REGISTERS ) > - vm_event_set_registers(v, &rsp); > + vm_event_set_registers(rsp_v, &rsp); > > if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT ) > - vm_event_monitor_next_interrupt(v); > + vm_event_monitor_next_interrupt(rsp_v); > > if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED ) > - vm_event_vcpu_unpause(v); > + vm_event_vcpu_unpause(rsp_v); > } > +end_loop: ; > } > + while ( rc > 0 ); > > return 0; > } > @@ -527,28 +599,28 @@ int __vm_event_claim_slot(struct vm_event_domain > *ved, bool allow_sleep) > if ( !vm_event_check(ved) ) > return -EOPNOTSUPP; > > - return ved->claim_slot(ved, allow_sleep); > + return (ved->claim_slot) ? ved->claim_slot(ved, allow_sleep) : 0; > } > > #ifdef CONFIG_HAS_MEM_PAGING > /* Registered with Xen-bound event channel for incoming notifications. */ > static void mem_paging_notification(struct vcpu *v, unsigned int port) > { > - vm_event_resume(v->domain->vm_event_paging); > + vm_event_resume(v->domain->vm_event_paging, v, port); > } > #endif > > /* Registered with Xen-bound event channel for incoming notifications. */ > static void monitor_notification(struct vcpu *v, unsigned int port) > { > - vm_event_resume(v->domain->vm_event_monitor); > + vm_event_resume(v->domain->vm_event_monitor, v, port); > } > > #ifdef CONFIG_HAS_MEM_SHARING > /* Registered with Xen-bound event channel for incoming notifications. */ > static void mem_sharing_notification(struct vcpu *v, unsigned int port) > { > - vm_event_resume(v->domain->vm_event_share); > + vm_event_resume(v->domain->vm_event_share, v, port); > } > #endif > > @@ -565,19 +637,24 @@ void vm_event_cleanup(struct domain *d) > * Finally, because this code path involves previously > * pausing the domain (domain_kill), unpausing the > * vcpus causes no harm. */ > - destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_paging)->wq); > + if ( is_vm_event_domain_ring(d->vm_event_paging) ) > + destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_paging)->wq); > (void)vm_event_disable(&d->vm_event_paging); > } > #endif > + > if ( vm_event_check(d->vm_event_monitor) ) > { > - destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_monitor)->wq); > + if ( is_vm_event_domain_ring(d->vm_event_monitor) ) > + destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_monitor)->wq); > (void)vm_event_disable(&d->vm_event_monitor); > } > + > #ifdef CONFIG_HAS_MEM_SHARING > if ( vm_event_check(d->vm_event_share) ) > { > - destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_share)->wq); > + if ( is_vm_event_domain_ring(d->vm_event_share) ) > + destroy_waitqueue_head(&to_vm_event_domain_ring(d- > >vm_event_share)->wq); > (void)vm_event_disable(&d->vm_event_share); > } > #endif > @@ -641,7 +718,7 @@ static int vm_event_ring_enable( > if ( rc < 0 ) > goto err; > > - impl->xen_port = vec->port = rc; > + impl->xen_port = vec->u.enable.port = rc; > > /* Prepare ring buffer */ > FRONT_RING_INIT(&impl->front_ring, > @@ -668,6 +745,294 @@ static int vm_event_ring_enable( > return rc; > } > > +/* > + * Helper functions for allocating / freeing vm_event buffers > + */ > +static int vm_event_alloc_buffer(struct domain *d, unsigned int > nr_frames, > + struct vm_event_buffer **_veb) > +{ > + struct vm_event_buffer *veb; > + int i = 0, rc; > + > + veb = _xzalloc(sizeof(struct vm_event_buffer) + nr_frames * > sizeof(mfn_t), > + __alignof__(struct vm_event_buffer)); > + if ( unlikely(!veb) ) > + { > + rc = -ENOMEM; > + goto err; > + } > + > + veb->nr_frames = nr_frames; > + > + for ( i = 0; i < nr_frames; i++ ) > + { > + struct page_info *page = alloc_domheap_page(d, 0); > + > + if ( !page ) > + { > + rc = -ENOMEM; > + goto err; > + } > + > + if ( !get_page_and_type(page, d, PGT_writable_page) ) > + { > + domain_crash(d); > + rc = -ENODATA; > + goto err; > + } > + > + veb->mfn[i] = page_to_mfn(page); > + } > + > + veb->va = vmap(veb->mfn, nr_frames); > + if ( !veb->va ) > + { > + rc = -ENOMEM; > + goto err; > + } > + > + for( i = 0; i < nr_frames; i++ ) > + clear_page(veb->va + i * PAGE_SIZE); > + > + *_veb = veb; > + return 0; > + > +err: > + while ( --i >= 0 ) > + { > + struct page_info *page = mfn_to_page(veb->mfn[i]); > + > + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) > + put_page(page); > + put_page_and_type(page); > + } > + > + xfree(veb); > + return rc; > +} > + > +static void vm_event_free_buffer(struct vm_event_buffer **_veb) > +{ > + struct vm_event_buffer *veb = *_veb; > + > + if ( !veb ) > + return; > + > + if ( veb->va ) > + { > + int i; > + > + vunmap(veb->va); > + for ( i = 0; i < veb->nr_frames; i++ ) > + { > + struct page_info *page = mfn_to_page(veb->mfn[i]); > + > + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) > + put_page(page); > + put_page_and_type(page); > + } > + } > + XFREE(*_veb); > +} > + > +static bool vm_event_channel_check(struct vm_event_domain *ved) > +{ > + struct vm_event_domain_channel *impl = > to_vm_event_domain_channel(ved); > + return impl->ring->va != NULL && impl->channels->va != NULL; > +} > + > +static void vm_event_channel_put_request(struct vm_event_domain *ved, > + vm_event_request_t *req) > +{ > + struct vcpu *curr = current; > + struct vm_event_domain_channel *impl = > to_vm_event_domain_channel(ved); > + struct domain *d; > + struct vm_event_slot *slot; > + bool sync; > + > + if ( !vm_event_check(ved) ) > + return; > + > + d = ved->d; > + slot = impl->channels->va + req->vcpu_id * sizeof(struct > vm_event_slot); > + > + if ( curr->domain != d ) > + { > + req->flags |= VM_EVENT_FLAG_FOREIGN; > +#ifndef NDEBUG > + if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) ) > + gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n", > + d->domain_id, req->vcpu_id); > +#endif > + } > + > + req->version = VM_EVENT_INTERFACE_VERSION; > + > + sync = req->flags & VM_EVENT_FLAG_VCPU_PAUSED; > + > + vm_event_lock(ved); > + > + if ( sync ) > + { > + if ( slot->state != VM_EVENT_SLOT_STATE_IDLE ) > + { > + gdprintk(XENLOG_G_WARNING, "The VM event slot for d%dv%d is > not IDLE.\n", > + d->domain_id, req->vcpu_id); > + vm_event_unlock(ved); > + return; > + } > + memcpy( &slot->u.req, req, sizeof(*req) ); > + slot->state = VM_EVENT_SLOT_STATE_SUBMIT; > + } > + else > + { > + vm_event_front_ring_t *front_ring; > + RING_IDX req_prod; > + > + /* Due to the reservations, this step must succeed. */ > + front_ring = &impl->front_ring; > + > + /* Copy request */ > + req_prod = front_ring->req_prod_pvt; > + memcpy(RING_GET_REQUEST(front_ring, req_prod), req, > sizeof(*req)); > + req_prod++; > + > + /* Update ring */ > + front_ring->req_prod_pvt = req_prod; > + RING_PUSH_REQUESTS(front_ring); > + } > + > + vm_event_unlock(ved); > + > + notify_via_xen_event_channel(d, impl->xen_ports[(sync) ? req->vcpu_id > : d->max_vcpus]); > +} > + > +static int vm_event_channel_disable(struct vm_event_domain **_ved) > +{ > + struct vm_event_domain_channel *ved = > to_vm_event_domain_channel(*_ved); > + struct domain *d = ved->ved.d; > + struct vcpu *v; > + int i; > + > + vm_event_lock(&ved->ved); > + > + for_each_vcpu ( d, v ) > + { > + if ( atomic_read(&v->vm_event_pause_count) ) > + vm_event_vcpu_unpause(v); > + /* > + if ( test_and_clear_bit(ved->ved.pause_flag, &v->pause_flags) ) > + { > + vcpu_unpause(v); > + } > + */ > + } > + > + /* Free domU's event channels and leave the other one unbound */ > + for ( i = 0; i < d->max_vcpus; i++ ) > + evtchn_close(d, ved->xen_ports[i], 0); > + evtchn_close(d, ved->xen_ports[d->max_vcpus], 0); > + > + vm_event_free_buffer(&ved->ring); > + vm_event_free_buffer(&ved->channels); > + > + vm_event_cleanup_domain(d); > + > + vm_event_unlock(&ved->ved); > + > + XFREE(*_ved); > + > + return 0; > +} > + > +static int vm_event_channel_enable( > + struct domain *d, > + struct vm_event_domain **_ved, > + unsigned int nr_frames, > + xen_event_channel_notification_t notification_fn) > +{ > + int i = 0, rc; > + struct vm_event_domain_channel *impl; > + unsigned int nr_ring_frames, nr_channel_frames; > + > + if ( *_ved ) > + return -EBUSY; > + > + if ( nr_frames <= PFN_UP(d->max_vcpus * sizeof(struct vm_event_slot)) > ) > + return -EINVAL; > + > + impl = _xzalloc(sizeof(struct vm_event_domain_channel) + > + ( d->max_vcpus + 1 ) * sizeof(uint32_t), > + __alignof__(struct vm_event_domain_channel)); > + if ( !impl ) > + return -ENOMEM; > + > + impl->ved.d = d; > + impl->ved.check = vm_event_channel_check; > + impl->ved.claim_slot = NULL; > + impl->ved.release_slot = NULL; > + impl->ved.put_request = vm_event_channel_put_request; > + impl->ved.get_response = vm_event_channel_get_response; > + impl->ved.disable = vm_event_channel_disable; > + > + nr_channel_frames = PFN_UP(d->max_vcpus * > sizeof(vm_event_request_t)); > + nr_ring_frames = nr_frames - nr_channel_frames; > + > + vm_event_lock_init(&impl->ved); > + vm_event_lock(&impl->ved); > + > + rc = vm_event_init_domain(d); > + if ( rc < 0 ) > + goto err; > + > + rc = vm_event_alloc_buffer(d, nr_ring_frames, &impl->ring); > + if ( rc ) > + goto err; > + > + /* Allocate event channel for the async ring*/ > + rc = alloc_unbound_xen_event_channel(d, 0, current->domain- > >domain_id, > + notification_fn); > + if ( rc < 0 ) > + goto err; > + > + impl->xen_ports[d->max_vcpus] = rc; > + > + /* Prepare ring buffer */ > + FRONT_RING_INIT(&impl->front_ring, > + (vm_event_sring_t *)impl->ring->va, > + impl->ring->nr_frames * PAGE_SIZE); > + > + rc = vm_event_alloc_buffer(d, nr_channel_frames, &impl->channels); > + if ( rc != 0) > + goto err; > + > + for ( i = 0; i < d->max_vcpus; i++) > + { > + rc = alloc_unbound_xen_event_channel(d, i, current->domain- > >domain_id, > + notification_fn); > + if ( rc < 0 ) > + goto err; > + > + impl->xen_ports[i] = rc; > + } > + > + *_ved = &impl->ved; > + > + vm_event_unlock(&impl->ved); > + return 0; > + > +err: > + while (i--) > + evtchn_close(d, impl->xen_ports[i], 0); > + evtchn_close(d, impl->xen_ports[d->max_vcpus], 0); > + vm_event_free_buffer(&impl->ring); > + vm_event_free_buffer(&impl->channels); > + vm_event_cleanup_domain(d); > + vm_event_unlock(&impl->ved); > + xfree(impl); > + return rc; > +} > + > int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec, > XEN_GUEST_HANDLE_PARAM(void) u_domctl) > { > @@ -748,7 +1113,9 @@ int vm_event_domctl(struct domain *d, struct > xen_domctl_vm_event_op *vec, > break; > > case XEN_VM_EVENT_RESUME: > - rc = vm_event_resume(d->vm_event_paging); > + if ( vm_event_check(d->vm_event_paging) && > + is_vm_event_domain_ring(d->vm_event_paging) ) > + rc = vm_event_resume(d->vm_event_paging, NULL, 0); > break; > > default: > @@ -786,7 +1153,30 @@ int vm_event_domctl(struct domain *d, struct > xen_domctl_vm_event_op *vec, > break; > > case XEN_VM_EVENT_RESUME: > - rc = vm_event_resume(d->vm_event_monitor); > + if ( vm_event_check(d->vm_event_monitor) && > + is_vm_event_domain_ring(d->vm_event_monitor) ) > + rc = vm_event_resume(d->vm_event_monitor, NULL, 0); > + break; > + > + case XEN_VM_EVENT_GET_PORTS: > + if ( !vm_event_check(d->vm_event_monitor) ) > + break; > + > + if ( !is_vm_event_domain_ring(d->vm_event_monitor) ) > + { > + struct vm_event_domain_channel *impl = > to_vm_event_domain_channel(d->vm_event_monitor); > + > + if ( copy_to_guest(vec->u.get_ports.sync, > + impl->xen_ports, > + d->max_vcpus) != 0 ) > + { > + rc = -EFAULT; > + break; > + } > + > + vec->u.get_ports.async = impl->xen_ports[d->max_vcpus]; > + rc = 0; > + } > break; > > default: > @@ -830,7 +1220,10 @@ int vm_event_domctl(struct domain *d, struct > xen_domctl_vm_event_op *vec, > break; > > case XEN_VM_EVENT_RESUME: > - rc = vm_event_resume(d->vm_event_share); > + if ( vm_event_check(d->vm_event_monitor) && > + is_vm_event_domain_ring(d->vm_event_monitor) ) > + rc = vm_event_resume(d->vm_event_share, NULL, 0); > + break; > > default: > rc = -ENOSYS; > @@ -847,6 +1240,52 @@ int vm_event_domctl(struct domain *d, struct > xen_domctl_vm_event_op *vec, > return rc; > } > > +int vm_event_get_frames(struct domain *d, unsigned int id, > + unsigned long frame, unsigned int nr_frames, > + xen_pfn_t mfn_list[]) > +{ > + int rc = 0, i, j; > + struct vm_event_domain **_ved; > + struct vm_event_domain_channel *impl; > + xen_event_channel_notification_t fn; > + > + switch ( id ) > + { > + case XEN_VM_EVENT_TYPE_MONITOR: > + /* domain_pause() not required here, see XSA-99 */ > + rc = arch_monitor_init_domain(d); > + if ( rc ) > + return rc; > + _ved = &d->vm_event_monitor; > + fn = monitor_notification; > + break; > + > + default: > + return -ENOSYS; > + } > + > + rc = vm_event_channel_enable(d, _ved, nr_frames, fn); > + if ( rc ) > + { > + switch ( id ) > + { > + case XEN_VM_EVENT_TYPE_MONITOR: > + arch_monitor_cleanup_domain(d); > + break; > + } > + return rc; > + } > + > + impl = to_vm_event_domain_channel(*_ved); > + j = 0; > + for ( i = 0; i < impl->ring->nr_frames; i++ ) > + mfn_list[j++] = mfn_x(impl->ring->mfn[i]); > + for ( i = 0; i < impl->channels->nr_frames; i++ ) > + mfn_list[j++] = mfn_x(impl->channels->mfn[i]); > + > + return rc; > +} > + > void vm_event_vcpu_pause(struct vcpu *v) > { > ASSERT(v == current); > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index 26b1a55..78262a1 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -38,7 +38,7 @@ > #include "hvm/save.h" > #include "memory.h" > > -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000011 > +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000012 > > /* > * NB. xen_domctl.domain is an IN/OUT parameter for this operation. > @@ -836,6 +836,7 @@ struct xen_domctl_gdbsx_domstatus { > #define XEN_VM_EVENT_ENABLE 0 > #define XEN_VM_EVENT_DISABLE 1 > #define XEN_VM_EVENT_RESUME 2 > +#define XEN_VM_EVENT_GET_PORTS 3 > > /* > * Use for teardown/setup of helper<->hypervisor interface for paging, > @@ -843,10 +844,26 @@ struct xen_domctl_gdbsx_domstatus { > */ > /* XEN_DOMCTL_vm_event_op */ > struct xen_domctl_vm_event_op { > - uint32_t op; /* XEN_VM_EVENT_* */ > - uint32_t type; /* XEN_VM_EVENT_TYPE_* */ > + /* IN: Xen vm_event opcode (XEN_VM_EVENT_*) */ > + uint32_t op; > + /* IN: Xen vm event ring type (XEN_VM_EVENT_TYPE_*) */ > + uint32_t type; > > - uint32_t port; /* OUT: event channel for ring */ > + union { > + struct { > + /* OUT: remote port for event channel ring */ > + uint32_t port; > + } enable; > + struct { > + /* OUT: remote port for the async event channel ring */ > + uint32_t async; > + /* > + * OUT: remote ports for the sync event vm_event channels > + * The number for ports will be equal with the vcpu count. > + */ > + XEN_GUEST_HANDLE_64(uint32) sync; > + } get_ports; > + } u; > }; > > /* > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 8638023..cfd280d 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -612,6 +612,7 @@ struct xen_mem_acquire_resource { > > #define XENMEM_resource_ioreq_server 0 > #define XENMEM_resource_grant_table 1 > +#define XENMEM_resource_vm_event 2 > > /* > * IN - a type-specific resource identifier, which must be zero > @@ -619,6 +620,7 @@ struct xen_mem_acquire_resource { > * > * type == XENMEM_resource_ioreq_server -> id == ioreq server id > * type == XENMEM_resource_grant_table -> id defined below > + * type == XENMEM_resource_vm_event -> id == vm_event type > */ > uint32_t id; > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h > index b2bafc0..499fbbc 100644 > --- a/xen/include/public/vm_event.h > +++ b/xen/include/public/vm_event.h > @@ -388,6 +388,21 @@ typedef struct vm_event_st { > > DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t); > > +struct vm_event_slot > +{ > + uint32_t state; > + union { > + vm_event_request_t req; > + vm_event_response_t rsp; > + } u; > +}; > + > +enum vm_event_slot_state { > + VM_EVENT_SLOT_STATE_IDLE, /* no contents */ > + VM_EVENT_SLOT_STATE_SUBMIT, /* request ready */ > + VM_EVENT_SLOT_STATE_FINISH, /* response ready */ > +}; > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > #endif /* _XEN_PUBLIC_VM_EVENT_H */ > > diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h > index a5c82d6..d4bd184 100644 > --- a/xen/include/xen/vm_event.h > +++ b/xen/include/xen/vm_event.h > @@ -64,6 +64,10 @@ void vm_event_put_request(struct vm_event_domain *ved, > int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec, > XEN_GUEST_HANDLE_PARAM(void) u_domctl); > > +int vm_event_get_frames(struct domain *d, unsigned int id, > + unsigned long frame, unsigned int nr_frames, > + xen_pfn_t mfn_list[]); > + > void vm_event_vcpu_pause(struct vcpu *v); > void vm_event_vcpu_unpause(struct vcpu *v); > > -- > 2.7.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxxx > https://lists.xenproject.org/mailman/listinfo/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |