Xen project Mailing List

Re: [Xen-devel] [PATCH v4 2/8] ioreq-server: centralize access to ioreq structures

To: Paul Durrant <paul.durrant@xxxxxxxxxx>

From: George Dunlap <dunlapg@xxxxxxxxx>

Date: Thu, 3 Apr 2014 12:22:56 +0100

Cc: Kevin Tian <kevin.tian@xxxxxxxxx>, Keir Fraser <keir@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>, "xen-devel@xxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>

Delivery-date: Thu, 03 Apr 2014 11:23:04 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Wed, Apr 2, 2014 at 4:11 PM, Paul Durrant <paul.durrant@xxxxxxxxxx> wrote: > To simplify creation of the ioreq server abstraction in a subsequent patch, > this patch centralizes all use of the shared ioreq structure and the > buffered ioreq ring to the source module xen/arch/x86/hvm/hvm.c. > > The patch moves an rmb() from inside hvm_io_assist() to hvm_do_resume() > because the former may now be passed a data structure on stack, in which > case the barrier is unnecessary. > > Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> > Cc: Keir Fraser <keir@xxxxxxx> > Cc: Jan Beulich <jbeulich@xxxxxxxx> > Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx> > Cc: Eddie Dong <eddie.dong@xxxxxxxxx> > Cc: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > --- > xen/arch/x86/hvm/emulate.c | 70 +++++++++------------- > xen/arch/x86/hvm/hvm.c | 118 > +++++++++++++++++++++++++++++++++++-- > xen/arch/x86/hvm/io.c | 104 +++----------------------------- > xen/arch/x86/hvm/vmx/vvmx.c | 13 +++- > xen/include/asm-x86/hvm/hvm.h | 15 ++++- > xen/include/asm-x86/hvm/support.h | 21 ++++--- > 6 files changed, 185 insertions(+), 156 deletions(-) > > diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c > index 868aa1d..1c71902 100644 > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -57,24 +57,11 @@ static int hvmemul_do_io( > int value_is_ptr = (p_data == NULL); > struct vcpu *curr = current; > struct hvm_vcpu_io *vio; > - ioreq_t *p = get_ioreq(curr); > - ioreq_t _ioreq; > + ioreq_t p; > unsigned long ram_gfn = paddr_to_pfn(ram_gpa); > p2m_type_t p2mt; > struct page_info *ram_page; > int rc; > - bool_t has_dm = 1; > - > - /* > - * Domains without a backing DM, don't have an ioreq page. Just > - * point to a struct on the stack, initialising the state as needed. > - */ > - if ( !p ) > - { > - has_dm = 0; > - p = &_ioreq; > - p->state = STATE_IOREQ_NONE; > - } > > /* Check for paged out page */ > ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE); > @@ -173,15 +160,6 @@ static int hvmemul_do_io( > return X86EMUL_UNHANDLEABLE; > } > > - if ( p->state != STATE_IOREQ_NONE ) > - { > - gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", > - p->state); > - if ( ram_page ) > - put_page(ram_page); > - return X86EMUL_UNHANDLEABLE; > - } > - > vio->io_state = > (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; > vio->io_size = size; > @@ -193,38 +171,38 @@ static int hvmemul_do_io( > if ( vio->mmio_retrying ) > *reps = 1; > > - p->dir = dir; > - p->data_is_ptr = value_is_ptr; > - p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; > - p->size = size; > - p->addr = addr; > - p->count = *reps; > - p->df = df; > - p->data = value; > + p.dir = dir; > + p.data_is_ptr = value_is_ptr; > + p.type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; > + p.size = size; > + p.addr = addr; > + p.count = *reps; > + p.df = df; > + p.data = value; > > if ( dir == IOREQ_WRITE ) > - hvmtrace_io_assist(is_mmio, p); > + hvmtrace_io_assist(is_mmio, &p); > > if ( is_mmio ) > { > - rc = hvm_mmio_intercept(p); > + rc = hvm_mmio_intercept(&p); > if ( rc == X86EMUL_UNHANDLEABLE ) > - rc = hvm_buffered_io_intercept(p); > + rc = hvm_buffered_io_intercept(&p); > } > else > { > - rc = hvm_portio_intercept(p); > + rc = hvm_portio_intercept(&p); > } > > switch ( rc ) > { > case X86EMUL_OKAY: > case X86EMUL_RETRY: > - *reps = p->count; > - p->state = STATE_IORESP_READY; > + *reps = p.count; > + p.state = STATE_IORESP_READY; > if ( !vio->mmio_retry ) > { > - hvm_io_assist(p); > + hvm_io_assist(&p); > vio->io_state = HVMIO_none; > } > else > @@ -233,7 +211,7 @@ static int hvmemul_do_io( > break; > case X86EMUL_UNHANDLEABLE: > /* If there is no backing DM, just ignore accesses */ > - if ( !has_dm ) > + if ( !hvm_has_dm(curr->domain) ) > { > rc = X86EMUL_OKAY; > vio->io_state = HVMIO_none; > @@ -241,7 +219,7 @@ static int hvmemul_do_io( > else > { > rc = X86EMUL_RETRY; > - if ( !hvm_send_assist_req(curr) ) > + if ( !hvm_send_assist_req(curr, &p) ) > vio->io_state = HVMIO_none; > else if ( p_data == NULL ) > rc = X86EMUL_OKAY; > @@ -260,7 +238,7 @@ static int hvmemul_do_io( > > finish_access: > if ( dir == IOREQ_READ ) > - hvmtrace_io_assist(is_mmio, p); > + hvmtrace_io_assist(is_mmio, &p); > > if ( p_data != NULL ) > memcpy(p_data, &vio->io_data, size); > @@ -1292,3 +1270,13 @@ struct segment_register *hvmemul_get_seg_reg( > hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); > return &hvmemul_ctxt->seg_reg[seg]; > } > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 69d0a44..573f845 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -352,6 +352,26 @@ void hvm_migrate_pirqs(struct vcpu *v) > spin_unlock(&d->event_lock); > } > > +static ioreq_t *get_ioreq(struct vcpu *v) > +{ > + struct domain *d = v->domain; > + shared_iopage_t *p = d->arch.hvm_domain.ioreq.va; > + > + ASSERT((v == current) || spin_is_locked(&d->arch.hvm_domain.ioreq.lock)); > + > + return p ? &p->vcpu_ioreq[v->vcpu_id] : NULL; > +} > + > +bool_t hvm_io_pending(struct vcpu *v) > +{ > + ioreq_t *p = get_ioreq(v); > + > + if ( !p ) > + return 0; > + > + return ( p->state != STATE_IOREQ_NONE ); > +} > + > void hvm_do_resume(struct vcpu *v) > { > ioreq_t *p = get_ioreq(v); > @@ -370,11 +390,12 @@ void hvm_do_resume(struct vcpu *v) > switch ( p->state ) > { > case STATE_IORESP_READY: /* IORESP_READY -> NONE */ > + rmb(); /* see IORESP_READY /then/ read contents of ioreq */ > hvm_io_assist(p); > break; > case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY > */ > case STATE_IOREQ_INPROCESS: > - wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port, > + wait_on_xen_event_channel(p->vp_eport, > (p->state != STATE_IOREQ_READY) && > (p->state != STATE_IOREQ_INPROCESS)); > break; > @@ -1414,7 +1435,87 @@ void hvm_vcpu_down(struct vcpu *v) > } > } > > -bool_t hvm_send_assist_req(struct vcpu *v) > +int hvm_buffered_io_send(struct domain *d, const ioreq_t *p) > +{ > + struct hvm_ioreq_page *iorp = &d->arch.hvm_domain.buf_ioreq; > + buffered_iopage_t *pg = iorp->va; > + buf_ioreq_t bp; > + /* Timeoffset sends 64b data, but no address. Use two consecutive slots. > */ > + int qw = 0; > + > + /* Ensure buffered_iopage fits in a page */ > + BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE); > + > + /* > + * Return 0 for the cases we can't deal with: > + * - 'addr' is only a 20-bit field, so we cannot address beyond 1MB > + * - we cannot buffer accesses to guest memory buffers, as the guest > + * may expect the memory buffer to be synchronously accessed > + * - the count field is usually used with data_is_ptr and since we don't > + * support data_is_ptr we do not waste space for the count field > either > + */ > + if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) ) > + return 0; > + > + bp.type = p->type; > + bp.dir = p->dir; > + switch ( p->size ) > + { > + case 1: > + bp.size = 0; > + break; > + case 2: > + bp.size = 1; > + break; > + case 4: > + bp.size = 2; > + break; > + case 8: > + bp.size = 3; > + qw = 1; > + break; > + default: > + gdprintk(XENLOG_WARNING, "unexpected ioreq size: %u\n", p->size); > + return 0; > + } > + > + bp.data = p->data; > + bp.addr = p->addr; > + > + spin_lock(&iorp->lock); > + > + if ( (pg->write_pointer - pg->read_pointer) >= > + (IOREQ_BUFFER_SLOT_NUM - qw) ) > + { > + /* The queue is full: send the iopacket through the normal path. */ > + spin_unlock(&iorp->lock); > + return 0; > + } > + > + pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; > + > + if ( qw ) > + { > + bp.data = p->data >> 32; > + pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; > + } > + > + /* Make the ioreq_t visible /before/ write_pointer. */ > + wmb(); > + pg->write_pointer += qw ? 2 : 1; > + > + notify_via_xen_event_channel(d, > d->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN]); > + spin_unlock(&iorp->lock); > + > + return 1; > +} > + > +bool_t hvm_has_dm(struct domain *d) > +{ > + return !!d->arch.hvm_domain.ioreq.va; > +} > + > +bool_t hvm_send_assist_req(struct vcpu *v, const ioreq_t *proto_p) > { > ioreq_t *p = get_ioreq(v); > > @@ -1432,14 +1533,23 @@ bool_t hvm_send_assist_req(struct vcpu *v) > return 0; > } > > - prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port); > + p->dir = proto_p->dir; > + p->data_is_ptr = proto_p->data_is_ptr; > + p->type = proto_p->type; > + p->size = proto_p->size; > + p->addr = proto_p->addr; > + p->count = proto_p->count; > + p->df = proto_p->df; > + p->data = proto_p->data; > + > + prepare_wait_on_xen_event_channel(p->vp_eport); > > /* > * Following happens /after/ blocking and setting up ioreq contents. > * prepare_wait_on_xen_event_channel() is an implicit barrier. > */ > p->state = STATE_IOREQ_READY; > - notify_via_xen_event_channel(v->domain, v->arch.hvm_vcpu.xen_port); > + notify_via_xen_event_channel(v->domain, p->vp_eport); > > return 1; > } > diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c > index 5ba38d2..8db300d 100644 > --- a/xen/arch/x86/hvm/io.c > +++ b/xen/arch/x86/hvm/io.c > @@ -46,82 +46,6 @@ > #include <xen/iocap.h> > #include <public/hvm/ioreq.h> > > -int hvm_buffered_io_send(struct domain *d, const ioreq_t *p) > -{ > - struct hvm_ioreq_page *iorp = &d->arch.hvm_domain.buf_ioreq; > - buffered_iopage_t *pg = iorp->va; > - buf_ioreq_t bp; > - /* Timeoffset sends 64b data, but no address. Use two consecutive slots. > */ > - int qw = 0; > - > - /* Ensure buffered_iopage fits in a page */ > - BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE); > - > - /* > - * Return 0 for the cases we can't deal with: > - * - 'addr' is only a 20-bit field, so we cannot address beyond 1MB > - * - we cannot buffer accesses to guest memory buffers, as the guest > - * may expect the memory buffer to be synchronously accessed > - * - the count field is usually used with data_is_ptr and since we don't > - * support data_is_ptr we do not waste space for the count field > either > - */ > - if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) ) > - return 0; > - > - bp.type = p->type; > - bp.dir = p->dir; > - switch ( p->size ) > - { > - case 1: > - bp.size = 0; > - break; > - case 2: > - bp.size = 1; > - break; > - case 4: > - bp.size = 2; > - break; > - case 8: > - bp.size = 3; > - qw = 1; > - break; > - default: > - gdprintk(XENLOG_WARNING, "unexpected ioreq size: %u\n", p->size); > - return 0; > - } > - > - bp.data = p->data; > - bp.addr = p->addr; > - > - spin_lock(&iorp->lock); > - > - if ( (pg->write_pointer - pg->read_pointer) >= > - (IOREQ_BUFFER_SLOT_NUM - qw) ) > - { > - /* The queue is full: send the iopacket through the normal path. */ > - spin_unlock(&iorp->lock); > - return 0; > - } > - > - pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; > - > - if ( qw ) > - { > - bp.data = p->data >> 32; > - pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; > - } > - > - /* Make the ioreq_t visible /before/ write_pointer. */ > - wmb(); > - pg->write_pointer += qw ? 2 : 1; > - > - notify_via_xen_event_channel(d, > - d->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN]); > - spin_unlock(&iorp->lock); > - > - return 1; > -} > - > void send_timeoffset_req(unsigned long timeoff) > { > ioreq_t p = { > @@ -143,26 +67,14 @@ void send_timeoffset_req(unsigned long timeoff) > /* Ask ioemu mapcache to invalidate mappings. */ > void send_invalidate_req(void) > { > - struct vcpu *v = current; > - ioreq_t *p = get_ioreq(v); > - > - if ( !p ) > - return; > - > - if ( p->state != STATE_IOREQ_NONE ) > - { > - gdprintk(XENLOG_ERR, "WARNING: send invalidate req with something " > - "already pending (%d)?\n", p->state); > - domain_crash(v->domain); > - return; > - } > - > - p->type = IOREQ_TYPE_INVALIDATE; > - p->size = 4; > - p->dir = IOREQ_WRITE; > - p->data = ~0UL; /* flush all */ > + ioreq_t p = { > + .type = IOREQ_TYPE_INVALIDATE, > + .size = 4, > + .dir = IOREQ_WRITE, > + .data = ~0UL, /* flush all */ > + }; > > - (void)hvm_send_assist_req(v); > + (void)hvm_send_assist_req(current, &p); > } > > int handle_mmio(void) > @@ -265,8 +177,6 @@ void hvm_io_assist(ioreq_t *p) > struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; > enum hvm_io_state io_state; > > - rmb(); /* see IORESP_READY /then/ read contents of ioreq */ > - > p->state = STATE_IOREQ_NONE; > > io_state = vio->io_state; > diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c > index 40167d6..0421623 100644 > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1394,7 +1394,6 @@ void nvmx_switch_guest(void) > struct vcpu *v = current; > struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); > struct cpu_user_regs *regs = guest_cpu_user_regs(); > - const ioreq_t *ioreq = get_ioreq(v); > > /* > * A pending IO emulation may still be not finished. In this case, no > @@ -1404,7 +1403,7 @@ void nvmx_switch_guest(void) > * don't want to continue as this setup is not implemented nor supported > * as of right now. > */ > - if ( !ioreq || ioreq->state != STATE_IOREQ_NONE ) > + if ( hvm_io_pending(v) ) > return; > /* > * a softirq may interrupt us between a virtual vmentry is > @@ -2522,3 +2521,13 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned > int cr) > /* nvcpu.guest_cr is what L2 write to cr actually. */ > __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); > } > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index dcc3483..08a62ea 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -26,6 +26,7 @@ > #include <asm/hvm/asid.h> > #include <public/domctl.h> > #include <public/hvm/save.h> > +#include <public/hvm/ioreq.h> > #include <asm/mm.h> > > /* Interrupt acknowledgement sources. */ > @@ -227,7 +228,7 @@ int prepare_ring_for_helper(struct domain *d, unsigned > long gmfn, > struct page_info **_page, void **_va); > void destroy_ring_for_helper(void **_va, struct page_info *page); > > -bool_t hvm_send_assist_req(struct vcpu *v); > +bool_t hvm_send_assist_req(struct vcpu *v, const ioreq_t *p); > > void hvm_get_guest_pat(struct vcpu *v, u64 *guest_pat); > int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat); > @@ -339,6 +340,8 @@ static inline unsigned long hvm_get_shadow_gs_base(struct > vcpu *v) > void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, > unsigned int *ecx, unsigned int *edx); > void hvm_migrate_timers(struct vcpu *v); > +bool_t hvm_has_dm(struct domain *d); > +bool_t hvm_io_pending(struct vcpu *v); > void hvm_do_resume(struct vcpu *v); > void hvm_migrate_pirqs(struct vcpu *v); > > @@ -522,3 +525,13 @@ bool_t nhvm_vmcx_hap_enabled(struct vcpu *v); > enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v); > > #endif /* __ASM_X86_HVM_HVM_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/include/asm-x86/hvm/support.h > b/xen/include/asm-x86/hvm/support.h > index 1dc2f2d..05ef5c5 100644 > --- a/xen/include/asm-x86/hvm/support.h > +++ b/xen/include/asm-x86/hvm/support.h > @@ -22,21 +22,10 @@ > #define __ASM_X86_HVM_SUPPORT_H__ > > #include <xen/types.h> > -#include <public/hvm/ioreq.h> > #include <xen/sched.h> > #include <xen/hvm/save.h> > #include <asm/processor.h> > > -static inline ioreq_t *get_ioreq(struct vcpu *v) > -{ > - struct domain *d = v->domain; > - shared_iopage_t *p = d->arch.hvm_domain.ioreq.va; > - > - ASSERT((v == current) || spin_is_locked(&d->arch.hvm_domain.ioreq.lock)); > - > - return p ? &p->vcpu_ioreq[v->vcpu_id] : NULL; > -} > - > #define HVM_DELIVER_NO_ERROR_CODE -1 > > #ifndef NDEBUG > @@ -144,3 +133,13 @@ int hvm_mov_to_cr(unsigned int cr, unsigned int gpr); > int hvm_mov_from_cr(unsigned int cr, unsigned int gpr); > > #endif /* __ASM_X86_HVM_SUPPORT_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > -- > 1.7.10.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.