[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 2/8] ioreq-server: centralize access to ioreq structures



On Wed, Apr 2, 2014 at 4:11 PM, Paul Durrant <paul.durrant@xxxxxxxxxx> wrote:
> To simplify creation of the ioreq server abstraction in a subsequent patch,
> this patch centralizes all use of the shared ioreq structure and the
> buffered ioreq ring to the source module xen/arch/x86/hvm/hvm.c.
>
> The patch moves an rmb() from inside hvm_io_assist() to hvm_do_resume()
> because the former may now be passed a data structure on stack, in which
> case the barrier is unnecessary.
>
> Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
> Cc: Keir Fraser <keir@xxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> Cc: Eddie Dong <eddie.dong@xxxxxxxxx>
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>

Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

> ---
>  xen/arch/x86/hvm/emulate.c        |   70 +++++++++-------------
>  xen/arch/x86/hvm/hvm.c            |  118 
> +++++++++++++++++++++++++++++++++++--
>  xen/arch/x86/hvm/io.c             |  104 +++-----------------------------
>  xen/arch/x86/hvm/vmx/vvmx.c       |   13 +++-
>  xen/include/asm-x86/hvm/hvm.h     |   15 ++++-
>  xen/include/asm-x86/hvm/support.h |   21 ++++---
>  6 files changed, 185 insertions(+), 156 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index 868aa1d..1c71902 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -57,24 +57,11 @@ static int hvmemul_do_io(
>      int value_is_ptr = (p_data == NULL);
>      struct vcpu *curr = current;
>      struct hvm_vcpu_io *vio;
> -    ioreq_t *p = get_ioreq(curr);
> -    ioreq_t _ioreq;
> +    ioreq_t p;
>      unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
>      p2m_type_t p2mt;
>      struct page_info *ram_page;
>      int rc;
> -    bool_t has_dm = 1;
> -
> -    /*
> -     * Domains without a backing DM, don't have an ioreq page.  Just
> -     * point to a struct on the stack, initialising the state as needed.
> -     */
> -    if ( !p )
> -    {
> -        has_dm = 0;
> -        p = &_ioreq;
> -        p->state = STATE_IOREQ_NONE;
> -    }
>
>      /* Check for paged out page */
>      ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE);
> @@ -173,15 +160,6 @@ static int hvmemul_do_io(
>          return X86EMUL_UNHANDLEABLE;
>      }
>
> -    if ( p->state != STATE_IOREQ_NONE )
> -    {
> -        gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
> -                 p->state);
> -        if ( ram_page )
> -            put_page(ram_page);
> -        return X86EMUL_UNHANDLEABLE;
> -    }
> -
>      vio->io_state =
>          (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
>      vio->io_size = size;
> @@ -193,38 +171,38 @@ static int hvmemul_do_io(
>      if ( vio->mmio_retrying )
>          *reps = 1;
>
> -    p->dir = dir;
> -    p->data_is_ptr = value_is_ptr;
> -    p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
> -    p->size = size;
> -    p->addr = addr;
> -    p->count = *reps;
> -    p->df = df;
> -    p->data = value;
> +    p.dir = dir;
> +    p.data_is_ptr = value_is_ptr;
> +    p.type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
> +    p.size = size;
> +    p.addr = addr;
> +    p.count = *reps;
> +    p.df = df;
> +    p.data = value;
>
>      if ( dir == IOREQ_WRITE )
> -        hvmtrace_io_assist(is_mmio, p);
> +        hvmtrace_io_assist(is_mmio, &p);
>
>      if ( is_mmio )
>      {
> -        rc = hvm_mmio_intercept(p);
> +        rc = hvm_mmio_intercept(&p);
>          if ( rc == X86EMUL_UNHANDLEABLE )
> -            rc = hvm_buffered_io_intercept(p);
> +            rc = hvm_buffered_io_intercept(&p);
>      }
>      else
>      {
> -        rc = hvm_portio_intercept(p);
> +        rc = hvm_portio_intercept(&p);
>      }
>
>      switch ( rc )
>      {
>      case X86EMUL_OKAY:
>      case X86EMUL_RETRY:
> -        *reps = p->count;
> -        p->state = STATE_IORESP_READY;
> +        *reps = p.count;
> +        p.state = STATE_IORESP_READY;
>          if ( !vio->mmio_retry )
>          {
> -            hvm_io_assist(p);
> +            hvm_io_assist(&p);
>              vio->io_state = HVMIO_none;
>          }
>          else
> @@ -233,7 +211,7 @@ static int hvmemul_do_io(
>          break;
>      case X86EMUL_UNHANDLEABLE:
>          /* If there is no backing DM, just ignore accesses */
> -        if ( !has_dm )
> +        if ( !hvm_has_dm(curr->domain) )
>          {
>              rc = X86EMUL_OKAY;
>              vio->io_state = HVMIO_none;
> @@ -241,7 +219,7 @@ static int hvmemul_do_io(
>          else
>          {
>              rc = X86EMUL_RETRY;
> -            if ( !hvm_send_assist_req(curr) )
> +            if ( !hvm_send_assist_req(curr, &p) )
>                  vio->io_state = HVMIO_none;
>              else if ( p_data == NULL )
>                  rc = X86EMUL_OKAY;
> @@ -260,7 +238,7 @@ static int hvmemul_do_io(
>
>   finish_access:
>      if ( dir == IOREQ_READ )
> -        hvmtrace_io_assist(is_mmio, p);
> +        hvmtrace_io_assist(is_mmio, &p);
>
>      if ( p_data != NULL )
>          memcpy(p_data, &vio->io_data, size);
> @@ -1292,3 +1270,13 @@ struct segment_register *hvmemul_get_seg_reg(
>          hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
>      return &hvmemul_ctxt->seg_reg[seg];
>  }
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 69d0a44..573f845 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -352,6 +352,26 @@ void hvm_migrate_pirqs(struct vcpu *v)
>      spin_unlock(&d->event_lock);
>  }
>
> +static ioreq_t *get_ioreq(struct vcpu *v)
> +{
> +    struct domain *d = v->domain;
> +    shared_iopage_t *p = d->arch.hvm_domain.ioreq.va;
> +
> +    ASSERT((v == current) || spin_is_locked(&d->arch.hvm_domain.ioreq.lock));
> +
> +    return p ? &p->vcpu_ioreq[v->vcpu_id] : NULL;
> +}
> +
> +bool_t hvm_io_pending(struct vcpu *v)
> +{
> +    ioreq_t *p = get_ioreq(v);
> +
> +    if ( !p )
> +        return 0;
> +
> +    return ( p->state != STATE_IOREQ_NONE );
> +}
> +
>  void hvm_do_resume(struct vcpu *v)
>  {
>      ioreq_t *p = get_ioreq(v);
> @@ -370,11 +390,12 @@ void hvm_do_resume(struct vcpu *v)
>          switch ( p->state )
>          {
>          case STATE_IORESP_READY: /* IORESP_READY -> NONE */
> +            rmb(); /* see IORESP_READY /then/ read contents of ioreq */
>              hvm_io_assist(p);
>              break;
>          case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY 
> */
>          case STATE_IOREQ_INPROCESS:
> -            wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
> +            wait_on_xen_event_channel(p->vp_eport,
>                                        (p->state != STATE_IOREQ_READY) &&
>                                        (p->state != STATE_IOREQ_INPROCESS));
>              break;
> @@ -1414,7 +1435,87 @@ void hvm_vcpu_down(struct vcpu *v)
>      }
>  }
>
> -bool_t hvm_send_assist_req(struct vcpu *v)
> +int hvm_buffered_io_send(struct domain *d, const ioreq_t *p)
> +{
> +    struct hvm_ioreq_page *iorp = &d->arch.hvm_domain.buf_ioreq;
> +    buffered_iopage_t *pg = iorp->va;
> +    buf_ioreq_t bp;
> +    /* Timeoffset sends 64b data, but no address. Use two consecutive slots. 
> */
> +    int qw = 0;
> +
> +    /* Ensure buffered_iopage fits in a page */
> +    BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE);
> +
> +    /*
> +     * Return 0 for the cases we can't deal with:
> +     *  - 'addr' is only a 20-bit field, so we cannot address beyond 1MB
> +     *  - we cannot buffer accesses to guest memory buffers, as the guest
> +     *    may expect the memory buffer to be synchronously accessed
> +     *  - the count field is usually used with data_is_ptr and since we don't
> +     *    support data_is_ptr we do not waste space for the count field 
> either
> +     */
> +    if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) )
> +        return 0;
> +
> +    bp.type = p->type;
> +    bp.dir  = p->dir;
> +    switch ( p->size )
> +    {
> +    case 1:
> +        bp.size = 0;
> +        break;
> +    case 2:
> +        bp.size = 1;
> +        break;
> +    case 4:
> +        bp.size = 2;
> +        break;
> +    case 8:
> +        bp.size = 3;
> +        qw = 1;
> +        break;
> +    default:
> +        gdprintk(XENLOG_WARNING, "unexpected ioreq size: %u\n", p->size);
> +        return 0;
> +    }
> +
> +    bp.data = p->data;
> +    bp.addr = p->addr;
> +
> +    spin_lock(&iorp->lock);
> +
> +    if ( (pg->write_pointer - pg->read_pointer) >=
> +         (IOREQ_BUFFER_SLOT_NUM - qw) )
> +    {
> +        /* The queue is full: send the iopacket through the normal path. */
> +        spin_unlock(&iorp->lock);
> +        return 0;
> +    }
> +
> +    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> +
> +    if ( qw )
> +    {
> +        bp.data = p->data >> 32;
> +        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> +    }
> +
> +    /* Make the ioreq_t visible /before/ write_pointer. */
> +    wmb();
> +    pg->write_pointer += qw ? 2 : 1;
> +
> +    notify_via_xen_event_channel(d, 
> d->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN]);
> +    spin_unlock(&iorp->lock);
> +
> +    return 1;
> +}
> +
> +bool_t hvm_has_dm(struct domain *d)
> +{
> +    return !!d->arch.hvm_domain.ioreq.va;
> +}
> +
> +bool_t hvm_send_assist_req(struct vcpu *v, const ioreq_t *proto_p)
>  {
>      ioreq_t *p = get_ioreq(v);
>
> @@ -1432,14 +1533,23 @@ bool_t hvm_send_assist_req(struct vcpu *v)
>          return 0;
>      }
>
> -    prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
> +    p->dir = proto_p->dir;
> +    p->data_is_ptr = proto_p->data_is_ptr;
> +    p->type = proto_p->type;
> +    p->size = proto_p->size;
> +    p->addr = proto_p->addr;
> +    p->count = proto_p->count;
> +    p->df = proto_p->df;
> +    p->data = proto_p->data;
> +
> +    prepare_wait_on_xen_event_channel(p->vp_eport);
>
>      /*
>       * Following happens /after/ blocking and setting up ioreq contents.
>       * prepare_wait_on_xen_event_channel() is an implicit barrier.
>       */
>      p->state = STATE_IOREQ_READY;
> -    notify_via_xen_event_channel(v->domain, v->arch.hvm_vcpu.xen_port);
> +    notify_via_xen_event_channel(v->domain, p->vp_eport);
>
>      return 1;
>  }
> diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
> index 5ba38d2..8db300d 100644
> --- a/xen/arch/x86/hvm/io.c
> +++ b/xen/arch/x86/hvm/io.c
> @@ -46,82 +46,6 @@
>  #include <xen/iocap.h>
>  #include <public/hvm/ioreq.h>
>
> -int hvm_buffered_io_send(struct domain *d, const ioreq_t *p)
> -{
> -    struct hvm_ioreq_page *iorp = &d->arch.hvm_domain.buf_ioreq;
> -    buffered_iopage_t *pg = iorp->va;
> -    buf_ioreq_t bp;
> -    /* Timeoffset sends 64b data, but no address. Use two consecutive slots. 
> */
> -    int qw = 0;
> -
> -    /* Ensure buffered_iopage fits in a page */
> -    BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE);
> -
> -    /*
> -     * Return 0 for the cases we can't deal with:
> -     *  - 'addr' is only a 20-bit field, so we cannot address beyond 1MB
> -     *  - we cannot buffer accesses to guest memory buffers, as the guest
> -     *    may expect the memory buffer to be synchronously accessed
> -     *  - the count field is usually used with data_is_ptr and since we don't
> -     *    support data_is_ptr we do not waste space for the count field 
> either
> -     */
> -    if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) )
> -        return 0;
> -
> -    bp.type = p->type;
> -    bp.dir  = p->dir;
> -    switch ( p->size )
> -    {
> -    case 1:
> -        bp.size = 0;
> -        break;
> -    case 2:
> -        bp.size = 1;
> -        break;
> -    case 4:
> -        bp.size = 2;
> -        break;
> -    case 8:
> -        bp.size = 3;
> -        qw = 1;
> -        break;
> -    default:
> -        gdprintk(XENLOG_WARNING, "unexpected ioreq size: %u\n", p->size);
> -        return 0;
> -    }
> -
> -    bp.data = p->data;
> -    bp.addr = p->addr;
> -
> -    spin_lock(&iorp->lock);
> -
> -    if ( (pg->write_pointer - pg->read_pointer) >=
> -         (IOREQ_BUFFER_SLOT_NUM - qw) )
> -    {
> -        /* The queue is full: send the iopacket through the normal path. */
> -        spin_unlock(&iorp->lock);
> -        return 0;
> -    }
> -
> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> -
> -    if ( qw )
> -    {
> -        bp.data = p->data >> 32;
> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> -    }
> -
> -    /* Make the ioreq_t visible /before/ write_pointer. */
> -    wmb();
> -    pg->write_pointer += qw ? 2 : 1;
> -
> -    notify_via_xen_event_channel(d,
> -            d->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN]);
> -    spin_unlock(&iorp->lock);
> -
> -    return 1;
> -}
> -
>  void send_timeoffset_req(unsigned long timeoff)
>  {
>      ioreq_t p = {
> @@ -143,26 +67,14 @@ void send_timeoffset_req(unsigned long timeoff)
>  /* Ask ioemu mapcache to invalidate mappings. */
>  void send_invalidate_req(void)
>  {
> -    struct vcpu *v = current;
> -    ioreq_t *p = get_ioreq(v);
> -
> -    if ( !p )
> -        return;
> -
> -    if ( p->state != STATE_IOREQ_NONE )
> -    {
> -        gdprintk(XENLOG_ERR, "WARNING: send invalidate req with something "
> -                 "already pending (%d)?\n", p->state);
> -        domain_crash(v->domain);
> -        return;
> -    }
> -
> -    p->type = IOREQ_TYPE_INVALIDATE;
> -    p->size = 4;
> -    p->dir = IOREQ_WRITE;
> -    p->data = ~0UL; /* flush all */
> +    ioreq_t p = {
> +        .type = IOREQ_TYPE_INVALIDATE,
> +        .size = 4,
> +        .dir = IOREQ_WRITE,
> +        .data = ~0UL, /* flush all */
> +    };
>
> -    (void)hvm_send_assist_req(v);
> +    (void)hvm_send_assist_req(current, &p);
>  }
>
>  int handle_mmio(void)
> @@ -265,8 +177,6 @@ void hvm_io_assist(ioreq_t *p)
>      struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
>      enum hvm_io_state io_state;
>
> -    rmb(); /* see IORESP_READY /then/ read contents of ioreq */
> -
>      p->state = STATE_IOREQ_NONE;
>
>      io_state = vio->io_state;
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 40167d6..0421623 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -1394,7 +1394,6 @@ void nvmx_switch_guest(void)
>      struct vcpu *v = current;
>      struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
> -    const ioreq_t *ioreq = get_ioreq(v);
>
>      /*
>       * A pending IO emulation may still be not finished. In this case, no
> @@ -1404,7 +1403,7 @@ void nvmx_switch_guest(void)
>       * don't want to continue as this setup is not implemented nor supported
>       * as of right now.
>       */
> -    if ( !ioreq || ioreq->state != STATE_IOREQ_NONE )
> +    if ( hvm_io_pending(v) )
>          return;
>      /*
>       * a softirq may interrupt us between a virtual vmentry is
> @@ -2522,3 +2521,13 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned 
> int cr)
>      /* nvcpu.guest_cr is what L2 write to cr actually. */
>      __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]);
>  }
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index dcc3483..08a62ea 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -26,6 +26,7 @@
>  #include <asm/hvm/asid.h>
>  #include <public/domctl.h>
>  #include <public/hvm/save.h>
> +#include <public/hvm/ioreq.h>
>  #include <asm/mm.h>
>
>  /* Interrupt acknowledgement sources. */
> @@ -227,7 +228,7 @@ int prepare_ring_for_helper(struct domain *d, unsigned 
> long gmfn,
>                              struct page_info **_page, void **_va);
>  void destroy_ring_for_helper(void **_va, struct page_info *page);
>
> -bool_t hvm_send_assist_req(struct vcpu *v);
> +bool_t hvm_send_assist_req(struct vcpu *v, const ioreq_t *p);
>
>  void hvm_get_guest_pat(struct vcpu *v, u64 *guest_pat);
>  int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat);
> @@ -339,6 +340,8 @@ static inline unsigned long hvm_get_shadow_gs_base(struct 
> vcpu *v)
>  void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                     unsigned int *ecx, unsigned int *edx);
>  void hvm_migrate_timers(struct vcpu *v);
> +bool_t hvm_has_dm(struct domain *d);
> +bool_t hvm_io_pending(struct vcpu *v);
>  void hvm_do_resume(struct vcpu *v);
>  void hvm_migrate_pirqs(struct vcpu *v);
>
> @@ -522,3 +525,13 @@ bool_t nhvm_vmcx_hap_enabled(struct vcpu *v);
>  enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v);
>
>  #endif /* __ASM_X86_HVM_HVM_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-x86/hvm/support.h 
> b/xen/include/asm-x86/hvm/support.h
> index 1dc2f2d..05ef5c5 100644
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -22,21 +22,10 @@
>  #define __ASM_X86_HVM_SUPPORT_H__
>
>  #include <xen/types.h>
> -#include <public/hvm/ioreq.h>
>  #include <xen/sched.h>
>  #include <xen/hvm/save.h>
>  #include <asm/processor.h>
>
> -static inline ioreq_t *get_ioreq(struct vcpu *v)
> -{
> -    struct domain *d = v->domain;
> -    shared_iopage_t *p = d->arch.hvm_domain.ioreq.va;
> -
> -    ASSERT((v == current) || spin_is_locked(&d->arch.hvm_domain.ioreq.lock));
> -
> -    return p ? &p->vcpu_ioreq[v->vcpu_id] : NULL;
> -}
> -
>  #define HVM_DELIVER_NO_ERROR_CODE  -1
>
>  #ifndef NDEBUG
> @@ -144,3 +133,13 @@ int hvm_mov_to_cr(unsigned int cr, unsigned int gpr);
>  int hvm_mov_from_cr(unsigned int cr, unsigned int gpr);
>
>  #endif /* __ASM_X86_HVM_SUPPORT_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> --
> 1.7.10.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.