[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 21/28] vvtd: update hvm_gmsi_info when binding guest msi with pirq or



On Fri, Nov 17, 2017 at 02:22:28PM +0800, Chao Gao wrote:
> ... handlding guest's invalidation request.
> 
> To support pirq migration optimization and using VT-d posted interrupt to
> inject msi from assigned devices, each time guest programs msi information
> (affinity, vector), the struct hvm_gmsi_info should be updated accordingly.
> But after introducing vvtd, guest only needs to update an IRTE, which is in
> guest memory, to program msi information.  vvtd doesn't trap r/w to the memory
> range. Instead, it traps the queue invalidation, which is a method used to
> notify VT-d hardware that an IRTE has changed.
> 
> This patch updates hvm_gmsi_info structure and programs physical IRTEs to use
> VT-d posted interrupt if possible when binding guest msi with pirq or handling
> guest's invalidation request. For the latter, all physical interrupts bound
> with the domain are gone through to find the ones matching with the IRTE.
> 
> Notes: calling vvtd_process_iq() in vvtd_read() rather than in
> vvtd_handle_irq_request() is to avoid ABBA deadlock of d->event_lock and
> vvtd->ie_lock.
> 
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
> ---
> v4:
>  - new
> ---
>  xen/arch/x86/hvm/hvm.c             |  2 +-
>  xen/drivers/passthrough/io.c       | 89 
> ++++++++++++++++++++++++++++----------
>  xen/drivers/passthrough/vtd/vvtd.c | 70 ++++++++++++++++++++++++++++--
>  xen/include/asm-x86/hvm/hvm.h      |  2 +
>  xen/include/asm-x86/hvm/irq.h      |  1 +
>  xen/include/asm-x86/viommu.h       | 11 +++++
>  6 files changed, 147 insertions(+), 28 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 964418a..d2c1372 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -462,7 +462,7 @@ void hvm_migrate_timers(struct vcpu *v)
>      pt_migrate(v);
>  }
>  
> -static int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci 
> *pirq_dpci,
> +int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
>                              void *arg)
>  {
>      struct vcpu *v = arg;
> diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> index d8c66bf..9198ef5 100644
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -21,6 +21,7 @@
>  #include <xen/iommu.h>
>  #include <xen/cpu.h>
>  #include <xen/irq.h>
> +#include <xen/viommu.h>
>  #include <asm/hvm/irq.h>
>  #include <asm/hvm/support.h>
>  #include <asm/io_apic.h>
> @@ -275,6 +276,61 @@ static struct vcpu *vector_hashing_dest(const struct 
> domain *d,
>      return dest;
>  }
>  
> +void pt_update_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
> +{
> +    uint8_t dest, delivery_mode;
> +    bool dest_mode;
> +    int dest_vcpu_id;
> +    const struct vcpu *vcpu;
> +    struct arch_irq_remapping_request request;
> +    struct arch_irq_remapping_info remap_info;
> +
> +    ASSERT(spin_is_locked(&d->event_lock));
> +
> +    /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> +    irq_request_msi_fill(&request, pirq_dpci->gmsi.addr, 
> pirq_dpci->gmsi.data);
> +    if ( viommu_check_irq_remapping(d, &request) )
> +    {
> +        /* An error in IRTE, don't perform the optimization */
> +        if ( viommu_get_irq_info(d, &request, &remap_info) )
> +        {
> +            pirq_dpci->gmsi.posted = false;
> +            pirq_dpci->gmsi.dest_vcpu_id = -1;
> +            pirq_dpci->gmsi.gvec = 0;
> +            return;
> +        }
> +
> +        dest = remap_info.dest;
> +        dest_mode = remap_info.dest_mode;
> +        delivery_mode = remap_info.delivery_mode;
> +        pirq_dpci->gmsi.gvec = remap_info.vector;
> +    }
> +    else
> +    {
> +        dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK);
> +        dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK;
> +        delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data,
> +                                  MSI_DATA_DELIVERY_MODE_MASK);
> +        pirq_dpci->gmsi.gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK;
> +    }
> +
> +    dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
> +    pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
> +
> +    pirq_dpci->gmsi.posted = false;
> +    vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;

So you use dest_vcpu_id to get the vcpu here...

> +    if ( iommu_intpost )
> +    {
> +        if ( delivery_mode == dest_LowestPrio )
> +            vcpu = vector_hashing_dest(d, dest, dest_mode, 
> pirq_dpci->gmsi.gvec);
> +        if ( vcpu )
> +        {
> +            pirq_dpci->gmsi.posted = true;
> +            pirq_dpci->gmsi.dest_vcpu_id = vcpu->vcpu_id;

... which is only used here in order to get the dest_vcpu_id back. Is
this really needed? Can't you just use dest_vcpu_id?

I would rather do:

if ( iommu_intpost && delivery_mode == dest_LowestPrio )
{
    const struct vcpu *vcpu = vector_hashing_dest(d, dest, dest_mode,
                                                  pirq_dpci->gmsi.gvec);

    if ( vcpu )
    {
        ....
    }
}

> +        }
> +    }
> +}
> +
>  int pt_irq_create_bind(
>      struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
>  {
> @@ -339,9 +395,6 @@ int pt_irq_create_bind(
>      {
>      case PT_IRQ_TYPE_MSI:
>      {
> -        uint8_t dest, delivery_mode, gvec;
> -        bool dest_mode;
> -        int dest_vcpu_id;
>          const struct vcpu *vcpu;
>  
>          if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
> @@ -411,35 +464,23 @@ int pt_irq_create_bind(
>                  pirq_dpci->gmsi.addr = pt_irq_bind->u.msi.addr;
>              }
>          }
> -        /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> -        dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK);
> -        dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK;
> -        delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data,
> -                                  MSI_DATA_DELIVERY_MODE_MASK);
> -        gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK;
> -        pirq_dpci->gmsi.gvec = gvec;
>  
> -        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
> -        pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
> +        pt_update_gmsi(d, pirq_dpci);
>          spin_unlock(&d->event_lock);
>  
> -        pirq_dpci->gmsi.posted = false;
> -        vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
> -        if ( iommu_intpost )
> -        {
> -            if ( delivery_mode == dest_LowestPrio )
> -                vcpu = vector_hashing_dest(d, dest, dest_mode,
> -                                           pirq_dpci->gmsi.gvec);
> -            if ( vcpu )
> -                pirq_dpci->gmsi.posted = true;
> -        }
> -        if ( dest_vcpu_id >= 0 )
> -            hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
> +        if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 )
> +            hvm_migrate_pirqs(d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]);
>  
>          /* Use interrupt posting if it is supported. */
>          if ( iommu_intpost )
> +        {
> +            if ( pirq_dpci->gmsi.posted )
> +                vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id];
> +            else
> +                vcpu = NULL;
>              pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
>                             info, pirq_dpci->gmsi.gvec);

If vcpu is now only used inside of this if condition please move it's
declaration here to reduce the scope.

> +        }
>  
>          if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
>          {
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
> b/xen/drivers/passthrough/vtd/vvtd.c
> index f6bde69..d12ad1d 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -477,6 +477,50 @@ static int vvtd_record_fault(struct vvtd *vvtd,
>  }
>  
>  /*
> + * 'arg' is the index of interrupt remapping table. This index is used to
> + * search physical irqs which satify that the gmsi mapped with the physical 
> irq
> + * is tranlated by the IRTE refered to by the index. The struct hvm_gmsi_info
> + * contains some fields are infered from an virtual IRTE. These fields should
> + * be updated when guest invalidates an IRTE. Furthermore, the physical IRTE
> + * is updated accordingly to reduce IPIs or utilize VT-d posted interrupt.
> + *
> + * if 'arg' is -1, perform a global invalidation.
> + */
> +static int invalidate_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
> +                         void *arg)
> +{
> +    if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
> +    {
> +        uint32_t index, target = (long)arg;
> +        struct arch_irq_remapping_request req;
> +        const struct vcpu *vcpu;
> +
> +        irq_request_msi_fill(&req, pirq_dpci->gmsi.addr, 
> pirq_dpci->gmsi.data);
> +        if ( !irq_remapping_request_index(&req, &index) &&
> +             ((target == -1) || (target == index)) )

Shouldn't this -1 be some kind of define, like GMSI_ALL or similar?
Also isn't it possible to use -1 as a valid target?

> +        {
> +            pt_update_gmsi(d, pirq_dpci);
> +            if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 )
> +                hvm_migrate_pirq(d, pirq_dpci,
> +                                 d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]);
> +
> +            /* Use interrupt posting if it is supported. */
> +            if ( iommu_intpost )
> +            {
> +                if ( pirq_dpci->gmsi.posted )
> +                    vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id];
> +                else
> +                    vcpu = NULL;
> +                pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
> +                               dpci_pirq(pirq_dpci), pirq_dpci->gmsi.gvec);
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/*
>   * Process an invalidation descriptor. Currently, only two types descriptors,
>   * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
>   * Descriptor are handled.
> @@ -530,7 +574,26 @@ static int process_iqe(struct vvtd *vvtd, uint32_t i)
>          break;
>  
>      case TYPE_INVAL_IEC:
> -        /* No cache is preserved in vvtd, nothing is needed to be flushed */
> +        /*
> +         * If VT-d pi is enabled, pi_update_irte() may be called. It assumes
> +         * pcidevs_locked().
> +         */
> +        pcidevs_lock();
> +        spin_lock(&vvtd->domain->event_lock);
> +        /* A global invalidation of the cache is requested */
> +        if ( !qinval.q.iec_inv_dsc.lo.granu )
> +            pt_pirq_iterate(vvtd->domain, invalidate_gmsi, (void *)(long)-1);
> +        else
> +        {
> +            uint32_t iidx = qinval.q.iec_inv_dsc.lo.iidx;
> +            uint32_t nr = 1 << qinval.q.iec_inv_dsc.lo.im;
> +
> +            for ( ; nr; nr--, iidx++)

You can initialize nr in the for loop.

> +                pt_pirq_iterate(vvtd->domain, invalidate_gmsi,
> +                                (void *)(long)iidx);
> +        }
> +        spin_unlock(&vvtd->domain->event_lock);
> +        pcidevs_unlock();
>          break;
>  
>      default:
> @@ -839,6 +902,8 @@ static int vvtd_read(struct vcpu *v, unsigned long addr,
>      else
>          *pval = vvtd_get_reg_quad(vvtd, offset);
>  
> +    if ( !atomic_read(&vvtd->inflight_intr) )
> +        vvtd_process_iq(vvtd);
>      return X86EMUL_OKAY;
>  }
>  
> @@ -1088,8 +1153,7 @@ static int vvtd_handle_irq_request(const struct domain 
> *d,
>                          irte.remap.tm);
>  
>   out:
> -    if ( !atomic_dec_and_test(&vvtd->inflight_intr) )
> -        vvtd_process_iq(vvtd);
> +    atomic_dec(&vvtd->inflight_intr);

Why is this removed? It was changed like 4 patches before, and
reverted here.

>      return ret;
>  }
>  
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index b687e03..f276ab6 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -394,6 +394,8 @@ bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val);
>  bool hvm_check_cpuid_faulting(struct vcpu *v);
>  void hvm_migrate_timers(struct vcpu *v);
>  void hvm_do_resume(struct vcpu *v);
> +int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
> +                            void *arg);

Please align.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.