Xen project Mailing List

Re: [Xen-devel] [PATCH v4 21/28] vvtd: update hvm_gmsi_info when binding guest msi with pirq or

On Fri, Nov 17, 2017 at 02:22:28PM +0800, Chao Gao wrote: > ... handlding guest's invalidation request. > > To support pirq migration optimization and using VT-d posted interrupt to > inject msi from assigned devices, each time guest programs msi information > (affinity, vector), the struct hvm_gmsi_info should be updated accordingly. > But after introducing vvtd, guest only needs to update an IRTE, which is in > guest memory, to program msi information. vvtd doesn't trap r/w to the memory > range. Instead, it traps the queue invalidation, which is a method used to > notify VT-d hardware that an IRTE has changed. > > This patch updates hvm_gmsi_info structure and programs physical IRTEs to use > VT-d posted interrupt if possible when binding guest msi with pirq or handling > guest's invalidation request. For the latter, all physical interrupts bound > with the domain are gone through to find the ones matching with the IRTE. > > Notes: calling vvtd_process_iq() in vvtd_read() rather than in > vvtd_handle_irq_request() is to avoid ABBA deadlock of d->event_lock and > vvtd->ie_lock. > > Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx> > Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx> > --- > v4: > - new > --- > xen/arch/x86/hvm/hvm.c | 2 +- > xen/drivers/passthrough/io.c | 89 > ++++++++++++++++++++++++++++---------- > xen/drivers/passthrough/vtd/vvtd.c | 70 ++++++++++++++++++++++++++++-- > xen/include/asm-x86/hvm/hvm.h | 2 + > xen/include/asm-x86/hvm/irq.h | 1 + > xen/include/asm-x86/viommu.h | 11 +++++ > 6 files changed, 147 insertions(+), 28 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 964418a..d2c1372 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -462,7 +462,7 @@ void hvm_migrate_timers(struct vcpu *v) > pt_migrate(v); > } > > -static int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci > *pirq_dpci, > +int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, > void *arg) > { > struct vcpu *v = arg; > diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c > index d8c66bf..9198ef5 100644 > --- a/xen/drivers/passthrough/io.c > +++ b/xen/drivers/passthrough/io.c > @@ -21,6 +21,7 @@ > #include <xen/iommu.h> > #include <xen/cpu.h> > #include <xen/irq.h> > +#include <xen/viommu.h> > #include <asm/hvm/irq.h> > #include <asm/hvm/support.h> > #include <asm/io_apic.h> > @@ -275,6 +276,61 @@ static struct vcpu *vector_hashing_dest(const struct > domain *d, > return dest; > } > > +void pt_update_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci) > +{ > + uint8_t dest, delivery_mode; > + bool dest_mode; > + int dest_vcpu_id; > + const struct vcpu *vcpu; > + struct arch_irq_remapping_request request; > + struct arch_irq_remapping_info remap_info; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + /* Calculate dest_vcpu_id for MSI-type pirq migration. */ > + irq_request_msi_fill(&request, pirq_dpci->gmsi.addr, > pirq_dpci->gmsi.data); > + if ( viommu_check_irq_remapping(d, &request) ) > + { > + /* An error in IRTE, don't perform the optimization */ > + if ( viommu_get_irq_info(d, &request, &remap_info) ) > + { > + pirq_dpci->gmsi.posted = false; > + pirq_dpci->gmsi.dest_vcpu_id = -1; > + pirq_dpci->gmsi.gvec = 0; > + return; > + } > + > + dest = remap_info.dest; > + dest_mode = remap_info.dest_mode; > + delivery_mode = remap_info.delivery_mode; > + pirq_dpci->gmsi.gvec = remap_info.vector; > + } > + else > + { > + dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK); > + dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK; > + delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data, > + MSI_DATA_DELIVERY_MODE_MASK); > + pirq_dpci->gmsi.gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK; > + } > + > + dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); > + pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; > + > + pirq_dpci->gmsi.posted = false; > + vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; So you use dest_vcpu_id to get the vcpu here... > + if ( iommu_intpost ) > + { > + if ( delivery_mode == dest_LowestPrio ) > + vcpu = vector_hashing_dest(d, dest, dest_mode, > pirq_dpci->gmsi.gvec); > + if ( vcpu ) > + { > + pirq_dpci->gmsi.posted = true; > + pirq_dpci->gmsi.dest_vcpu_id = vcpu->vcpu_id; ... which is only used here in order to get the dest_vcpu_id back. Is this really needed? Can't you just use dest_vcpu_id? I would rather do: if ( iommu_intpost && delivery_mode == dest_LowestPrio ) { const struct vcpu *vcpu = vector_hashing_dest(d, dest, dest_mode, pirq_dpci->gmsi.gvec); if ( vcpu ) { .... } } > + } > + } > +} > + > int pt_irq_create_bind( > struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) > { > @@ -339,9 +395,6 @@ int pt_irq_create_bind( > { > case PT_IRQ_TYPE_MSI: > { > - uint8_t dest, delivery_mode, gvec; > - bool dest_mode; > - int dest_vcpu_id; > const struct vcpu *vcpu; > > if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) > @@ -411,35 +464,23 @@ int pt_irq_create_bind( > pirq_dpci->gmsi.addr = pt_irq_bind->u.msi.addr; > } > } > - /* Calculate dest_vcpu_id for MSI-type pirq migration. */ > - dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK); > - dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK; > - delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data, > - MSI_DATA_DELIVERY_MODE_MASK); > - gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK; > - pirq_dpci->gmsi.gvec = gvec; > > - dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); > - pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; > + pt_update_gmsi(d, pirq_dpci); > spin_unlock(&d->event_lock); > > - pirq_dpci->gmsi.posted = false; > - vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; > - if ( iommu_intpost ) > - { > - if ( delivery_mode == dest_LowestPrio ) > - vcpu = vector_hashing_dest(d, dest, dest_mode, > - pirq_dpci->gmsi.gvec); > - if ( vcpu ) > - pirq_dpci->gmsi.posted = true; > - } > - if ( dest_vcpu_id >= 0 ) > - hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]); > + if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 ) > + hvm_migrate_pirqs(d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]); > > /* Use interrupt posting if it is supported. */ > if ( iommu_intpost ) > + { > + if ( pirq_dpci->gmsi.posted ) > + vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]; > + else > + vcpu = NULL; > pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL, > info, pirq_dpci->gmsi.gvec); If vcpu is now only used inside of this if condition please move it's declaration here to reduce the scope. > + } > > if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED ) > { > diff --git a/xen/drivers/passthrough/vtd/vvtd.c > b/xen/drivers/passthrough/vtd/vvtd.c > index f6bde69..d12ad1d 100644 > --- a/xen/drivers/passthrough/vtd/vvtd.c > +++ b/xen/drivers/passthrough/vtd/vvtd.c > @@ -477,6 +477,50 @@ static int vvtd_record_fault(struct vvtd *vvtd, > } > > /* > + * 'arg' is the index of interrupt remapping table. This index is used to > + * search physical irqs which satify that the gmsi mapped with the physical > irq > + * is tranlated by the IRTE refered to by the index. The struct hvm_gmsi_info > + * contains some fields are infered from an virtual IRTE. These fields should > + * be updated when guest invalidates an IRTE. Furthermore, the physical IRTE > + * is updated accordingly to reduce IPIs or utilize VT-d posted interrupt. > + * > + * if 'arg' is -1, perform a global invalidation. > + */ > +static int invalidate_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, > + void *arg) > +{ > + if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) > + { > + uint32_t index, target = (long)arg; > + struct arch_irq_remapping_request req; > + const struct vcpu *vcpu; > + > + irq_request_msi_fill(&req, pirq_dpci->gmsi.addr, > pirq_dpci->gmsi.data); > + if ( !irq_remapping_request_index(&req, &index) && > + ((target == -1) || (target == index)) ) Shouldn't this -1 be some kind of define, like GMSI_ALL or similar? Also isn't it possible to use -1 as a valid target? > + { > + pt_update_gmsi(d, pirq_dpci); > + if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 ) > + hvm_migrate_pirq(d, pirq_dpci, > + d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]); > + > + /* Use interrupt posting if it is supported. */ > + if ( iommu_intpost ) > + { > + if ( pirq_dpci->gmsi.posted ) > + vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]; > + else > + vcpu = NULL; > + pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL, > + dpci_pirq(pirq_dpci), pirq_dpci->gmsi.gvec); > + } > + } > + } > + > + return 0; > +} > + > +/* > * Process an invalidation descriptor. Currently, only two types descriptors, > * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait > * Descriptor are handled. > @@ -530,7 +574,26 @@ static int process_iqe(struct vvtd *vvtd, uint32_t i) > break; > > case TYPE_INVAL_IEC: > - /* No cache is preserved in vvtd, nothing is needed to be flushed */ > + /* > + * If VT-d pi is enabled, pi_update_irte() may be called. It assumes > + * pcidevs_locked(). > + */ > + pcidevs_lock(); > + spin_lock(&vvtd->domain->event_lock); > + /* A global invalidation of the cache is requested */ > + if ( !qinval.q.iec_inv_dsc.lo.granu ) > + pt_pirq_iterate(vvtd->domain, invalidate_gmsi, (void *)(long)-1); > + else > + { > + uint32_t iidx = qinval.q.iec_inv_dsc.lo.iidx; > + uint32_t nr = 1 << qinval.q.iec_inv_dsc.lo.im; > + > + for ( ; nr; nr--, iidx++) You can initialize nr in the for loop. > + pt_pirq_iterate(vvtd->domain, invalidate_gmsi, > + (void *)(long)iidx); > + } > + spin_unlock(&vvtd->domain->event_lock); > + pcidevs_unlock(); > break; > > default: > @@ -839,6 +902,8 @@ static int vvtd_read(struct vcpu *v, unsigned long addr, > else > *pval = vvtd_get_reg_quad(vvtd, offset); > > + if ( !atomic_read(&vvtd->inflight_intr) ) > + vvtd_process_iq(vvtd); > return X86EMUL_OKAY; > } > > @@ -1088,8 +1153,7 @@ static int vvtd_handle_irq_request(const struct domain > *d, > irte.remap.tm); > > out: > - if ( !atomic_dec_and_test(&vvtd->inflight_intr) ) > - vvtd_process_iq(vvtd); > + atomic_dec(&vvtd->inflight_intr); Why is this removed? It was changed like 4 patches before, and reverted here. > return ret; > } > > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index b687e03..f276ab6 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -394,6 +394,8 @@ bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val); > bool hvm_check_cpuid_faulting(struct vcpu *v); > void hvm_migrate_timers(struct vcpu *v); > void hvm_do_resume(struct vcpu *v); > +int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, > + void *arg); Please align. Thanks, Roger. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.