Xen project Mailing List

Re: [Xen-devel] [PATCH for-4.13 v4 2/3] x86/passthrough: fix migration of MSI when using posted interrupts

To: Roger Pau Monne <roger.pau@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx

Date: Wed, 13 Nov 2019 13:22:08 -0800

Cc: Juergen Gross <jgross@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>

Delivery-date: Wed, 13 Nov 2019 21:22:22 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Ran same test and did not hit the issue. Tested-by: Joe Jin <joe.jin@xxxxxxxxxx> On 11/13/19 7:59 AM, Roger Pau Monne wrote: > When using posted interrupts and the guest migrates MSI from vCPUs Xen > needs to flush any pending PIRR vectors on the previous vCPU, or else > those vectors could get wrongly injected at a later point when the MSI > fields are already updated. > > The usage of a fixed vCPU in lowest priority mode when using VT-d > posted interrupts is also removed, and as a result VT-d posted > interrupts are not used together with lowest priority mode and > multiple destinations. That forces vlapic_lowest_prio to be called in > order to select the destination vCPU during interrupt dispatch. > > Note that PIRR is synced to IRR both in pt_irq_destroy_bind and > pt_irq_create_bind when the interrupt delivery data is being updated. > > Reported-by: Joe Jin <joe.jin@xxxxxxxxxx> > Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> > --- > Cc: Joe Jin <joe.jin@xxxxxxxxxx> > Cc: Juergen Gross <jgross@xxxxxxxx> > --- > Changes since v3: > - In multi-destination mode make sure all destination vCPUs have PIR > synced to IRR by using a bitmap. > - Drop the bogus selection of a fixed vCPU when using lowest priority > mode. > > Changes since v2: > - Also sync PIRR with IRR when using CPU posted interrupts. > - Force the selection of a specific vCPU when using posted interrupts > for multi-dest. > - Change vmsi_deliver_pirq to honor dest_vcpu_id. > > Changes since v1: > - Store the vcpu id also in multi-dest mode if the interrupt is bound > to a vcpu for posted delivery. > - s/#if/#ifdef/. > --- > xen/arch/x86/hvm/hvm.c | 31 ++++++++ > xen/arch/x86/hvm/vlapic.c | 19 +++++ > xen/arch/x86/hvm/vmsi.c | 23 ------ > xen/drivers/passthrough/io.c | 118 ++++++++++++++----------------- > xen/include/asm-x86/hvm/hvm.h | 5 +- > xen/include/asm-x86/hvm/vlapic.h | 3 + > 6 files changed, 110 insertions(+), 89 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 06a7b40107..0e3379fa6f 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -43,6 +43,7 @@ > #include <asm/current.h> > #include <asm/e820.h> > #include <asm/io.h> > +#include <asm/io_apic.h> > #include <asm/regs.h> > #include <asm/cpufeature.h> > #include <asm/processor.h> > @@ -5266,6 +5267,36 @@ void hvm_set_segment_register(struct vcpu *v, enum > x86_segment seg, > alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg); > } > > +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode, > + uint8_t delivery_mode, unsigned long *vcpus) > +{ > + struct vcpu *v; > + > + switch ( delivery_mode ) > + { > + case dest_LowestPrio: > + /* > + * Get all the possible destinations, but note that lowest priority > + * mode is only going to inject the interrupt to the vCPU running at > + * the least privilege level. > + * > + * Fallthrough > + */ > + case dest_Fixed: > + for_each_vcpu ( d, v ) > + if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) > ) > + __set_bit(v->vcpu_id, vcpus); > + break; > + > + default: > + gprintk(XENLOG_WARNING, "unsupported interrupt delivery mode %u\n", > + delivery_mode); > + return -EINVAL; > + } > + > + return 0; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c > index 9466258d6f..9d9c6d391a 100644 > --- a/xen/arch/x86/hvm/vlapic.c > +++ b/xen/arch/x86/hvm/vlapic.c > @@ -112,6 +112,25 @@ static void sync_pir_to_irr(struct vcpu *v) > alternative_vcall(hvm_funcs.sync_pir_to_irr, v); > } > > +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus) > +{ > + unsigned int id; > + > + if ( !bitmap_weight(vcpus, d->max_vcpus) ) > + return; > + > + for ( id = find_first_bit(vcpus, d->max_vcpus); > + id < d->max_vcpus; > + id = find_next_bit(vcpus, d->max_vcpus, id + 1) ) > + { > + if ( d->vcpu[id] != current ) > + vcpu_pause(d->vcpu[id]); > + sync_pir_to_irr(d->vcpu[id]); > + if ( d->vcpu[id] != current ) > + vcpu_unpause(d->vcpu[id]); > + } > +} > + > static int vlapic_find_highest_irr(struct vlapic *vlapic) > { > sync_pir_to_irr(vlapic_vcpu(vlapic)); > diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c > index 6597d9f719..66891d7d20 100644 > --- a/xen/arch/x86/hvm/vmsi.c > +++ b/xen/arch/x86/hvm/vmsi.c > @@ -121,29 +121,6 @@ void vmsi_deliver_pirq(struct domain *d, const struct > hvm_pirq_dpci *pirq_dpci) > vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode); > } > > -/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */ > -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t > dest_mode) > -{ > - int dest_vcpu_id = -1, w = 0; > - struct vcpu *v; > - > - if ( d->max_vcpus == 1 ) > - return 0; > - > - for_each_vcpu ( d, v ) > - { > - if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) > - { > - w++; > - dest_vcpu_id = v->vcpu_id; > - } > - } > - if ( w > 1 ) > - return -1; > - > - return dest_vcpu_id; > -} > - > /* MSI-X mask bit hypervisor interception */ > struct msixtbl_entry > { > diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c > index b292e79382..5289e89bc1 100644 > --- a/xen/drivers/passthrough/io.c > +++ b/xen/drivers/passthrough/io.c > @@ -219,62 +219,6 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci) > xfree(dpci); > } > > -/* > - * This routine handles lowest-priority interrupts using vector-hashing > - * mechanism. As an example, modern Intel CPUs use this method to handle > - * lowest-priority interrupts. > - * > - * Here is the details about the vector-hashing mechanism: > - * 1. For lowest-priority interrupts, store all the possible destination > - * vCPUs in an array. > - * 2. Use "gvec % max number of destination vCPUs" to find the right > - * destination vCPU in the array for the lowest-priority interrupt. > - */ > -static struct vcpu *vector_hashing_dest(const struct domain *d, > - uint32_t dest_id, > - bool dest_mode, > - uint8_t gvec) > - > -{ > - unsigned long *dest_vcpu_bitmap; > - unsigned int dest_vcpus = 0; > - struct vcpu *v, *dest = NULL; > - unsigned int i; > - > - dest_vcpu_bitmap = xzalloc_array(unsigned long, > - BITS_TO_LONGS(d->max_vcpus)); > - if ( !dest_vcpu_bitmap ) > - return NULL; > - > - for_each_vcpu ( d, v ) > - { > - if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT, > - dest_id, dest_mode) ) > - continue; > - > - __set_bit(v->vcpu_id, dest_vcpu_bitmap); > - dest_vcpus++; > - } > - > - if ( dest_vcpus != 0 ) > - { > - unsigned int mod = gvec % dest_vcpus; > - unsigned int idx = 0; > - > - for ( i = 0; i <= mod; i++ ) > - { > - idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1; > - BUG_ON(idx > d->max_vcpus); > - } > - > - dest = d->vcpu[idx - 1]; > - } > - > - xfree(dest_vcpu_bitmap); > - > - return dest; > -} > - > int pt_irq_create_bind( > struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) > { > @@ -345,6 +289,8 @@ int pt_irq_create_bind( > const struct vcpu *vcpu; > uint32_t gflags = pt_irq_bind->u.msi.gflags & > ~XEN_DOMCTL_VMSI_X86_UNMASKED; > + DECLARE_BITMAP(dest_vcpus, MAX_VIRT_CPUS) = { }; > + DECLARE_BITMAP(prev_vcpus, MAX_VIRT_CPUS) = { }; > > if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) > { > @@ -411,6 +357,24 @@ int pt_irq_create_bind( > > pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; > pirq_dpci->gmsi.gflags = gflags; > + if ( pirq_dpci->gmsi.dest_vcpu_id != -1 ) > + __set_bit(pirq_dpci->gmsi.dest_vcpu_id, prev_vcpus); > + else > + { > + /* > + * If previous configuration has multiple possible > + * destinations record them in order to sync the PIR to > IRR > + * afterwards. > + */ > + dest = MASK_EXTR(pirq_dpci->gmsi.gflags, > + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); > + dest_mode = pirq_dpci->gmsi.gflags & > + XEN_DOMCTL_VMSI_X86_DM_MASK; > + delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, > + > XEN_DOMCTL_VMSI_X86_DELIV_MASK); > + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, > + prev_vcpus); > + } > } > } > /* Calculate dest_vcpu_id for MSI-type pirq migration. */ > @@ -420,20 +384,16 @@ int pt_irq_create_bind( > delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, > XEN_DOMCTL_VMSI_X86_DELIV_MASK); > > - dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); > + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, dest_vcpus); > + dest_vcpu_id = bitmap_weight(dest_vcpus, d->max_vcpus) != 1 ? > + -1 : find_first_bit(dest_vcpus, d->max_vcpus); > pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; > spin_unlock(&d->event_lock); > > pirq_dpci->gmsi.posted = false; > vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; > - if ( iommu_intpost ) > - { > - if ( delivery_mode == dest_LowestPrio ) > - vcpu = vector_hashing_dest(d, dest, dest_mode, > - pirq_dpci->gmsi.gvec); > - if ( vcpu ) > - pirq_dpci->gmsi.posted = true; > - } > + if ( vcpu && iommu_intpost ) > + pirq_dpci->gmsi.posted = true; > if ( vcpu && is_iommu_enabled(d) ) > hvm_migrate_pirq(pirq_dpci, vcpu); > > @@ -442,6 +402,9 @@ int pt_irq_create_bind( > pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL, > info, pirq_dpci->gmsi.gvec); > > + if ( hvm_funcs.deliver_posted_intr ) > + domain_sync_vlapic_pir(d, prev_vcpus); > + > if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED ) > { > unsigned long flags; > @@ -731,6 +694,31 @@ int pt_irq_destroy_bind( > else if ( pirq_dpci && pirq_dpci->gmsi.posted ) > pi_update_irte(NULL, pirq, 0); > > + if ( hvm_funcs.deliver_posted_intr ) > + { > + DECLARE_BITMAP(vcpus, MAX_VIRT_CPUS) = { }; > + > + if ( pirq_dpci->gmsi.dest_vcpu_id != -1 ) > + __set_bit(pirq_dpci->gmsi.dest_vcpu_id, vcpus); > + else > + { > + /* > + * If previous configuration has multiple possible > + * destinations record them in order to sync the PIR to IRR. > + */ > + uint8_t dest = MASK_EXTR(pirq_dpci->gmsi.gflags, > + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); > + uint8_t dest_mode = pirq_dpci->gmsi.gflags & > + XEN_DOMCTL_VMSI_X86_DM_MASK; > + uint8_t delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, > + > XEN_DOMCTL_VMSI_X86_DELIV_MASK); > + > + hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, vcpus); > + } > + > + domain_sync_vlapic_pir(d, vcpus); > + } > + > if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && > list_empty(&pirq_dpci->digl_list) ) > { > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index f86af09898..899665fed8 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -266,7 +266,6 @@ int vmsi_deliver( > uint8_t delivery_mode, uint8_t trig_mode); > struct hvm_pirq_dpci; > void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *); > -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t > dest_mode); > > enum hvm_intblk > hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack); > @@ -336,6 +335,10 @@ unsigned long hvm_cr4_guest_valid_bits(const struct > domain *d, bool restore); > bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), > void *ctxt); > > +/* Get all the possible destination vCPUs of an interrupt. */ > +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode, > + uint8_t delivery_mode, unsigned long *vcpus); > + > #ifdef CONFIG_HVM > > #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0) > diff --git a/xen/include/asm-x86/hvm/vlapic.h > b/xen/include/asm-x86/hvm/vlapic.h > index dde66b4f0f..2bc2ebadf0 100644 > --- a/xen/include/asm-x86/hvm/vlapic.h > +++ b/xen/include/asm-x86/hvm/vlapic.h > @@ -150,4 +150,7 @@ bool_t vlapic_match_dest( > const struct vlapic *target, const struct vlapic *source, > int short_hand, uint32_t dest, bool_t dest_mode); > > +/* Sync the PIR to IRR of all vlapics in the vcpus bitmap. */ > +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus); > + > #endif /* __ASM_X86_HVM_VLAPIC_H__ */ > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.