[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH for-4.13 v4 2/3] x86/passthrough: fix migration of MSI when using posted interrupts



Ran same test and did not hit the issue.

Tested-by: Joe Jin <joe.jin@xxxxxxxxxx>

On 11/13/19 7:59 AM, Roger Pau Monne wrote:
> When using posted interrupts and the guest migrates MSI from vCPUs Xen
> needs to flush any pending PIRR vectors on the previous vCPU, or else
> those vectors could get wrongly injected at a later point when the MSI
> fields are already updated.
> 
> The usage of a fixed vCPU in lowest priority mode when using VT-d
> posted interrupts is also removed, and as a result VT-d posted
> interrupts are not used together with lowest priority mode and
> multiple destinations. That forces vlapic_lowest_prio to be called in
> order to select the destination vCPU during interrupt dispatch.
> 
> Note that PIRR is synced to IRR both in pt_irq_destroy_bind and
> pt_irq_create_bind when the interrupt delivery data is being updated.
> 
> Reported-by: Joe Jin <joe.jin@xxxxxxxxxx>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> Cc: Joe Jin <joe.jin@xxxxxxxxxx>
> Cc: Juergen Gross <jgross@xxxxxxxx>
> ---
> Changes since v3:
>  - In multi-destination mode make sure all destination vCPUs have PIR
>    synced to IRR by using a bitmap.
>  - Drop the bogus selection of a fixed vCPU when using lowest priority
>    mode.
> 
> Changes since v2:
>  - Also sync PIRR with IRR when using CPU posted interrupts.
>  - Force the selection of a specific vCPU when using posted interrupts
>    for multi-dest.
>  - Change vmsi_deliver_pirq to honor dest_vcpu_id.
> 
> Changes since v1:
>  - Store the vcpu id also in multi-dest mode if the interrupt is bound
>    to a vcpu for posted delivery.
>  - s/#if/#ifdef/.
> ---
>  xen/arch/x86/hvm/hvm.c           |  31 ++++++++
>  xen/arch/x86/hvm/vlapic.c        |  19 +++++
>  xen/arch/x86/hvm/vmsi.c          |  23 ------
>  xen/drivers/passthrough/io.c     | 118 ++++++++++++++-----------------
>  xen/include/asm-x86/hvm/hvm.h    |   5 +-
>  xen/include/asm-x86/hvm/vlapic.h |   3 +
>  6 files changed, 110 insertions(+), 89 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 06a7b40107..0e3379fa6f 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -43,6 +43,7 @@
>  #include <asm/current.h>
>  #include <asm/e820.h>
>  #include <asm/io.h>
> +#include <asm/io_apic.h>
>  #include <asm/regs.h>
>  #include <asm/cpufeature.h>
>  #include <asm/processor.h>
> @@ -5266,6 +5267,36 @@ void hvm_set_segment_register(struct vcpu *v, enum 
> x86_segment seg,
>      alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
>  }
>  
> +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode,
> +                       uint8_t delivery_mode, unsigned long *vcpus)
> +{
> +    struct vcpu *v;
> +
> +    switch ( delivery_mode )
> +    {
> +    case dest_LowestPrio:
> +        /*
> +         * Get all the possible destinations, but note that lowest priority
> +         * mode is only going to inject the interrupt to the vCPU running at
> +         * the least privilege level.
> +         *
> +         * Fallthrough
> +         */
> +    case dest_Fixed:
> +        for_each_vcpu ( d, v )
> +            if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) 
> )
> +                __set_bit(v->vcpu_id, vcpus);
> +        break;
> +
> +    default:
> +        gprintk(XENLOG_WARNING, "unsupported interrupt delivery mode %u\n",
> +                delivery_mode);
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 9466258d6f..9d9c6d391a 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -112,6 +112,25 @@ static void sync_pir_to_irr(struct vcpu *v)
>          alternative_vcall(hvm_funcs.sync_pir_to_irr, v);
>  }
>  
> +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus)
> +{
> +    unsigned int id;
> +
> +    if ( !bitmap_weight(vcpus, d->max_vcpus) )
> +        return;
> +
> +    for ( id = find_first_bit(vcpus, d->max_vcpus);
> +          id < d->max_vcpus;
> +          id = find_next_bit(vcpus, d->max_vcpus, id + 1) )
> +    {
> +        if ( d->vcpu[id] != current )
> +            vcpu_pause(d->vcpu[id]);
> +        sync_pir_to_irr(d->vcpu[id]);
> +        if ( d->vcpu[id] != current )
> +            vcpu_unpause(d->vcpu[id]);
> +    }
> +}
> +
>  static int vlapic_find_highest_irr(struct vlapic *vlapic)
>  {
>      sync_pir_to_irr(vlapic_vcpu(vlapic));
> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
> index 6597d9f719..66891d7d20 100644
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -121,29 +121,6 @@ void vmsi_deliver_pirq(struct domain *d, const struct 
> hvm_pirq_dpci *pirq_dpci)
>      vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
>  }
>  
> -/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t 
> dest_mode)
> -{
> -    int dest_vcpu_id = -1, w = 0;
> -    struct vcpu *v;
> -    
> -    if ( d->max_vcpus == 1 )
> -        return 0;
> - 
> -    for_each_vcpu ( d, v )
> -    {
> -        if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) 
> -        {
> -            w++;
> -            dest_vcpu_id = v->vcpu_id;
> -        }
> -    }
> -    if ( w > 1 )
> -        return -1;
> -
> -    return dest_vcpu_id;
> -}
> -
>  /* MSI-X mask bit hypervisor interception */
>  struct msixtbl_entry
>  {
> diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> index b292e79382..5289e89bc1 100644
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -219,62 +219,6 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
>      xfree(dpci);
>  }
>  
> -/*
> - * This routine handles lowest-priority interrupts using vector-hashing
> - * mechanism. As an example, modern Intel CPUs use this method to handle
> - * lowest-priority interrupts.
> - *
> - * Here is the details about the vector-hashing mechanism:
> - * 1. For lowest-priority interrupts, store all the possible destination
> - *    vCPUs in an array.
> - * 2. Use "gvec % max number of destination vCPUs" to find the right
> - *    destination vCPU in the array for the lowest-priority interrupt.
> - */
> -static struct vcpu *vector_hashing_dest(const struct domain *d,
> -                                        uint32_t dest_id,
> -                                        bool dest_mode,
> -                                        uint8_t gvec)
> -
> -{
> -    unsigned long *dest_vcpu_bitmap;
> -    unsigned int dest_vcpus = 0;
> -    struct vcpu *v, *dest = NULL;
> -    unsigned int i;
> -
> -    dest_vcpu_bitmap = xzalloc_array(unsigned long,
> -                                     BITS_TO_LONGS(d->max_vcpus));
> -    if ( !dest_vcpu_bitmap )
> -        return NULL;
> -
> -    for_each_vcpu ( d, v )
> -    {
> -        if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
> -                                dest_id, dest_mode) )
> -            continue;
> -
> -        __set_bit(v->vcpu_id, dest_vcpu_bitmap);
> -        dest_vcpus++;
> -    }
> -
> -    if ( dest_vcpus != 0 )
> -    {
> -        unsigned int mod = gvec % dest_vcpus;
> -        unsigned int idx = 0;
> -
> -        for ( i = 0; i <= mod; i++ )
> -        {
> -            idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
> -            BUG_ON(idx > d->max_vcpus);
> -        }
> -
> -        dest = d->vcpu[idx - 1];
> -    }
> -
> -    xfree(dest_vcpu_bitmap);
> -
> -    return dest;
> -}
> -
>  int pt_irq_create_bind(
>      struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
>  {
> @@ -345,6 +289,8 @@ int pt_irq_create_bind(
>          const struct vcpu *vcpu;
>          uint32_t gflags = pt_irq_bind->u.msi.gflags &
>                            ~XEN_DOMCTL_VMSI_X86_UNMASKED;
> +        DECLARE_BITMAP(dest_vcpus, MAX_VIRT_CPUS) = { };
> +        DECLARE_BITMAP(prev_vcpus, MAX_VIRT_CPUS) = { };
>  
>          if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>          {
> @@ -411,6 +357,24 @@ int pt_irq_create_bind(
>  
>                  pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
>                  pirq_dpci->gmsi.gflags = gflags;
> +                if ( pirq_dpci->gmsi.dest_vcpu_id != -1 )
> +                    __set_bit(pirq_dpci->gmsi.dest_vcpu_id, prev_vcpus);
> +                else
> +                {
> +                    /*
> +                     * If previous configuration has multiple possible
> +                     * destinations record them in order to sync the PIR to 
> IRR
> +                     * afterwards.
> +                     */
> +                    dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
> +                                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
> +                    dest_mode = pirq_dpci->gmsi.gflags &
> +                                XEN_DOMCTL_VMSI_X86_DM_MASK;
> +                    delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
> +                                              
> XEN_DOMCTL_VMSI_X86_DELIV_MASK);
> +                    hvm_intr_get_dests(d, dest, dest_mode, delivery_mode,
> +                                       prev_vcpus);
> +                }
>              }
>          }
>          /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> @@ -420,20 +384,16 @@ int pt_irq_create_bind(
>          delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
>                                    XEN_DOMCTL_VMSI_X86_DELIV_MASK);
>  
> -        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
> +        hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, dest_vcpus);
> +        dest_vcpu_id = bitmap_weight(dest_vcpus, d->max_vcpus) != 1 ?
> +            -1 : find_first_bit(dest_vcpus, d->max_vcpus);
>          pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
>          spin_unlock(&d->event_lock);
>  
>          pirq_dpci->gmsi.posted = false;
>          vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
> -        if ( iommu_intpost )
> -        {
> -            if ( delivery_mode == dest_LowestPrio )
> -                vcpu = vector_hashing_dest(d, dest, dest_mode,
> -                                           pirq_dpci->gmsi.gvec);
> -            if ( vcpu )
> -                pirq_dpci->gmsi.posted = true;
> -        }
> +        if ( vcpu && iommu_intpost )
> +            pirq_dpci->gmsi.posted = true;
>          if ( vcpu && is_iommu_enabled(d) )
>              hvm_migrate_pirq(pirq_dpci, vcpu);
>  
> @@ -442,6 +402,9 @@ int pt_irq_create_bind(
>              pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
>                             info, pirq_dpci->gmsi.gvec);
>  
> +        if ( hvm_funcs.deliver_posted_intr )
> +            domain_sync_vlapic_pir(d, prev_vcpus);
> +
>          if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
>          {
>              unsigned long flags;
> @@ -731,6 +694,31 @@ int pt_irq_destroy_bind(
>      else if ( pirq_dpci && pirq_dpci->gmsi.posted )
>          pi_update_irte(NULL, pirq, 0);
>  
> +    if ( hvm_funcs.deliver_posted_intr )
> +    {
> +        DECLARE_BITMAP(vcpus, MAX_VIRT_CPUS) = { };
> +
> +        if ( pirq_dpci->gmsi.dest_vcpu_id != -1 )
> +            __set_bit(pirq_dpci->gmsi.dest_vcpu_id, vcpus);
> +        else
> +        {
> +            /*
> +             * If previous configuration has multiple possible
> +             * destinations record them in order to sync the PIR to IRR.
> +             */
> +            uint8_t dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
> +                                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
> +            uint8_t dest_mode = pirq_dpci->gmsi.gflags &
> +                                XEN_DOMCTL_VMSI_X86_DM_MASK;
> +            uint8_t delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
> +                                              
> XEN_DOMCTL_VMSI_X86_DELIV_MASK);
> +
> +            hvm_intr_get_dests(d, dest, dest_mode, delivery_mode, vcpus);
> +        }
> +
> +        domain_sync_vlapic_pir(d, vcpus);
> +    }
> +
>      if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
>           list_empty(&pirq_dpci->digl_list) )
>      {
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index f86af09898..899665fed8 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -266,7 +266,6 @@ int vmsi_deliver(
>      uint8_t delivery_mode, uint8_t trig_mode);
>  struct hvm_pirq_dpci;
>  void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *);
> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t 
> dest_mode);
>  
>  enum hvm_intblk
>  hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
> @@ -336,6 +335,10 @@ unsigned long hvm_cr4_guest_valid_bits(const struct 
> domain *d, bool restore);
>  bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
>                          void *ctxt);
>  
> +/* Get all the possible destination vCPUs of an interrupt. */
> +int hvm_intr_get_dests(struct domain *d, uint8_t dest, uint8_t dest_mode,
> +                       uint8_t delivery_mode, unsigned long *vcpus);
> +
>  #ifdef CONFIG_HVM
>  
>  #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
> diff --git a/xen/include/asm-x86/hvm/vlapic.h 
> b/xen/include/asm-x86/hvm/vlapic.h
> index dde66b4f0f..2bc2ebadf0 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -150,4 +150,7 @@ bool_t vlapic_match_dest(
>      const struct vlapic *target, const struct vlapic *source,
>      int short_hand, uint32_t dest, bool_t dest_mode);
>  
> +/* Sync the PIR to IRR of all vlapics in the vcpus bitmap. */
> +void domain_sync_vlapic_pir(struct domain *d, unsigned long *vcpus);
> +
>  #endif /* __ASM_X86_HVM_VLAPIC_H__ */
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.