[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH] hvm/vpt: fix inconsistent views of vIOAPIC in vmx_intr_assist()



PING...

Please give some comments. I think this bug should be fixed in Xen 4.9.

On Fri, Apr 07, 2017 at 11:24:16AM +0800, Chao Gao wrote:
>When injecting periodic timer interrupt in vmx_intr_assist(), multiple read
>operation is operated during one event delivery and incur to inconsistent
>views of vIOAPIC. For example, if a periodic timer interrupt is from PIT, when
>set the corresponding bit in vIRR, the corresponding RTE is accessed in
>pt_update_irq(). When this function returns, it accesses the RTE again to get
>the vector it set in vIRR.  Between the two accesses, the content of RTE may
>have been changed by another CPU for no protection method in use. This case
>can incur the assertion failure in vmx_intr_assist().  Also after a periodic
>timer interrupt is injected successfully, pt_irq_posted() will decrease the
>count (pending_intr_nr). But if the corresponding RTE has been changed,
>pt_irq_posted() will not decrease the count, resulting one more timer
>interrupt.
>
>More discussion and history can be found in
>1.https://lists.xenproject.org/archives/html/xen-devel/2017-03/msg00906.html
>2.https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01019.html
>
>This patch simply uses irq_lock to avoid these inconsistent views of vIOAPIC.
>To fix the deadlock, provide locked version of several functions.
>
>Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
>---
> xen/arch/x86/hvm/irq.c      | 22 ++++++++++++++++------
> xen/arch/x86/hvm/svm/intr.c | 21 +++++++++++++++------
> xen/arch/x86/hvm/vmx/intr.c |  9 +++++++++
> xen/arch/x86/hvm/vpic.c     | 20 ++++++++++++++------
> xen/arch/x86/hvm/vpt.c      |  4 ++--
> xen/include/xen/hvm/irq.h   |  4 ++++
> 6 files changed, 60 insertions(+), 20 deletions(-)
>
>diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
>index 8625584..a1af61e 100644
>--- a/xen/arch/x86/hvm/irq.c
>+++ b/xen/arch/x86/hvm/irq.c
>@@ -126,37 +126,47 @@ void hvm_pci_intx_deassert(
>     spin_unlock(&d->arch.hvm_domain.irq_lock);
> }
> 
>-void hvm_isa_irq_assert(
>+void hvm_isa_irq_assert_locked(
>     struct domain *d, unsigned int isa_irq)
> {
>     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
> 
>     ASSERT(isa_irq <= 15);
>-
>-    spin_lock(&d->arch.hvm_domain.irq_lock);
>+    ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
> 
>     if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
>          (hvm_irq->gsi_assert_count[gsi]++ == 0) )
>         assert_irq(d, gsi, isa_irq);
>+}
> 
>+void hvm_isa_irq_assert(
>+    struct domain *d, unsigned int isa_irq)
>+{
>+    spin_lock(&d->arch.hvm_domain.irq_lock);
>+    hvm_isa_irq_assert_locked(d, isa_irq);
>     spin_unlock(&d->arch.hvm_domain.irq_lock);
> }
> 
>-void hvm_isa_irq_deassert(
>+void hvm_isa_irq_deassert_locked(
>     struct domain *d, unsigned int isa_irq)
> {
>     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
> 
>     ASSERT(isa_irq <= 15);
>-
>-    spin_lock(&d->arch.hvm_domain.irq_lock);
>+    ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
> 
>     if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) &&
>          (--hvm_irq->gsi_assert_count[gsi] == 0) )
>         deassert_irq(d, isa_irq);
>+}
> 
>+void hvm_isa_irq_deassert(
>+    struct domain *d, unsigned int isa_irq)
>+{
>+    spin_lock(&d->arch.hvm_domain.irq_lock);
>+    hvm_isa_irq_deassert_locked(d, isa_irq);
>     spin_unlock(&d->arch.hvm_domain.irq_lock);
> }
> 
>diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
>index 8511ff0..d2a318c 100644
>--- a/xen/arch/x86/hvm/svm/intr.c
>+++ b/xen/arch/x86/hvm/svm/intr.c
>@@ -137,18 +137,25 @@ void svm_intr_assist(void)
>     struct hvm_intack intack;
>     enum hvm_intblk intblk;
> 
>+    /*
>+     * Avoid the vIOAPIC RTE being changed by another vCPU.
>+     * Otherwise, pt_update_irq() may return a wrong vector which is not in
>+     * vIRR and pt_irq_posted() may not recognize a timer interrupt has been
>+     * injected.
>+     */
>+    spin_lock(&v->domain->arch.hvm_domain.irq_lock);
>     /* Crank the handle on interrupt state. */
>     pt_update_irq(v);
> 
>     do {
>         intack = hvm_vcpu_has_pending_irq(v);
>         if ( likely(intack.source == hvm_intsrc_none) )
>-            return;
>+            goto out;
> 
>         intblk = hvm_interrupt_blocked(v, intack);
>         if ( intblk == hvm_intblk_svm_gif ) {
>             ASSERT(nestedhvm_enabled(v->domain));
>-            return;
>+            goto out;
>         }
> 
>         /* Interrupts for the nested guest are already
>@@ -165,13 +172,13 @@ void svm_intr_assist(void)
>             switch (rc) {
>             case NSVM_INTR_NOTINTERCEPTED:
>                 /* Inject interrupt into 2nd level guest directly. */
>-                break;        
>+                goto out;
>             case NSVM_INTR_NOTHANDLED:
>             case NSVM_INTR_FORCEVMEXIT:
>-                return;
>+                goto out;
>             case NSVM_INTR_MASKED:
>                 /* Guest already enabled an interrupt window. */
>-                return;
>+                goto out;
>             default:
>                 panic("%s: nestedsvm_vcpu_interrupt can't handle value %#x",
>                     __func__, rc);
>@@ -195,7 +202,7 @@ void svm_intr_assist(void)
>         if ( unlikely(vmcb->eventinj.fields.v) || intblk )
>         {
>             svm_enable_intr_window(v, intack);
>-            return;
>+            goto out;
>         }
> 
>         intack = hvm_vcpu_ack_pending_irq(v, intack);
>@@ -216,6 +223,8 @@ void svm_intr_assist(void)
>     intack = hvm_vcpu_has_pending_irq(v);
>     if ( unlikely(intack.source != hvm_intsrc_none) )
>         svm_enable_intr_window(v, intack);
>+ out:
>+    spin_unlock(&v->domain->arch.hvm_domain.irq_lock);
> }
> 
> /*
>diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
>index 1eb9f38..8f3d1a7 100644
>--- a/xen/arch/x86/hvm/vmx/intr.c
>+++ b/xen/arch/x86/hvm/vmx/intr.c
>@@ -234,6 +234,14 @@ void vmx_intr_assist(void)
>         return;
>     }
> 
>+    /*
>+     * Avoid the vIOAPIC RTE being changed by another vCPU.
>+     * Otherwise, pt_update_irq() may return a wrong vector which is not in
>+     * vIRR and pt_irq_posted() may not recognize a timer interrupt has been
>+     * injected.
>+     */
>+    spin_lock(&v->domain->arch.hvm_domain.irq_lock);
>+
>     /* Crank the handle on interrupt state. */
>     if ( is_hvm_vcpu(v) )
>         pt_vector = pt_update_irq(v);
>@@ -396,6 +404,7 @@ void vmx_intr_assist(void)
>     }
> 
>  out:
>+    spin_unlock(&v->domain->arch.hvm_domain.irq_lock);
>     if ( !nestedhvm_vcpu_in_guestmode(v) &&
>          !cpu_has_vmx_virtual_intr_delivery &&
>          cpu_has_vmx_tpr_shadow )
>diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
>index e160bbd..8d02e52 100644
>--- a/xen/arch/x86/hvm/vpic.c
>+++ b/xen/arch/x86/hvm/vpic.c
>@@ -153,14 +153,13 @@ static void __vpic_intack(struct hvm_hw_vpic *vpic, int 
>irq)
>     vpic_update_int_output(vpic);
> }
> 
>-static int vpic_intack(struct hvm_hw_vpic *vpic)
>+static int vpic_intack_locked(struct hvm_hw_vpic *vpic)
> {
>     int irq = -1;
> 
>-    vpic_lock(vpic);
>-
>+    ASSERT(vpic_is_locked(vpic));
>     if ( !vpic->int_output )
>-        goto out;
>+        return irq;
> 
>     irq = vpic_get_highest_priority_irq(vpic);
>     BUG_ON(irq < 0);
>@@ -175,7 +174,15 @@ static int vpic_intack(struct hvm_hw_vpic *vpic)
>         irq += 8;
>     }
> 
>- out:
>+    return irq;
>+}
>+
>+static int vpic_intack(struct hvm_hw_vpic *vpic)
>+{
>+    int irq;
>+
>+    vpic_lock(vpic);
>+    irq = vpic_intack_locked(vpic);
>     vpic_unlock(vpic);
>     return irq;
> }
>@@ -487,13 +494,14 @@ int vpic_ack_pending_irq(struct vcpu *v)
>     struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
> 
>     ASSERT(has_vpic(v->domain));
>+    ASSERT(vpic_is_locked(vpic));
> 
>     TRACE_2D(TRC_HVM_EMUL_PIC_PEND_IRQ_CALL, vlapic_accept_pic_intr(v),
>              vpic->int_output);
>     if ( !vlapic_accept_pic_intr(v) || !vpic->int_output )
>         return -1;
> 
>-    irq = vpic_intack(vpic);
>+    irq = vpic_intack_locked(vpic);
>     if ( irq == -1 )
>         return -1;
> 
>diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
>index e3f2039..d048f26 100644
>--- a/xen/arch/x86/hvm/vpt.c
>+++ b/xen/arch/x86/hvm/vpt.c
>@@ -296,8 +296,8 @@ int pt_update_irq(struct vcpu *v)
>         vlapic_set_irq(vcpu_vlapic(v), irq, 0);
>     else
>     {
>-        hvm_isa_irq_deassert(v->domain, irq);
>-        hvm_isa_irq_assert(v->domain, irq);
>+        hvm_isa_irq_deassert_locked(v->domain, irq);
>+        hvm_isa_irq_assert_locked(v->domain, irq);
>     }
> 
>     /*
>diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
>index f041252..a8f36f8 100644
>--- a/xen/include/xen/hvm/irq.h
>+++ b/xen/include/xen/hvm/irq.h
>@@ -119,8 +119,12 @@ void hvm_pci_intx_deassert(
> /* Modify state of an ISA device's IRQ wire. */
> void hvm_isa_irq_assert(
>     struct domain *d, unsigned int isa_irq);
>+void hvm_isa_irq_assert_locked(
>+    struct domain *d, unsigned int isa_irq);
> void hvm_isa_irq_deassert(
>     struct domain *d, unsigned int isa_irq);
>+void hvm_isa_irq_deassert_locked(
>+    struct domain *d, unsigned int isa_irq);
> 
> int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
> 
>-- 
>1.8.3.1
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.