[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Fwd: [v3 14/15] Update Posted-Interrupts Descriptor during vCPU scheduling




> -----Original Message-----
> From: Dario Faggioli [mailto:dario.faggioli@xxxxxxxxxx]
> Sent: Wednesday, July 15, 2015 12:03 AM
> To: Wu, Feng
> Cc: Jan Beulich; Tian, Kevin; keir@xxxxxxx; George Dunlap;
> andrew.cooper3@xxxxxxxxxx; xen-devel; Zhang, Yang Z
> Subject: Re: [Xen-devel] Fwd: [v3 14/15] Update Posted-Interrupts Descriptor
> during vCPU scheduling
> 

Hi Dario,

I finished the new patch with arch hooks, but seems something is wrong,
after assigning the NIC to guest, I ping some guy from the guest, the
latency is too big. So far I've not found the reason after debugging the code
for some time. I post the path here to see if you can find any obvious logic
errors in it. Or could you have a look at it to double check whether this patch
exactly does the same thing as
[v3 14/15] Update Posted-Interrupts Descriptor during vCPU scheduling,
which works well. A thorough review is highly appreciated! Thank you
very much!

    vmx: Add some scheduler hooks for VT-d posted interrupts

    This patch adds the following arch hooks in scheduler:
    - vmx_pre_ctx_switch_pi():
    It is called in vmx_ctxt_switch_from(), we update the posted
    interrupt descriptor when the vCPU is preempted, go to sleep,
    or is blocked.

    - vmx_post_ctx_switch_pi()
    It is called in vmx_ctxt_switch_to(), we update the posted
    interrupt descriptor when the vCPU is going to run.

    - arch_vcpu_wake()
    It will be called in vcpu_wake() in later patch, we update
    the posted interrupt descriptor when the vCPU is unblocked.

    Suggested-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
    Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6d25a32..82d797f 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -168,6 +168,7 @@ static int vmx_vcpu_initialise(struct vcpu *v)

     INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);

+    v->arch.hvm_vmx.pi_block_cpu = -1;
     return 0;
 }

@@ -725,6 +726,139 @@ static void vmx_fpu_leave(struct vcpu *v)
     }
 }

+void arch_vcpu_wake(struct vcpu *v)
+{
+    if ( !iommu_intpost || !is_hvm_vcpu(v) )
+        return;
+
+    if ( likely(vcpu_runnable(v)) ||
+         !test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+        unsigned long flags;
+
+        /*
+         * blocked -> runnable/offline
+         * If the state is transferred from RUNSTATE_blocked,
+         * we should set 'NV' feild back to posted_intr_vector,
+         * so the Posted-Interrupts can be delivered to the vCPU
+         * by VT-d HW after it is scheduled to run.
+         */
+        write_atomic((uint8_t*)&pi_desc->nv, posted_intr_vector);
+
+        /*
+         * Delete the vCPU from the related block list
+         * if we are resuming from blocked state
+         */
+        if (v->arch.hvm_vmx.pi_block_cpu != -1)
+        {
+            spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+                              v->arch.hvm_vmx.pi_block_cpu), flags);
+            list_del_init(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+            spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+                                   v->arch.hvm_vmx.pi_block_cpu), flags);
+            v->arch.hvm_vmx.pi_block_cpu = -1;
+        }
+    }
+}
+
+static void vmx_pre_ctx_switch_pi(struct vcpu *v)
+{
+    struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+    struct pi_desc old, new;
+    unsigned long flags;
+
+    if ( !iommu_intpost || !is_hvm_vcpu(v) )
+        return;
+
+    if ( vcpu_runnable(v) || !test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        /*
+         * The vCPU has been preempted or went to sleep. We don't need to send
+         * notification event to a non-running vcpu, the interrupt information
+         * will be delivered to it before VM-ENTRY when the vcpu is scheduled
+         * to run next time.
+         */
+        pi_set_sn(pi_desc);
+
+    }
+    else if ( test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        ASSERT(v->arch.hvm_vmx.pi_block_cpu == -1);
+
+        /*
+         * The vCPU is blocking, we need to add it to one of the per pCPU 
lists.
+         * We save v->processor to v->arch.hvm_vmx.pi_block_cpu and use it for
+         * the per-CPU list, we also save it to posted-interrupt descriptor and
+         * make it as the destination of the wake-up notification event.
+         */
+        v->arch.hvm_vmx.pi_block_cpu = v->processor;
+        spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+                          v->arch.hvm_vmx.pi_block_cpu), flags);
+        list_add_tail(&v->arch.hvm_vmx.pi_blocked_vcpu_list,
+                      &per_cpu(pi_blocked_vcpu, v->arch.hvm_vmx.pi_block_cpu));
+        spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+                           v->arch.hvm_vmx.pi_block_cpu), flags);
+
+        do {
+            old.control = new.control = pi_desc->control;
+
+            /* Should not block the vCPU if an interrupt was posted for it */
+
+            if ( old.on )
+            {
+                ASSERT(v->arch.hvm_vmx.pi_block_cpu != -1);
+
+                spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+                                  v->arch.hvm_vmx.pi_block_cpu), flags);
+                list_del_init(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+                spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+                                       v->arch.hvm_vmx.pi_block_cpu), flags);
+                v->arch.hvm_vmx.pi_block_cpu = -1;
+
+                tasklet_schedule(&v->arch.hvm_vmx.pi_vcpu_wakeup_tasklet);
+
+                return;
+            }
+
+            /*
+             * Change the 'NDST' field to v->arch.hvm_vmx.pi_block_cpu,
+             * so when external interrupts from assigned deivces happen,
+             * wakeup notifiction event will go to
+             * v->arch.hvm_vmx.pi_block_cpu, then in pi_wakeup_interrupt()
+             * we can find the vCPU in the right list to wake up.
+             */
+            if ( x2apic_enabled )
+                new.ndst = cpu_physical_id(v->arch.hvm_vmx.pi_block_cpu);
+            else
+                new.ndst = MASK_INSR(cpu_physical_id(
+                                 v->arch.hvm_vmx.pi_block_cpu),
+                                 PI_xAPIC_NDST_MASK);
+            new.sn = 0;
+            new.nv = pi_wakeup_vector;
+        } while ( cmpxchg(&pi_desc->control, old.control, new.control)
+                  != old.control );
+    }
+}
+
+static void vmx_post_ctx_switch_pi(struct vcpu *v)
+{
+    struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+
+    if ( !iommu_intpost || !is_hvm_vcpu(v) )
+        return;
+
+    ASSERT(pi_desc->sn == 1);
+
+    if ( x2apic_enabled )
+        write_atomic(&pi_desc->ndst, cpu_physical_id(v->processor));
+    else
+        write_atomic(&pi_desc->ndst,
+                     MASK_INSR(cpu_physical_id(v->processor),
+                     PI_xAPIC_NDST_MASK));
+
+    pi_clear_sn(pi_desc);
+}
+
 static void vmx_ctxt_switch_from(struct vcpu *v)
 {
     /*
@@ -739,6 +873,7 @@ static void vmx_ctxt_switch_from(struct vcpu *v)
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
+    vmx_pre_ctx_switch_pi(v);
 }

 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -763,6 +898,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v)

     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vmx_post_ctx_switch_pi(v);
 }


diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index e621c30..db546eb 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -510,6 +510,7 @@ bool_t nhvm_vmcx_hap_enabled(struct vcpu *v);
 /* interrupt */
 enum hvm_intblk nhvm_interrupt_blocked(struct vcpu *v);

+void arch_vcpu_wake(struct vcpu *v);
 #ifndef NDEBUG
 /* Permit use of the Forced Emulation Prefix in HVM guests */
 extern bool_t opt_hvm_fep;
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index b6b34d1..ea8fbe5 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -167,6 +167,13 @@ struct arch_vmx_struct {
     struct tasklet       pi_vcpu_wakeup_tasklet;

     struct list_head     pi_blocked_vcpu_list;
+
+    /*
+     * Before vCPU is blocked, it is added to the global per-cpu list
+     * of 'pi_block_cpu', then VT-d engine can send wakeup notification
+     * event to 'pi_block_cpu' and wakeup the related vCPU.
+     */
+    int                  pi_block_cpu;
 };

 int vmx_create_vmcs(struct vcpu *v);

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 6b02f98..bb18c8c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -393,6 +393,8 @@ void vcpu_wake(struct vcpu *v)
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
     }

+    arch_vcpu_wake(v);
+
     vcpu_schedule_unlock_irqrestore(lock, flags, v);

     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);

Thanks,
Feng

> --
> <<This happens because I choose it to happen!>> (Raistlin Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.