[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [Patch] continue_hypercall_on_cpu rework using tasklets


  • To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
  • From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>
  • Date: Wed, 14 Apr 2010 10:04:21 +0200
  • Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • Delivery-date: Wed, 14 Apr 2010 01:05:27 -0700
  • Domainkey-signature: s=s1536a; d=ts.fujitsu.com; c=nofws; q=dns; h=X-SBRSScore:X-IronPort-AV:Received:X-IronPort-AV: Received:Received:Message-ID:Date:From:Organization: User-Agent:MIME-Version:To:CC:Subject:References: In-Reply-To:X-Enigmail-Version:Content-Type; b=I7jk6UtWkjV7Ffqq/y1cUC402Oche9VdQaGy1mCXNqywvStMK1rL6wOi r+1YgifKI22fym4Vbwuapaw0ezS3gBGEvSb1xRPk+ZMy2i2atj8ATI0In vCK48thIP9N7UQ2I3+F8zwXUg61mZAxR7ARbLJKZ2P8kCzQpsXZwVXyTH rUA3IShLrSCs3O0meiJGBXz+TxKP3eIo6b5fzFz3KTmgtTkBGj34F7nMV /OCqJjLalczGPySKPWJ1DRUHguDjm;
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Keir Fraser wrote:
> On 14/04/2010 08:25, "Juergen Gross" <juergen.gross@xxxxxxxxxxxxxx> wrote:
> 
>>> A tasklet also takes an arbitrary ulong parameter, which you can cast to a
>>> pointer to your informational structure. The parameter is specified via
>>> tasklet_init(). That should suffice.
>> I'm already using this. The problem is to find the original calling vcpu in
>> case of a nested call of continue_hypercall_on_cpu() while not conflicting
>> with concurrent calls from other vcpus which happen to address the same pcpu.
> 
> There can be only one nested invocation on any given pcpu, since a running
> invocation is never preempted. Hence on entry to c_h_o_c() you can check a
> per-cpu variable to see whether this invocation is nesting, or not. And if
> it is, that variable can be a pointer to an info structure which includes a
> pointer to the invoking vcpu.

Okay, attached is the modified patch again.


Juergen

-- 
Juergen Gross                 Principal Developer Operating Systems
TSP ES&S SWE OS6                       Telephone: +49 (0) 89 3222 2967
Fujitsu Technology Solutions              e-mail: juergen.gross@xxxxxxxxxxxxxx
Domagkstr. 28                           Internet: ts.fujitsu.com
D-80807 Muenchen                 Company details: ts.fujitsu.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxx

# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxx
# Date 1271232093 -7200
# Node ID 4f01d2bf496cbd95649775deb004e26e74008227
# Parent  c02cc832cb2d88c383d33c1ba50c381fae703308
rework of continue_hypercall_on_cpu to use tasklets, make architecture 
independant

diff -r c02cc832cb2d -r 4f01d2bf496c xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/domain.c     Wed Apr 14 10:01:33 2010 +0200
@@ -1517,82 +1517,6 @@
     flush_tlb_mask(&v->vcpu_dirty_cpumask);
 }
 
-struct migrate_info {
-    long (*func)(void *data);
-    void *data;
-    void (*saved_schedule_tail)(struct vcpu *);
-    cpumask_t saved_affinity;
-    unsigned int nest;
-};
-
-static void continue_hypercall_on_cpu_helper(struct vcpu *v)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    struct migrate_info *info = v->arch.continue_info;
-    cpumask_t mask = info->saved_affinity;
-    void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
-
-    regs->eax = info->func(info->data);
-
-    if ( info->nest-- == 0 )
-    {
-        xfree(info);
-        v->arch.schedule_tail = saved_schedule_tail;
-        v->arch.continue_info = NULL;
-        vcpu_unlock_affinity(v, &mask);
-    }
-
-    (*saved_schedule_tail)(v);
-}
-
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
-{
-    struct vcpu *v = current;
-    struct migrate_info *info;
-    cpumask_t mask = cpumask_of_cpu(cpu);
-    int rc;
-
-    if ( cpu == smp_processor_id() )
-        return func(data);
-
-    info = v->arch.continue_info;
-    if ( info == NULL )
-    {
-        info = xmalloc(struct migrate_info);
-        if ( info == NULL )
-            return -ENOMEM;
-
-        rc = vcpu_lock_affinity(v, &mask);
-        if ( rc )
-        {
-            xfree(info);
-            return rc;
-        }
-
-        info->saved_schedule_tail = v->arch.schedule_tail;
-        info->saved_affinity = mask;
-        info->nest = 0;
-
-        v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
-        v->arch.continue_info = info;
-    }
-    else
-    {
-        BUG_ON(info->nest != 0);
-        rc = vcpu_locked_change_affinity(v, &mask);
-        if ( rc )
-            return rc;
-        info->nest++;
-    }
-
-    info->func = func;
-    info->data = data;
-
-    /* Dummy return value will be overwritten by new schedule_tail. */
-    BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
-    return 0;
-}
-
 #define next_arg(fmt, args) ({                                              \
     unsigned long __arg;                                                    \
     switch ( *(fmt)++ )                                                     \
diff -r c02cc832cb2d -r 4f01d2bf496c xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/microcode.c  Wed Apr 14 10:01:33 2010 +0200
@@ -28,6 +28,7 @@
 #include <xen/sched.h>
 #include <xen/smp.h>
 #include <xen/spinlock.h>
+#include <xen/domain.h>
 #include <xen/guest_access.h>
 
 #include <asm/current.h>
diff -r c02cc832cb2d -r 4f01d2bf496c xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/sysctl.c     Wed Apr 14 10:01:33 2010 +0200
@@ -14,6 +14,7 @@
 #include <public/sysctl.h>
 #include <xen/sched.h>
 #include <xen/event.h>
+#include <xen/domain.h>
 #include <xen/domain_page.h>
 #include <asm/msr.h>
 #include <xen/trace.h>
diff -r c02cc832cb2d -r 4f01d2bf496c xen/common/domain.c
--- a/xen/common/domain.c       Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/domain.c       Wed Apr 14 10:01:33 2010 +0200
@@ -898,6 +898,81 @@
     return -ENOSYS;
 }
 
+struct migrate_info {
+    long (*func)(void *data);
+    void *data;
+    volatile int nest;
+    struct cpu_user_regs volatile *regs;
+    struct vcpu *v;
+};
+
+static DEFINE_PER_CPU(struct migrate_info *, continue_info);
+
+static void continue_hypercall_on_cpu_tasklet(struct migrate_info *info)
+{
+    struct vcpu *v = info->v;
+
+    while ( get_return_reg(info->regs) )
+        cpu_relax();
+
+    per_cpu(continue_info, smp_processor_id()) = info;
+
+    /* vcpu is paused now, return value can be set */
+    set_return_reg(info->regs, info->func(info->data));
+
+    per_cpu(continue_info, smp_processor_id()) = NULL;
+
+    if ( info->nest-- == 0 )
+    {
+        xfree(info);
+        vcpu_unpause(v);
+    }
+
+    return;
+}
+
+int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
+{
+    struct vcpu *v = current;
+    struct migrate_info *info = per_cpu(continue_info, smp_processor_id());
+
+    if ( cpu == smp_processor_id() )
+        return func(data);
+
+    if ( info == NULL )
+    {
+        info = xmalloc(struct migrate_info);
+        if ( info == NULL )
+            return -ENOMEM;
+
+        info->nest = 0;
+        info->v = v;
+        info->regs = guest_cpu_user_regs();
+        tasklet_init(&v->cont_tasklet,
+                     (void(*)(unsigned long))continue_hypercall_on_cpu_tasklet,
+                     (unsigned long)info);
+
+        vcpu_pause_nosync(v);
+    }
+    else
+    {
+        v = info->v;
+        BUG_ON(info->nest != 0);
+        info->nest++;
+    }
+
+    info->func = func;
+    info->data = data;
+
+    set_return_reg(info->regs, -EBUSY);
+
+    tasklet_schedule_cpu(&v->cont_tasklet, cpu);
+    raise_softirq(SCHEDULE_SOFTIRQ);
+
+    /* Dummy return value will be overwritten by tasklet. */
+    return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r c02cc832cb2d -r 4f01d2bf496c xen/common/schedule.c
--- a/xen/common/schedule.c     Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/schedule.c     Wed Apr 14 10:01:33 2010 +0200
@@ -408,26 +408,18 @@
     }
 }
 
-static int __vcpu_set_affinity(
-    struct vcpu *v, cpumask_t *affinity,
-    bool_t old_lock_status, bool_t new_lock_status)
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
 {
     cpumask_t online_affinity, old_affinity;
+
+    if ( v->domain->is_pinned )
+        return -EINVAL;
 
     cpus_and(online_affinity, *affinity, cpu_online_map);
     if ( cpus_empty(online_affinity) )
         return -EINVAL;
 
     vcpu_schedule_lock_irq(v);
-
-    if ( v->affinity_locked != old_lock_status )
-    {
-        BUG_ON(!v->affinity_locked);
-        vcpu_schedule_unlock_irq(v);
-        return -EBUSY;
-    }
-
-    v->affinity_locked = new_lock_status;
 
     old_affinity = v->cpu_affinity;
     v->cpu_affinity = *affinity;
@@ -444,36 +436,6 @@
     }
 
     return 0;
-}
-
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    if ( v->domain->is_pinned )
-        return -EINVAL;
-    return __vcpu_set_affinity(v, affinity, 0, 0);
-}
-
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    return __vcpu_set_affinity(v, affinity, 0, 1);
-}
-
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    return __vcpu_set_affinity(v, affinity, 1, 1);
-}
-
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    cpumask_t online_affinity;
-
-    /* Do not fail if no CPU in old affinity mask is online. */
-    cpus_and(online_affinity, *affinity, cpu_online_map);
-    if ( cpus_empty(online_affinity) )
-        *affinity = cpu_online_map;
-
-    if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
-        BUG();
 }
 
 /* Block the currently-executing domain until a pertinent event occurs. */
diff -r c02cc832cb2d -r 4f01d2bf496c xen/common/softirq.c
--- a/xen/common/softirq.c      Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/softirq.c      Wed Apr 14 10:01:33 2010 +0200
@@ -88,9 +88,11 @@
 }
 
 static LIST_HEAD(tasklet_list);
+static DEFINE_PER_CPU(struct list_head, tasklet_list_pcpu);
 static DEFINE_SPINLOCK(tasklet_lock);
 
-void tasklet_schedule(struct tasklet *t)
+static void tasklet_schedule_list(struct tasklet *t, struct list_head *tlist,
+    int cpu)
 {
     unsigned long flags;
 
@@ -101,28 +103,47 @@
         if ( !t->is_scheduled && !t->is_running )
         {
             BUG_ON(!list_empty(&t->list));
-            list_add_tail(&t->list, &tasklet_list);
+            list_add_tail(&t->list, tlist);
+            t->scheduled_on = NR_CPUS;
         }
         t->is_scheduled = 1;
-        raise_softirq(TASKLET_SOFTIRQ);
+        if ( cpu == smp_processor_id() )
+            raise_softirq(TASKLET_SOFTIRQ);
+        else if ( !t->is_running )
+            cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+        else
+            t->scheduled_on = cpu;
     }
 
     spin_unlock_irqrestore(&tasklet_lock, flags);
 }
 
+void tasklet_schedule(struct tasklet *t)
+{
+    tasklet_schedule_list(t, &tasklet_list, smp_processor_id());
+}
+
+void tasklet_schedule_cpu(struct tasklet *t, int cpu)
+{
+    tasklet_schedule_list(t, &per_cpu(tasklet_list_pcpu, cpu), cpu);
+}
+
 static void tasklet_action(void)
 {
+    struct list_head *tlist;
     struct tasklet *t;
 
     spin_lock_irq(&tasklet_lock);
 
-    if ( list_empty(&tasklet_list) )
+    tlist = ( list_empty(&this_cpu(tasklet_list_pcpu)) ) ? &tasklet_list :
+        &this_cpu(tasklet_list_pcpu);
+    if ( list_empty(tlist) )
     {
         spin_unlock_irq(&tasklet_lock);
         return;
     }
 
-    t = list_entry(tasklet_list.next, struct tasklet, list);
+    t = list_entry(tlist->next, struct tasklet, list);
     list_del_init(&t->list);
 
     BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
@@ -138,14 +159,23 @@
     if ( t->is_scheduled )
     {
         BUG_ON(t->is_dead || !list_empty(&t->list));
-        list_add_tail(&t->list, &tasklet_list);
+        if ( t->scheduled_on >= NR_CPUS )
+            list_add_tail(&t->list, tlist);
+        else
+        {
+            unsigned int cpu = t->scheduled_on;
+
+            list_add_tail(&t->list, &per_cpu(tasklet_list_pcpu, cpu));
+            cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+        }
     }
 
     /*
      * If there is more work to do then reschedule. We don't grab more work
      * immediately as we want to allow other softirq work to happen first.
      */
-    if ( !list_empty(&tasklet_list) )
+    if ( !list_empty(&tasklet_list) ||
+        !list_empty(&this_cpu(tasklet_list_pcpu)) )
         raise_softirq(TASKLET_SOFTIRQ);
 
     spin_unlock_irq(&tasklet_lock);
@@ -186,6 +216,12 @@
 
 void __init softirq_init(void)
 {
+    int i;
+
+    for_each_possible_cpu ( i )
+    {
+        INIT_LIST_HEAD(&per_cpu(tasklet_list_pcpu, i));
+    }
     open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 }
 
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/asm-ia64/regs.h
--- a/xen/include/asm-ia64/regs.h       Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-ia64/regs.h       Wed Apr 14 10:01:33 2010 +0200
@@ -1,1 +1,4 @@
 #include <asm/ptrace.h>
+
+#define get_return_reg(r)      r->r8
+#define set_return_reg(r, val) r->r8 = val
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-x86/domain.h      Wed Apr 14 10:01:33 2010 +0200
@@ -451,9 +451,6 @@
 #define hvm_vmx         hvm_vcpu.u.vmx
 #define hvm_svm         hvm_vcpu.u.svm
 
-/* Continue the current hypercall via func(data) on specified cpu. */
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
-
 void vcpu_show_execution_state(struct vcpu *);
 void vcpu_show_registers(const struct vcpu *);
 
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/asm-x86/regs.h
--- a/xen/include/asm-x86/regs.h        Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-x86/regs.h        Wed Apr 14 10:01:33 2010 +0200
@@ -19,4 +19,7 @@
     (diff == 0);                                                              \
 })
 
+#define get_return_reg(r)      r->eax
+#define set_return_reg(r, val) r->eax = val
+
 #endif /* __X86_REGS_H__ */
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/domain.h  Wed Apr 14 10:01:33 2010 +0200
@@ -62,6 +62,9 @@
 bool_t domctl_lock_acquire(void);
 void domctl_lock_release(void);
 
+/* Continue the current hypercall via func(data) on specified cpu. */
+int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
+
 extern unsigned int xen_processor_pmbits;
 
 #endif /* __XEN_DOMAIN_H__ */
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/sched.h   Wed Apr 14 10:01:33 2010 +0200
@@ -132,8 +132,6 @@
     bool_t           defer_shutdown;
     /* VCPU is paused following shutdown request (d->is_shutting_down)? */
     bool_t           paused_for_shutdown;
-    /* VCPU affinity is temporarily locked from controller changes? */
-    bool_t           affinity_locked;
 
     /*
      * > 0: a single port is being polled;
@@ -156,6 +154,9 @@
 
     /* Bitmask of CPUs which are holding onto this VCPU's state. */
     cpumask_t        vcpu_dirty_cpumask;
+
+    /* tasklet for continue_hypercall_on_cpu() */
+    struct tasklet   cont_tasklet;
 
     struct arch_vcpu arch;
 };
@@ -581,9 +582,6 @@
 void vcpu_force_reschedule(struct vcpu *v);
 void cpu_disable_scheduler(void);
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);
diff -r c02cc832cb2d -r 4f01d2bf496c xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/softirq.h Wed Apr 14 10:01:33 2010 +0200
@@ -50,14 +50,17 @@
     bool_t is_scheduled;
     bool_t is_running;
     bool_t is_dead;
+    unsigned int scheduled_on;
     void (*func)(unsigned long);
     unsigned long data;
 };
 
 #define DECLARE_TASKLET(name, func, data) \
-    struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }
+    struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, NR_CPUS, \
+                            func, data }
 
 void tasklet_schedule(struct tasklet *t);
+void tasklet_schedule_cpu(struct tasklet *t, int cpu);
 void tasklet_kill(struct tasklet *t);
 void tasklet_init(
     struct tasklet *t, void (*func)(unsigned long), unsigned long data);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.