[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Reduce locked critical region in __enter_scheduler(),



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 0aff653824db5ce6474ba1865794a285ae9f3bbc
# Parent  d92a68e6faa95c2241e147c759ecbcc3946b74ac
Reduce locked critical region in __enter_scheduler(),
changing the context switch interface yet again.

domain_runnable() renamed to vcpu_runnable().

Fix stupid bug resulting in bogus value for
vcpu_dirty_cpumask, which caused vcpu_sync_execstate() to
fail sometimes.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r d92a68e6faa9 -r 0aff653824db xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c       Sat Jan  7 01:31:04 2006
+++ b/xen/arch/ia64/xen/process.c       Sat Jan  7 15:53:25 2006
@@ -65,24 +65,16 @@
 
 extern struct schedule_data schedule_data[NR_CPUS];
 
-void schedule_tail(struct vcpu *next)
-{
-       unsigned long rr7;
-       //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
-       //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
-    // This is necessary because when a new domain is started, our
-    // implementation of context_switch() does not return (switch_to() has
-    // special and peculiar behaviour in this case).
-    context_switch_done();
-
-       /* rr7 will be postponed to last point when resuming back to guest */
-    if(VMX_DOMAIN(current)){
-       vmx_load_all_rr(current);
-    }else{
-           load_region_regs(current);
-            vcpu_load_kernel_regs(current);
-    }
+void schedule_tail(struct vcpu *prev)
+{
+       context_saved(prev);
+
+       if (VMX_DOMAIN(current)) {
+               vmx_load_all_rr(current);
+       } else {
+               load_region_regs(current);
+               vcpu_load_kernel_regs(current);
+       }
 }
 
 void tdpfoo(void) { }
diff -r d92a68e6faa9 -r 0aff653824db xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Sat Jan  7 01:31:04 2006
+++ b/xen/arch/ia64/xen/xenmisc.c       Sat Jan  7 15:53:25 2006
@@ -327,6 +327,8 @@
        }
            if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
     }
+
+    context_saved(prev);
 }
 
 void continue_running(struct vcpu *same)
diff -r d92a68e6faa9 -r 0aff653824db xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Sat Jan  7 01:31:04 2006
+++ b/xen/arch/x86/domain.c     Sat Jan  7 15:53:25 2006
@@ -739,7 +739,7 @@
 
     if ( p->domain != n->domain )
         cpu_clear(cpu, p->domain->domain_dirty_cpumask);
-    cpu_clear(cpu, n->vcpu_dirty_cpumask);
+    cpu_clear(cpu, p->vcpu_dirty_cpumask);
 
     percpu_ctxt[cpu].curr_vcpu = n;
 }
@@ -748,18 +748,15 @@
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
     unsigned int cpu = smp_processor_id();
-
-    ASSERT(!local_irq_is_enabled());
 
     set_current(next);
 
     if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
          !is_idle_domain(next->domain) )
     {
+        local_irq_disable();
         __context_switch();
-
-        context_switch_done();
-        ASSERT(local_irq_is_enabled());
+        local_irq_enable();
 
         if ( VMX_DOMAIN(next) )
         {
@@ -772,10 +769,8 @@
             vmx_load_msrs(next);
         }
     }
-    else
-    {
-        context_switch_done();
-    }
+
+    context_saved(prev);
 
     schedule_tail(next);
     BUG();
diff -r d92a68e6faa9 -r 0aff653824db xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Sat Jan  7 01:31:04 2006
+++ b/xen/common/sched_bvt.c    Sat Jan  7 15:53:25 2006
@@ -277,7 +277,7 @@
 
 static void bvt_sleep(struct vcpu *v)
 {
-    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+    if ( schedule_data[v->processor].curr == v )
         cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
     else  if ( __task_on_runqueue(v) )
         __del_from_runqueue(v);
@@ -409,7 +409,7 @@
         
         __del_from_runqueue(prev);
         
-        if ( domain_runnable(prev) )
+        if ( vcpu_runnable(prev) )
             __add_to_runqueue_tail(prev);
     }
 
diff -r d92a68e6faa9 -r 0aff653824db xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Sat Jan  7 01:31:04 2006
+++ b/xen/common/sched_sedf.c   Sat Jan  7 15:53:25 2006
@@ -782,8 +782,8 @@
  
     /* create local state of the status of the domain, in order to avoid
        inconsistent state during scheduling decisions, because data for
-       domain_runnable is not protected by the scheduling lock!*/
-    if ( !domain_runnable(current) )
+       vcpu_runnable is not protected by the scheduling lock!*/
+    if ( !vcpu_runnable(current) )
         inf->status |= SEDF_ASLEEP;
  
     if ( inf->status & SEDF_ASLEEP )
@@ -879,7 +879,7 @@
 
     EDOM_INFO(d)->status |= SEDF_ASLEEP;
  
-    if ( test_bit(_VCPUF_running, &d->vcpu_flags) )
+    if ( schedule_data[d->processor].curr == d )
     {
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
     }
diff -r d92a68e6faa9 -r 0aff653824db xen/common/schedule.c
--- a/xen/common/schedule.c     Sat Jan  7 01:31:04 2006
+++ b/xen/common/schedule.c     Sat Jan  7 15:53:25 2006
@@ -168,7 +168,7 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(!domain_runnable(v)) )
+    if ( likely(!vcpu_runnable(v)) )
         SCHED_OP(sleep, v);
     spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
 
@@ -184,7 +184,7 @@
      * flag is cleared and the scheduler lock is released. We also check that
      * the domain continues to be unrunnable, in case someone else wakes it.
      */
-    while ( !domain_runnable(v) &&
+    while ( !vcpu_runnable(v) &&
             (test_bit(_VCPUF_running, &v->vcpu_flags) ||
              spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
         cpu_relax();
@@ -197,7 +197,7 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(domain_runnable(v)) )
+    if ( likely(vcpu_runnable(v)) )
     {
         SCHED_OP(wake, v);
         v->wokenup = NOW();
@@ -387,20 +387,18 @@
 {
     struct vcpu        *prev = current, *next = NULL;
     int                 cpu = smp_processor_id();
-    s_time_t            now;
+    s_time_t            now = NOW();
     struct task_slice   next_slice;
     s32                 r_time;     /* time for new dom to run */
 
+    ASSERT(!in_irq());
+
     perfc_incrc(sched_run);
-    
+
     spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
-    now = NOW();
 
     rem_ac_timer(&schedule_data[cpu].s_timer);
     
-    ASSERT(!in_irq());
-
     prev->cpu_time += now - prev->lastschd;
 
     /* get policy-specific decision on scheduling... */
@@ -408,7 +406,7 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -425,11 +423,6 @@
              prev->domain->domain_id, now - prev->lastschd);
     TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
              next->domain->domain_id, now - next->wokenup, r_time);
-
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
-    set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    perfc_incrc(sched_ctx);
 
     /*
      * Logic of wokenup field in domain struct:
@@ -439,7 +432,7 @@
      * also set here then a preempted runnable domain will get a screwed up
      * "waiting time" value next time it is scheduled.
      */
-    prev->wokenup = NOW();
+    prev->wokenup = now;
 
 #if defined(WAKE_HISTO)
     if ( !is_idle_domain(next->domain) && next->wokenup )
@@ -460,6 +453,12 @@
     }
 #endif
 
+    set_bit(_VCPUF_running, &next->vcpu_flags);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    perfc_incrc(sched_ctx);
+
     prev->sleep_tick = schedule_data[cpu].tick;
 
     /* Ensure that the domain has an up-to-date time base. */
@@ -474,25 +473,7 @@
              prev->domain->domain_id, prev->vcpu_id,
              next->domain->domain_id, next->vcpu_id);
 
-    schedule_data[cpu].context_switch_in_progress = 1;
     context_switch(prev, next);
-    if ( schedule_data[cpu].context_switch_in_progress )
-        context_switch_done();
-}
-
-void context_switch_done(void)
-{
-    unsigned int cpu = smp_processor_id();
-    ASSERT(schedule_data[cpu].context_switch_in_progress);
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-    schedule_data[cpu].context_switch_in_progress = 0;
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
-    struct vcpu *p = schedule_data[cpu].curr;
-    return p == idle_domain[cpu];
 }
 
 
diff -r d92a68e6faa9 -r 0aff653824db xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Sat Jan  7 01:31:04 2006
+++ b/xen/include/xen/sched-if.h        Sat Jan  7 15:53:25 2006
@@ -18,7 +18,6 @@
     void               *sched_priv;
     struct ac_timer     s_timer;        /* scheduling timer                */
     unsigned long       tick;           /* current periodic 'tick'         */
-    int                 context_switch_in_progress;
 #ifdef BUCKETS
     u32                 hist[BUCKETS];  /* for scheduler latency histogram */
 #endif
diff -r d92a68e6faa9 -r 0aff653824db xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Sat Jan  7 01:31:04 2006
+++ b/xen/include/xen/sched.h   Sat Jan  7 15:53:25 2006
@@ -271,40 +271,27 @@
 extern void sync_vcpu_execstate(struct vcpu *v);
 
 /*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- * 
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
  */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
 /*
- * If context_switch() does not return to the caller, or you need to perform
- * some aspects of state restoration with interrupts enabled, then you must
- * call context_switch_done() at a suitable safe point.
- * 
- * As when returning from context_switch(), the caller must ensure that the
- * local CPU is no longer running in the previous VCPU's context, and that the
- * context is saved to memory. Alternatively, if implementing lazy context
- * switching, ensure that invoking sync_vcpu_execstate() will switch and
- * commit the previous VCPU's state.
- */
-extern void context_switch_done(void);
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
+ */
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
 
 /* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
-
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
 
 void startup_cpu_idle_loop(void);
 
@@ -400,7 +387,7 @@
 #define DOMF_debugging         (1UL<<_DOMF_debugging)
 
 
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
              !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.