[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC 47/49] xen/sched: support core scheduling in continue_running()
For core scheduling a transition from an offline vcpu to a running one must be special cased: the vcpu might be in guest idle but the context has to be loaded as if a context switch is to be done. For that purpose add a flag to the vcpu structure which indicates that condition. That flag is tested in continue_running() and if set the context is loaded if required. Carve out some context loading functionality from __context_switch() into a new function as we need it in continue_running() now, too. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- xen/arch/x86/domain.c | 114 +++++++++++++++++++++++++++++++++++++++------- xen/arch/x86/hvm/hvm.c | 2 + xen/arch/x86/hvm/vlapic.c | 1 + xen/common/domain.c | 2 + xen/common/schedule.c | 19 +++++--- xen/include/xen/sched.h | 3 ++ 6 files changed, 117 insertions(+), 24 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 9acf2e9792..7a51064de0 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1171,7 +1171,10 @@ int arch_set_info_guest( out: if ( flags & VGCF_online ) + { + v->reload_context = true; clear_bit(_VPF_down, &v->pause_flags); + } else set_bit(_VPF_down, &v->pause_flags); return 0; @@ -1663,6 +1666,24 @@ static inline void load_default_gdt(seg_desc_t *gdt, unsigned int cpu) per_cpu(full_gdt_loaded, cpu) = false; } +static void inline csw_load_regs(struct vcpu *v, + struct cpu_user_regs *stack_regs) +{ + memcpy(stack_regs, &v->arch.user_regs, CTXT_SWITCH_STACK_BYTES); + if ( cpu_has_xsave ) + { + u64 xcr0 = v->arch.xcr0 ?: XSTATE_FP_SSE; + + if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) + BUG(); + + if ( cpu_has_xsaves && is_hvm_vcpu(v) ) + set_msr_xss(v->arch.hvm.msr_xss); + } + vcpu_restore_fpu_nonlazy(v, false); + v->domain->arch.ctxt_switch->to(v); +} + static void __context_switch(void) { struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); @@ -1676,7 +1697,7 @@ static void __context_switch(void) ASSERT(p != n); ASSERT(!vcpu_cpu_dirty(n)); - if ( !is_idle_domain(pd) ) + if ( !is_idle_domain(pd) && is_vcpu_online(p) && !p->reload_context ) { memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES); vcpu_save_fpu(p); @@ -1692,22 +1713,8 @@ static void __context_switch(void) cpumask_set_cpu(cpu, nd->dirty_cpumask); write_atomic(&n->dirty_cpu, cpu); - if ( !is_idle_domain(nd) ) - { - memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES); - if ( cpu_has_xsave ) - { - u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE; - - if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) - BUG(); - - if ( cpu_has_xsaves && is_hvm_vcpu(n) ) - set_msr_xss(n->arch.hvm.msr_xss); - } - vcpu_restore_fpu_nonlazy(n, false); - nd->arch.ctxt_switch->to(n); - } + if ( !is_idle_domain(nd) && is_vcpu_online(n) ) + csw_load_regs(n, stack_regs); psr_ctxt_switch_to(nd); @@ -1775,6 +1782,72 @@ static void context_wait_rendezvous_out(struct sched_item *item, context_saved(prev); } +static void __continue_running(struct vcpu *same) +{ + struct domain *d = same->domain; + seg_desc_t *gdt; + bool full_gdt = need_full_gdt(d); + unsigned int cpu = smp_processor_id(); + + gdt = !is_pv_32bit_domain(d) ? per_cpu(gdt_table, cpu) : + per_cpu(compat_gdt_table, cpu); + + if ( same->reload_context ) + { + struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); + + get_cpu_info()->use_pv_cr3 = false; + get_cpu_info()->xen_cr3 = 0; + + local_irq_disable(); + + csw_load_regs(same, stack_regs); + + psr_ctxt_switch_to(d); + + if ( full_gdt ) + write_full_gdt_ptes(gdt, same); + + write_ptbase(same); + +#if defined(CONFIG_PV) && defined(CONFIG_HVM) + /* Prefetch the VMCB if we expect to use it later in context switch */ + if ( cpu_has_svm && is_pv_domain(d) && !is_pv_32bit_domain(d) && + !(read_cr4() & X86_CR4_FSGSBASE) ) + svm_load_segs(0, 0, 0, 0, 0, 0, 0); +#endif + + if ( full_gdt ) + load_full_gdt(same, cpu); + + local_irq_enable(); + + if ( is_pv_domain(d) ) + load_segments(same); + + same->reload_context = false; + + _update_runstate_area(same); + + update_vcpu_system_time(same); + } + else if ( !is_idle_vcpu(same) && full_gdt != per_cpu(full_gdt_loaded, cpu) ) + { + local_irq_disable(); + + if ( full_gdt ) + { + write_full_gdt_ptes(gdt, same); + write_ptbase(same); + load_full_gdt(same, cpu); + } + else + load_default_gdt(gdt, cpu); + + local_irq_enable(); + } +} + void context_switch(struct vcpu *prev, struct vcpu *next) { unsigned int cpu = smp_processor_id(); @@ -1811,6 +1884,9 @@ void context_switch(struct vcpu *prev, struct vcpu *next) (is_idle_domain(nextd) && cpu_online(cpu)) ) { local_irq_enable(); + + if ( !is_idle_domain(nextd) ) + __continue_running(next); } else { @@ -1822,6 +1898,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) if ( is_pv_domain(nextd) ) load_segments(next); + next->reload_context = false; + ctxt_switch_levelling(next); if ( opt_ibpb && !is_idle_domain(nextd) ) @@ -1886,6 +1964,8 @@ void continue_running(struct vcpu *same) if ( !vcpu_runnable(same) ) sched_vcpu_idle(same); + __continue_running(same); + /* See the comment above. */ same->domain->arch.ctxt_switch->tail(same); BUG(); diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 6668df9f3b..12a6d62dc8 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1133,6 +1133,7 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) /* Auxiliary processors should be woken immediately. */ v->is_initialised = 1; + v->reload_context = true; clear_bit(_VPF_down, &v->pause_flags); vcpu_wake(v); @@ -3913,6 +3914,7 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip) v->arch.flags |= TF_kernel_mode; v->is_initialised = 1; + v->reload_context = true; clear_bit(_VPF_down, &v->pause_flags); out: diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index a1a43cd792..41f8050c02 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -367,6 +367,7 @@ static void vlapic_accept_irq(struct vcpu *v, uint32_t icr_low) domain_lock(v->domain); if ( v->is_initialised ) wake = test_and_clear_bit(_VPF_down, &v->pause_flags); + v->reload_context = wake; domain_unlock(v->domain); if ( wake ) vcpu_wake(v); diff --git a/xen/common/domain.c b/xen/common/domain.c index d338a2204c..b467197f05 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -1383,6 +1383,8 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) rc = -EINVAL; else wake = test_and_clear_bit(_VPF_down, &v->pause_flags); + if ( wake ) + v->reload_context = true; domain_unlock(d); if ( wake ) vcpu_wake(v); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index f43d00b59f..7b30a153df 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -1775,17 +1775,22 @@ static struct sched_item *sched_wait_rendezvous_in(struct sched_item *prev, { next = do_schedule(prev, now); atomic_set(&next->rendezvous_out_cnt, sched_granularity + 1); - return next; } - - while ( prev->rendezvous_in_cnt ) + else { - pcpu_schedule_unlock_irq(lock, cpu); - cpu_relax(); - pcpu_schedule_lock_irq(cpu); + while ( prev->rendezvous_in_cnt ) + { + pcpu_schedule_unlock_irq(lock, cpu); + cpu_relax(); + pcpu_schedule_lock_irq(cpu); + } + next = prev->next_task; } - return prev->next_task; + if ( unlikely(prev == next) ) + vcpu_runstate_helper(current, RUNSTATE_running, now); + + return next; } static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 51b8b6a44f..13085ddf90 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -206,6 +206,9 @@ struct vcpu bool hcall_compat; #endif + /* VCPU was down before (context might need to be reloaded). */ + bool reload_context; + /* The CPU, if any, which is holding onto this VCPU's state. */ #define VCPU_CPU_CLEAN (~0u) unsigned int dirty_cpu; -- 2.16.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |