VMX: fix VMCS race on context-switch paths When __context_switch() is being bypassed during original context switch handling, the vCPU "owning" the VMCS partially loses control of it: It will appear non-running to remote CPUs, and hence their attempt to pause the owning vCPU will have no effect on it (as it already looks to be paused). At the same time the "owning" CPU will re-enable interrupts eventually (the lastest when entering the idle loop) and hence becomes subject to IPIs from other CPUs requesting access to the VMCS. As a result, when __context_switch() finally gets run, the CPU may no longer have the VMCS loaded, and hence any accesses to it would fail. Hence we may need to re-load the VMCS in vmx_ctxt_switch_from(). Similarly, when __context_switch() is being bypassed also on the second (switch-in) path, VMCS ownership may have been lost and hence needs re-establishing. Since there's no existing hook to put this in, add a new one. Reported-by: Kevin Mayer Reported-by: Anshul Makkar Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -2098,11 +2098,14 @@ void context_switch(struct vcpu *prev, s set_current(next); - if ( (per_cpu(curr_vcpu, cpu) == next) || - (is_idle_domain(nextd) && cpu_online(cpu)) ) + if ( (per_cpu(curr_vcpu, cpu) == next) ) { + if ( next->arch.ctxt_switch_same ) + next->arch.ctxt_switch_same(next); local_irq_enable(); } + else if ( is_idle_domain(nextd) && cpu_online(cpu) ) + local_irq_enable(); else { __context_switch(); --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -552,6 +552,27 @@ static void vmx_load_vmcs(struct vcpu *v local_irq_restore(flags); } +void vmx_vmcs_reload(struct vcpu *v) +{ + /* + * As we're running with interrupts disabled, we can't acquire + * v->arch.hvm_vmx.vmcs_lock here. However, with interrupts disabled + * the VMCS can't be taken away from us anymore if we still own it. + */ + ASSERT(!local_irq_is_enabled()); + if ( v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs) ) + return; + ASSERT(!this_cpu(current_vmcs)); + + /* + * Wait for the remote side to be done with the VMCS before loading + * it here. + */ + while ( v->arch.hvm_vmx.active_cpu != -1 ) + cpu_relax(); + vmx_load_vmcs(v); +} + int vmx_cpu_up_prepare(unsigned int cpu) { /* --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -298,6 +298,7 @@ static int vmx_vcpu_initialise(struct vc v->arch.schedule_tail = vmx_do_resume; v->arch.ctxt_switch_from = vmx_ctxt_switch_from; v->arch.ctxt_switch_to = vmx_ctxt_switch_to; + v->arch.ctxt_switch_same = vmx_vmcs_reload; if ( (rc = vmx_create_vmcs(v)) != 0 ) { @@ -936,6 +937,18 @@ static void vmx_ctxt_switch_from(struct if ( unlikely(!this_cpu(vmxon)) ) return; + if ( !v->is_running ) + { + /* + * When this vCPU isn't marked as running anymore, a remote pCPU's + * attempt to pause us (from vmx_vmcs_enter()) won't have a reason + * to spin in vcpu_sleep_sync(), and hence that pCPU might have taken + * away the VMCS from us. As we're running with interrupts disabled, + * we also can't call vmx_vmcs_enter(). + */ + vmx_vmcs_reload(v); + } + vmx_fpu_leave(v); vmx_save_guest_msrs(v); vmx_restore_host_msrs(); --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -514,6 +514,7 @@ struct arch_vcpu void (*ctxt_switch_from) (struct vcpu *); void (*ctxt_switch_to) (struct vcpu *); + void (*ctxt_switch_same) (struct vcpu *); struct vpmu_struct vpmu; --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -174,6 +174,7 @@ void vmx_destroy_vmcs(struct vcpu *v); void vmx_vmcs_enter(struct vcpu *v); bool_t __must_check vmx_vmcs_try_enter(struct vcpu *v); void vmx_vmcs_exit(struct vcpu *v); +void vmx_vmcs_reload(struct vcpu *v); #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 #define CPU_BASED_USE_TSC_OFFSETING 0x00000008