[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V6] x86/hvm: fix domain crash when CR3 has the noflush bit set
In hardware, when PCID support is enabled and the NOFLUSH bit is set when writing a CR3 value, the hardware will clear that that bit and change the CR3 without flushing the TLB. hvm_set_cr3(), however, was ignoring this bit; the result was that post-vm_event checks detected an invalid CR3 value and crashed the domain. Handle NOFLUSH in hvm_set_cr3() by: 1. Clearing the bit 2. Passing a "noflush" flag to lower-level cr3 setting functions to indicate that a flush should not be performed. Also clear X86_CR3_NOFLUSH when reporting CR3 monitored CR3 writes. This allows introspection to be used on VMs whose operating system uses the NOFLUSH bit. Signed-off-by: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx> Reported-by: Bitweasil <bitweasil@xxxxxxxxxxxxxx> Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Acked-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx> --- Changes since V5: - Updated the commit message. --- xen/arch/x86/hvm/hvm.c | 13 ++++++++++--- xen/arch/x86/hvm/monitor.c | 3 +++ xen/arch/x86/hvm/svm/svm.c | 15 +++++++++------ xen/arch/x86/hvm/vmx/vmx.c | 18 +++++++++++------- xen/arch/x86/mm.c | 2 +- xen/arch/x86/mm/hap/hap.c | 6 +++--- xen/arch/x86/mm/shadow/common.c | 2 +- xen/arch/x86/mm/shadow/multi.c | 6 +++--- xen/arch/x86/mm/shadow/none.c | 2 +- xen/include/asm-x86/hvm/hvm.h | 15 +++++++++++++-- xen/include/asm-x86/hvm/svm/svm.h | 2 +- xen/include/asm-x86/paging.h | 7 ++++--- xen/include/asm-x86/x86-defns.h | 5 +++++ 13 files changed, 65 insertions(+), 31 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 0539551..10ae922 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) struct vcpu *v = current; struct page_info *page; unsigned long old = v->arch.hvm_vcpu.guest_cr[3]; + bool noflush = false; if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) @@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) } } + if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ + { + noflush = value & X86_CR3_NOFLUSH; + value &= ~X86_CR3_NOFLUSH; + } + if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && (value != v->arch.hvm_vcpu.guest_cr[3]) ) { @@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) } v->arch.hvm_vcpu.guest_cr[3] = value; - paging_update_cr3(v); + paging_update_cr3(v, noflush); return X86EMUL_OKAY; bad_cr3: @@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void) /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */ for_each_vcpu ( d, v ) - paging_update_cr3(v); + paging_update_cr3(v, false); /* Flush all dirty TLBs. */ flush_tlb_mask(d->dirty_cpumask); @@ -4193,7 +4200,7 @@ static int hvmop_set_param( domain_pause(d); d->arch.hvm_domain.params[a.index] = a.value; for_each_vcpu ( d, v ) - paging_update_cr3(v); + paging_update_cr3(v, false); domain_unpause(d); domctl_lock_release(); diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c index 160c032..2a41ccc 100644 --- a/xen/arch/x86/hvm/monitor.c +++ b/xen/arch/x86/hvm/monitor.c @@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long value, unsigned long old) struct arch_domain *ad = &curr->domain->arch; unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index); + if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) ) + value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */ + if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) && (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) || value != old) && diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index f2fbe07..c34f5b5 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -325,9 +325,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) v->arch.hvm_vcpu.guest_cr[2] = c->cr2; v->arch.hvm_vcpu.guest_cr[3] = c->cr3; v->arch.hvm_vcpu.guest_cr[4] = c->cr4; - svm_update_guest_cr(v, 0); - svm_update_guest_cr(v, 2); - svm_update_guest_cr(v, 4); + svm_update_guest_cr(v, 0, 0); + svm_update_guest_cr(v, 2, 0); + svm_update_guest_cr(v, 4, 0); /* Load sysenter MSRs into both VMCB save area and VCPU fields. */ vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs; @@ -543,7 +543,7 @@ static int svm_guest_x86_mode(struct vcpu *v) return likely(vmcb->cs.db) ? 4 : 2; } -void svm_update_guest_cr(struct vcpu *v, unsigned int cr) +void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; uint64_t value; @@ -583,10 +583,13 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr) case 3: vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]); if ( !nestedhvm_enabled(v->domain) ) - hvm_asid_flush_vcpu(v); + { + if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) ) + hvm_asid_flush_vcpu(v); + } else if ( nestedhvm_vmswitch_in_progress(v) ) ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */ - else + else if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) ) hvm_asid_flush_vcpu_asid( nestedhvm_vcpu_in_guestmode(v) ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index aa05050..ff9e997 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -68,7 +68,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v); static int vmx_alloc_vlapic_mapping(struct domain *d); static void vmx_free_vlapic_mapping(struct domain *d); static void vmx_install_vlapic_mapping(struct vcpu *v); -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr, + unsigned int flags); static void vmx_update_guest_efer(struct vcpu *v); static void vmx_wbinvd_intercept(void); static void vmx_fpu_dirty_intercept(void); @@ -836,9 +837,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c) v->arch.hvm_vcpu.guest_cr[2] = c->cr2; v->arch.hvm_vcpu.guest_cr[4] = c->cr4; - vmx_update_guest_cr(v, 0); - vmx_update_guest_cr(v, 2); - vmx_update_guest_cr(v, 4); + vmx_update_guest_cr(v, 0, 0); + vmx_update_guest_cr(v, 2, 0); + vmx_update_guest_cr(v, 4, 0); v->arch.hvm_vcpu.guest_efer = c->msr_efer; vmx_update_guest_efer(v); @@ -1548,7 +1549,8 @@ void vmx_update_debug_state(struct vcpu *v) vmx_vmcs_exit(v); } -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr, + unsigned int flags) { vmx_vmcs_enter(v); @@ -1730,7 +1732,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) } __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); - hvm_asid_flush_vcpu(v); + + if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) ) + hvm_asid_flush_vcpu(v); break; default: @@ -2682,7 +2686,7 @@ static int vmx_cr_access(unsigned long exit_qualification) */ hvm_monitor_crX(CR0, value, old); curr->arch.hvm_vcpu.guest_cr[0] = value; - vmx_update_guest_cr(curr, 0); + vmx_update_guest_cr(curr, 0, 0); HVMTRACE_0D(CLTS); break; } diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index e1f089b..9d26a9d 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v) if ( paging_mode_enabled(v->domain) ) { - paging_update_cr3(v); + paging_update_cr3(v, false); return; } diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index 003c2d8..b76e6b8 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long va) return 1; } -static void hap_update_cr3(struct vcpu *v, int do_locking) +static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush) { v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3]; - hvm_update_guest_cr(v, 3); + hvm_update_guest_cr3(v, noflush); } const struct paging_mode * @@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v) } /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */ - hap_update_cr3(v, 0); + hap_update_cr3(v, 0, false); paging_unlock(d); put_gfn(d, cr3_gfn); diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index c240953..20ded3e 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v) } #endif /* OOS */ - v->arch.paging.mode->update_cr3(v, 0); + v->arch.paging.mode->update_cr3(v, 0, false); } void shadow_update_paging_modes(struct vcpu *v) diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index a6372e3..fcc4fa3 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v, * In any case, in the PAE case, the ASSERT is not true; it can * happen because of actions the guest is taking. */ #if GUEST_PAGING_LEVELS == 3 - v->arch.paging.mode->update_cr3(v, 0); + v->arch.paging.mode->update_cr3(v, 0, false); #else ASSERT(d->is_shutting_down); #endif @@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v, static void -sh_update_cr3(struct vcpu *v, int do_locking) +sh_update_cr3(struct vcpu *v, int do_locking, bool noflush) /* Updates vcpu->arch.cr3 after the guest has changed CR3. * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). @@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking) v->arch.hvm_vcpu.hw_cr[3] = pagetable_get_paddr(v->arch.shadow_table[0]); #endif - hvm_update_guest_cr(v, 3); + hvm_update_guest_cr3(v, noflush); } /* Fix up the linear pagetable mappings */ diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c index 9e6ad23..a8c9604 100644 --- a/xen/arch/x86/mm/shadow/none.c +++ b/xen/arch/x86/mm/shadow/none.c @@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m, return gfn_x(INVALID_GFN); } -static void _update_cr3(struct vcpu *v, int do_locking) +static void _update_cr3(struct vcpu *v, int do_locking, bool noflush) { ASSERT_UNREACHABLE(); } diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index dd3dd5f..ca3a334 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -80,6 +80,9 @@ enum hvm_intblk { #define HVM_EVENT_VECTOR_UNSET (-1) #define HVM_EVENT_VECTOR_UPDATING (-2) +/* update_guest_cr() flags. */ +#define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001 + /* * The hardware virtual machine (HVM) interface abstracts away from the * x86/x86_64 CPU virtualization assist specifics. Currently this interface @@ -132,7 +135,8 @@ struct hvm_function_table { /* * Called to inform HVM layer that a guest CRn or EFER has changed. */ - void (*update_guest_cr)(struct vcpu *v, unsigned int cr); + void (*update_guest_cr)(struct vcpu *v, unsigned int cr, + unsigned int flags); void (*update_guest_efer)(struct vcpu *v); void (*cpuid_policy_changed)(struct vcpu *v); @@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v) static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr) { - hvm_funcs.update_guest_cr(v, cr); + hvm_funcs.update_guest_cr(v, cr, 0); +} + +static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush) +{ + unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NOFLUSH : 0; + + hvm_funcs.update_guest_cr(v, 3, flags); } static inline void hvm_update_guest_efer(struct vcpu *v) diff --git a/xen/include/asm-x86/hvm/svm/svm.h b/xen/include/asm-x86/hvm/svm/svm.h index 593546f..4e5e142 100644 --- a/xen/include/asm-x86/hvm/svm/svm.h +++ b/xen/include/asm-x86/hvm/svm/svm.h @@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, uint32_t asid) unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr); void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len); -void svm_update_guest_cr(struct vcpu *, unsigned int cr); +void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags); extern u32 svm_feature_flags; diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h index 5607ab4..dd3e31f 100644 --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -122,7 +122,8 @@ struct paging_mode { unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order); - void (*update_cr3 )(struct vcpu *v, int do_locking); + void (*update_cr3 )(struct vcpu *v, int do_locking, + bool noflush); void (*update_paging_modes )(struct vcpu *v); void (*write_p2m_entry )(struct domain *d, unsigned long gfn, l1_pgentry_t *p, l1_pgentry_t new, @@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v, /* Update all the things that are derived from the guest's CR3. * Called when the guest changes CR3; the caller can then use v->arch.cr3 * as the value to load into the host CR3 to schedule this vcpu */ -static inline void paging_update_cr3(struct vcpu *v) +static inline void paging_update_cr3(struct vcpu *v, bool noflush) { - paging_get_hostmode(v)->update_cr3(v, 1); + paging_get_hostmode(v)->update_cr3(v, 1, noflush); } /* Update all the things that are derived from the guest's CR0/CR3/CR4. diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h index 70453e8..8598ade 100644 --- a/xen/include/asm-x86/x86-defns.h +++ b/xen/include/asm-x86/x86-defns.h @@ -43,6 +43,11 @@ #define X86_CR0_PG 0x80000000 /* Paging (RW) */ /* + * Intel CPU flags in CR3 + */ +#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63) + +/* * Intel CPU features in CR4 */ #define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ -- 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |