[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH V4] x86/hvm: fix domain crash when CR3 has the noflush bit set
On Fri, Feb 9, 2018 at 4:01 AM, Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx> wrote: > The emulation layers of Xen lack PCID support, and as we only offer > PCID to HAP guests, all writes to CR3 are handled by hardware, > except when introspection is involved. Consequently, trying to set > CR3 when the noflush bit is set in hvm_set_cr3() leads to domain > crashes. The workaround is to clear the noflush bit in > hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized. > Additionally, a bool parameter now propagates to > {svm,vmx}_update_guest_cr(), so that no flushes occur when > the bit was set. > > Signed-off-by: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx> > Reported-by: Bitweasil <bitweasil@xxxxxxxxxxxxxx> > Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Acked-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx> > > --- > Changes since V3: > - Removed unnecessary !! from "noflush = !!(value & X86_CR3_NOFLUSH);" > - Moved the X86_CR3_NOFLUSH #define to x86-defns.h. > - Removed X86_CR3_NOFLUSH_DISABLE_MASK (using ~X86_CR3_NOFLUSH). > - Reverted changes to hvm_update_guest_cr() and callers, and added > hvm_update_guest_cr3() instead. > - Added HVM_UPDATE_GUEST_CR3_NO_FLUSH as a potential flag to pass > to update_guest_cr(), instead of a simple bool which did not > apply to all CRs. > --- > xen/arch/x86/hvm/hvm.c | 13 ++++++++++--- > xen/arch/x86/hvm/monitor.c | 3 +++ > xen/arch/x86/hvm/svm/svm.c | 22 ++++++++++++++-------- > xen/arch/x86/hvm/vmx/vmx.c | 18 +++++++++++------- > xen/arch/x86/mm.c | 2 +- > xen/arch/x86/mm/hap/hap.c | 6 +++--- > xen/arch/x86/mm/shadow/common.c | 2 +- > xen/arch/x86/mm/shadow/multi.c | 6 +++--- > xen/arch/x86/mm/shadow/none.c | 2 +- > xen/include/asm-x86/hvm/hvm.h | 15 +++++++++++++-- > xen/include/asm-x86/hvm/svm/svm.h | 2 +- > xen/include/asm-x86/paging.h | 7 ++++--- > xen/include/asm-x86/x86-defns.h | 5 +++++ > 13 files changed, 70 insertions(+), 33 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 18d721d..f17163e 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) > struct vcpu *v = current; > struct page_info *page; > unsigned long old = v->arch.hvm_vcpu.guest_cr[3]; > + bool noflush = false; > > if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled > & > monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) > @@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) > } > } > > + if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ > + { > + noflush = value & X86_CR3_NOFLUSH; > + value &= ~X86_CR3_NOFLUSH; > + } > + > if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && > (value != v->arch.hvm_vcpu.guest_cr[3]) ) > { > @@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) > } > > v->arch.hvm_vcpu.guest_cr[3] = value; > - paging_update_cr3(v); > + paging_update_cr3(v, noflush); > return X86EMUL_OKAY; > > bad_cr3: > @@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void) > > /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */ > for_each_vcpu ( d, v ) > - paging_update_cr3(v); > + paging_update_cr3(v, false); > > /* Flush all dirty TLBs. */ > flush_tlb_mask(d->dirty_cpumask); > @@ -4193,7 +4200,7 @@ static int hvmop_set_param( > domain_pause(d); > d->arch.hvm_domain.params[a.index] = a.value; > for_each_vcpu ( d, v ) > - paging_update_cr3(v); > + paging_update_cr3(v, false); > domain_unpause(d); > > domctl_lock_release(); > diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c > index 131b852..87e7e31 100644 > --- a/xen/arch/x86/hvm/monitor.c > +++ b/xen/arch/x86/hvm/monitor.c > @@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long > value, unsigned long old) > struct arch_domain *ad = &curr->domain->arch; > unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index); > > + if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) ) > + value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */ > + > if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) && > (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) || > value != old) && > diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c > index 81cf5b8..4e74f62 100644 > --- a/xen/arch/x86/hvm/svm/svm.c > +++ b/xen/arch/x86/hvm/svm/svm.c > @@ -315,9 +315,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct > hvm_hw_cpu *c) > v->arch.hvm_vcpu.guest_cr[2] = c->cr2; > v->arch.hvm_vcpu.guest_cr[3] = c->cr3; > v->arch.hvm_vcpu.guest_cr[4] = c->cr4; > - svm_update_guest_cr(v, 0); > - svm_update_guest_cr(v, 2); > - svm_update_guest_cr(v, 4); > + svm_update_guest_cr(v, 0, 0); > + svm_update_guest_cr(v, 2, 0); > + svm_update_guest_cr(v, 4, 0); > > /* Load sysenter MSRs into both VMCB save area and VCPU fields. */ > vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs; > @@ -533,7 +533,7 @@ static int svm_guest_x86_mode(struct vcpu *v) > return likely(vmcb->cs.db) ? 4 : 2; > } > > -void svm_update_guest_cr(struct vcpu *v, unsigned int cr) > +void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags) > { > struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; > uint64_t value; > @@ -563,13 +563,19 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int > cr) > case 3: > vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]); > if ( !nestedhvm_enabled(v->domain) ) > - hvm_asid_flush_vcpu(v); > + { > + if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) ) > + hvm_asid_flush_vcpu(v); > + } > else if ( nestedhvm_vmswitch_in_progress(v) ) > ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */ > else > - hvm_asid_flush_vcpu_asid( > - nestedhvm_vcpu_in_guestmode(v) > - ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid); > + { > + if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) ) > + hvm_asid_flush_vcpu_asid( > + nestedhvm_vcpu_in_guestmode(v) > + ? &vcpu_nestedhvm(v).nv_n2asid : > &v->arch.hvm_vcpu.n1asid); > + } > break; > case 4: > value = HVM_CR4_HOST_MASK; > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index 3dc6a6d..4106d4c 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -70,7 +70,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v); > static int vmx_alloc_vlapic_mapping(struct domain *d); > static void vmx_free_vlapic_mapping(struct domain *d); > static void vmx_install_vlapic_mapping(struct vcpu *v); > -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); > +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr, > + unsigned int flags); > static void vmx_update_guest_efer(struct vcpu *v); > static void vmx_wbinvd_intercept(void); > static void vmx_fpu_dirty_intercept(void); > @@ -840,9 +841,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct > hvm_hw_cpu *c) > > v->arch.hvm_vcpu.guest_cr[2] = c->cr2; > v->arch.hvm_vcpu.guest_cr[4] = c->cr4; > - vmx_update_guest_cr(v, 0); > - vmx_update_guest_cr(v, 2); > - vmx_update_guest_cr(v, 4); > + vmx_update_guest_cr(v, 0, 0); > + vmx_update_guest_cr(v, 2, 0); > + vmx_update_guest_cr(v, 4, 0); > > v->arch.hvm_vcpu.guest_efer = c->msr_efer; > vmx_update_guest_efer(v); > @@ -1552,7 +1553,8 @@ void vmx_update_debug_state(struct vcpu *v) > vmx_vmcs_exit(v); > } > > -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr, > + unsigned int flags) > { > vmx_vmcs_enter(v); > > @@ -1704,7 +1706,9 @@ static void vmx_update_guest_cr(struct vcpu *v, > unsigned int cr) > } > > __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); > - hvm_asid_flush_vcpu(v); > + > + if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) ) > + hvm_asid_flush_vcpu(v); > break; > > default: > @@ -2656,7 +2660,7 @@ static int vmx_cr_access(unsigned long > exit_qualification) > */ > hvm_monitor_crX(CR0, value, old); > curr->arch.hvm_vcpu.guest_cr[0] = value; > - vmx_update_guest_cr(curr, 0); > + vmx_update_guest_cr(curr, 0, 0); > HVMTRACE_0D(CLTS); > break; > } > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index 66ea822..1ef3635 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v) > > if ( paging_mode_enabled(v->domain) ) > { > - paging_update_cr3(v); > + paging_update_cr3(v, false); > return; > } > > diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c > index 003c2d8..b76e6b8 100644 > --- a/xen/arch/x86/mm/hap/hap.c > +++ b/xen/arch/x86/mm/hap/hap.c > @@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long > va) > return 1; > } > > -static void hap_update_cr3(struct vcpu *v, int do_locking) > +static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush) > { > v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3]; > - hvm_update_guest_cr(v, 3); > + hvm_update_guest_cr3(v, noflush); > } > > const struct paging_mode * > @@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v) > } > > /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */ > - hap_update_cr3(v, 0); > + hap_update_cr3(v, 0, false); > > paging_unlock(d); > put_gfn(d, cr3_gfn); > diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c > index c240953..20ded3e 100644 > --- a/xen/arch/x86/mm/shadow/common.c > +++ b/xen/arch/x86/mm/shadow/common.c > @@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v) > } > #endif /* OOS */ > > - v->arch.paging.mode->update_cr3(v, 0); > + v->arch.paging.mode->update_cr3(v, 0, false); > } > > void shadow_update_paging_modes(struct vcpu *v) > diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c > index a6372e3..fcc4fa3 100644 > --- a/xen/arch/x86/mm/shadow/multi.c > +++ b/xen/arch/x86/mm/shadow/multi.c > @@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v, > * In any case, in the PAE case, the ASSERT is not true; it can > * happen because of actions the guest is taking. */ > #if GUEST_PAGING_LEVELS == 3 > - v->arch.paging.mode->update_cr3(v, 0); > + v->arch.paging.mode->update_cr3(v, 0, false); > #else > ASSERT(d->is_shutting_down); > #endif > @@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v, > > > static void > -sh_update_cr3(struct vcpu *v, int do_locking) > +sh_update_cr3(struct vcpu *v, int do_locking, bool noflush) > /* Updates vcpu->arch.cr3 after the guest has changed CR3. > * Paravirtual guests should set v->arch.guest_table (and guest_table_user, > * if appropriate). > @@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking) > v->arch.hvm_vcpu.hw_cr[3] = > pagetable_get_paddr(v->arch.shadow_table[0]); > #endif > - hvm_update_guest_cr(v, 3); > + hvm_update_guest_cr3(v, noflush); > } > > /* Fix up the linear pagetable mappings */ > diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c > index 9e6ad23..a8c9604 100644 > --- a/xen/arch/x86/mm/shadow/none.c > +++ b/xen/arch/x86/mm/shadow/none.c > @@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct > p2m_domain *p2m, > return gfn_x(INVALID_GFN); > } > > -static void _update_cr3(struct vcpu *v, int do_locking) > +static void _update_cr3(struct vcpu *v, int do_locking, bool noflush) > { > ASSERT_UNREACHABLE(); > } > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index dd3dd5f..1b4080b 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -80,6 +80,9 @@ enum hvm_intblk { > #define HVM_EVENT_VECTOR_UNSET (-1) > #define HVM_EVENT_VECTOR_UPDATING (-2) > > +/* update_guest_cr() flags. */ > +#define HVM_UPDATE_GUEST_CR3_NO_FLUSH 0x00000001 > + > /* > * The hardware virtual machine (HVM) interface abstracts away from the > * x86/x86_64 CPU virtualization assist specifics. Currently this interface > @@ -132,7 +135,8 @@ struct hvm_function_table { > /* > * Called to inform HVM layer that a guest CRn or EFER has changed. > */ > - void (*update_guest_cr)(struct vcpu *v, unsigned int cr); > + void (*update_guest_cr)(struct vcpu *v, unsigned int cr, > + unsigned int flags); > void (*update_guest_efer)(struct vcpu *v); > > void (*cpuid_policy_changed)(struct vcpu *v); > @@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v) > > static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr) > { > - hvm_funcs.update_guest_cr(v, cr); > + hvm_funcs.update_guest_cr(v, cr, 0); > +} > + > +static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush) > +{ > + unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NO_FLUSH : 0; > + > + hvm_funcs.update_guest_cr(v, 3, flags); > } > > static inline void hvm_update_guest_efer(struct vcpu *v) > diff --git a/xen/include/asm-x86/hvm/svm/svm.h > b/xen/include/asm-x86/hvm/svm/svm.h > index 462cb89..6c050f5 100644 > --- a/xen/include/asm-x86/hvm/svm/svm.h > +++ b/xen/include/asm-x86/hvm/svm/svm.h > @@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, > uint32_t asid) > > unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr); > void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len); > -void svm_update_guest_cr(struct vcpu *, unsigned int cr); > +void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags); > > extern u32 svm_feature_flags; > > diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h > index 5607ab4..dd3e31f 100644 > --- a/xen/include/asm-x86/paging.h > +++ b/xen/include/asm-x86/paging.h > @@ -122,7 +122,8 @@ struct paging_mode { > unsigned long cr3, > paddr_t ga, uint32_t *pfec, > unsigned int *page_order); > - void (*update_cr3 )(struct vcpu *v, int do_locking); > + void (*update_cr3 )(struct vcpu *v, int do_locking, > + bool noflush); > void (*update_paging_modes )(struct vcpu *v); > void (*write_p2m_entry )(struct domain *d, unsigned long > gfn, > l1_pgentry_t *p, l1_pgentry_t > new, > @@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct > vcpu *v, > /* Update all the things that are derived from the guest's CR3. > * Called when the guest changes CR3; the caller can then use v->arch.cr3 > * as the value to load into the host CR3 to schedule this vcpu */ > -static inline void paging_update_cr3(struct vcpu *v) > +static inline void paging_update_cr3(struct vcpu *v, bool noflush) > { > - paging_get_hostmode(v)->update_cr3(v, 1); > + paging_get_hostmode(v)->update_cr3(v, 1, noflush); > } > > /* Update all the things that are derived from the guest's CR0/CR3/CR4. > diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h > index 70453e8..7bb6918 100644 > --- a/xen/include/asm-x86/x86-defns.h > +++ b/xen/include/asm-x86/x86-defns.h > @@ -43,6 +43,11 @@ > #define X86_CR0_PG 0x80000000 /* Paging (RW) > */ > > /* > + * Intel CPU flags in CR3 > + */ > +#define X86_CR3_NOFLUSH 0x8000000000000000 > + > +/* > * Intel CPU features in CR4 > */ > #define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ > -- > 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |