|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V6] x86/hvm: fix domain crash when CR3 has the noflush bit set
In hardware, when PCID support is enabled and the NOFLUSH bit is set
when writing a CR3 value, the hardware will clear that that bit and
change the CR3 without flushing the TLB. hvm_set_cr3(), however, was
ignoring this bit; the result was that post-vm_event checks detected
an invalid CR3 value and crashed the domain.
Handle NOFLUSH in hvm_set_cr3() by:
1. Clearing the bit
2. Passing a "noflush" flag to lower-level cr3 setting functions to
indicate that a flush should not be performed.
Also clear X86_CR3_NOFLUSH when reporting CR3 monitored CR3 writes.
This allows introspection to be used on VMs whose operating system uses
the NOFLUSH bit.
Signed-off-by: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx>
Reported-by: Bitweasil <bitweasil@xxxxxxxxxxxxxx>
Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx>
---
Changes since V5:
- Updated the commit message.
---
xen/arch/x86/hvm/hvm.c | 13 ++++++++++---
xen/arch/x86/hvm/monitor.c | 3 +++
xen/arch/x86/hvm/svm/svm.c | 15 +++++++++------
xen/arch/x86/hvm/vmx/vmx.c | 18 +++++++++++-------
xen/arch/x86/mm.c | 2 +-
xen/arch/x86/mm/hap/hap.c | 6 +++---
xen/arch/x86/mm/shadow/common.c | 2 +-
xen/arch/x86/mm/shadow/multi.c | 6 +++---
xen/arch/x86/mm/shadow/none.c | 2 +-
xen/include/asm-x86/hvm/hvm.h | 15 +++++++++++++--
xen/include/asm-x86/hvm/svm/svm.h | 2 +-
xen/include/asm-x86/paging.h | 7 ++++---
xen/include/asm-x86/x86-defns.h | 5 +++++
13 files changed, 65 insertions(+), 31 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 0539551..10ae922 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
struct vcpu *v = current;
struct page_info *page;
unsigned long old = v->arch.hvm_vcpu.guest_cr[3];
+ bool noflush = false;
if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled &
monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
@@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
}
}
+ if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
+ {
+ noflush = value & X86_CR3_NOFLUSH;
+ value &= ~X86_CR3_NOFLUSH;
+ }
+
if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
(value != v->arch.hvm_vcpu.guest_cr[3]) )
{
@@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
}
v->arch.hvm_vcpu.guest_cr[3] = value;
- paging_update_cr3(v);
+ paging_update_cr3(v, noflush);
return X86EMUL_OKAY;
bad_cr3:
@@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void)
/* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
for_each_vcpu ( d, v )
- paging_update_cr3(v);
+ paging_update_cr3(v, false);
/* Flush all dirty TLBs. */
flush_tlb_mask(d->dirty_cpumask);
@@ -4193,7 +4200,7 @@ static int hvmop_set_param(
domain_pause(d);
d->arch.hvm_domain.params[a.index] = a.value;
for_each_vcpu ( d, v )
- paging_update_cr3(v);
+ paging_update_cr3(v, false);
domain_unpause(d);
domctl_lock_release();
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 160c032..2a41ccc 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long value,
unsigned long old)
struct arch_domain *ad = &curr->domain->arch;
unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index);
+ if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) )
+ value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */
+
if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) &&
(!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) ||
value != old) &&
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index f2fbe07..c34f5b5 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -325,9 +325,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct
hvm_hw_cpu *c)
v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
- svm_update_guest_cr(v, 0);
- svm_update_guest_cr(v, 2);
- svm_update_guest_cr(v, 4);
+ svm_update_guest_cr(v, 0, 0);
+ svm_update_guest_cr(v, 2, 0);
+ svm_update_guest_cr(v, 4, 0);
/* Load sysenter MSRs into both VMCB save area and VCPU fields. */
vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
@@ -543,7 +543,7 @@ static int svm_guest_x86_mode(struct vcpu *v)
return likely(vmcb->cs.db) ? 4 : 2;
}
-void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
+void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
uint64_t value;
@@ -583,10 +583,13 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
case 3:
vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]);
if ( !nestedhvm_enabled(v->domain) )
- hvm_asid_flush_vcpu(v);
+ {
+ if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+ hvm_asid_flush_vcpu(v);
+ }
else if ( nestedhvm_vmswitch_in_progress(v) )
; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */
- else
+ else if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
hvm_asid_flush_vcpu_asid(
nestedhvm_vcpu_in_guestmode(v)
? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index aa05050..ff9e997 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -68,7 +68,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
static int vmx_alloc_vlapic_mapping(struct domain *d);
static void vmx_free_vlapic_mapping(struct domain *d);
static void vmx_install_vlapic_mapping(struct vcpu *v);
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+ unsigned int flags);
static void vmx_update_guest_efer(struct vcpu *v);
static void vmx_wbinvd_intercept(void);
static void vmx_fpu_dirty_intercept(void);
@@ -836,9 +837,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct
hvm_hw_cpu *c)
v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
- vmx_update_guest_cr(v, 0);
- vmx_update_guest_cr(v, 2);
- vmx_update_guest_cr(v, 4);
+ vmx_update_guest_cr(v, 0, 0);
+ vmx_update_guest_cr(v, 2, 0);
+ vmx_update_guest_cr(v, 4, 0);
v->arch.hvm_vcpu.guest_efer = c->msr_efer;
vmx_update_guest_efer(v);
@@ -1548,7 +1549,8 @@ void vmx_update_debug_state(struct vcpu *v)
vmx_vmcs_exit(v);
}
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+ unsigned int flags)
{
vmx_vmcs_enter(v);
@@ -1730,7 +1732,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned
int cr)
}
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
- hvm_asid_flush_vcpu(v);
+
+ if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+ hvm_asid_flush_vcpu(v);
break;
default:
@@ -2682,7 +2686,7 @@ static int vmx_cr_access(unsigned long exit_qualification)
*/
hvm_monitor_crX(CR0, value, old);
curr->arch.hvm_vcpu.guest_cr[0] = value;
- vmx_update_guest_cr(curr, 0);
+ vmx_update_guest_cr(curr, 0, 0);
HVMTRACE_0D(CLTS);
break;
}
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e1f089b..9d26a9d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v)
if ( paging_mode_enabled(v->domain) )
{
- paging_update_cr3(v);
+ paging_update_cr3(v, false);
return;
}
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 003c2d8..b76e6b8 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
return 1;
}
-static void hap_update_cr3(struct vcpu *v, int do_locking)
+static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
{
v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
- hvm_update_guest_cr(v, 3);
+ hvm_update_guest_cr3(v, noflush);
}
const struct paging_mode *
@@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v)
}
/* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
- hap_update_cr3(v, 0);
+ hap_update_cr3(v, 0, false);
paging_unlock(d);
put_gfn(d, cr3_gfn);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index c240953..20ded3e 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v)
}
#endif /* OOS */
- v->arch.paging.mode->update_cr3(v, 0);
+ v->arch.paging.mode->update_cr3(v, 0, false);
}
void shadow_update_paging_modes(struct vcpu *v)
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index a6372e3..fcc4fa3 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v,
* In any case, in the PAE case, the ASSERT is not true; it can
* happen because of actions the guest is taking. */
#if GUEST_PAGING_LEVELS == 3
- v->arch.paging.mode->update_cr3(v, 0);
+ v->arch.paging.mode->update_cr3(v, 0, false);
#else
ASSERT(d->is_shutting_down);
#endif
@@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
static void
-sh_update_cr3(struct vcpu *v, int do_locking)
+sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
/* Updates vcpu->arch.cr3 after the guest has changed CR3.
* Paravirtual guests should set v->arch.guest_table (and guest_table_user,
* if appropriate).
@@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
v->arch.hvm_vcpu.hw_cr[3] =
pagetable_get_paddr(v->arch.shadow_table[0]);
#endif
- hvm_update_guest_cr(v, 3);
+ hvm_update_guest_cr3(v, noflush);
}
/* Fix up the linear pagetable mappings */
diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
index 9e6ad23..a8c9604 100644
--- a/xen/arch/x86/mm/shadow/none.c
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct
p2m_domain *p2m,
return gfn_x(INVALID_GFN);
}
-static void _update_cr3(struct vcpu *v, int do_locking)
+static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
{
ASSERT_UNREACHABLE();
}
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dd3dd5f..ca3a334 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -80,6 +80,9 @@ enum hvm_intblk {
#define HVM_EVENT_VECTOR_UNSET (-1)
#define HVM_EVENT_VECTOR_UPDATING (-2)
+/* update_guest_cr() flags. */
+#define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
+
/*
* The hardware virtual machine (HVM) interface abstracts away from the
* x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -132,7 +135,8 @@ struct hvm_function_table {
/*
* Called to inform HVM layer that a guest CRn or EFER has changed.
*/
- void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
+ void (*update_guest_cr)(struct vcpu *v, unsigned int cr,
+ unsigned int flags);
void (*update_guest_efer)(struct vcpu *v);
void (*cpuid_policy_changed)(struct vcpu *v);
@@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v)
static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
{
- hvm_funcs.update_guest_cr(v, cr);
+ hvm_funcs.update_guest_cr(v, cr, 0);
+}
+
+static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush)
+{
+ unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NOFLUSH : 0;
+
+ hvm_funcs.update_guest_cr(v, 3, flags);
}
static inline void hvm_update_guest_efer(struct vcpu *v)
diff --git a/xen/include/asm-x86/hvm/svm/svm.h
b/xen/include/asm-x86/hvm/svm/svm.h
index 593546f..4e5e142 100644
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, uint32_t
asid)
unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
-void svm_update_guest_cr(struct vcpu *, unsigned int cr);
+void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
extern u32 svm_feature_flags;
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 5607ab4..dd3e31f 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -122,7 +122,8 @@ struct paging_mode {
unsigned long cr3,
paddr_t ga, uint32_t *pfec,
unsigned int *page_order);
- void (*update_cr3 )(struct vcpu *v, int do_locking);
+ void (*update_cr3 )(struct vcpu *v, int do_locking,
+ bool noflush);
void (*update_paging_modes )(struct vcpu *v);
void (*write_p2m_entry )(struct domain *d, unsigned long
gfn,
l1_pgentry_t *p, l1_pgentry_t new,
@@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct
vcpu *v,
/* Update all the things that are derived from the guest's CR3.
* Called when the guest changes CR3; the caller can then use v->arch.cr3
* as the value to load into the host CR3 to schedule this vcpu */
-static inline void paging_update_cr3(struct vcpu *v)
+static inline void paging_update_cr3(struct vcpu *v, bool noflush)
{
- paging_get_hostmode(v)->update_cr3(v, 1);
+ paging_get_hostmode(v)->update_cr3(v, 1, noflush);
}
/* Update all the things that are derived from the guest's CR0/CR3/CR4.
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index 70453e8..8598ade 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -43,6 +43,11 @@
#define X86_CR0_PG 0x80000000 /* Paging (RW) */
/*
+ * Intel CPU flags in CR3
+ */
+#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+
+/*
* Intel CPU features in CR4
*/
#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
--
2.7.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |