[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active
Instead of flushing the TLB from global pages when switching address spaces with XPTI being active just disable global pages via %cr4 completely when a domain subject to XPTI is active. This avoids the need for extra TLB flushes as loading %cr3 will remove all TLB entries. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- xen/arch/x86/cpu/mtrr/generic.c | 32 +++++++++++++++++++++----------- xen/arch/x86/flushtlb.c | 39 +++++++++++++++++++++++++-------------- xen/arch/x86/x86_64/entry.S | 10 ---------- xen/include/asm-x86/domain.h | 3 ++- 4 files changed, 48 insertions(+), 36 deletions(-) diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c index e9c0e5e059..d705138100 100644 --- a/xen/arch/x86/cpu/mtrr/generic.c +++ b/xen/arch/x86/cpu/mtrr/generic.c @@ -400,8 +400,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock); * has been called. */ -static void prepare_set(void) +static bool prepare_set(void) { + unsigned long cr4; + /* Note that this is not ideal, since the cache is only flushed/disabled for this CPU while the MTRRs are changed, but changing this requires more invasive changes to the way the kernel boots */ @@ -412,18 +414,22 @@ static void prepare_set(void) write_cr0(read_cr0() | X86_CR0_CD); wbinvd(); - /* TLB flushing here relies on Xen always using CR4.PGE. */ - BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE)); - write_cr4(read_cr4() & ~X86_CR4_PGE); + cr4 = read_cr4(); + if (cr4 & X86_CR4_PGE) + write_cr4(cr4 & ~X86_CR4_PGE); + else + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); /* Save MTRR state */ rdmsrl(MSR_MTRRdefType, deftype); /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff); + + return !!(cr4 & X86_CR4_PGE); } -static void post_set(void) +static void post_set(bool pge) { /* Intel (P6) standard MTRRs */ mtrr_wrmsr(MSR_MTRRdefType, deftype); @@ -432,7 +438,10 @@ static void post_set(void) write_cr0(read_cr0() & ~X86_CR0_CD); /* Reenable CR4.PGE (also flushes the TLB) */ - write_cr4(read_cr4() | X86_CR4_PGE); + if (pge) + write_cr4(read_cr4() | X86_CR4_PGE); + else + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); spin_unlock(&set_atomicity_lock); } @@ -441,14 +450,15 @@ static void generic_set_all(void) { unsigned long mask, count; unsigned long flags; + bool pge; local_irq_save(flags); - prepare_set(); + pge = prepare_set(); /* Actually set the state */ mask = set_mtrr_state(); - post_set(); + post_set(pge); local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ @@ -457,7 +467,6 @@ static void generic_set_all(void) set_bit(count, &smp_changes_mask); mask >>= 1; } - } static void generic_set_mtrr(unsigned int reg, unsigned long base, @@ -474,11 +483,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, { unsigned long flags; struct mtrr_var_range *vr; + bool pge; vr = &mtrr_state.var_ranges[reg]; local_irq_save(flags); - prepare_set(); + pge = prepare_set(); if (size == 0) { /* The invalid bit is kept in the mask, so we simply clear the @@ -499,7 +509,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask); } - post_set(); + post_set(pge); local_irq_restore(flags); } diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index e4ea4f3297..186d9099f6 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -72,20 +72,39 @@ static void post_flush(u32 t) this_cpu(tlbflush_time) = t; } +static void do_flush_tlb(unsigned long cr3) +{ + unsigned long cr4; + + cr4 = read_cr4(); + if ( cr4 & X86_CR4_PGE ) + { + write_cr4(cr4 & ~X86_CR4_PGE); + if ( cr3 ) + asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + else + barrier(); + write_cr4(cr4); + } + else + { + if ( !cr3 ) + cr3 = read_cr3(); + asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + } +} + void write_cr3(unsigned long cr3) { - unsigned long flags, cr4; + unsigned long flags; u32 t; /* This non-reentrant function is sometimes called in interrupt context. */ local_irq_save(flags); t = pre_flush(); - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); - write_cr4(cr4); + do_flush_tlb(cr3); post_flush(t); @@ -123,22 +142,14 @@ unsigned int flush_area_local(const void *va, unsigned int flags) u32 t = pre_flush(); if ( !cpu_has_invpcid ) - { - unsigned long cr4 = read_cr4(); - - write_cr4(cr4 & ~X86_CR4_PGE); - barrier(); - write_cr4(cr4); - } + do_flush_tlb(0); else - { /* * Using invpcid to flush all mappings works * regardless of whether PCID is enabled or not. * It is faster than read-modify-write CR4. */ invpcid_flush_all(); - } post_flush(t); } diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index cdcdc2c40a..a8d38e7eb2 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -73,13 +73,8 @@ restore_all_guest: ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi rep movsq .Lrag_copy_done: - mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) - mov %rdi, %rsi - and $~X86_CR4_PGE, %rdi - mov %rdi, %cr4 mov %rax, %cr3 - mov %rsi, %cr4 .Lrag_cr3_end: ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI @@ -136,12 +131,7 @@ restore_all_xen: * so "g" will have to do. */ UNLIKELY_START(g, exit_cr3) - mov %cr4, %rdi - mov %rdi, %rsi - and $~X86_CR4_PGE, %rdi - mov %rdi, %cr4 mov %rax, %cr3 - mov %rsi, %cr4 UNLIKELY_END(exit_cr3) /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 0cc37dea05..316418a6fe 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -622,7 +622,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4); X86_CR4_SMAP | X86_CR4_OSXSAVE | \ X86_CR4_FSGSBASE)) \ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ - & ~X86_CR4_DE) + & ~(X86_CR4_DE | \ + ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0))) #define real_cr4_to_pv_guest_cr4(c) \ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ X86_CR4_OSXSAVE | X86_CR4_SMEP | \ -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |