[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86/xpti: avoid copying L4 page table contents when possible
For mitigation of Meltdown the current L4 page table is copied to the cpu local root page table each time a 64 bit pv guest is entered. Copying can be avoided in cases where the guest L4 page table hasn't been modified while running the hypervisor, e.g. when handling interrupts or any hypercall not modifying the L4 page table or %cr3. So add a per-cpu flag whether the copying should be performed and set that flag only when loading a new %cr3 or modifying the L4 page table. This includes synchronization of the cpu local root page table with other cpus, so add a special synchronization flag for that case. A simple performance check (compiling the hypervisor via "make -j 4") in dom0 with 4 vcpus shows a significant improvement: - real time drops from 112 seconds to 103 seconds - system time drops from 142 seconds to 131 seconds Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- To be applied on top of Jan's "Meltdown band-aid overhead reduction" series --- xen/arch/x86/mm.c | 32 +++++++++++++++++++------------- xen/arch/x86/pv/domain.c | 1 + xen/arch/x86/smp.c | 2 ++ xen/arch/x86/x86_64/asm-offsets.c | 1 + xen/arch/x86/x86_64/entry.S | 8 ++++++-- xen/include/asm-x86/current.h | 8 ++++++++ xen/include/asm-x86/flushtlb.h | 2 ++ 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 4c9340ca30..5340a49f00 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -510,6 +510,8 @@ void make_cr3(struct vcpu *v, mfn_t mfn) void write_ptbase(struct vcpu *v) { write_cr3(v->arch.cr3); + /* Setting copy_l4 unconditionally does no harm. */ + get_cpu_info()->copy_l4 = true; } /* @@ -3704,18 +3706,22 @@ long do_mmu_update( break; rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v); - /* - * No need to sync if all uses of the page can be accounted - * to the page lock we hold, its pinned status, and uses on - * this (v)CPU. - */ - if ( !rc && !cpu_has_no_xpti && - ((page->u.inuse.type_info & PGT_count_mask) > - (1 + !!(page->u.inuse.type_info & PGT_pinned) + - (pagetable_get_pfn(curr->arch.guest_table) == mfn) + - (pagetable_get_pfn(curr->arch.guest_table_user) == - mfn))) ) - sync_guest = true; + if ( !rc && !cpu_has_no_xpti ) + { + get_cpu_info()->copy_l4 = true; + /* + * No need to sync if all uses of the page can be + * accounted to the page lock we hold, its pinned + * status, and uses on this (v)CPU. + */ + if ( (page->u.inuse.type_info & PGT_count_mask) > + (1 + !!(page->u.inuse.type_info & PGT_pinned) + + (pagetable_get_pfn(curr->arch.guest_table) == + mfn) + + (pagetable_get_pfn(curr->arch.guest_table_user) == + mfn)) ) + sync_guest = true; + } break; case PGT_writable_page: @@ -3830,7 +3836,7 @@ long do_mmu_update( cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu)); if ( !cpumask_empty(mask) ) - flush_mask(mask, FLUSH_TLB_GLOBAL); + flush_mask(mask, FLUSH_TLB_GLOBAL | XPTI_L4_UPDATE); } perfc_add(num_page_updates, i); diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index 868a23fd7e..4e6d29e76f 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -238,6 +238,7 @@ static void _toggle_guest_pt(struct vcpu *v, bool force_cr3) /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); + get_cpu_info()->copy_l4 = true; if ( !(v->arch.flags & TF_kernel_mode) ) return; diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index 033dd05958..6e3c8904fb 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -207,6 +207,8 @@ void invalidate_interrupt(struct cpu_user_regs *regs) unsigned int flags = flush_flags; ack_APIC_irq(); perfc_incr(ipis); + if ( flags & XPTI_L4_UPDATE ) + get_cpu_info()->copy_l4 = true; if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() ) flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL); if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) ) diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index e925e6589c..362fd1cbd6 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -144,6 +144,7 @@ void __dummy__(void) OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); + OFFSET(CPUINFO_copy_l4, struct cpu_info, copy_l4); DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); BLANK(); diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index b955cbd43d..764bf792cb 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -46,10 +46,13 @@ restore_all_guest: .Lrag_cr3_start: mov VCPU_cr3(%rbx), %r9 GET_STACK_END(dx) - mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi + mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rax + cmpb $0, STACK_CPUINFO_FIELD(copy_l4)(%rdx) + je .Lrag_copyend + movb $0, STACK_CPUINFO_FIELD(copy_l4)(%rdx) movabs $PADDR_MASK & PAGE_MASK, %rsi movabs $DIRECTMAP_VIRT_START, %rcx - mov %rdi, %rax + mov %rax, %rdi and %rsi, %rdi and %r9, %rsi add %rcx, %rdi @@ -65,6 +68,7 @@ restore_all_guest: sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi rep movsq +.Lrag_copyend: mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) mov %rdi, %rsi diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h index 4678a0fcf5..23957206c3 100644 --- a/xen/include/asm-x86/current.h +++ b/xen/include/asm-x86/current.h @@ -59,6 +59,14 @@ struct cpu_info { bool use_shadow_spec_ctrl; uint8_t bti_ist_info; + /* + * The following field controls copying of the L4 page table of 64-bit + * PV guests to the per-cpu root page table on entering the guest context. + * If set the L4 page table is being copied to the root page table and + * the field will be reset. + */ + bool copy_l4; + unsigned long __pad; /* get_stack_bottom() must be 16-byte aligned */ }; diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h index 2cade9cbfb..bd0493fba0 100644 --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -103,6 +103,8 @@ void write_cr3(unsigned long cr3); #define FLUSH_VA_VALID 0x800 /* Flush CPU state */ #define FLUSH_VCPU_STATE 0x1000 + /* Update XPTI root page table */ +#define XPTI_L4_UPDATE 0x2000 /* Flush local TLBs/caches. */ unsigned int flush_area_local(const void *va, unsigned int flags); -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |