[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen stable-4.10] x86: slightly reduce Meltdown band-aid overhead



commit 489cfbc1b915bc1462579901f5af90b9f779b8b2
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Mar 20 14:20:29 2018 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Mar 20 14:20:29 2018 +0100

    x86: slightly reduce Meltdown band-aid overhead
    
    I'm not sure why I didn't do this right away: By avoiding the use of
    global PTEs in the cloned directmap, there's no need to fiddle with
    CR4.PGE on any of the entry paths. Only the exit paths need to flush
    global mappings.
    
    The reduced flushing, however, requires that we now have interrupts off
    on all entry paths until after the page table switch, so that flush IPIs
    can't be serviced while on the restricted pagetables, leaving a window
    where a potentially stale guest global mapping can be brought into the
    TLB. Along those lines the "sync" IPI after L4 entry updates now needs
    to become a real (and global) flush IPI, so that inside Xen we'll also
    pick up such changes.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Tested-by: Juergen Gross <jgross@xxxxxxxx>
    Reviewed-by: Juergen Gross <jgross@xxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    
    x86: correct EFLAGS.IF in SYSENTER frame
    
    Commit 9d1d31ad94 ("x86: slightly reduce Meltdown band-aid overhead")
    moved the STI past the PUSHF. While this isn't an active problem (as we
    force EFLAGS.IF to 1 before exiting to guest context), let's not risk
    internal confusion by finding a PV guest frame with interrupts
    apparently off.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    master commit: 9d1d31ad9498e6ceb285d5774e34fed5f648c273
    master date: 2018-03-06 16:48:44 +0100
    master commit: c4dd58f0cf23cdf119bbccedfb8c24435fc6f3ab
    master date: 2018-03-16 17:27:36 +0100
---
 xen/arch/x86/mm.c                  |  8 ++------
 xen/arch/x86/smpboot.c             | 10 ++++++++++
 xen/arch/x86/x86_64/compat/entry.S |  7 ++++---
 xen/arch/x86/x86_64/entry.S        | 23 ++++++++++++++---------
 4 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 7958d00e81..e37c40b954 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3792,18 +3792,14 @@ long do_mmu_update(
     {
         /*
          * Force other vCPU-s of the affected guest to pick up L4 entry
-         * changes (if any). Issue a flush IPI with empty operation mask to
-         * facilitate this (including ourselves waiting for the IPI to
-         * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
-         * meaningless without FLUSH_CACHE, but will allow to pass the no-op
-         * check in flush_area_mask().
+         * changes (if any).
          */
         unsigned int cpu = smp_processor_id();
         cpumask_t *mask = per_cpu(scratch_cpumask, cpu);
 
         cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu));
         if ( !cpumask_empty(mask) )
-            flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID);
+            flush_mask(mask, FLUSH_TLB_GLOBAL);
     }
 
     perfc_add(num_page_updates, i);
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 4e17c5a05c..aa0b632320 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -741,6 +741,7 @@ static int clone_mapping(const void *ptr, root_pgentry_t 
*rpt)
     }
 
     pl1e += l1_table_offset(linear);
+    flags &= ~_PAGE_GLOBAL;
 
     if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
     {
@@ -1050,8 +1051,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
     if ( per_cpu(root_pgt, 0) )
+    {
         get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
 
+        /*
+         * All entry points which may need to switch page tables have to start
+         * with interrupts off. Re-write what pv_trap_init() has put there.
+         */
+        _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3,
+                  &int80_direct_trap);
+    }
+
     set_nr_sockets();
 
     socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
diff --git a/xen/arch/x86/x86_64/compat/entry.S 
b/xen/arch/x86/x86_64/compat/entry.S
index 5b13b24e16..34c83c9c52 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -202,7 +202,7 @@ ENTRY(compat_post_handle_exception)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         CR4_PV32_RESTORE
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
@@ -222,11 +222,12 @@ ENTRY(cstar_enter)
         jz    .Lcstar_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
+        sti
 
-        GET_CURRENT(bx)
+        __GET_CURRENT(bx)
         movq  VCPU_domain(%rbx),%rcx
         cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
         je    switch_to_kernel
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 82438a02e7..5944cf6a01 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -150,7 +150,7 @@ UNLIKELY_END(exit_cr3)
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
         pushq %r11
@@ -169,9 +169,10 @@ ENTRY(lstar_enter)
         jz    .Llstar_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
+        sti
 
         __GET_CURRENT(bx)
         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
@@ -254,7 +255,7 @@ process_trap:
         jmp  test_all_events
 
 ENTRY(sysenter_entry)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
@@ -270,14 +271,17 @@ GLOBAL(sysenter_eflags_saved)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_END(bx)
+        /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */
+        orl   $X86_EFLAGS_IF, UREGS_eflags(%rsp)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lsyse_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
+        sti
 
         __GET_CURRENT(bx)
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
@@ -324,9 +328,10 @@ ENTRY(int80_direct_trap)
         jz    .Lint80_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
+        sti
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -510,7 +515,7 @@ ENTRY(common_interrupt)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lintr_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -552,7 +557,7 @@ GLOBAL(handle_exception)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lxcpt_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -748,7 +753,7 @@ ENTRY(double_fault)
         jns   .Ldblf_cr3_load
         neg   %rbx
 .Ldblf_cr3_load:
-        write_cr3 rbx, rdi, rsi
+        mov   %rbx, %cr3
 .Ldblf_cr3_okay:
 
         movq  %rsp,%rdi
@@ -783,7 +788,7 @@ handle_ist_exception:
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .List_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
 .List_cr3_okay:
 
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.10

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.