[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86/xpti: Don't leak TSS-adjacent percpu data via Meltdown



commit 7888440625617693487495a7842e6a991ead2647
Author:     Jan Beulich <JBeulich@xxxxxxxx>
AuthorDate: Fri Aug 9 14:16:06 2019 +0100
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Mon Aug 12 14:10:09 2019 +0100

    x86/xpti: Don't leak TSS-adjacent percpu data via Meltdown
    
    The XPTI work restricted the visibility of most of memory, but missed a few
    aspects when it came to the TSS.
    
    Given that the TSS is just an object in percpu data, the 4k mapping for it
    created in setup_cpu_root_pgt() maps adjacent percpu data, making it all
    leakable via Meltdown, even when XPTI is in use.
    
    Furthermore, no care is taken to check that the TSS doesn't cross a page
    boundary.  As it turns out, struct tss_struct is aligned on its size which
    does prevent it straddling a page boundary.
    
    Rework the TSS types while making this change.  Rename tss_struct to tss64, 
to
    mirror the existing tss32 structure we have in HVM's Tast Switch logic.  
Drop
    tss64's alignment and __cacheline_filler[] field.
    
    Introduce tss_page which contains a single tss64 and keeps the rest of the
    page clear, so no adjacent data can be leaked.  Move the definition from
    setup.c to traps.c, which is a more appropriate place for it to live.
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/cpu/common.c       | 20 ++++++++------------
 xen/arch/x86/hvm/vmx/vmcs.c     |  2 +-
 xen/arch/x86/setup.c            |  3 ---
 xen/arch/x86/smpboot.c          |  6 +++++-
 xen/arch/x86/traps.c            | 18 ++++++++++--------
 xen/include/asm-x86/processor.h |  9 +++++----
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 3e76afbc6c..a074176c00 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -707,7 +707,7 @@ void load_system_tables(void)
        unsigned long stack_bottom = get_stack_bottom(),
                stack_top = stack_bottom & ~(STACK_SIZE - 1);
 
-       struct tss_struct *tss = &this_cpu(init_tss);
+       struct tss64 *tss = &this_cpu(tss_page).tss;
        seg_desc_t *gdt =
                this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY;
        seg_desc_t *compat_gdt =
@@ -722,7 +722,7 @@ void load_system_tables(void)
                .limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1,
        };
 
-       *tss = (struct tss_struct){
+       *tss = (struct tss64){
                /* Main stack for interrupts/exceptions. */
                .rsp0 = stack_bottom,
 
@@ -747,16 +747,12 @@ void load_system_tables(void)
                .bitmap = IOBMP_INVALID_OFFSET,
        };
 
-       _set_tssldt_desc(
-               gdt + TSS_ENTRY,
-               (unsigned long)tss,
-               offsetof(struct tss_struct, __cacheline_filler) - 1,
-               SYS_DESC_tss_avail);
-       _set_tssldt_desc(
-               compat_gdt + TSS_ENTRY,
-               (unsigned long)tss,
-               offsetof(struct tss_struct, __cacheline_filler) - 1,
-               SYS_DESC_tss_busy);
+       BUILD_BUG_ON(sizeof(*tss) <= 0x67); /* Mandated by the architecture. */
+
+       _set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss,
+                        sizeof(*tss) - 1, SYS_DESC_tss_avail);
+       _set_tssldt_desc(compat_gdt + TSS_ENTRY, (unsigned long)tss,
+                        sizeof(*tss) - 1, SYS_DESC_tss_busy);
 
        per_cpu(full_gdt_loaded, cpu) = false;
        lgdt(&gdtr);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 6b16fe1e3c..16f14abe8f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -796,7 +796,7 @@ static void vmx_set_host_env(struct vcpu *v)
               (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
     __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
 
-    __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu));
+    __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
 
     __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index d2011910fa..f9d38155d3 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -16,7 +16,6 @@
 #include <xen/domain_page.h>
 #include <xen/version.h>
 #include <xen/gdbstub.h>
-#include <xen/percpu.h>
 #include <xen/hypercall.h>
 #include <xen/keyhandler.h>
 #include <xen/numa.h>
@@ -100,8 +99,6 @@ unsigned long __read_mostly xen_phys_start;
 
 unsigned long __read_mostly xen_virt_end;
 
-DEFINE_PER_CPU(struct tss_struct, init_tss);
-
 char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
     cpu0_stack[STACK_SIZE];
 
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 8d5fef0012..5057109a77 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -825,7 +825,11 @@ static int setup_cpu_root_pgt(unsigned int cpu)
     if ( !rc )
         rc = clone_mapping(idt_tables[cpu], rpt);
     if ( !rc )
-        rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
+    {
+        BUILD_BUG_ON(sizeof(this_cpu(tss_page)) != PAGE_SIZE);
+
+        rc = clone_mapping(&per_cpu(tss_page, cpu).tss, rpt);
+    }
     if ( !rc )
         rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index dd5f1af044..23069e25ec 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -108,6 +108,12 @@ idt_entry_t __section(".bss.page_aligned") 
__aligned(PAGE_SIZE)
 /* Pointer to the IDT of every CPU. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
 
+/*
+ * The TSS is smaller than a page, but we give it a full page to avoid
+ * adjacent per-cpu data leaking via Meltdown when XPTI is in use.
+ */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page);
+
 bool (*ioemul_handle_quirk)(
     u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
 
@@ -559,7 +565,7 @@ void show_stack_overflow(unsigned int cpu, const struct 
cpu_user_regs *regs)
 
     printk("Valid stack range: %p-%p, sp=%p, tss.rsp0=%p\n",
            (void *)esp_top, (void *)esp_bottom, (void *)esp,
-           (void *)per_cpu(init_tss, cpu).rsp0);
+           (void *)per_cpu(tss_page, cpu).tss.rsp0);
 
     /*
      * Trigger overflow trace if %esp is anywhere within the guard page, or
@@ -1897,7 +1903,7 @@ static void __init set_intr_gate(unsigned int n, void 
*addr)
 
 void load_TR(void)
 {
-    struct tss_struct *tss = &this_cpu(init_tss);
+    struct tss64 *tss = &this_cpu(tss_page).tss;
     struct desc_ptr old_gdt, tss_gdt = {
         .base = (long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY),
         .limit = LAST_RESERVED_GDT_BYTE
@@ -1905,14 +1911,10 @@ void load_TR(void)
 
     _set_tssldt_desc(
         this_cpu(gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
-        (unsigned long)tss,
-        offsetof(struct tss_struct, __cacheline_filler) - 1,
-        SYS_DESC_tss_avail);
+        (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_avail);
     _set_tssldt_desc(
         this_cpu(compat_gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
-        (unsigned long)tss,
-        offsetof(struct tss_struct, __cacheline_filler) - 1,
-        SYS_DESC_tss_busy);
+        (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);
 
     /* Switch to non-compat GDT (which has B bit clear) to execute LTR. */
     asm volatile (
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 2862321eee..3660238ca8 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -411,7 +411,7 @@ static always_inline void __mwait(unsigned long eax, 
unsigned long ecx)
 #define IOBMP_BYTES             8192
 #define IOBMP_INVALID_OFFSET    0x8000
 
-struct __packed __cacheline_aligned tss_struct {
+struct __packed tss64 {
     uint32_t :32;
     uint64_t rsp0, rsp1, rsp2;
     uint64_t :64;
@@ -422,9 +422,11 @@ struct __packed __cacheline_aligned tss_struct {
     uint64_t ist[7];
     uint64_t :64;
     uint16_t :16, bitmap;
-    /* Pads the TSS to be cacheline-aligned (total size is 0x80). */
-    uint8_t __cacheline_filler[24];
 };
+struct tss_page {
+    struct tss64 __aligned(PAGE_SIZE) tss;
+};
+DECLARE_PER_CPU(struct tss_page, tss_page);
 
 #define IST_NONE 0UL
 #define IST_DF   1UL
@@ -463,7 +465,6 @@ static inline void disable_each_ist(idt_entry_t *idt)
 extern idt_entry_t idt_table[];
 extern idt_entry_t *idt_tables[];
 
-DECLARE_PER_CPU(struct tss_struct, init_tss);
 DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
 
 extern void write_ptbase(struct vcpu *v);
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.