[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5] x86/xpti: Don't leak TSS-adjacent percpu data via Meltdown



From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

The XPTI work restricted the visibility of most of memory, but missed a few
aspects when it came to the TSS.

Given that the TSS is just an object in percpu data, the 4k mapping for it
created in setup_cpu_root_pgt() maps adjacent percpu data, making it all
leakable via Meltdown, even when XPTI is in use.

Furthermore, no care is taken to check that the TSS doesn't cross a page
boundary.  As it turns out, struct tss_struct is aligned on its size which
does prevent it straddling a page boundary.

Move the TSS into the page aligned percpu area, so no adjacent data can be
leaked.  Move the definition from setup.c to traps.c, which is a more
appropriate place for it to live.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Introduce / use struct tss_page. Drop __cacheline_filler field.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

The _struct suffix on tss_struct is quite redundant.  Rename it to tss64 to
mirror the existing tss32 structure we have in HVM's Task Switch logic.

The per-cpu name having an init_ prefix is also wrong.  There is exactly one
TSS for each CPU, which is used for the lifetime of the system.  Drop the
redirection and update all callers to use tss_page properly.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
TBD: Especially with how the previous patch now works I'm unconvinced of
     the utility of the linker script alignment check. It in particular
     doesn't check the property we're after in this patch, i.e. the fact
     that there's nothing else in the same page.
NB: Sadly get_per_cpu_var() can't also be used on the "left" side of a
    #define.
---
v5:
 * Replace link time assertion with BUILD_BUG_ON(). Rename types and
   variables. Drop __cacheline_filler field.

v4:
 * Introduce / use struct tss_page.

v3:
 * Drop the remark about CET.  It is no longer accurate in the latest version
   of the CET spec.

v2:
 * Rebase over changes to include __aligned() within
   DEFINE_PER_CPU_PAGE_ALIGNED()
 * Drop now-unused xen/percpu.h from setup.c

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -707,7 +707,7 @@ void load_system_tables(void)
        unsigned long stack_bottom = get_stack_bottom(),
                stack_top = stack_bottom & ~(STACK_SIZE - 1);
- struct tss_struct *tss = &this_cpu(init_tss);
+       struct tss64 *tss = &this_cpu(tss_page).tss;
        seg_desc_t *gdt =
                this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY;
        seg_desc_t *compat_gdt =
@@ -722,7 +722,7 @@ void load_system_tables(void)
                .limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1,
        };
- *tss = (struct tss_struct){
+       *tss = (struct tss64){
                /* Main stack for interrupts/exceptions. */
                .rsp0 = stack_bottom,
@@ -747,16 +747,10 @@ void load_system_tables(void)
                .bitmap = IOBMP_INVALID_OFFSET,
        };
- _set_tssldt_desc(
-               gdt + TSS_ENTRY,
-               (unsigned long)tss,
-               offsetof(struct tss_struct, __cacheline_filler) - 1,
-               SYS_DESC_tss_avail);
-       _set_tssldt_desc(
-               compat_gdt + TSS_ENTRY,
-               (unsigned long)tss,
-               offsetof(struct tss_struct, __cacheline_filler) - 1,
-               SYS_DESC_tss_busy);
+       _set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss,
+                        sizeof(*tss) - 1, SYS_DESC_tss_avail);
+       _set_tssldt_desc(compat_gdt + TSS_ENTRY, (unsigned long)tss,
+                        sizeof(*tss) - 1, SYS_DESC_tss_busy);
per_cpu(full_gdt_loaded, cpu) = false;
        lgdt(&gdtr);
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -796,7 +796,7 @@ static void vmx_set_host_env(struct vcpu
               (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
     __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
- __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu));
+    __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom()); --- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -16,7 +16,6 @@
 #include <xen/domain_page.h>
 #include <xen/version.h>
 #include <xen/gdbstub.h>
-#include <xen/percpu.h>
 #include <xen/hypercall.h>
 #include <xen/keyhandler.h>
 #include <xen/numa.h>
@@ -100,8 +99,6 @@ unsigned long __read_mostly xen_phys_sta
unsigned long __read_mostly xen_virt_end; -DEFINE_PER_CPU(struct tss_struct, init_tss);
-
 char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
     cpu0_stack[STACK_SIZE];
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -825,7 +825,11 @@ static int setup_cpu_root_pgt(unsigned i
     if ( !rc )
         rc = clone_mapping(idt_tables[cpu], rpt);
     if ( !rc )
-        rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
+    {
+        BUILD_BUG_ON(sizeof(this_cpu(tss_page)) != PAGE_SIZE);
+
+        rc = clone_mapping(&per_cpu(tss_page, cpu), rpt);
+    }
     if ( !rc )
         rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -108,6 +108,12 @@ idt_entry_t __section(".bss.page_aligned
 /* Pointer to the IDT of every CPU. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
+/*
+ * The TSS is smaller than a page, but we give it a full page to avoid
+ * adjacent per-cpu data leaking via Meltdown when XPTI is in use.
+ */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page);
+
 bool (*ioemul_handle_quirk)(
     u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
@@ -559,7 +565,7 @@ void show_stack_overflow(unsigned int cp printk("Valid stack range: %p-%p, sp=%p, tss.rsp0=%p\n",
            (void *)esp_top, (void *)esp_bottom, (void *)esp,
-           (void *)per_cpu(init_tss, cpu).rsp0);
+           (void *)per_cpu(tss_page, cpu).tss.rsp0);
/*
      * Trigger overflow trace if %esp is anywhere within the guard page, or
@@ -1897,7 +1903,7 @@ static void __init set_intr_gate(unsigne
void load_TR(void)
 {
-    struct tss_struct *tss = &this_cpu(init_tss);
+    struct tss64 *tss = &this_cpu(tss_page).tss;
     struct desc_ptr old_gdt, tss_gdt = {
         .base = (long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY),
         .limit = LAST_RESERVED_GDT_BYTE
@@ -1905,14 +1911,10 @@ void load_TR(void)
_set_tssldt_desc(
         this_cpu(gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
-        (unsigned long)tss,
-        offsetof(struct tss_struct, __cacheline_filler) - 1,
-        SYS_DESC_tss_avail);
+        (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_avail);
     _set_tssldt_desc(
         this_cpu(compat_gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
-        (unsigned long)tss,
-        offsetof(struct tss_struct, __cacheline_filler) - 1,
-        SYS_DESC_tss_busy);
+        (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);
/* Switch to non-compat GDT (which has B bit clear) to execute LTR. */
     asm volatile (
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -411,7 +411,7 @@ static always_inline void __mwait(unsign
 #define IOBMP_BYTES             8192
 #define IOBMP_INVALID_OFFSET    0x8000
-struct __packed __cacheline_aligned tss_struct {
+struct __packed tss64 {
     uint32_t :32;
     uint64_t rsp0, rsp1, rsp2;
     uint64_t :64;
@@ -422,9 +422,11 @@ struct __packed __cacheline_aligned tss_
     uint64_t ist[7];
     uint64_t :64;
     uint16_t :16, bitmap;
-    /* Pads the TSS to be cacheline-aligned (total size is 0x80). */
-    uint8_t __cacheline_filler[24];
 };
+struct tss_page {
+    struct tss64 __aligned(PAGE_SIZE) tss;
+};
+DECLARE_PER_CPU(struct tss_page, tss_page);
#define IST_NONE 0UL
 #define IST_DF   1UL
@@ -463,7 +465,6 @@ static inline void disable_each_ist(idt_
 extern idt_entry_t idt_table[];
 extern idt_entry_t *idt_tables[];
-DECLARE_PER_CPU(struct tss_struct, init_tss);
 DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
extern void write_ptbase(struct vcpu *v);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.