[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 1/4] x86: use PDEP/PEXT for maddr/direct-map-offset conversion when available



This allows to fold 6 instructions into a single one, reducing code size
quite a bit, especially when not considering the fallback functions
(which won't ever need to be brought into iCache or their mappings into
iTLB on systems supporting BMI2).

Make use of gcc's new V operand modifier, even if that results in a
slightly odd dependency in the sources (but I also didn't want to
introduce yet another manifest constant). This assumes that backports of
support for this relatively new modifier have only been done to tool
chains with not too old a binutils (gas) version. If this turns out to
be a false assumption, we'll have to add HAVE_AS_BMI2 as a qualifier.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v4: "Bodge" alternative_io() indentation here so that it'll come out
    right without re-indenting after patch 3. Add comment to
    LINKONCE_{PRO,EPI}LOGUE().
v3: Move infrastructure pieces here from "x86: use PDEP for PTE flags
    insertion when available".
v2: Avoid quoted symbols; use gcc's new V operand modifier instead.
    Re-base.

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -243,6 +243,12 @@ void init_or_livepatch apply_alternative
         /* 0xe8/0xe9 are relative branches; fix the offset. */
         if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
             *(int32_t *)(buf + 1) += repl - orig;
+        /* RIP-relative addressing is easy to check for in VEX-encoded insns. 
*/
+        else if ( a->repl_len >= 8 &&
+                  (*buf & ~1) == 0xc4 &&
+                  a->repl_len >= 9 - (*buf & 1) &&
+                  (buf[4 - (*buf & 1)] & ~0x38) == 0x05 )
+            *(int32_t *)(buf + 5 - (*buf & 1)) += repl - orig;
 
         add_nops(buf + a->repl_len, total_len - a->repl_len);
         text_poke(orig, buf, total_len);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -390,6 +390,25 @@ void __init arch_init_memory(void)
 #endif
 }
 
+paddr_t __read_mostly ma_real_mask = ~0UL;
+
+#ifndef CONFIG_INDIRECT_THUNK /* V modifier unavailable? */
+
+/* Conversion between machine address and direct map offset. */
+paddr_t do2ma(unsigned long off)
+{
+    return (off & ma_va_bottom_mask) |
+           ((off << pfn_pdx_hole_shift) & ma_top_mask);
+}
+
+unsigned long ma2do(paddr_t ma)
+{
+    return (ma & ma_va_bottom_mask) |
+           ((ma & ma_top_mask) >> pfn_pdx_hole_shift);
+}
+
+#endif
+
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
 {
     uint64_t maddr = pfn_to_paddr(mfn);
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -450,6 +450,8 @@ void __init srat_parse_regions(u64 addr)
        }
 
        pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
+
+       ma_real_mask = ma_top_mask | ma_va_bottom_mask;
 }
 
 /* Use the information discovered above to actually set up the nodes. */
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -66,6 +66,7 @@ SECTIONS
         _stext = .;            /* Text and read-only data */
        *(.text)
        *(.text.__x86_indirect_thunk_*)
+       *(.gnu.linkonce.t.*)
        *(.text.page_aligned)
 
        . = ALIGN(PAGE_SIZE);
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -186,6 +186,24 @@ void ret_from_intr(void);
         UNLIKELY_END_SECTION "\n"          \
         ".Llikely." #tag ".%=:"
 
+/*
+ * For both of the below, sym() must be a macro which takes an optional
+ * symbol name prefix.
+ */
+#define LINKONCE_PROLOGUE(sym)                    \
+        ".ifndef " sym() "\n\t"                   \
+        ".pushsection " sym(.gnu.linkonce.t.) "," \
+                      "\"ax\",@progbits\n\t"      \
+        ".p2align 4\n"                            \
+        sym() ":"
+
+#define LINKONCE_EPILOGUE(sym)                    \
+        ".weak " sym() "\n\t"                     \
+        ".type " sym() ", @function\n\t"          \
+        ".size " sym() ", . - " sym() "\n\t"      \
+        ".popsection\n\t"                         \
+        ".endif"
+
 #endif
 
 /* "Raw" instruction opcodes */
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -42,11 +42,18 @@ static inline unsigned long canonicalise
         return addr & ~CANONICAL_MASK;
 }
 
+#include <asm/alternative.h>
+#include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
 #include <asm/types.h>
 
 #include <xen/pdx.h>
 
 extern unsigned long xen_virt_end;
+extern paddr_t ma_real_mask;
+
+paddr_t do2ma(unsigned long);
+unsigned long ma2do(paddr_t);
 
 /*
  * Note: These are solely for the use by page_{get,set}_owner(), and
@@ -57,8 +64,10 @@ extern unsigned long xen_virt_end;
 #define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \
                                    ((unsigned long)(pdx) << PAGE_SHIFT)))
 
-static inline unsigned long __virt_to_maddr(unsigned long va)
+static always_inline paddr_t __virt_to_maddr(unsigned long va)
 {
+    paddr_t ma;
+
     ASSERT(va < DIRECTMAP_VIRT_END);
     if ( va >= DIRECTMAP_VIRT_START )
         va -= DIRECTMAP_VIRT_START;
@@ -71,16 +80,77 @@ static inline unsigned long __virt_to_ma
 
         va += xen_phys_start - XEN_VIRT_START;
     }
-    return (va & ma_va_bottom_mask) |
-           ((va << pfn_pdx_hole_shift) & ma_top_mask);
+
+#ifdef CONFIG_INDIRECT_THUNK /* V modifier available? */
+#define SYMNAME(pfx...) #pfx "do2ma_%V[ma]_%V[off]"
+    alternative_io("call " SYMNAME() "\n\t"
+                     LINKONCE_PROLOGUE(SYMNAME) "\n\t"
+                     "mov %[shift], %%ecx\n\t"
+                     "mov %[off], %[ma]\n\t"
+                     "and %[bmask], %[ma]\n\t"
+                     "shl %%cl, %[off]\n\t"
+                     "and %[tmask], %[off]\n\t"
+                     "or %[off], %[ma]\n\t"
+                     "ret\n\t"
+                     LINKONCE_EPILOGUE(SYMNAME),
+                     "pdep %[mask], %[off], %[ma]", X86_FEATURE_BMI2,
+                     ASM_OUTPUT2([ma] "=&r" (ma), [off] "+r" (va)),
+                     [mask] "m" (ma_real_mask),
+                     [shift] "m" (pfn_pdx_hole_shift),
+                     [bmask] "m" (ma_va_bottom_mask),
+                     [tmask] "m" (ma_top_mask)
+                     : "ecx");
+#undef SYMNAME
+#else
+    alternative_io("call do2ma",
+                     /* pdep ma_real_mask(%rip), %rdi, %rax */
+                     ".byte 0xc4, 0xe2, 0xc3, 0xf5, 0x05\n\t"
+                     ".long ma_real_mask - 4 - .",
+                     X86_FEATURE_BMI2,
+                     ASM_OUTPUT2("=a" (ma), "+D" (va)), "m" (ma_real_mask)
+                     : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11");
+#endif
+
+    return ma;
 }
 
-static inline void *__maddr_to_virt(unsigned long ma)
+static always_inline void *__maddr_to_virt(paddr_t ma)
 {
+    unsigned long off;
+
     ASSERT(pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >> PAGE_SHIFT));
-    return (void *)(DIRECTMAP_VIRT_START +
-                    ((ma & ma_va_bottom_mask) |
-                     ((ma & ma_top_mask) >> pfn_pdx_hole_shift)));
+
+#ifdef CONFIG_INDIRECT_THUNK /* V modifier available? */
+#define SYMNAME(pfx...) #pfx "ma2do_%V[off]_%V[ma]"
+    alternative_io("call " SYMNAME() "\n\t"
+                     LINKONCE_PROLOGUE(SYMNAME) "\n\t"
+                     "mov %[tmask], %[off]\n\t"
+                     "mov %[shift], %%ecx\n\t"
+                     "and %[ma], %[off]\n\t"
+                     "and %[bmask], %[ma]\n\t"
+                     "shr %%cl, %[off]\n\t"
+                     "or %[ma], %[off]\n\t"
+                     "ret\n\t"
+                     LINKONCE_EPILOGUE(SYMNAME),
+                     "pext %[mask], %[ma], %[off]", X86_FEATURE_BMI2,
+                     ASM_OUTPUT2([off] "=&r" (off), [ma] "+r" (ma)),
+                     [mask] "m" (ma_real_mask),
+                     [shift] "m" (pfn_pdx_hole_shift),
+                     [bmask] "m" (ma_va_bottom_mask),
+                     [tmask] "m" (ma_top_mask)
+                     : "ecx");
+#undef SYMNAME
+#else
+    alternative_io("call ma2do",
+                     /* pext ma_real_mask(%rip), %rdi, %rax */
+                     ".byte 0xc4, 0xe2, 0xc2, 0xf5, 0x05\n\t"
+                     ".long ma_real_mask - 4 - .",
+                     X86_FEATURE_BMI2,
+                     ASM_OUTPUT2("=a" (off), "+D" (ma)), "m" (ma_real_mask)
+                     : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11");
+#endif
+
+    return (void *)DIRECTMAP_VIRT_START + off;
 }
 
 /* read access (should only be used for debug printk's) */




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.