[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 3/6] x86: use PDEP for PTE flags insertion when available



This allows to fold 5 instructions into a single one, reducing code size
quite a bit, especially when not considering the fallback functions
(which won't ever need to be brought into iCache or their mappings into
iTLB on systems supporting BMI2).

Make use of gcc's new V operand modifier, even if that results in a
slightly odd dependency in the sources (but I also didn't want to
introduce yet another manifest constant).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Avoid quoted symbols; use gcc's new V operand modifier instead.
    Re-base.
---
TBD: Also change get_pte_flags() (after having introduced test_pte_flags())?

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -224,6 +224,12 @@ void init_or_livepatch apply_alternative
         /* 0xe8/0xe9 are relative branches; fix the offset. */
         if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
             *(int32_t *)(buf + 1) += repl - orig;
+        /* RIP-relative addressing is easy to check for in VEX-encoded insns. 
*/
+        else if ( a->repl_len >= 8 &&
+                  (*buf & ~1) == 0xc4 &&
+                  a->repl_len >= 9 - (*buf & 1) &&
+                  (buf[4 - (*buf & 1)] & ~0x38) == 0x05 )
+            *(int32_t *)(buf + 5 - (*buf & 1)) += repl - orig;
 
         add_nops(buf + a->repl_len, total_len - a->repl_len);
         text_poke(orig, buf, total_len);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -391,6 +391,15 @@ void __init arch_init_memory(void)
 #endif
 }
 
+const intpte_t pte_flags_mask = ~(PADDR_MASK & PAGE_MASK);
+
+#ifndef CONFIG_INDIRECT_THUNK /* V modifier unavailable? */
+intpte_t put_pte_flags_v(unsigned int flags)
+{
+    return put_pte_flags_c(flags);
+}
+#endif
+
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
 {
     uint64_t maddr = pfn_to_paddr(mfn);
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -66,6 +66,7 @@ SECTIONS
         _stext = .;            /* Text and read-only data */
        *(.text)
        *(.text.__x86_indirect_thunk_*)
+       *(.gnu.linkonce.t.*)
        *(.text.page_aligned)
 
        . = ALIGN(PAGE_SIZE);
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -34,6 +34,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/alternative.h>
+#include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
 #include <asm/types.h>
 
 #include <xen/pdx.h>
@@ -123,15 +126,53 @@ typedef l4_pgentry_t root_pgentry_t;
 
 /* Extract flags into 24-bit integer, or turn 24-bit flags into a pte mask. */
 #ifndef __ASSEMBLY__
+extern const intpte_t pte_flags_mask;
+intpte_t __attribute_const__ put_pte_flags_v(unsigned int x);
+
 static inline unsigned int get_pte_flags(intpte_t x)
 {
     return ((x >> 40) & ~0xfff) | (x & 0xfff);
 }
 
-static inline intpte_t put_pte_flags(unsigned int x)
+static inline intpte_t put_pte_flags_c(unsigned int x)
 {
     return (((intpte_t)x & ~0xfff) << 40) | (x & 0xfff);
 }
+
+static always_inline intpte_t put_pte_flags(unsigned int x)
+{
+    intpte_t pte;
+
+    if ( __builtin_constant_p(x) )
+        return put_pte_flags_c(x);
+
+#ifdef CONFIG_INDIRECT_THUNK /* V modifier available? */
+#define SYMNAME(pfx...) #pfx "put_pte_flags_%V[pte]_%V[flags]"
+    alternative_io("call " SYMNAME() "\n\t"
+                   LINKONCE_PROLOGUE(SYMNAME) "\n\t"
+                   "mov %[flags], %k[pte]\n\t"
+                   "and $0xfff000, %[flags]\n\t"
+                   "and $0x000fff, %k[pte]\n\t"
+                   "shl $40, %q[flags]\n\t"
+                   "or %q[flags], %[pte]\n\t"
+                   "ret\n\t"
+                   LINKONCE_EPILOGUE(SYMNAME),
+                   "pdep %[mask], %q[flags], %[pte]", X86_FEATURE_BMI2,
+                   ASM_OUTPUT2([pte] "=&r" (pte), [flags] "+r" (x)),
+                   [mask] "m" (pte_flags_mask));
+#undef SYMNAME
+#else
+    alternative_io("call put_pte_flags_v",
+                   /* pdep pte_flags_mask(%rip), %rdi, %rax */
+                   ".byte 0xc4, 0xe2, 0xc3, 0xf5, 0x05\n\t"
+                   ".long pte_flags_mask - 4 - .",
+                   X86_FEATURE_BMI2,
+                   ASM_OUTPUT2("=a" (pte), "+D" (x)), "m" (pte_flags_mask)
+                   : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11");
+#endif
+
+    return pte;
+}
 #endif
 
 /*
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -187,6 +187,20 @@ void ret_from_intr(void);
         UNLIKELY_END_SECTION "\n"          \
         ".Llikely." #tag ".%=:"
 
+#define LINKONCE_PROLOGUE(sym)                    \
+        ".ifndef " sym() "\n\t"                   \
+        ".pushsection " sym(.gnu.linkonce.t.) "," \
+                      "\"ax\",@progbits\n\t"      \
+        ".p2align 4\n"                            \
+        sym() ":"
+
+#define LINKONCE_EPILOGUE(sym)                    \
+        ".weak " sym() "\n\t"                     \
+        ".type " sym() ", @function\n\t"          \
+        ".size " sym() ", . - " sym() "\n\t"      \
+        ".popsection\n\t"                         \
+        ".endif"
+
 #endif
 
 /* "Raw" instruction opcodes */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.