[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 5/7] x86/alt: Support for automatic padding calculations



The correct amount of padding in an origin patch site can be calculated
automatically, based on the relative lengths of the replacements.

This requires a bit of trickery to calculate correctly, especially in the
ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
calculation is further complicated because GAS's idea of true is -1 rather
than 1, which is why the extra negations are required.

Additionally, have apply_alternatives() attempt to optimise the padding nops.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>

v2: Fix build with Clang.
---
 xen/arch/x86/Rules.mk                 |  4 +++
 xen/arch/x86/alternative.c            | 32 ++++++++++++++++---
 xen/include/asm-x86/alternative-asm.h | 60 +++++++++++++++++++++++++++++------
 xen/include/asm-x86/alternative.h     | 46 +++++++++++++++++++++------
 4 files changed, 120 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index 9897dea..e169d67 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -24,6 +24,10 @@ $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
 
+# GCC's idea of true is -1.  Clang's idea is 1
+$(call as-option-add,CFLAGS,CC,\
+    ".if ((1 > 0) < 0); .error \"\";.endif",,-DHAVE_AS_NEGATIVE_TRUE)
+
 CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables
 
 # Xen doesn't use SSE interally.  If the compiler supports it, also skip the
diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
index 51ca53e..e24db84 100644
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
         uint8_t *orig = ALT_ORIG_PTR(a);
         uint8_t *repl = ALT_REPL_PTR(a);
         uint8_t buf[MAX_PATCH_LEN];
+        unsigned int total_len = a->orig_len + a->pad_len;
 
-        BUG_ON(a->repl_len > a->orig_len);
-        BUG_ON(a->orig_len > sizeof(buf));
+        BUG_ON(a->repl_len > total_len);
+        BUG_ON(total_len > sizeof(buf));
         BUG_ON(a->cpuid >= NCAPINTS * 32);
 
+        /* No replacement to make, but try to optimise any padding. */
         if ( !boot_cpu_has(a->cpuid) )
+        {
+            unsigned int i;
+
+            if ( a->pad_len <= 1 )
+                continue;
+
+            /* Search the padding area for any byte which isn't a nop. */
+            for ( i = a->orig_len; i < total_len; ++i )
+                if ( orig[i] != 0x90 )
+                    break;
+
+            /*
+             * Only make any changes if all padding bytes are unoptimised
+             * nops.  With multiple alternatives over the same origin site, we
+             * may have already made a replacement, or optimised the nops.
+             */
+            if ( i != total_len )
+                continue;
+
+            add_nops(buf, a->pad_len);
+            text_poke(orig + a->orig_len, buf, a->pad_len);
             continue;
+        }
 
         memcpy(buf, repl, a->repl_len);
 
@@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
         if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
             *(int32_t *)(buf + 1) += repl - orig;
 
-        add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
-        text_poke(orig, buf, a->orig_len);
+        add_nops(buf + a->repl_len, total_len - a->repl_len);
+        text_poke(orig, buf, total_len);
     }
 }
 
diff --git a/xen/include/asm-x86/alternative-asm.h 
b/xen/include/asm-x86/alternative-asm.h
index 150bd1a..25f79fe 100644
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -9,30 +9,55 @@
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig repl feature orig_len repl_len
+.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
     .long \orig - .
     .long \repl - .
     .word \feature
     .byte \orig_len
     .byte \repl_len
+    .byte \pad_len
 .endm
 
 #define orig_len               (.L\@_orig_e       -     .L\@_orig_s)
+#define pad_len                (.L\@_orig_p       -     .L\@_orig_e)
+#define total_len              (.L\@_orig_p       -     .L\@_orig_s)
 #define repl_len(nr)           (.L\@_repl_e\()nr  -     .L\@_repl_s\()nr)
 #define decl_repl(insn, nr)     .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
 
+/* GCC's idea of true is -1, while Clang's idea is 1. */
+#ifdef HAVE_AS_NEGATIVE_TRUE
+# define as_true(x) (-(x))
+#else
+# define as_true(x) (x)
+#endif
+
+#define as_max(a, b)           ((a) ^ (((a) ^ (b)) & -as_true((a) < (b))))
+
 .macro ALTERNATIVE oldinstr, newinstr, feature
 .L\@_orig_s:
     \oldinstr
 .L\@_orig_e:
+    /*
+     * Calculate the difference in size between the replacement and original
+     * instructions, to derive how much padding to introduce.
+     */
+    .L\@_diff = repl_len(1) - orig_len
+
+    .skip as_true(.L\@_diff > 0) * .L\@_diff, 0x90
+.L\@_orig_p:
 
     .pushsection .altinstructions, "a", @progbits
     altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \
-        orig_len, repl_len(1)
+        orig_len, repl_len(1), pad_len
 
     .section .discard, "a", @progbits
-    /* Assembler-time check that \newinstr isn't longer than \oldinstr. */
-    .byte 0xff + repl_len(1) - orig_len
+    /*
+     * Assembler-time checks:
+     *   - total_len <= 255
+     *   - \newinstr <= total_len
+     */
+    .byte total_len
+    .byte 0xff + repl_len(1) - total_len
 
     .section .altinstr_replacement, "ax", @progbits
 
@@ -45,18 +70,31 @@
 .L\@_orig_s:
     \oldinstr
 .L\@_orig_e:
+    /*
+     * Calculate the difference in size between the largest replacement and
+     * the original instructions, to derive how much padding to introduce.
+     */
+    .L\@_diff = as_max(repl_len(1), repl_len(2)) - orig_len
+
+     .skip as_true(.L\@_diff > 0) * .L\@_diff, 0x90
+.L\@_orig_p:
 
     .pushsection .altinstructions, "a", @progbits
 
     altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature1, \
-        orig_len, repl_len(1)
+        orig_len, repl_len(1), pad_len
     altinstruction_entry .L\@_orig_s, .L\@_repl_s2, \feature2, \
-        orig_len, repl_len(2)
+        orig_len, repl_len(2), pad_len
 
     .section .discard, "a", @progbits
-    /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. 
*/
-    .byte 0xff + repl_len(1) - orig_len
-    .byte 0xff + repl_len(2) - orig_len
+    /*
+     * Assembler-time checks:
+     *   - total_len <= 255
+     *   - \newinstr* <= total_len
+     */
+    .byte total_len
+    .byte 0xff + repl_len(1) - total_len
+    .byte 0xff + repl_len(2) - total_len
 
     .section .altinstr_replacement, "ax", @progbits
 
@@ -66,8 +104,12 @@
     .popsection
 .endm
 
+#undef as_max
+#undef as_true
 #undef decl_repl
 #undef repl_len
+#undef total_len
+#undef pad_len
 #undef orig_len
 
 #endif /* __ASSEMBLY__ */
diff --git a/xen/include/asm-x86/alternative.h 
b/xen/include/asm-x86/alternative.h
index bcad3ee..d53cea0 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -8,12 +8,13 @@
 #include <xen/stringify.h>
 #include <xen/types.h>
 
-struct alt_instr {
+struct __packed alt_instr {
     int32_t  orig_offset;   /* original instruction */
     int32_t  repl_offset;   /* offset to replacement instruction */
     uint16_t cpuid;         /* cpuid bit set for replacement */
     uint8_t  orig_len;      /* length of original instruction */
-    uint8_t  repl_len;      /* length of new instruction, <= instrlen */
+    uint8_t  repl_len;      /* length of new instruction */
+    uint8_t  pad_len;       /* length of build-time padding */
 };
 
 #define __ALT_PTR(a,f)      ((uint8_t *)((void *)&(a)->f + (a)->f))
@@ -26,43 +27,70 @@ extern void apply_alternatives(const struct alt_instr 
*start,
                                const struct alt_instr *end);
 extern void alternative_instructions(void);
 
-#define OLDINSTR(oldinstr) ".LXEN%=_orig_s:\n\t" oldinstr "\n.LXEN%=_orig_e:\n"
-
 #define alt_orig_len       "(.LXEN%=_orig_e - .LXEN%=_orig_s)"
+#define alt_pad_len        "(.LXEN%=_orig_p - .LXEN%=_orig_e)"
+#define alt_total_len      "(.LXEN%=_orig_p - .LXEN%=_orig_s)"
 #define alt_repl_s(num)    ".LXEN%=_repl_s"#num
 #define alt_repl_e(num)    ".LXEN%=_repl_e"#num
 #define alt_repl_len(num)  "(" alt_repl_e(num) " - " alt_repl_s(num) ")"
 
+/* GCC's idea of true is -1, while Clang's idea is 1. */
+#ifdef HAVE_AS_NEGATIVE_TRUE
+# define AS_TRUE "-"
+#else
+# define AS_TRUE ""
+#endif
+
+#define as_max(a, b) "(("a") ^ ((("a") ^ ("b")) & -("AS_TRUE"(("a") < 
("b")))))"
+
+#define OLDINSTR_1(oldinstr, n1)                                 \
+    ".LXEN%=_orig_s:\n\t" oldinstr "\n .LXEN%=_orig_e:\n\t"      \
+    ".LXEN%=_diff = "alt_repl_len(n1)"-"alt_orig_len"\n\t"       \
+    ".skip "AS_TRUE"(.LXEN%=_diff > 0) * .LXEN%=_diff, 0x90\n\t" \
+    ".LXEN%=_orig_p:\n\t"
+
+#define ALT_PADDING_LEN(n1, n2) \
+    as_max((alt_repl_len(n1), alt_repl_len(n2))"-"alt_orig_len
+
+#define OLDINSTR_2(oldinstr, n1, n2)                             \
+    ".LXEN%=_orig_s:\n\t" oldinstr "\n .LXEN%=_orig_e:\n\t"      \
+    ".LXEN%=_diff = "ALT_PADDING_LEN(n1, n2)"\n\t"               \
+    ".skip "AS_TRUE"(.LXEN%=_diff > 0) * .LXEN%=_diff, 0x90\n\t" \
+    ".LXEN%=_orig_p:\n\t"
+
 #define ALTINSTR_ENTRY(feature, num)                                    \
         " .long .LXEN%=_orig_s - .\n"             /* label           */ \
         " .long " alt_repl_s(num)" - .\n"         /* new instruction */ \
         " .word " __stringify(feature) "\n"       /* feature bit     */ \
         " .byte " alt_orig_len "\n"               /* source len      */ \
-        " .byte " alt_repl_len(num) "\n"          /* replacement len */
+        " .byte " alt_repl_len(num) "\n"          /* replacement len */ \
+        " .byte " alt_pad_len "\n"                /* padding len     */
 
-#define DISCARD_ENTRY(num)                        /* repl <= orig */    \
-        " .byte 0xff + (" alt_repl_len(num) ") - (" alt_orig_len ")\n"
+#define DISCARD_ENTRY(num)                        /* repl <= total */   \
+        " .byte 0xff + (" alt_repl_len(num) ") - (" alt_total_len ")\n"
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)       /* replacement */     \
         alt_repl_s(num)":\n\t" newinstr "\n" alt_repl_e(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)                        \
-        OLDINSTR(oldinstr)                                              \
+        OLDINSTR_1(oldinstr, 1)                                         \
         ".pushsection .altinstructions, \"a\", @progbits\n"             \
         ALTINSTR_ENTRY(feature, 1)                                      \
         ".section .discard, \"a\", @progbits\n"                         \
+        ".byte " alt_total_len "\n" /* total_len <= 255 */              \
         DISCARD_ENTRY(1)                                                \
         ".section .altinstr_replacement, \"ax\", @progbits\n"           \
         ALTINSTR_REPLACEMENT(newinstr, 1)                               \
         ".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-        OLDINSTR(oldinstr)                                              \
+        OLDINSTR_2(oldinstr, 1, 2)                                      \
         ".pushsection .altinstructions, \"a\", @progbits\n"             \
         ALTINSTR_ENTRY(feature1, 1)                                     \
         ALTINSTR_ENTRY(feature2, 2)                                     \
         ".section .discard, \"a\", @progbits\n"                         \
+        ".byte " alt_total_len "\n" /* total_len <= 255 */              \
         DISCARD_ENTRY(1)                                                \
         DISCARD_ENTRY(2)                                                \
         ".section .altinstr_replacement, \"ax\", @progbits\n"           \
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.