[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/3] x86/alternatives: Rework get_ideal_nops()



The {k8,p6}_nops[] arrays are both 80-byte structures indexing 45-byte
structures.  Furthermore, perhaps unusually for C, the source layout is an
obvious hint about the trangular nature of the structure.

Therefore, we can replace the pointer chase with some simple arithmatic.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>

The implemenation of get_ideal_nops() changes from:

    mov    0x19bc41(%rip),%rax        # <ideal_nops>
    mov    %edi,%edi
    mov    (%rax,%rdi,8),%rax
    jmp    <__x86_return_thunk>

to:

    lea    -0x1(%rdi),%eax
    imul   %edi,%eax
    shr    %eax
    add    0x67fc1(%rip),%rax        # <ideal_nops>
    jmp    <__x86_return_thunk>

The imul has a latency of 3 cycles on all CPUs back to the K8 and Nehalem.
It's better than an extra deference on all CPUs, even the older ones.
---
 xen/arch/x86/alternative.c | 40 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
index cc2d0c89aca3..ff7d83c0ddbd 100644
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -20,7 +20,7 @@
 #define MAX_PATCH_LEN (255-1)
 
 #ifdef K8_NOP1
-static const unsigned char k8nops[] init_or_livepatch_const = {
+static const unsigned char k8_nops[] init_or_livepatch_const = {
     K8_NOP1,
     K8_NOP2,
     K8_NOP3,
@@ -31,22 +31,10 @@ static const unsigned char k8nops[] init_or_livepatch_const 
= {
     K8_NOP8,
     K8_NOP9,
 };
-static const unsigned char * const k8_nops[ASM_NOP_MAX+1] 
init_or_livepatch_constrel = {
-    NULL,
-    k8nops,
-    k8nops + 1,
-    k8nops + 1 + 2,
-    k8nops + 1 + 2 + 3,
-    k8nops + 1 + 2 + 3 + 4,
-    k8nops + 1 + 2 + 3 + 4 + 5,
-    k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
 #endif
 
 #ifdef P6_NOP1
-static const unsigned char p6nops[] init_or_livepatch_const = {
+static const unsigned char p6_nops[] init_or_livepatch_const = {
     P6_NOP1,
     P6_NOP2,
     P6_NOP3,
@@ -57,21 +45,9 @@ static const unsigned char p6nops[] init_or_livepatch_const 
= {
     P6_NOP8,
     P6_NOP9,
 };
-static const unsigned char * const p6_nops[ASM_NOP_MAX+1] 
init_or_livepatch_constrel = {
-    NULL,
-    p6nops,
-    p6nops + 1,
-    p6nops + 1 + 2,
-    p6nops + 1 + 2 + 3,
-    p6nops + 1 + 2 + 3 + 4,
-    p6nops + 1 + 2 + 3 + 4 + 5,
-    p6nops + 1 + 2 + 3 + 4 + 5 + 6,
-    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
 #endif
 
-static const unsigned char * const *ideal_nops init_or_livepatch_data = 
p6_nops;
+static const unsigned char *ideal_nops init_or_livepatch_data = p6_nops;
 
 #ifdef HAVE_AS_NOPS_DIRECTIVE
 
@@ -86,9 +62,17 @@ static bool init_or_livepatch_read_mostly 
toolchain_nops_are_ideal;
 # define toolchain_nops_are_ideal false
 #endif
 
+/*
+ * Both k8_nops[] and p6_nops[] are flattened triangular data structures,
+ * making the offsets easy to calculate.
+ *
+ * To get the start of NOP $N, we want to calculate T($N - 1)
+ */
 static const unsigned char *get_ideal_nops(unsigned int noplen)
 {
-    return ideal_nops[noplen];
+    unsigned int offset = ((noplen - 1) * noplen) / 2;
+
+    return &ideal_nops[offset];
 }
 
 static void __init arch_init_ideal_nops(void)
-- 
2.39.5




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.