[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC] x86: slightly reduce RSB overwrite overhead



Avoiding the PAUSE/LFENCE pair on the first of the two unrolled
iterations not only reduces code size, but also improves performance
according to my (limited) measurements (by 15-20 cycles on my Westmere;
granted I can't really explain why that is). Now that the two iterations
are sufficiently different, remove the use of .irp; otherwise .if would
need to be added to its body.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
RFC because this presumably wants verification on other CPU models.

--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -70,7 +70,7 @@
 #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
 #define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
 #define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
-#define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
+#define ASM_NOP35 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP3
 
 #define ASM_NOP_MAX 9
 
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -101,16 +101,16 @@
 
 .L\@_fill_rsb_loop:
 
-    .irp n, 1, 2                    /* Unrolled twice. */
-    call .L\@_insert_rsb_entry_\n   /* Create an RSB entry. */
+    call .L\@_insert_rsb_entry1     /* Create an RSB entry. */
+    jmp .L\@_capture_speculation    /* Capture rogue speculation. */
+.L\@_insert_rsb_entry1:
 
-.L\@_capture_speculation_\n:
+    call .L\@_insert_rsb_entry2     /* Create another RSB entry. */
+.L\@_capture_speculation:
     pause
     lfence
-    jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */
-
-.L\@_insert_rsb_entry_\n:
-    .endr
+    jmp .L\@_capture_speculation    /* Capture rogue speculation. */
+.L\@_insert_rsb_entry2:
 
     sub $1, %ecx
     jnz .L\@_fill_rsb_loop
@@ -216,7 +216,7 @@
 
 /* Use after a VMEXIT from an HVM guest. */
 #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
-    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+    ALTERNATIVE __stringify(ASM_NOP35),                                 \
         DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
         __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
@@ -228,7 +228,7 @@
 
 /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
 #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+    ALTERNATIVE __stringify(ASM_NOP35),                                 \
         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP21),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
@@ -239,7 +239,7 @@
 
 /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
 #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+    ALTERNATIVE __stringify(ASM_NOP35),                                 \
         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP29),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.