[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH RFC 18/43] x86/percpu: Use PC-relative addressing for percpu variable references



For PIE binary, all symbol references are PC-relative addressing, even
for percpu variable. So to keep compatible with PIE, add %rip suffix in
percpu assembly macros if PIE is enabled. However, relocation of percpu
variable references is broken now for PIE. It would be fixed later.

Suggested-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
Cc: Thomas Garnier <thgarnie@xxxxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
---
 arch/x86/entry/calling.h             | 17 ++++++++++++----
 arch/x86/include/asm/nospec-branch.h | 10 +++++-----
 arch/x86/include/asm/percpu.h        | 29 +++++++++++++++++++++++++---
 arch/x86/kernel/head_64.S            |  2 +-
 arch/x86/kernel/kvm.c                | 21 ++++++++++++++++----
 arch/x86/lib/cmpxchg16b_emu.S        |  8 ++++----
 arch/x86/xen/xen-asm.S               | 10 +++++-----
 7 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index f6907627172b..11328578741d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -173,7 +173,7 @@ For 32-bit we have the following conventions - kernel is 
built with
 .endm
 
 #define THIS_CPU_user_pcid_flush_mask   \
-       PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+       PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
 
 .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
@@ -370,8 +370,8 @@ For 32-bit we have the following conventions - kernel is 
built with
 .endm
 
 .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
+       GET_PERCPU_BASE \scratch_reg \save_reg
        rdgsbase \save_reg
-       GET_PERCPU_BASE \scratch_reg
        wrgsbase \scratch_reg
 .endm
 
@@ -407,15 +407,24 @@ For 32-bit we have the following conventions - kernel is 
built with
  * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
  * while running KVM's run loop.
  */
-.macro GET_PERCPU_BASE reg:req
+#ifdef CONFIG_X86_PIE
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
+       LOAD_CPU_AND_NODE_SEG_LIMIT \reg
+       andq    $VDSO_CPUNODE_MASK, \reg
+       leaq    __per_cpu_offset(%rip), \scratch_reg
+       movq    (\scratch_reg, \reg, 8), \reg
+.endm
+#else
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
        LOAD_CPU_AND_NODE_SEG_LIMIT \reg
        andq    $VDSO_CPUNODE_MASK, \reg
        movq    __per_cpu_offset(, \reg, 8), \reg
 .endm
+#endif /* CONFIG_X86_PIE */
 
 #else
 
-.macro GET_PERCPU_BASE reg:req
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
        movq    pcpu_unit_offsets(%rip), \reg
 .endm
 
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index edb2b0cb8efe..d8fd935e0697 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -59,13 +59,13 @@
 
 #ifdef CONFIG_CALL_THUNKS_DEBUG
 # define CALL_THUNKS_DEBUG_INC_CALLS                           \
-       incq    %gs:__x86_call_count;
+       incq    %gs:(__x86_call_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_RETS                            \
-       incq    %gs:__x86_ret_count;
+       incq    %gs:(__x86_ret_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_STUFFS                          \
-       incq    %gs:__x86_stuffs_count;
+       incq    %gs:(__x86_stuffs_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_CTXSW                           \
-       incq    %gs:__x86_ctxsw_count;
+       incq    %gs:(__x86_ctxsw_count)__percpu_rel;
 #else
 # define CALL_THUNKS_DEBUG_INC_CALLS
 # define CALL_THUNKS_DEBUG_INC_RETS
@@ -95,7 +95,7 @@
        CALL_THUNKS_DEBUG_INC_CALLS
 
 #define INCREMENT_CALL_DEPTH                                   \
-       sarq    $5, %gs:pcpu_hot + X86_call_depth;              \
+       sarq    $5, %gs:(pcpu_hot + X86_call_depth)__percpu_rel;\
        CALL_THUNKS_DEBUG_INC_CALLS
 
 #define ASM_INCREMENT_CALL_DEPTH                               \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 13c0d63ed55e..a627a073c6ea 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,16 +4,26 @@
 
 #ifdef CONFIG_X86_64
 #define __percpu_seg           gs
+#ifdef CONFIG_X86_PIE
+#define __percpu_rel           (%rip)
+#else
+#define __percpu_rel
+#endif /* CONFIG_X86_PIE */
 #else
 #define __percpu_seg           fs
+#define __percpu_rel
 #endif
 
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_SMP
-#define PER_CPU_VAR(var)       %__percpu_seg:var
+/* Compatible with Position Independent Code */
+#define PER_CPU_VAR(var)       %__percpu_seg:(var)##__percpu_rel
+/* Rare absolute reference */
+#define PER_CPU_VAR_ABS(var)   %__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU_VAR(var)       var
+#define PER_CPU_VAR(var)       (var)##__percpu_rel
+#define PER_CPU_VAR_ABS(var)   var
 #endif /* SMP */
 
 #ifdef CONFIG_X86_64_SMP
@@ -148,10 +158,23 @@ do {                                                      
                \
        (typeof(_var))(unsigned long) pfo_val__;                        \
 })
 
+/*
+ * Position Independent code uses relative addresses only.
+ * The 'P' modifier prevents RIP-relative addressing in GCC,
+ * so use 'a' modifier instead. Howerver, 'P' modifier allows
+ * RIP-relative addressing in Clang but Clang doesn't support
+ * 'a' modifier.
+ */
+#if defined(CONFIG_X86_PIE) && defined(CONFIG_CC_IS_GCC)
+#define __percpu_stable_arg    __percpu_arg(a[var])
+#else
+#define __percpu_stable_arg    __percpu_arg(P[var])
+#endif
+
 #define percpu_stable_op(size, op, _var)                               \
 ({                                                                     \
        __pcpu_type_##size pfo_val__;                                   \
-       asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")       \
+       asm(__pcpu_op2_##size(op, __percpu_stable_arg, "%[val]")        \
            : [val] __pcpu_reg_##size("=", pfo_val__)                   \
            : [var] "p" (&(_var)));                                     \
        (typeof(_var))(unsigned long) pfo_val__;                        \
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 61f1873d0ff7..1eed50b7d1ac 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -396,7 +396,7 @@ SYM_CODE_START(start_cpu0)
        UNWIND_HINT_END_OF_STACK
 
        /* Find the idle task stack */
-       movq    PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
+       movq    PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx
        movq    TASK_threadsp(%rcx), %rsp
 
        jmp     .Ljump_to_C_code
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1cceac5984da..32d7b201f4f0 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -794,14 +794,27 @@ PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
 
 extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
 
+#ifndef CONFIG_X86_PIE
+#define KVM_CHECK_VCPU_PREEMPTED                       \
+       "movq   __per_cpu_offset(,%rdi,8), %rax;"       \
+       "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+#else
+#define KVM_CHECK_VCPU_PREEMPTED                       \
+       "pushq  %rdi;"                                  \
+       "leaq   __per_cpu_offset(%rip), %rax;"          \
+       "movq   (%rax,%rdi,8), %rax;"                   \
+       "leaq   steal_time(%rip), %rdi;"                \
+       "cmpb   $0, (%rax, %rdi, 1);"                   \
+       "popq   %rdi;"
+#endif
+
 /*
  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
  * restoring to/from the stack.
  */
-#define PV_VCPU_PREEMPTED_ASM                                               \
- "movq   __per_cpu_offset(,%rdi,8), %rax\n\t"                               \
- "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
- "setne  %al\n\t"
+#define PV_VCPU_PREEMPTED_ASM          \
+       KVM_CHECK_VCPU_PREEMPTED        \
+       "setne  %al\n\t"
 
 DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
                    PV_VCPU_PREEMPTED_ASM, .text);
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 33c70c0160ea..891c5e9fd868 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -27,13 +27,13 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
        pushfq
        cli
 
-       cmpq PER_CPU_VAR((%rsi)), %rax
+       cmpq PER_CPU_VAR_ABS((%rsi)), %rax
        jne .Lnot_same
-       cmpq PER_CPU_VAR(8(%rsi)), %rdx
+       cmpq PER_CPU_VAR_ABS(8(%rsi)), %rdx
        jne .Lnot_same
 
-       movq %rbx, PER_CPU_VAR((%rsi))
-       movq %rcx, PER_CPU_VAR(8(%rsi))
+       movq %rbx, PER_CPU_VAR_ABS((%rsi))
+       movq %rcx, PER_CPU_VAR_ABS(8(%rsi))
 
        popfq
        mov $1, %al
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 9e5e68008785..448958ddbaf8 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -28,7 +28,7 @@
  * non-zero.
  */
 SYM_FUNC_START(xen_irq_disable_direct)
-       movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+       movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
        RET
 SYM_FUNC_END(xen_irq_disable_direct)
 
@@ -69,7 +69,7 @@ SYM_FUNC_END(check_events)
 SYM_FUNC_START(xen_irq_enable_direct)
        FRAME_BEGIN
        /* Unmask events */
-       movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+       movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
 
        /*
         * Preempt here doesn't matter because that will deal with any
@@ -78,7 +78,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
         */
 
        /* Test for pending */
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+       testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
        jz 1f
 
        call check_events
@@ -97,7 +97,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
  * x86 use opposite senses (mask vs enable).
  */
 SYM_FUNC_START(xen_save_fl_direct)
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+       testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
        setz %ah
        addb %ah, %ah
        RET
@@ -113,7 +113,7 @@ SYM_FUNC_END(xen_read_cr2);
 
 SYM_FUNC_START(xen_read_cr2_direct)
        FRAME_BEGIN
-       _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+       _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
        FRAME_END
        RET
 SYM_FUNC_END(xen_read_cr2_direct);
-- 
2.31.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.