[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH RESEND v9 33/36] KVM: VMX: Add VMX_DO_FRED_EVENT_IRQOFF for IRQ/NMI handling



> > Jumping way back to providing a wrapper for FRED, if we do that, then
> > there's no need to include calling.h, and the weird wrinkle about the
> > ERET target kinda goes away too.  E.g. provide this in
> > arch/x86/entry/entry_64_fred.S
> >
> >     .section .text, "ax"
> >
> > /* Note, this is instrumentation safe, and returns via RET, not ERETS!
> > */ #if IS_ENABLED(CONFIG_KVM_INTEL)
> > SYM_CODE_START(fred_irq_entry_from_kvm)
> >     FRED_ENTER
> >     call external_interrupt
> >     FRED_EXIT
> >     RET
> > SYM_CODE_END(fred_irq_entry_from_kvm)
> > EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
> > #endif
> >
> > and then the KVM side for this particular chunk is more simply:
> >
> >     lea 1f(%rip), %rax
> >     push %rax
> >     push $0         /* FRED error code, 0 for NMI and external
> > interrupts */
> >
> >     \branch_insn \branch_target

The call instruction here inserts a return address between the FRED
stack frame just created and the GP regs pushed in the FRED wrapper,
thus it doesn't work.  I only realize it after looking at the stack layout
in the Intel Simics emulator.

> > 1:
> >     VMX_DO_EVENT_FRAME_END
> >     RET
>
> > Alternatively, the whole thing could be shoved into
> > arch/x86/entry/entry_64_fred.S, but at a glance I don't think that
> > would be a net positive due to the need to handle IRQs vs. NMIs.

But following your idea of "the whole thing could be shoved into ...",
plus the patch you posted, I get a patch as the following, which works
and gets rid of the 2 warnings of "with modified stack frame".

After calling external_interrupt() and POP_REGS, we can also use ERETS,
thus to unify the stack adjustment.

diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index de9e2dc70e40..2b638914c6ed 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -4,12 +4,87 @@
  */
 
 #include <asm/asm.h>
+#include <asm/export.h>
 #include <asm/fred.h>
+#include <asm/segment.h>
 
 #include "calling.h"
 
        .code64
 
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+.macro FRED_ENTRY_FROM_KVM event_func nmi=0
+       ENDBR
+#ifdef CONFIG_X86_FRED
+       push %rbp
+       mov %rsp, %rbp
+
+       /*
+        * Don't check the FRED stack level, the call stack leading to this
+        * helper is effectively constant and shallow (relatively speaking).
+        *
+        * Emulate the FRED-defined redzone and stack alignment.
+        */
+       sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+       and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+       /*
+        * Start to push a FRED stack frame, which is always 64 bytes:
+        *
+        * +--------+-----------------+
+        * | Bytes  | Usage           |
+        * +--------+-----------------+
+        * | 63:56  | Reserved        |
+        * | 55:48  | Event Data      |
+        * | 47:40  | SS + Event Info |
+        * | 39:32  | RSP             |
+        * | 31:24  | RFLAGS          |
+        * | 23:16  | CS + Aux Info   |
+        * |  15:8  | RIP             |
+        * |   7:0  | Error Code      |
+        * +--------+-----------------+
+        */
+       push $0                                 /* Reserved, must be 0 */
+       push $0                                 /* Event data, 0 for IRQ/NMI */
+
+       shl $32, %rdi                           /* Event type and vector */
+       .if \nmi
+       bts $FRED_SSX_NMI_BIT, %rdi
+       .endif
+       bts $FRED_SSX_64_BIT_MODE_BIT, %rdi
+       or $__KERNEL_DS, %rdi
+       push %rdi
+       push %rbp
+       pushf
+       mov $__KERNEL_CS, %rax
+       push %rax
+
+       /*
+        * Unlike the IDT event delivery, FRED _always_ pushes an error code
+        * after pushing the return RIP, thus the CALL instruction CANNOT be
+        * used here to push the return RIP, otherwise there is no chance to
+        * push an error code before invoking the IRQ/NMI handler.
+        *
+        * Use LEA to get the return RIP and push it, then push an error code.
+        */
+       lea 1f(%rip), %rax
+       push %rax                               /* Return RIP */
+       push $0                                 /* Error code, 0 for IRQ/NMI */
+
+       PUSH_AND_CLEAR_REGS
+       movq %rsp, %rdi                         /* %rdi -> pt_regs */
+       call \event_func
+       POP_REGS
+       ERETS
+1:
+       pop %rbp
+       RET
+#else /* CONFIG_X86_FRED */
+       ud2
+#endif /* CONFIG_X86_FRED */
+.endm
+#endif /* CONFIG_KVM_INTEL */
+
 .macro FRED_ENTER
        UNWIND_HINT_END_OF_STACK
        ENDBR
@@ -22,6 +97,16 @@
        POP_REGS
 .endm
 
+       .section .text, "ax"
+
+/* Note, this is instrumentation safe, and returns via RET */
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_CODE_START(fred_irq_entry_from_kvm)
+       FRED_ENTRY_FROM_KVM external_interrupt
+SYM_CODE_END(fred_irq_entry_from_kvm)
+EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
+#endif
+
        .section .noinstr.text, "ax"
 
 /*
@@ -55,3 +140,10 @@ SYM_CODE_START_NOALIGN(fred_entrypoint_kernel)
        FRED_EXIT
        ERETS
 SYM_CODE_END(fred_entrypoint_kernel)
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_CODE_START(fred_nmi_entry_from_kvm)
+       FRED_ENTRY_FROM_KVM fred_exc_nmi nmi=1
+SYM_CODE_END(fred_nmi_entry_from_kvm)
+EXPORT_SYMBOL_GPL(fred_nmi_entry_from_kvm);
+#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 3c91f0eae62e..c7288213de14 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -122,6 +122,10 @@ DECLARE_FRED_HANDLER(fred_exc_double_fault);
 extern asmlinkage __visible void fred_entrypoint_user(void);
 extern asmlinkage __visible void fred_entrypoint_kernel(void);
 
+/* For KVM VMX to handle IRQs in IRQ induced VM exits */
+extern asmlinkage __visible void fred_irq_entry_from_kvm(unsigned int 
event_info);
+extern asmlinkage __visible void fred_nmi_entry_from_kvm(unsigned int 
event_info);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_X86_FRED */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index be275a0410a8..f6f557c6329e 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -40,6 +40,20 @@
        push %_ASM_BP
        mov %_ASM_SP, %_ASM_BP
 
+       /*
+        * Start to push an IDT IRQ/NMI stack frame, which is 40 bytes on
+        * x86_64 and 24 bytes on x86_32:
+        *
+        * +-------+-------------------+
+        * | Bytes | Usage             |
+        * +-------+-------------------+
+        * | 39:32 | SS (x86_64 only)  |
+        * | 31:24 | RSP (x86_64 only) |
+        * | 23:16 | RFLAGS            |
+        * |  15:8 | CS                |
+        * |   7:0 | RIP               |
+        * +-------+-------------------+
+        */
 #ifdef CONFIG_X86_64
        /*
         * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index df461f387e20..83c89239fcff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6961,14 +6961,16 @@ static void handle_external_interrupt_irqoff(struct 
kvm_vcpu *vcpu)
 {
        u32 intr_info = vmx_get_intr_info(vcpu);
        unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
-       gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
        if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
            "unexpected VM-Exit interrupt info: 0x%x", intr_info))
                return;
 
        kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
-       vmx_do_interrupt_irqoff(gate_offset(desc));
+       if (cpu_feature_enabled(X86_FEATURE_FRED))
+               fred_irq_entry_from_kvm((EVENT_TYPE_HWINT << 16) | vector);
+       else
+               vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base 
+ vector));
        kvm_after_interrupt(vcpu);
 
        vcpu->arch.at_instruction_boundary = true;
@@ -7254,7 +7256,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu 
*vcpu,
        if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
            is_nmi(vmx_get_intr_info(vcpu))) {
                kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
-               vmx_do_nmi_irqoff();
+               if (cpu_feature_enabled(X86_FEATURE_FRED))
+                       fred_nmi_entry_from_kvm((EVENT_TYPE_NMI << 16) | 
NMI_VECTOR);
+               else
+                       vmx_do_nmi_irqoff();
                kvm_after_interrupt(vcpu);
        }




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.