|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 4/8] tools/hvmloader: Wake APs with hypercalls and not with INIT+SIPI+SIPI
On 08/05/2024 1:39 pm, Alejandro Vallejo wrote:
> Removes a needless assembly entry point and simplifies the codebase by
> allowing
> hvmloader to wake APs it doesn't know the APIC ID of.
>
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
This is basically independent of the rest of the series, and it would be
good to pull it in separately. A few notes.
The commit message ought to note that this has a side effect of removing
an LMSW instruction which has fastpath at all on AMD CPUs, and requires
full emulation, and it gets rid of 13 vLAPIC emulations when 3
hypercalls would do.
The point being that this is borderline backport material, although it
does depend on the 32 vCPU bugfix.
> ---
> v2:
> * New patch. Replaces adding cpu policy to hvmloader in v1.
> ---
> tools/firmware/hvmloader/smp.c | 111 +++++++++++++--------------------
> 1 file changed, 44 insertions(+), 67 deletions(-)
>
> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
> index 082b17f13818..a668f15d7e1f 100644
> --- a/tools/firmware/hvmloader/smp.c
> +++ b/tools/firmware/hvmloader/smp.c
> @@ -22,88 +22,68 @@
> #include "util.h"
> #include "config.h"
> #include "apic_regs.h"
> +#include "hypercall.h"
>
> -#define AP_BOOT_EIP 0x1000
> -extern char ap_boot_start[], ap_boot_end[];
> +#include <xen/asm/x86-defns.h>
> +#include <xen/hvm/hvm_vcpu.h>
> +
> +#include <xen/vcpu.h>
>
> static int ap_callin, ap_cpuid;
>
> -asm (
> - " .text \n"
> - " .code16 \n"
> - "ap_boot_start: .code16 \n"
> - " mov %cs,%ax \n"
> - " mov %ax,%ds \n"
> - " lgdt gdt_desr-ap_boot_start\n"
> - " xor %ax, %ax \n"
> - " inc %ax \n"
> - " lmsw %ax \n"
> - " ljmpl $0x08,$1f \n"
> - "gdt_desr: \n"
> - " .word gdt_end - gdt - 1 \n"
> - " .long gdt \n"
> - "ap_boot_end: .code32 \n"
> - "1: mov $0x10,%eax \n"
> - " mov %eax,%ds \n"
> - " mov %eax,%es \n"
> - " mov %eax,%ss \n"
> - " movl $stack_top,%esp \n"
> - " movl %esp,%ebp \n"
> - " call ap_start \n"
> - "1: hlt \n"
> - " jmp 1b \n"
> - " \n"
> - " .align 8 \n"
> - "gdt: \n"
> - " .quad 0x0000000000000000 \n"
> - " .quad 0x00cf9a000000ffff \n" /* 0x08: Flat code segment */
> - " .quad 0x00cf92000000ffff \n" /* 0x10: Flat data segment */
> - "gdt_end: \n"
> - " \n"
> - " .bss \n"
> - " .align 8 \n"
> - "stack: \n"
> - " .skip 0x4000 \n"
> - "stack_top: \n"
> - " .text \n"
> - );
> -
> -void ap_start(void); /* non-static avoids unused-function compiler warning */
> -/*static*/ void ap_start(void)
> +static void ap_start(void)
> {
> printf(" - CPU%d ... ", ap_cpuid);
> cacheattr_init();
> printf("done.\n");
> +
> + if ( !ap_cpuid )
> + return;
> +
> wmb();
> ap_callin = 1;
/* After this point, the BSP will shut us down. */
> -}
>
> -static void lapic_wait_ready(void)
> -{
> - while ( lapic_read(APIC_ICR) & APIC_ICR_BUSY )
> - cpu_relax();
> + while ( 1 )
For this, we tend to favour "for ( ;; )".
> + asm volatile ( "hlt" );
> }
>
> static void boot_cpu(unsigned int cpu)
> {
> - unsigned int icr2 = SET_APIC_DEST_FIELD(LAPIC_ID(cpu));
> + static uint8_t ap_stack[4 * PAGE_SIZE] __attribute__ ((aligned (16)));
I know you're just copying what was there, but 4 pages is stupidly large
for something that needs about 4 stack slots.
4K is absolutely plenty.
> + static struct vcpu_hvm_context ap;
>
> /* Initialise shared variables. */
> ap_cpuid = cpu;
> - ap_callin = 0;
> wmb();
>
> - /* Wake up the secondary processor: INIT-SIPI-SIPI... */
> - lapic_wait_ready();
> - lapic_write(APIC_ICR2, icr2);
> - lapic_write(APIC_ICR, APIC_DM_INIT);
> - lapic_wait_ready();
> - lapic_write(APIC_ICR2, icr2);
> - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12));
> - lapic_wait_ready();
> - lapic_write(APIC_ICR2, icr2);
> - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12));
> - lapic_wait_ready();
> + /* Wake up the secondary processor */
> + ap = (struct vcpu_hvm_context) {
> + .mode = VCPU_HVM_MODE_32B,
> + .cpu_regs.x86_32 = {
> + .eip = (uint32_t)ap_start,
> + .esp = (uint32_t)ap_stack + ARRAY_SIZE(ap_stack),
Not that it really matters, but these want to be unsigned long casts.
> +
> + /* Protected mode with MMU off */
> + .cr0 = X86_CR0_PE,
> +
> + /* Prepopulate the GDT */
/* 32bit Flat Mode */
This isn't the GDT; it's the segment registers, but "Flat Mode" is the
more meaningful term to use.
I'm happy to fix all on commit.
~Andrew
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |