[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC v2 12/12] x86: activate per-vcpu stacks in case of xpti



>>> On 22.01.18 at 13:32, <jgross@xxxxxxxx> wrote:
> When scheduling a vcpu subject to xpti activate the per-vcpu stacks
> by loading the vcpu specific gdt and tss. When de-scheduling such a
> vcpu switch back to the per physical cpu gdt and tss.
> 
> Accessing the user registers on the stack is done via helpers as
> depending on XPTI active or not the registers are located either on
> the per-vcpu stack or on the default stack.
> 
> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
> ---
>  xen/arch/x86/domain.c              | 76 
> +++++++++++++++++++++++++++++++++++---
>  xen/arch/x86/pv/domain.c           | 34 +++++++++++++++--
>  xen/include/asm-x86/desc.h         |  5 +++
>  xen/include/asm-x86/regs.h         |  2 +
>  4 files changed, 107 insertions(+), 10 deletions(-)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index da1bf1a97b..d75234ca35 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1585,9 +1585,28 @@ static inline bool need_full_gdt(const struct domain 
> *d)
>      return is_pv_domain(d) && !is_idle_domain(d);
>  }
>  
> +static void copy_user_regs_from_stack(struct vcpu *v)
> +{
> +    struct cpu_user_regs *stack_regs;

const

> +    stack_regs = (is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti)
> +                 ? v->arch.pv_vcpu.stack_regs
> +                 : &get_cpu_info()->guest_cpu_user_regs;

Ugly open coding of what previously was guest_cpu_user_regs().

> +    memcpy(&v->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES);
> +}
> +
> +static void copy_user_regs_to_stack(struct vcpu *v)

const

> @@ -1635,7 +1654,7 @@ static void __context_switch(void)
>  
>      gdt = !is_pv_32bit_domain(nd) ? per_cpu(gdt_table, cpu) :
>                                      per_cpu(compat_gdt_table, cpu);
> -    if ( need_full_gdt(nd) )
> +    if ( need_full_gdt(nd) && !nd->arch.pv_domain.xpti )
>      {
>          unsigned long mfn = virt_to_mfn(gdt);
>          l1_pgentry_t *pl1e = pv_gdt_ptes(n);
> @@ -1647,23 +1666,68 @@ static void __context_switch(void)
>      }
>  
>      if ( need_full_gdt(pd) &&
> -         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(nd)) )
> +         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(nd) ||
> +          pd->arch.pv_domain.xpti) )
>      {
>          gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
>          gdt_desc.base  = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
>  
> +        if ( pd->arch.pv_domain.xpti )
> +            _set_tssldt_type(gdt + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
> +                             SYS_DESC_tss_avail);

Why is this not done in the if() after lgdt()?

>          lgdt(&gdt_desc);
> +
> +        if ( pd->arch.pv_domain.xpti )
> +        {
> +            unsigned long stub_va = this_cpu(stubs.addr);
> +
> +            ltr(TSS_ENTRY << 3);
> +            get_cpu_info()->flags &= ~VCPUSTACK_ACTIVE;
> +            wrmsrl(MSR_LSTAR, stub_va);
> +            wrmsrl(MSR_CSTAR, stub_va + STUB_TRAMPOLINE_SIZE_PERCPU);
> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
> +                 boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR )
> +                wrmsrl(MSR_IA32_SYSENTER_ESP,
> +                       (unsigned 
> long)&get_cpu_info()->guest_cpu_user_regs.es);

Why is this not - like below - &guest_cpu_user_regs()->es?

> +        }
>      }
>  
>      write_ptbase(n);
>  
>      if ( need_full_gdt(nd) &&
> -         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd)) )
> +         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd) ||
> +          nd->arch.pv_domain.xpti) )
>      {
>          gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
>          gdt_desc.base = GDT_VIRT_START(n);
>  
> +        if ( nd->arch.pv_domain.xpti )
> +        {
> +            struct cpu_info *info;
> +
> +            gdt = (struct desc_struct *)GDT_VIRT_START(n);
> +            gdt[PER_CPU_GDT_ENTRY].a = cpu;
> +            _set_tssldt_type(gdt + TSS_ENTRY, SYS_DESC_tss_avail);
> +            info = (struct cpu_info *)(XPTI_START(n) + STACK_SIZE) - 1;
> +            info->stack_bottom_cpu = (unsigned long)guest_cpu_user_regs();
> +        }
> +
>          lgdt(&gdt_desc);
> +
> +        if ( nd->arch.pv_domain.xpti )
> +        {
> +            unsigned long stub_va = XPTI_TRAMPOLINE(n);
> +
> +            ltr(TSS_ENTRY << 3);
> +            get_cpu_info()->flags |= VCPUSTACK_ACTIVE;
> +            wrmsrl(MSR_LSTAR, stub_va);
> +            wrmsrl(MSR_CSTAR, stub_va + STUB_TRAMPOLINE_SIZE_PERVCPU);
> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
> +                 boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR )
> +                wrmsrl(MSR_IA32_SYSENTER_ESP,
> +                       (unsigned long)&guest_cpu_user_regs()->es);
> +        }

So on a switch from PV to PV you add two LTR and 6 WRMSR. Quite
a lot, and I'm not at all convinced that this double writing is all really
needed in such a case.

> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -133,10 +133,36 @@ int switch_compat(struct domain *d)
>  
>  static int pv_create_gdt_ldt_l1tab(struct vcpu *v)
>  {
> -    return create_perdomain_mapping(v->domain, GDT_VIRT_START(v),
> -                                    1U << GDT_LDT_VCPU_SHIFT,
> -                                    v->domain->arch.pv_domain.gdt_ldt_l1tab,
> -                                    NULL);
> +    int rc;
> +
> +    rc = create_perdomain_mapping(v->domain, GDT_VIRT_START(v),
> +                                  1U << GDT_LDT_VCPU_SHIFT,
> +                                  v->domain->arch.pv_domain.gdt_ldt_l1tab,
> +                                  NULL);
> +    if ( !rc && v->domain->arch.pv_domain.xpti )
> +    {
> +        struct desc_struct *gdt;
> +        struct page_info *gdt_pg;
> +
> +        BUILD_BUG_ON(NR_RESERVED_GDT_PAGES > 1);
> +        gdt = (struct desc_struct *)GDT_VIRT_START(v) +
> +              FIRST_RESERVED_GDT_ENTRY;
> +        rc = create_perdomain_mapping(v->domain, (unsigned long)gdt,
> +                                      NR_RESERVED_GDT_PAGES,
> +                                      NULL, &gdt_pg);
> +        if ( !rc )
> +        {
> +            gdt = __map_domain_page(gdt_pg);
> +            memcpy(gdt, boot_cpu_gdt_table, NR_RESERVED_GDT_BYTES);
> +            _set_tssldt_desc(gdt + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
> +                         XPTI_TSS(v),
> +                         offsetof(struct tss_struct, __cacheline_filler) - 1,
> +                         SYS_DESC_tss_avail);
> +            unmap_domain_page(gdt);
> +        }
> +    }
> +
> +    return rc;
>  }

Since you fiddle with the GDT anyway during context switch - do
you really need to allocate another page here, rather than simply
mapping the pCPU's GDT page into the vCPU's per-domain area?
That would also eliminate a concern regarding changes being made
to the GDT after a domain was created.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.