[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] VMX: use a single, global APIC access page


  • To: Jan Beulich <jbeulich@xxxxxxxx>
  • From: Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Date: Thu, 11 Feb 2021 09:45:31 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=citrix.com; dmarc=pass action=none header.from=citrix.com; dkim=pass header.d=citrix.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=EWLfxRiPzPQUxaYqEvh2+Vz1QyXxG6AASbN+xoP5nJ0=; b=EkmxQGiPGBZLcCg73zsg2AeLb3ipn9UcHmQvo9BeL3Ti/pnhqvbkChKJPfawmn5hh4RN4kD1mtg8zNk1/tMuMJHBmhlQfgvPJYsOjNg7ZrJmLir3uUJjbdcLcfIHPY5VJAO1fQ0h5A7pblVQmHbCicjOi08sxfbOZC9czFtzijy++bDMb7MACt+RWmPy21Tk14d5+qoai5fsGbAxAUF+LLh7QCBUbhFovL5bkxyfsFY7jHPMr7jCXMP69E81HQOF5Sz+lrdo8wGk1gJw7USRmBuDBlXFfLerr5bplkb2knVMNd/RJaQfD3YR9Qxu9KjHt3eMwm72ydlGY4J227Ot4Q==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=JL5JQDOVG6gPi0UhS1EJk2FQUPYpotEq/3xo0PSRgvixtRsBduB4tYEqSIq/TQ9+EjBWbkp2Ynw+iuHY3ry9Cag3FuH6IZBIXnwLcEl47HYx9wNYqk6MeqJhWzospDeT+cGJyCjjoPprZoG8JglPbyo5ZLVHcuxWoWrFF2YpzvzjQ3WQvx8B23R7ZNR0XehUCQ2uMXDAc6fVgzCbq2KFX/FvxpuvqRfDzR4xJ+jEix21g4U5qM5XjDIpSJdMhsGrqYZUzBK86ts+7P1xqOB11Kv/R31w3pyjFC1KG7DGow2GYB8lbazTKhW+ouoQ1/hCXsXaZ8FtCzkFPFkpooUA0g==
  • Authentication-results: esa4.hc3370-68.iphmx.com; dkim=pass (signature verified) header.i=@citrix.onmicrosoft.com
  • Cc: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>, "Kevin Tian" <kevin.tian@xxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>
  • Delivery-date: Thu, 11 Feb 2021 08:45:51 +0000
  • Ironport-sdr: luGaybCio+dwxHAEdlfbG/A99XLAeIBkTvPdOPBdx9MHbKxoQ9L1sP318SwapRkgTa5RD4DHoJ neYl2nqV6xFNK+d+MAh+cOUtWcejlB7wrq004tu4JYHKsZ7M5iEHQqlRmOpYMskCgmjT8/ZglY D5NHxn8gANvBP18NcBxQB5niVWAaY4BGftgpSGV1puFnSdXf3wmMqJTIbl8fi31xEna3VZNLmB MOhFXMhgplV6b5h1JdALr35oqnNDPX+zOaXyMjDJjk/D7EA/jxrG5f1yxYCQUqHJ6H9NA2V2W9 xzA=
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On Wed, Feb 10, 2021 at 05:48:26PM +0100, Jan Beulich wrote:
> I did further consider not allocating any real page at all, but just
> using the address of some unpopulated space (which would require
> announcing this page as reserved to Dom0, so it wouldn't put any PCI
> MMIO BARs there). But I thought this would be too controversial, because
> of the possible risks associated with this.

No, Xen is not capable of allocating a suitable unpopulated page IMO,
so let's not go down that route. Wasting one RAM page seems perfectly
fine to me.

> Perhaps the change to p2m_get_iommu_flags() should be in a separate
> patch.

Maybe, I'm still not fully convinced that's a change we want to do
TBH.

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1007,6 +1007,8 @@ int arch_domain_soft_reset(struct domain
>  
>  void arch_domain_creation_finished(struct domain *d)
>  {
> +    if ( is_hvm_domain(d) )
> +        hvm_domain_creation_finished(d);
>  }
>  
>  #define xen_vcpu_guest_context vcpu_guest_context
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -66,8 +66,7 @@ boolean_param("force-ept", opt_force_ept
>  static void vmx_ctxt_switch_from(struct vcpu *v);
>  static void vmx_ctxt_switch_to(struct vcpu *v);
>  
> -static int  vmx_alloc_vlapic_mapping(struct domain *d);
> -static void vmx_free_vlapic_mapping(struct domain *d);
> +static int alloc_vlapic_mapping(void);
>  static void vmx_install_vlapic_mapping(struct vcpu *v);
>  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
>                                  unsigned int flags);
> @@ -78,6 +77,8 @@ static int vmx_msr_read_intercept(unsign
>  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
>  static void vmx_invlpg(struct vcpu *v, unsigned long linear);
>  
> +static mfn_t __read_mostly apic_access_mfn;
> +
>  /* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
>  #define PI_CSW_FROM (1u << 0)
>  #define PI_CSW_TO   (1u << 1)
> @@ -401,7 +402,6 @@ static int vmx_domain_initialise(struct
>          .to   = vmx_ctxt_switch_to,
>          .tail = vmx_do_resume,
>      };
> -    int rc;
>  
>      d->arch.ctxt_switch = &csw;
>  
> @@ -411,21 +411,16 @@ static int vmx_domain_initialise(struct
>       */
>      d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
>  
> -    if ( !has_vlapic(d) )
> -        return 0;
> -
> -    if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
> -        return rc;
> -
>      return 0;
>  }
>  
> -static void vmx_domain_relinquish_resources(struct domain *d)
> +static void domain_creation_finished(struct domain *d)
>  {
> -    if ( !has_vlapic(d) )
> -        return;
>  
> -    vmx_free_vlapic_mapping(d);
> +    if ( !mfn_eq(apic_access_mfn, _mfn(0)) &&
> +         set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE),
> +                            apic_access_mfn, PAGE_ORDER_4K) )
> +        domain_crash(d);
>  }
>  
>  static void vmx_init_ipt(struct vcpu *v)
> @@ -2407,7 +2402,7 @@ static struct hvm_function_table __initd
>      .cpu_up_prepare       = vmx_cpu_up_prepare,
>      .cpu_dead             = vmx_cpu_dead,
>      .domain_initialise    = vmx_domain_initialise,
> -    .domain_relinquish_resources = vmx_domain_relinquish_resources,
> +    .domain_creation_finished = domain_creation_finished,
>      .vcpu_initialise      = vmx_vcpu_initialise,
>      .vcpu_destroy         = vmx_vcpu_destroy,
>      .save_cpu_ctxt        = vmx_save_vmcs_ctxt,
> @@ -2653,7 +2648,7 @@ const struct hvm_function_table * __init
>  {
>      set_in_cr4(X86_CR4_VMXE);
>  
> -    if ( vmx_vmcs_init() )
> +    if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
>      {
>          printk("VMX: failed to initialise.\n");
>          return NULL;
> @@ -3208,7 +3203,7 @@ gp_fault:
>      return X86EMUL_EXCEPTION;
>  }
>  
> -static int vmx_alloc_vlapic_mapping(struct domain *d)
> +static int __init alloc_vlapic_mapping(void)
>  {
>      struct page_info *pg;
>      mfn_t mfn;
> @@ -3216,53 +3211,28 @@ static int vmx_alloc_vlapic_mapping(stru
>      if ( !cpu_has_vmx_virtualize_apic_accesses )
>          return 0;
>  
> -    pg = alloc_domheap_page(d, MEMF_no_refcount);
> +    pg = alloc_domheap_page(NULL, 0);
>      if ( !pg )
>          return -ENOMEM;
>  
> -    if ( !get_page_and_type(pg, d, PGT_writable_page) )
> -    {
> -        /*
> -         * The domain can't possibly know about this page yet, so failure
> -         * here is a clear indication of something fishy going on.
> -         */
> -        domain_crash(d);
> -        return -ENODATA;
> -    }
> -
>      mfn = page_to_mfn(pg);
>      clear_domain_page(mfn);
> -    d->arch.hvm.vmx.apic_access_mfn = mfn;
> +    apic_access_mfn = mfn;
>  
> -    return set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE), mfn,
> -                              PAGE_ORDER_4K);
> -}
> -
> -static void vmx_free_vlapic_mapping(struct domain *d)
> -{
> -    mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
> -
> -    d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
> -    if ( !mfn_eq(mfn, _mfn(0)) )
> -    {
> -        struct page_info *pg = mfn_to_page(mfn);
> -
> -        put_page_alloc_ref(pg);
> -        put_page_and_type(pg);
> -    }
> +    return 0;
>  }
>  
>  static void vmx_install_vlapic_mapping(struct vcpu *v)
>  {
>      paddr_t virt_page_ma, apic_page_ma;
>  
> -    if ( mfn_eq(v->domain->arch.hvm.vmx.apic_access_mfn, _mfn(0)) )
> +    if ( mfn_eq(apic_access_mfn, _mfn(0)) )

You should check if the domain has a vlapic I think, since now
apic_access_mfn is global and will be set regardless of whether the
domain has vlapic enabled or not.

Previously apic_access_mfn was only allocated if the domain had vlapic
enabled.

>          return;
>  
>      ASSERT(cpu_has_vmx_virtualize_apic_accesses);
>  
>      virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
> -    apic_page_ma = mfn_to_maddr(v->domain->arch.hvm.vmx.apic_access_mfn);
> +    apic_page_ma = mfn_to_maddr(apic_access_mfn);
>  
>      vmx_vmcs_enter(v);
>      __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);

I would consider setting up the vmcs and adding the page to the p2m in
the same function, and likely call it from vlapic_init. We could have
a domain_setup_apic in hvm_function_table that takes care of all this.

> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -106,6 +106,7 @@ struct hvm_function_table {
>       * Initialise/destroy HVM domain/vcpu resources
>       */
>      int  (*domain_initialise)(struct domain *d);
> +    void (*domain_creation_finished)(struct domain *d);
>      void (*domain_relinquish_resources)(struct domain *d);
>      void (*domain_destroy)(struct domain *d);
>      int  (*vcpu_initialise)(struct vcpu *v);
> @@ -390,6 +391,12 @@ static inline bool hvm_has_set_descripto
>      return hvm_funcs.set_descriptor_access_exiting;
>  }
>  
> +static inline void hvm_domain_creation_finished(struct domain *d)
> +{
> +    if ( hvm_funcs.domain_creation_finished )
> +        alternative_vcall(hvm_funcs.domain_creation_finished, d);
> +}
> +
>  static inline int
>  hvm_guest_x86_mode(struct vcpu *v)
>  {
> @@ -765,6 +772,11 @@ static inline void hvm_invlpg(const stru
>  {
>      ASSERT_UNREACHABLE();
>  }
> +
> +static inline void hvm_domain_creation_finished(struct domain *d)
> +{
> +    ASSERT_UNREACHABLE();
> +}
>  
>  /*
>   * Shadow code needs further cleanup to eliminate some HVM-only paths. For
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -58,7 +58,6 @@ struct ept_data {
>  #define _VMX_DOMAIN_PML_ENABLED    0
>  #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
>  struct vmx_domain {
> -    mfn_t apic_access_mfn;
>      /* VMX_DOMAIN_* */
>      unsigned int status;
>  
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -935,6 +935,9 @@ static inline unsigned int p2m_get_iommu
>          flags = IOMMUF_readable;
>          if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
>              flags |= IOMMUF_writable;
> +        /* VMX'es APIC access page is global and hence has no owner. */
> +        if ( mfn_valid(mfn) && !page_get_owner(mfn_to_page(mfn)) )
> +            flags = 0;

Is it fine to have this page accessible to devices if the page tables
are shared between the CPU and the IOMMU?

Is it possible for devices to write to it?

I still think both the CPU and the IOMMU page tables should be the
same unless there's a technical reason for them not to be, rather than
us just wanting to hide things from devices.

Thanks, Roger.



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.