[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing


  • To: "Beulich, Jan" <JBeulich@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
  • Date: Fri, 18 Feb 2022 05:34:46 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=/KRAENq2ZD/H5LC/wAS4dS2+EEEGfv6Q3p1mUQPMjZs=; b=kHgIJEarRyiIe3cuYtJoyCDd+QK0oIb1AowgRFA/mJFJkmSxWzTKQ4ZwCdnwBEjrigwqe2TJu9yQWz3Vs06saVhTiSCpUy7U4v2zCduwlP+LNDCXLKtqEWQNT/mXVKiNOrfDHOCc6gbXOBfQp73Vuk5YZJS8Xeq46hhcN4CkWkW0JeTlPeCgY1QovIE9LQGP3Uro42vtgfo7aVrO9AEEa0OETcvr5qvntNSOIeUhP3zsbUeHS0yV6CVhM4kOKxg+qEo0uc25ODL+syzen4T9ylE9H6eRWKIqGCHrvf1mhBahKpokrLry/KGoEXXlGyUxo1u0l8ZSAOrvEaqE5sgbVQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=XGegvpJIQIkiH9y6SMpDlJLiz6pJvRMd4a+nsYWuueojjtCYNJ9tgkRLQZSnB2Fh03k++cHP+czpctj4xZ67LjgP0myR9EFrJlGQBBd+HkWTJO/EBc0AyZ9yquU/LEFIZE3mo7EyKtMLXhtFMPdBEaSC95ciMpVS16VLgtpCz30e7bGLovIkBZ0uGoLR3gK8WMwHN0yo2ofLPTLMV3yG2el/F27rPS5FjsuIHanR44TGYpZnCXhMgf7bifYp2vtPwA719a2urH8yADfPRO4i955AW8n57oqp+xnE5zfTpmGXJ0b6REExM7PgJd++wugpWzqT+GT3KjhqCMQiZ+s5bw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=intel.com;
  • Cc: Paul Durrant <paul@xxxxxxx>, "Cooper, Andrew" <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Pau Monné, Roger <roger.pau@xxxxxxxxxx>
  • Delivery-date: Fri, 18 Feb 2022 05:35:01 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHYE4z9cNIIU9JepUSjhIuqcb/UzqyY6wbA
  • Thread-topic: [PATCH v2 2/4] VT-d / x86: re-arrange cache syncing

> From: Jan Beulich <jbeulich@xxxxxxxx>
> Sent: Thursday, January 27, 2022 10:48 PM
> 
> The actual function should always have lived in core x86 code; move it
> there, replacing get_cache_line_size() by readily available (except very
> early during boot; see the code comment) data. Also rename the function.
> 
> Drop the respective IOMMU hook, (re)introducing a respective boolean
> instead. Replace a true and an almost open-coding instance of
> iommu_sync_cache().
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>

> ---
> Placing the function next to flush_area_local() exposes a curious
> asymmetry between the SFENCE placements: sync_cache() has it after the
> flush, while flush_area_local() has it before it. I think the latter one
> is misplaced.
> ---
> v2: Rename sync_cache() to cache_writeback().
> 
> --- a/xen/arch/x86/flushtlb.c
> +++ b/xen/arch/x86/flushtlb.c
> @@ -11,6 +11,7 @@
>  #include <xen/sched.h>
>  #include <xen/smp.h>
>  #include <xen/softirq.h>
> +#include <asm/cache.h>
>  #include <asm/flushtlb.h>
>  #include <asm/invpcid.h>
>  #include <asm/nops.h>
> @@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
>      return flags;
>  }
> 
> +void cache_writeback(const void *addr, unsigned int size)
> +{
> +    /*
> +     * This function may be called before current_cpu_data is established.
> +     * Hence a fallback is needed to prevent the loop below becoming 
> infinite.
> +     */
> +    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
> +    const void *end = addr + size;
> +
> +    addr -= (unsigned long)addr & (clflush_size - 1);
> +    for ( ; addr < end; addr += clflush_size )
> +    {
> +/*
> + * The arguments to a macro must not include preprocessor directives.
> Doing so
> + * results in undefined behavior, so we have to create some defines here in
> + * order to avoid it.
> + */
> +#if defined(HAVE_AS_CLWB)
> +# define CLWB_ENCODING "clwb %[p]"
> +#elif defined(HAVE_AS_XSAVEOPT)
> +# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
> +#else
> +# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax)
> */
> +#endif
> +
> +#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
> +#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
> +# define INPUT BASE_INPUT
> +#else
> +# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
> +#endif
> +        /*
> +         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a 
> clflush
> +         * + prefix than a clflush + nop, and hence the prefix is added 
> instead
> +         * of letting the alternative framework fill the gap by appending 
> nops.
> +         */
> +        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush 
> %[p]",
> +                         "data16 clflush %[p]", /* clflushopt */
> +                         X86_FEATURE_CLFLUSHOPT,
> +                         CLWB_ENCODING,
> +                         X86_FEATURE_CLWB, /* no outputs */,
> +                         INPUT(addr));
> +#undef INPUT
> +#undef BASE_INPUT
> +#undef CLWB_ENCODING
> +    }
> +
> +    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
> +                      "sfence", X86_FEATURE_CLWB);
> +}
> +
>  unsigned int guest_flush_tlb_flags(const struct domain *d)
>  {
>      bool shadow = paging_mode_shadow(d);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -240,54 +240,6 @@ domid_t did_to_domain_id(const struct vt
>      return iommu->domid_map[did];
>  }
> 
> -static void sync_cache(const void *addr, unsigned int size)
> -{
> -    static unsigned long clflush_size = 0;
> -    const void *end = addr + size;
> -
> -    if ( clflush_size == 0 )
> -        clflush_size = get_cache_line_size();
> -
> -    addr -= (unsigned long)addr & (clflush_size - 1);
> -    for ( ; addr < end; addr += clflush_size )
> -/*
> - * The arguments to a macro must not include preprocessor directives.
> Doing so
> - * results in undefined behavior, so we have to create some defines here in
> - * order to avoid it.
> - */
> -#if defined(HAVE_AS_CLWB)
> -# define CLWB_ENCODING "clwb %[p]"
> -#elif defined(HAVE_AS_XSAVEOPT)
> -# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
> -#else
> -# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
> -#endif
> -
> -#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
> -#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
> -# define INPUT BASE_INPUT
> -#else
> -# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
> -#endif
> -        /*
> -         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a 
> clflush
> -         * + prefix than a clflush + nop, and hence the prefix is added 
> instead
> -         * of letting the alternative framework fill the gap by appending 
> nops.
> -         */
> -        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush 
> %[p]",
> -                         "data16 clflush %[p]", /* clflushopt */
> -                         X86_FEATURE_CLFLUSHOPT,
> -                         CLWB_ENCODING,
> -                         X86_FEATURE_CLWB, /* no outputs */,
> -                         INPUT(addr));
> -#undef INPUT
> -#undef BASE_INPUT
> -#undef CLWB_ENCODING
> -
> -    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
> -                      "sfence", X86_FEATURE_CLWB);
> -}
> -
>  /* Allocate page table, return its machine address */
>  uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
>  {
> @@ -306,8 +258,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
> 
>          clear_page(vaddr);
> 
> -        if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
> -            sync_cache(vaddr, PAGE_SIZE);
> +        iommu_sync_cache(vaddr, PAGE_SIZE);
>          unmap_domain_page(vaddr);
>          cur_pg++;
>      }
> @@ -1327,7 +1278,7 @@ int __init iommu_alloc(struct acpi_drhd_
>      iommu->nr_pt_levels = agaw_to_level(agaw);
> 
>      if ( !ecap_coherent(iommu->ecap) )
> -        vtd_ops.sync_cache = sync_cache;
> +        iommu_non_coherent = true;
> 
>      nr_dom = cap_ndoms(iommu->cap);
> 
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -28,6 +28,7 @@
> 
>  const struct iommu_init_ops *__initdata iommu_init_ops;
>  struct iommu_ops __read_mostly iommu_ops;
> +bool __read_mostly iommu_non_coherent;
> 
>  enum iommu_intremap __read_mostly iommu_intremap =
> iommu_intremap_full;
> 
> @@ -438,8 +439,7 @@ struct page_info *iommu_alloc_pgtable(st
>      p = __map_domain_page(pg);
>      clear_page(p);
> 
> -    if ( hd->platform_ops->sync_cache )
> -        iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
> +    iommu_sync_cache(p, PAGE_SIZE);
> 
>      unmap_domain_page(p);
> 
> --- a/xen/arch/x86/include/asm/cache.h
> +++ b/xen/arch/x86/include/asm/cache.h
> @@ -11,4 +11,10 @@
> 
>  #define __read_mostly __section(".data.read_mostly")
> 
> +#ifndef __ASSEMBLY__
> +
> +void cache_writeback(const void *addr, unsigned int size);
> +
> +#endif
> +
>  #endif
> --- a/xen/arch/x86/include/asm/iommu.h
> +++ b/xen/arch/x86/include/asm/iommu.h
> @@ -19,6 +19,7 @@
>  #include <xen/mem_access.h>
>  #include <xen/spinlock.h>
>  #include <asm/apicdef.h>
> +#include <asm/cache.h>
>  #include <asm/processor.h>
>  #include <asm/hvm/vmx/vmcs.h>
> 
> @@ -134,12 +135,13 @@ extern bool untrusted_msi;
>  int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
>                     const uint8_t gvec);
> 
> -#define iommu_sync_cache(addr, size) ({                 \
> -    const struct iommu_ops *ops = iommu_get_ops();      \
> -                                                        \
> -    if ( ops->sync_cache )                              \
> -        iommu_vcall(ops, sync_cache, addr, size);       \
> -})
> +extern bool iommu_non_coherent;
> +
> +static inline void iommu_sync_cache(const void *addr, unsigned int size)
> +{
> +    if ( iommu_non_coherent )
> +        cache_writeback(addr, size);
> +}
> 
>  int __must_check iommu_free_pgtables(struct domain *d);
>  struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -268,7 +268,6 @@ struct iommu_ops {
>      int (*setup_hpet_msi)(struct msi_desc *);
> 
>      int (*adjust_irq_affinities)(void);
> -    void (*sync_cache)(const void *addr, unsigned int size);
>      void (*clear_root_pgtable)(struct domain *d);
>      int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg
> *msg);
>  #endif /* CONFIG_X86 */
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -78,7 +78,6 @@ int __must_check qinval_device_iotlb_syn
>                                            struct pci_dev *pdev,
>                                            u16 did, u16 size, u64 addr);
> 
> -unsigned int get_cache_line_size(void);
>  void flush_all_cache(void);
> 
>  uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
> --- a/xen/drivers/passthrough/vtd/x86/vtd.c
> +++ b/xen/drivers/passthrough/vtd/x86/vtd.c
> @@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *v
>      unmap_domain_page(va);
>  }
> 
> -unsigned int get_cache_line_size(void)
> -{
> -    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
> -}
> -
>  void flush_all_cache()
>  {
>      wbinvd();


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.