[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH v4 03/14] x86/iommu: convert VT-d code to use new page table allocator


  • To: Paul Durrant <paul@xxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
  • Date: Fri, 14 Aug 2020 06:41:44 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=intel.com; dmarc=pass action=none header.from=intel.com; dkim=pass header.d=intel.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=TgSnu0/dylO4EoEKJWALXEUC6XZorYbEzd6UN+xkTvc=; b=DMXx3bktamX7seoj29/jZeQZRYBA537skwsJQg4XMupf3quhRgyK2SHLIgGZVrgSSec3ilFhShd97ncgdiS82p4FSoKHLVmUF27mikB936FrcDYAM+K0h4xE7Q2tj/KcMwUkOVTguHTKP6Rh9/5/O+urTYcW3ck049rxf0KrkZnk8G9ZBSkVXXlcROmrQDS41pVN9/MZjFt8UO2tS6FX8bf4Kfr3eqvVqN2+fNWlqFkzQnByZxVZL363wKOp1oa7cAjOGavDII1SSW4nEGR++AwVEXJs3Itj4uNqBlC860pWA2ifWVc+fwFc2hpJW4dx3I/gPV12tCEf8QAyoaTX+Q==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=LEGyjRwy+VXLNE7rUlaPJEQyLSZgbQJYDqNj2fbX4Ta7FdyArHxV0Ay+FG9T5jhIKbRwDn9EGBFcls//s+nzkF+f6Mzn2fShgp8PuAq9TyZmVR9cdD5EnRA/Gxx3dP6oEHMZuQHRpGkD37xVSVhfOWHuikXLd66RBphXbkpTCJtJQw7UqHj0DeRgxgaReJ+vXKzJf53LlXXnOfBBTo0gtnSvralQZk2Y9/yGcohBNuVnQzEl+tfjwWC7JOTl9nKvp2V69aU+hXQcD/D4dZqFgcH7X7SxFLzYp8vqGHJkyibwb+PGovsNoZ116moYEbnX7u2JsnSNSzEYWJAHbgz0qQ==
  • Authentication-results: xen.org; dkim=none (message not signed) header.d=none;xen.org; dmarc=none action=none header.from=intel.com;
  • Cc: Paul Durrant <pdurrant@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>
  • Delivery-date: Fri, 14 Aug 2020 06:42:03 +0000
  • Dlp-product: dlpe-windows
  • Dlp-reaction: no-action
  • Dlp-version: 11.5.1.3
  • Ironport-sdr: SP0mOtqMko3KAwGcbL65q6VLckWWE+KIfxine0a0N3giYKXhwA12RW1gX4xje4x398lBiuKFQA RiC2+dfvb18A==
  • Ironport-sdr: o8FjgIenACIX+Hr4C5SAQWYOtEuEjQcJpHEd6m5odePtlEhts8MG+cPwXbd5u9gEIVmcSJp0Tm mtzsl2WXXYFw==
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHWamUacjhbO8W3fkGXDLamh0t/Jqk3NaCQ
  • Thread-topic: [PATCH v4 03/14] x86/iommu: convert VT-d code to use new page table allocator

> From: Paul Durrant <paul@xxxxxxx>
> Sent: Tuesday, August 4, 2020 9:42 PM
> 
> From: Paul Durrant <pdurrant@xxxxxxxxxx>
> 
> This patch converts the VT-d code to use the new IOMMU page table
> allocator
> function. This allows all the free-ing code to be removed (since it is now
> handled by the general x86 code) which reduces TLB and cache thrashing as
> well
> as shortening the code.
> 
> The scope of the mapping_lock in intel_iommu_quarantine_init() has also
> been
> increased slightly; it should have always covered accesses to
> 'arch.vtd.pgd_maddr'.
> 
> NOTE: The common IOMMU needs a slight modification to avoid scheduling
> the
>       cleanup tasklet if the free_page_table() method is not present (since
>       the tasklet will unconditionally call it).
> 
> Signed-off-by: Paul Durrant <pdurrant@xxxxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
> 
> v2:
>  - New in v2 (split from "add common page-table allocator")
> ---
>  xen/drivers/passthrough/iommu.c     |   6 +-
>  xen/drivers/passthrough/vtd/iommu.c | 101 ++++++++++------------------
>  2 files changed, 39 insertions(+), 68 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/iommu.c
> b/xen/drivers/passthrough/iommu.c
> index 1d644844ab..2b1db8022c 100644
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -225,8 +225,10 @@ static void iommu_teardown(struct domain *d)
>  {
>      struct domain_iommu *hd = dom_iommu(d);
> 
> -    hd->platform_ops->teardown(d);
> -    tasklet_schedule(&iommu_pt_cleanup_tasklet);
> +    iommu_vcall(hd->platform_ops, teardown, d);
> +
> +    if ( hd->platform_ops->free_page_table )
> +        tasklet_schedule(&iommu_pt_cleanup_tasklet);
>  }
> 
>  void iommu_domain_destroy(struct domain *d)
> diff --git a/xen/drivers/passthrough/vtd/iommu.c
> b/xen/drivers/passthrough/vtd/iommu.c
> index 94e0455a4d..607e8b5e65 100644
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -265,10 +265,15 @@ static u64 addr_to_dma_page_maddr(struct
> domain *domain, u64 addr, int alloc)
> 
>      addr &= (((u64)1) << addr_width) - 1;
>      ASSERT(spin_is_locked(&hd->arch.mapping_lock));
> -    if ( !hd->arch.vtd.pgd_maddr &&
> -         (!alloc ||
> -          ((hd->arch.vtd.pgd_maddr = alloc_pgtable_maddr(1, hd->node)) ==
> 0)) )
> -        goto out;
> +    if ( !hd->arch.vtd.pgd_maddr )
> +    {
> +        struct page_info *pg;
> +
> +        if ( !alloc || !(pg = iommu_alloc_pgtable(domain)) )
> +            goto out;
> +
> +        hd->arch.vtd.pgd_maddr = page_to_maddr(pg);
> +    }
> 
>      parent = (struct dma_pte *)map_vtd_domain_page(hd-
> >arch.vtd.pgd_maddr);
>      while ( level > 1 )
> @@ -279,13 +284,16 @@ static u64 addr_to_dma_page_maddr(struct
> domain *domain, u64 addr, int alloc)
>          pte_maddr = dma_pte_addr(*pte);
>          if ( !pte_maddr )
>          {
> +            struct page_info *pg;
> +
>              if ( !alloc )
>                  break;
> 
> -            pte_maddr = alloc_pgtable_maddr(1, hd->node);
> -            if ( !pte_maddr )
> +            pg = iommu_alloc_pgtable(domain);
> +            if ( !pg )
>                  break;
> 
> +            pte_maddr = page_to_maddr(pg);
>              dma_set_pte_addr(*pte, pte_maddr);
> 
>              /*
> @@ -675,45 +683,6 @@ static void dma_pte_clear_one(struct domain
> *domain, uint64_t addr,
>      unmap_vtd_domain_page(page);
>  }
> 
> -static void iommu_free_pagetable(u64 pt_maddr, int level)
> -{
> -    struct page_info *pg = maddr_to_page(pt_maddr);
> -
> -    if ( pt_maddr == 0 )
> -        return;
> -
> -    PFN_ORDER(pg) = level;
> -    spin_lock(&iommu_pt_cleanup_lock);
> -    page_list_add_tail(pg, &iommu_pt_cleanup_list);
> -    spin_unlock(&iommu_pt_cleanup_lock);
> -}
> -
> -static void iommu_free_page_table(struct page_info *pg)
> -{
> -    unsigned int i, next_level = PFN_ORDER(pg) - 1;
> -    u64 pt_maddr = page_to_maddr(pg);
> -    struct dma_pte *pt_vaddr, *pte;
> -
> -    PFN_ORDER(pg) = 0;
> -    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
> -
> -    for ( i = 0; i < PTE_NUM; i++ )
> -    {
> -        pte = &pt_vaddr[i];
> -        if ( !dma_pte_present(*pte) )
> -            continue;
> -
> -        if ( next_level >= 1 )
> -            iommu_free_pagetable(dma_pte_addr(*pte), next_level);
> -
> -        dma_clear_pte(*pte);
> -        iommu_sync_cache(pte, sizeof(struct dma_pte));

I didn't see sync_cache in the new iommu_free_pgtables. Is it intended
(i.e. original flush is meaningless) or overlooked?

Thanks
Kevin

> -    }
> -
> -    unmap_vtd_domain_page(pt_vaddr);
> -    free_pgtable_maddr(pt_maddr);
> -}
> -
>  static int iommu_set_root_entry(struct vtd_iommu *iommu)
>  {
>      u32 sts;
> @@ -1748,16 +1717,7 @@ static void iommu_domain_teardown(struct
> domain *d)
>          xfree(mrmrr);
>      }
> 
> -    ASSERT(is_iommu_enabled(d));
> -
> -    if ( iommu_use_hap_pt(d) )
> -        return;
> -
> -    spin_lock(&hd->arch.mapping_lock);
> -    iommu_free_pagetable(hd->arch.vtd.pgd_maddr,
> -                         agaw_to_level(hd->arch.vtd.agaw));
>      hd->arch.vtd.pgd_maddr = 0;
> -    spin_unlock(&hd->arch.mapping_lock);
>  }
> 
>  static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn,
> @@ -2669,23 +2629,28 @@ static void vtd_dump_p2m_table(struct domain
> *d)
>  static int __init intel_iommu_quarantine_init(struct domain *d)
>  {
>      struct domain_iommu *hd = dom_iommu(d);
> +    struct page_info *pg;
>      struct dma_pte *parent;
>      unsigned int agaw =
> width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
>      unsigned int level = agaw_to_level(agaw);
> -    int rc;
> +    int rc = 0;
> +
> +    spin_lock(&hd->arch.mapping_lock);
> 
>      if ( hd->arch.vtd.pgd_maddr )
>      {
>          ASSERT_UNREACHABLE();
> -        return 0;
> +        goto out;
>      }
> 
> -    spin_lock(&hd->arch.mapping_lock);
> +    pg = iommu_alloc_pgtable(d);
> 
> -    hd->arch.vtd.pgd_maddr = alloc_pgtable_maddr(1, hd->node);
> -    if ( !hd->arch.vtd.pgd_maddr )
> +    rc = -ENOMEM;
> +    if ( !pg )
>          goto out;
> 
> +    hd->arch.vtd.pgd_maddr = page_to_maddr(pg);
> +
>      parent = map_vtd_domain_page(hd->arch.vtd.pgd_maddr);
>      while ( level )
>      {
> @@ -2697,10 +2662,12 @@ static int __init
> intel_iommu_quarantine_init(struct domain *d)
>           * page table pages, and the resulting allocations are always
>           * zeroed.
>           */
> -        maddr = alloc_pgtable_maddr(1, hd->node);
> -        if ( !maddr )
> -            break;
> +        pg = iommu_alloc_pgtable(d);
> +
> +        if ( !pg )
> +            goto out;
> 
> +        maddr = page_to_maddr(pg);
>          for ( offset = 0; offset < PTE_NUM; offset++ )
>          {
>              struct dma_pte *pte = &parent[offset];
> @@ -2716,13 +2683,16 @@ static int __init
> intel_iommu_quarantine_init(struct domain *d)
>      }
>      unmap_vtd_domain_page(parent);
> 
> +    rc = 0;
> +
>   out:
>      spin_unlock(&hd->arch.mapping_lock);
> 
> -    rc = iommu_flush_iotlb_all(d);
> +    if ( !rc )
> +        rc = iommu_flush_iotlb_all(d);
> 
> -    /* Pages leaked in failure case */
> -    return level ? -ENOMEM : rc;
> +    /* Pages may be leaked in failure case */
> +    return rc;
>  }
> 
>  static struct iommu_ops __initdata vtd_ops = {
> @@ -2737,7 +2707,6 @@ static struct iommu_ops __initdata vtd_ops = {
>      .map_page = intel_iommu_map_page,
>      .unmap_page = intel_iommu_unmap_page,
>      .lookup_page = intel_iommu_lookup_page,
> -    .free_page_table = iommu_free_page_table,
>      .reassign_device = reassign_device_ownership,
>      .get_device_group_id = intel_iommu_group_id,
>      .enable_x2apic = intel_iommu_enable_eim,
> --
> 2.20.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.