[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v1 6/7] iommu/arm: ipmmu-vmsa: Deallocate page table asynchronously





On 26/07/17 16:10, Oleksandr Tyshchenko wrote:
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>

This is the PoC how to optimize page table deallocation sequence
by splitting it into separate chunks.
Use iommu_pt_cleanup_list to queue pages that need to be handled and
freed next time. Use free_page_table platform callback to dequeue
pages.

The page allocation/deallocation definitely need to be split in chunk and allow voluntary preemption. Otherwise you may end up hit the RCU sched on the toolstack domain.


Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
CC: Julien Grall <julien.grall@xxxxxxx>
CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
---
 xen/drivers/passthrough/arm/io-pgtable-arm.c | 94 +++++++++++++++++++++++++---
 xen/drivers/passthrough/arm/io-pgtable.c     |  5 +-
 xen/drivers/passthrough/arm/io-pgtable.h     |  4 +-
 xen/drivers/passthrough/arm/ipmmu-vmsa.c     | 33 ++++++++--
 4 files changed, 119 insertions(+), 17 deletions(-)

diff --git a/xen/drivers/passthrough/arm/io-pgtable-arm.c 
b/xen/drivers/passthrough/arm/io-pgtable-arm.c
index c98caa3..7673fda 100644
--- a/xen/drivers/passthrough/arm/io-pgtable-arm.c
+++ b/xen/drivers/passthrough/arm/io-pgtable-arm.c
@@ -254,6 +254,10 @@ struct arm_lpae_io_pgtable {

        /* Xen: We deal with domain pages. */
        struct page_info        *pgd;
+       /* Xen: To indicate that deallocation sequence is in progress. */
+       bool_t                          cleanup;
+       /* Xen: To count allocated domain pages. */
+       unsigned int            page_count;
 };

 typedef u64 arm_lpae_iopte;
@@ -329,7 +333,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
 #endif

 static struct page_info *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
-                                   struct io_pgtable_cfg *cfg)
+                                   struct arm_lpae_io_pgtable *data)
 {
        struct page_info *pages;
        unsigned int order = get_order_from_bytes(size);
@@ -342,15 +346,21 @@ static struct page_info *__arm_lpae_alloc_pages(size_t 
size, gfp_t gfp,
        for (i = 0; i < (1 << order); i ++)
                clear_and_clean_page(pages + i);

+       data->page_count += (1<<order);
+
        return pages;
 }

 static void __arm_lpae_free_pages(struct page_info *pages, size_t size,
-                                 struct io_pgtable_cfg *cfg)
+                                 struct arm_lpae_io_pgtable *data)
 {
        unsigned int order = get_order_from_bytes(size);

+       BUG_ON((int)data->page_count <= 0);
+
        free_domheap_pages(pages, order);
+
+       data->page_count -= (1<<order);
 }

 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
@@ -434,7 +444,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, 
unsigned long iova,
        pte = *ptep;
        if (!pte) {
                page = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
-                                              GFP_ATOMIC, cfg);
+                                              GFP_ATOMIC, data);
                if (!page)
                        return -ENOMEM;

@@ -526,6 +536,46 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
        return ret;
 }

+static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
+                                   struct page_info *page);
+
+/*
+ * TODO: We have reused unused at the moment "page->pad" variable for
+ * storing "data" pointer we need during deallocation sequence. The current
+ * free_page_table platform callback carries the only one "page" argument.
+ * To perform required calculations with the current (generic) allocator
+ * implementation we are highly interested in the following fields:
+ * - data->levels
+ * - data->pg_shift
+ * - data->pgd_size
+ * But, this necessity might be avoided if we integrate allocator code with
+ * IPMMU-VMSA driver. And these variables will turn into the
+ * corresponding #define-s.
+ */
+static void __arm_lpae_free_next_pgtable(struct arm_lpae_io_pgtable *data,
+                                   int lvl, struct page_info *page)
+{
+       if (!data->cleanup) {
+               /*
+                * We are here during normal page table maintenance. Just call
+                * __arm_lpae_free_pgtable(), what we actually had to call.
+                */
+               __arm_lpae_free_pgtable(data, lvl, page);
+       } else {
+               /*
+                * The page table deallocation sequence is in progress. Use 
some fields
+                * in struct page_info to pass arguments we will need during 
handling
+                * this page back. Queue page to list.
+                */
+               PFN_ORDER(page) = lvl;
+               page->pad = (u64)&data->iop.ops;
+
+               spin_lock(&iommu_pt_cleanup_lock);
+               page_list_add_tail(page, &iommu_pt_cleanup_list);
+               spin_unlock(&iommu_pt_cleanup_lock);
+       }
+}
+
 /* Xen: We deal with domain pages. */
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
                                    struct page_info *page)
@@ -553,19 +603,41 @@ static void __arm_lpae_free_pgtable(struct 
arm_lpae_io_pgtable *data, int lvl,
                if (!pte || iopte_leaf(pte, lvl))
                        continue;

-               __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
+               __arm_lpae_free_next_pgtable(data, lvl + 1, iopte_deref(pte, 
data));
        }

        unmap_domain_page(start);
-       __arm_lpae_free_pages(page, table_size, &data->iop.cfg);
+       __arm_lpae_free_pages(page, table_size, data);
 }

-static void arm_lpae_free_pgtable(struct io_pgtable *iop)
+/*
+ * We added extra "page" argument since we want to know what page is processed
+ * at the moment and should be freed.
+ * */
+static void arm_lpae_free_pgtable(struct io_pgtable *iop, struct page_info 
*page)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
+       int lvl;

-       __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd);
-       kfree(data);
+       if (!data->cleanup) {
+               /* Start page table deallocation sequence from the first level. 
*/
+               data->cleanup = true;
+               lvl = ARM_LPAE_START_LVL(data);
+       } else {
+               /* Retrieve the level to continue deallocation sequence from. */
+               lvl = PFN_ORDER(page);
+               PFN_ORDER(page) = 0;
+               page->pad = 0;
+       }
+
+       __arm_lpae_free_pgtable(data, lvl, page);
+
+       /*
+        * Seems, we have already deallocated all pages, so it is time
+        * to release unfreed resource.
+        */
+       if (!data->page_count)
+               kfree(data);
 }

 /* Xen: We deal with domain pages. */
@@ -889,8 +961,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
        cfg->arm_lpae_s1_cfg.mair[0] = reg;
        cfg->arm_lpae_s1_cfg.mair[1] = 0;

+       /* Just to be sure */
+       data->cleanup = false;
+       data->page_count = 0;
+
        /* Looking good; allocate a pgd */
-       data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+       data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, data);
        if (!data->pgd)
                goto out_free_data;

diff --git a/xen/drivers/passthrough/arm/io-pgtable.c 
b/xen/drivers/passthrough/arm/io-pgtable.c
index bfc7020..e25d731 100644
--- a/xen/drivers/passthrough/arm/io-pgtable.c
+++ b/xen/drivers/passthrough/arm/io-pgtable.c
@@ -77,7 +77,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum 
io_pgtable_fmt fmt,
  * It is the IOMMU driver's responsibility to ensure that the page table
  * is no longer accessible to the walker by this point.
  */
-void free_io_pgtable_ops(struct io_pgtable_ops *ops)
+void free_io_pgtable_ops(struct io_pgtable_ops *ops, struct page_info *page)
 {
        struct io_pgtable *iop;

@@ -86,5 +86,6 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)

        iop = container_of(ops, struct io_pgtable, ops);
        io_pgtable_tlb_flush_all(iop);
-       io_pgtable_init_table[iop->fmt]->free(iop);
+       iop->cookie = NULL;
+       io_pgtable_init_table[iop->fmt]->free(iop, page);
 }
diff --git a/xen/drivers/passthrough/arm/io-pgtable.h 
b/xen/drivers/passthrough/arm/io-pgtable.h
index fb81fcf..df0e21b 100644
--- a/xen/drivers/passthrough/arm/io-pgtable.h
+++ b/xen/drivers/passthrough/arm/io-pgtable.h
@@ -144,7 +144,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum 
io_pgtable_fmt fmt,
  *
  * @ops: The ops returned from alloc_io_pgtable_ops.
  */
-void free_io_pgtable_ops(struct io_pgtable_ops *ops);
+void free_io_pgtable_ops(struct io_pgtable_ops *ops, struct page_info *page);


 /*
@@ -201,7 +201,7 @@ static inline void io_pgtable_tlb_sync(struct io_pgtable 
*iop)
  */
 struct io_pgtable_init_fns {
        struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
-       void (*free)(struct io_pgtable *iop);
+       void (*free)(struct io_pgtable *iop, struct page_info *page);
 };

 extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
diff --git a/xen/drivers/passthrough/arm/ipmmu-vmsa.c 
b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
index e54b507..2a04800 100644
--- a/xen/drivers/passthrough/arm/ipmmu-vmsa.c
+++ b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
@@ -708,8 +708,8 @@ static void ipmmu_tlb_flush_all(void *cookie)
 {
        struct ipmmu_vmsa_domain *domain = cookie;

-       /* Xen: Just return if context_id has non-existent value */
-       if (domain->context_id >= domain->root->num_ctx)
+       /* Xen: Just return if context is absent or context_id has non-existent 
value */
+       if (!domain || domain->context_id >= domain->root->num_ctx)
                return;

        ipmmu_tlb_invalidate(domain);
@@ -796,7 +796,9 @@ static int ipmmu_domain_init_context(struct 
ipmmu_vmsa_domain *domain)
         */
        ret = ipmmu_domain_allocate_context(domain->root, domain);
        if (ret < 0) {
-               free_io_pgtable_ops(domain->iop);
+               /* Pass root page table for this domain as an argument. */
+               free_io_pgtable_ops(domain->iop,
+                               
maddr_to_page(domain->cfg.arm_lpae_s1_cfg.ttbr[0]));
                return ret;
        }

@@ -2193,7 +2195,12 @@ static void ipmmu_vmsa_destroy_domain(struct 
iommu_domain *io_domain)
                 * been detached.
                 */
                ipmmu_domain_destroy_context(domain);
-               free_io_pgtable_ops(domain->iop);
+               /*
+                * Pass root page table for this domain as an argument.
+                * This call will lead to start deallocation sequence.
+                */
+               free_io_pgtable_ops(domain->iop,
+                               
maddr_to_page(domain->cfg.arm_lpae_s1_cfg.ttbr[0]));
        }

        kfree(domain);
@@ -2383,6 +2390,17 @@ static int ipmmu_vmsa_domain_init(struct domain *d, bool 
use_iommu)
        return 0;
 }

+/*
+ * Seems, there is one more page we need to process. So, retrieve
+ * the pointer and go on deallocation sequence.
+ */
+static void ipmmu_vmsa_free_page_table(struct page_info *page)
+{
+       struct io_pgtable_ops *ops = (struct io_pgtable_ops *)page->pad;
+
+       free_io_pgtable_ops(ops, page);
+}
+
 static void __hwdom_init ipmmu_vmsa_hwdom_init(struct domain *d)
 {
 }
@@ -2404,6 +2422,12 @@ static void ipmmu_vmsa_domain_teardown(struct domain *d)
        ASSERT(list_empty(&xen_domain->contexts));
        xfree(xen_domain);
        dom_iommu(d)->arch.priv = NULL;
+       /*
+        * After this point we have all domain resources deallocated, except
+        * page table which we will deallocate asynchronously. The IOMMU code
+        * provides us with iommu_pt_cleanup_list and free_page_table platform
+        * callback what we actually going to use.
+        */
 }

 static int __must_check ipmmu_vmsa_map_pages(struct domain *d,
@@ -2462,6 +2486,7 @@ static void ipmmu_vmsa_dump_p2m_table(struct domain *d)
 static const struct iommu_ops ipmmu_vmsa_iommu_ops = {
        .init = ipmmu_vmsa_domain_init,
        .hwdom_init = ipmmu_vmsa_hwdom_init,
+       .free_page_table = ipmmu_vmsa_free_page_table,
        .teardown = ipmmu_vmsa_domain_teardown,
        .iotlb_flush = ipmmu_vmsa_iotlb_flush,
        .assign_device = ipmmu_vmsa_assign_dev,


--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.