Xen project Mailing List

Re: [Xen-devel] [PATCH 2/5] IOMMU: make page table deallocation preemptible

To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Date: Wed, 11 Dec 2013 19:01:23 +0000

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Keir Fraser <keir@xxxxxxx>, suravee.suthikulpanit@xxxxxxx, xiantao.zhang@xxxxxxxxx

Delivery-date: Wed, 11 Dec 2013 19:01:44 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On 10/12/2013 15:46, Jan Beulich wrote:

This too can take an arbitrary amount of time.

In fact, the bulk of the work is being moved to a tasklet, as handling
the necessary preemption logic in line seems close to impossible given
that the teardown may also be invoked on error paths.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

There is a lot of duplicated code here. I would suggest making most of this infrastructure common, and having a new iommu_op for "void free_io_page_table(struct page_info*);"

~Andrew


--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -195,6 +195,8 @@ static int __init amd_iommu_setup_dom0_d
     return 0;
 }
 
+static void deallocate_page_tables(unsigned long unused);
+
 int __init amd_iov_detect(void)
 {
     INIT_LIST_HEAD(&amd_iommu_head);
@@ -215,6 +217,7 @@ int __init amd_iov_detect(void)
         return -ENODEV;
     }
 
+    tasklet_init(&iommu_pt_cleanup_tasklet, deallocate_page_tables, 0);
     init_done = 1;
 
     if ( !amd_iommu_perdev_intremap )
@@ -405,11 +408,21 @@ static int amd_iommu_assign_device(struc
     return reassign_device(dom0, d, devfn, pdev);
 }
 
-static void deallocate_next_page_table(struct page_info* pg, int level)
+static void deallocate_next_page_table(struct page_info *pg, int level)
+{
+    spin_lock(&iommu_pt_cleanup_lock);
+    PFN_ORDER(pg) = level;
+    page_list_add_tail(pg, &iommu_pt_cleanup_list);
+    spin_unlock(&iommu_pt_cleanup_lock);
+}
+
+static void deallocate_page_table(struct page_info *pg)
 {
     void *table_vaddr, *pde;
     u64 next_table_maddr;
-    int index, next_level;
+    unsigned int index, level = PFN_ORDER(pg), next_level;
+
+    PFN_ORDER(pg) = 0;
 
     if ( level <= 1 )
     {
@@ -439,6 +452,23 @@ static void deallocate_next_page_table(s
     free_amd_iommu_pgtable(pg);
 }
 
+static void deallocate_page_tables(unsigned long unused)
+{
+    do {
+        struct page_info *pg;
+
+        spin_lock(&iommu_pt_cleanup_lock);
+        pg = page_list_remove_head(&iommu_pt_cleanup_list);
+        spin_unlock(&iommu_pt_cleanup_lock);
+        if ( !pg )
+            return;
+        deallocate_page_table(pg);
+    } while ( !softirq_pending(smp_processor_id()) );
+
+    tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
+                            cpumask_cycle(smp_processor_id(), &cpu_online_map));
+}
+
 static void deallocate_iommu_page_tables(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
@@ -451,6 +481,7 @@ static void deallocate_iommu_page_tables
     {
         deallocate_next_page_table(hd->root_table, hd->paging_mode);
         hd->root_table = NULL;
+        tasklet_schedule(&iommu_pt_cleanup_tasklet);
     }
     spin_unlock(&hd->mapping_lock);
 }
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+DEFINE_SPINLOCK(iommu_pt_cleanup_lock);
+PAGE_LIST_HEAD(iommu_pt_cleanup_list);
+struct tasklet iommu_pt_cleanup_tasklet;
+
 static struct keyhandler iommu_p2m_table = {
     .diagnostic = 0,
     .u.fn = iommu_dump_p2m_table,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
 {
-    int i;
-    struct dma_pte *pt_vaddr, *pte;
-    int next_level = level - 1;
+    struct page_info *pg = maddr_to_page(pt_maddr);
 
     if ( pt_maddr == 0 )
         return;
 
+    spin_lock(&iommu_pt_cleanup_lock);
+    PFN_ORDER(pg) = level;
+    page_list_add_tail(pg, &iommu_pt_cleanup_list);
+    spin_unlock(&iommu_pt_cleanup_lock);
+}
+
+static void iommu_free_page_table(struct page_info *pg)
+{
+    unsigned int i, next_level = PFN_ORDER(pg) - 1;
+    u64 pt_maddr = page_to_maddr(pg);
+    struct dma_pte *pt_vaddr, *pte;
+
+    PFN_ORDER(pg) = 0;
     pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
 
     for ( i = 0; i < PTE_NUM; i++ )
@@ -694,6 +705,23 @@ static void iommu_free_pagetable(u64 pt_
     free_pgtable_maddr(pt_maddr);
 }
 
+static void iommu_free_pagetables(unsigned long unused)
+{
+    do {
+        struct page_info *pg;
+
+        spin_lock(&iommu_pt_cleanup_lock);
+        pg = page_list_remove_head(&iommu_pt_cleanup_list);
+        spin_unlock(&iommu_pt_cleanup_lock);
+        if ( !pg )
+            return;
+        iommu_free_page_table(pg);
+    } while ( !softirq_pending(smp_processor_id()) );
+
+    tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
+                            cpumask_cycle(smp_processor_id(), &cpu_online_map));
+}
+
 static int iommu_set_root_entry(struct iommu *iommu)
 {
     u32 sts;
@@ -1704,6 +1732,8 @@ void iommu_domain_teardown(struct domain
     iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
     hd->pgd_maddr = 0;
     spin_unlock(&hd->mapping_lock);
+
+    tasklet_schedule(&iommu_pt_cleanup_tasklet);
 }
 
 static int intel_iommu_map_page(
@@ -2204,6 +2234,7 @@ int __init intel_vtd_setup(void)
     if ( ret )
         goto error;
 
+    tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0);
     register_keyhandler('V', &dump_iommu_info_keyhandler);
 
     return 0;
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -151,4 +151,8 @@ int adjust_vtd_irq_affinities(void);
  */
 DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+extern struct spinlock iommu_pt_cleanup_lock;
+extern struct page_list_head iommu_pt_cleanup_list;
+extern struct tasklet iommu_pt_cleanup_tasklet;
+
 #endif /* _IOMMU_H_ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.