[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 4/6] xen/arm: Implement virtual-linear page table for guest p2m mapping in live migration



Implement vlpt (virtual-linear page table) for fast accessing of 3rd PTE of guest p2m.

When creating a mapping for vlpt, just copy the 1st level PTE of guest p2m to the xen's

2nd level PTE. Then the mapping becomes the following:

    xen's 1st PTE -->

    xen's 2nd PTE (which is the same as 1st PTE of guest p2m) -->

    guest p2m's 2nd PTE -->

    guest p2m's 3rd PTE (the memory contents where the vlpt points)

For more info about vlpt, see:

http://www.technovelty.org/linux/virtual-linear-page-table.html

 

This function is used in dirty-page tracing: when domU write-fault is trapped by xen,

xen can immediately locate the 3rd PTE of guest p2m.

The following link shows the performance comparison for handling a dirty-page between

vlpt and typical page table walking.

http://lists.xen.org/archives/html/xen-devel/2013-08/msg01503.html

 

Changes from v4:

1. In the restoring vlpt, use __foo variant without barriers for write_pte and

   flush_xen_data_tlb_range_va.

2. Support two consecutive pages for guest's first level page table.

 

Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx>

---

xen/arch/arm/domain.c            |   5 ++

xen/arch/arm/mm.c                | 112 +++++++++++++++++++++++++++++++++++++++

xen/include/asm-arm/arm32/page.h |  41 ++++++++++----

xen/include/asm-arm/config.h     |   5 ++

xen/include/asm-arm/domain.h     |   7 +++

xen/include/asm-arm/mm.h         |  15 ++++++

6 files changed, 174 insertions(+), 11 deletions(-)

 

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c

index cb0424d..c0b5dd8 100644

--- a/xen/arch/arm/domain.c

+++ b/xen/arch/arm/domain.c

@@ -509,6 +509,11 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)

     /* Default the virtual ID to match the physical */

     d->arch.vpidr = boot_cpu_data.midr.bits;

 

+    d->arch.dirty.second_lvl_start = 0;

+    d->arch.dirty.second_lvl_end = 0;

+    d->arch.dirty.second_lvl[0] = NULL;

+    d->arch.dirty.second_lvl[1] = NULL;

+

     clear_page(d->shared_info);

     share_xen_page_with_guest(

         virt_to_page(d->shared_info), d, XENSHARE_writable);

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c

index 3801f07..bf13993 100644

--- a/xen/arch/arm/mm.c

+++ b/xen/arch/arm/mm.c

@@ -1328,6 +1328,118 @@ void get_gma_start_end(struct domain *d, paddr_t *start, paddr_t *end)

         *end = GUEST_RAM_BASE + ((paddr_t) d->max_pages << PAGE_SHIFT);

}

 

+/* flush the vlpt area */

+void flush_vlpt(struct domain *d)

+{

+    int flush_size;

+    flush_size = (d->arch.dirty.second_lvl_end -

+                  d->arch.dirty.second_lvl_start) << SECOND_SHIFT;

+    /* flushing the 3rd level mapping */

+    flush_xen_data_tlb_range_va(d->arch.dirty.second_lvl_start << SECOND_SHIFT,

+                                flush_size);

+}

+

+/* restore the xen page table for vlpt mapping for domain d */

+void restore_vlpt(struct domain *d)

+{

+    int i;

+    dsb();

+    for ( i = d->arch.dirty.second_lvl_start;

+          i < d->arch.dirty.second_lvl_end;

+          ++i )

+    {

+        int k = i % LPAE_ENTRIES;

+        int l = i / LPAE_ENTRIES;

+

+        if ( xen_second[i].bits != d->arch.dirty.second_lvl[l][k].bits )

+        {

+            __write_pte(&xen_second[i], d->arch.dirty.second_lvl[l][k]);

+            __flush_xen_data_tlb_range_va(i << SECOND_SHIFT,

+                                          1 << SECOND_SHIFT);

+        }

+    }

+    dsb();

+    isb();

+}

+

+/* setting up the xen page table for vlpt mapping for domain d */

+int prepare_vlpt(struct domain *d)

+{

+    int xen_second_linear_base;

+    int gp2m_start_index, gp2m_end_index;

+    struct p2m_domain *p2m = &d->arch.p2m;

+    struct page_info *second_lvl_page;

+    paddr_t gma_start = 0;

+    paddr_t gma_end = 0;

+    lpae_t *first[2];

+    int i;

+    uint64_t required, avail = VIRT_LIN_P2M_END - VIRT_LIN_P2M_START;

+

+    get_gma_start_end(d, &gma_start, &gma_end);

+    required = (gma_end - gma_start) >> LPAE_SHIFT;

+

+

+    if ( required > avail )

+    {

+        dprintk(XENLOG_ERR, "Available VLPT is small for domU guest"

+                            "(avail: %llx, required: %llx)\n",

+                            avail, required);

+        return -ENOMEM;

+    }

+

+    xen_second_linear_base = second_linear_offset(VIRT_LIN_P2M_START);

+

+    gp2m_start_index = gma_start >> FIRST_SHIFT;

+    gp2m_end_index = (gma_end >> FIRST_SHIFT) + 1;

+

+    if ( xen_second_linear_base + gp2m_end_index >= LPAE_ENTRIES * 2 )

+    {

+        dprintk(XENLOG_ERR, "xen second page is small for VLPT for domU");

+        return -ENOMEM;

+    }

+

+    second_lvl_page = alloc_domheap_pages(NULL, 1, 0);

+    if ( second_lvl_page == NULL )

+        return -ENOMEM;

+

+    /* First level p2m is 2 consecutive pages */

+    d->arch.dirty.second_lvl[0] = map_domain_page_global(

+                                           page_to_mfn(second_lvl_page) );

+    d->arch.dirty.second_lvl[1] = map_domain_page_global(

+                                           page_to_mfn(second_lvl_page+1) );

+

+    first[0] = __map_domain_page(p2m->first_level);

+    first[1] = __map_domain_page(p2m->first_level+1);

+    for ( i = gp2m_start_index; i < gp2m_end_index; ++i )

+    {

+        int k = i % LPAE_ENTRIES;

+        int l = i / LPAE_ENTRIES;

+        int k2 = (xen_second_linear_base + i) % LPAE_ENTRIES;

+        int l2 = (xen_second_linear_base + i) / LPAE_ENTRIES;

+

+        write_pte(&xen_second[xen_second_linear_base+i], first[l][k]);

+

+        /* we copy the mapping into domain's structure as a reference

+         * in case of the context switch (used in restore_vlpt) */

+        d->arch.dirty.second_lvl[l2][k2] = first[l][k];

+    }

+    unmap_domain_page(first[0]);

+    unmap_domain_page(first[1]);

+

+    /* storing the start and end index */

+    d->arch.dirty.second_lvl_start = xen_second_linear_base + gp2m_start_index;

+    d->arch.dirty.second_lvl_end = xen_second_linear_base + gp2m_end_index;

+

+    flush_vlpt(d);

+    return 0;

+}

+

+void cleanup_vlpt(struct domain *d)

+{

+    /* First level p2m is 2 consecutive pages */

+    unmap_domain_page_global(d->arch.dirty.second_lvl[0]);

+    unmap_domain_page_global(d->arch.dirty.second_lvl[1]);

+}

/*

  * Local variables:

  * mode: C

diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h

index cf12a89..0a4e115 100644

--- a/xen/include/asm-arm/arm32/page.h

+++ b/xen/include/asm-arm/arm32/page.h

@@ -5,20 +5,28 @@

 

/* Write a pagetable entry.

  *

- * If the table entry is changing a text mapping, it is responsibility

- * of the caller to issue an ISB after write_pte.

+ * All necessary barriers are responsibility of the caller

  */

-static inline void write_pte(lpae_t *p, lpae_t pte)

+static inline void __write_pte(lpae_t *p, lpae_t pte)

{

     asm volatile (

-        /* Ensure any writes have completed with the old mappings. */

-        "dsb;"

         /* Safely write the entry (STRD is atomic on CPUs that support LPAE) */

         "strd %0, %H0, [%1];"

-        "dsb;"

         : : "r" (pte.bits), "r" (p) : "memory");

}

 

+/* Write a pagetable entry with dsb barriers.

+ *

+ * If the table entry is changing a text mapping, it is responsibility

+ * of the caller to issue an ISB after write_pte.

+ */

+static inline void write_pte(lpae_t *p, lpae_t pte)

+{

+    dsb();

+    __write_pte(p, pte);

+    dsb();

+}

+

/* Inline ASM to flush dcache on register R (may be an inline asm operand) */

#define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC)

 

@@ -57,18 +65,28 @@ static inline void flush_xen_data_tlb(void)

}

 

/*

- * Flush a range of VA's hypervisor mappings from the data TLB. This is not

- * sufficient when changing code mappings or for self modifying code.

+ * Flush a range of VA's hypervisor mappings from the data TLB.

+ * All necessary barriers are responsibility of the caller

  */

-static inline void flush_xen_data_tlb_range_va(unsigned long va, unsigned long size)

+static inline void __flush_xen_data_tlb_range_va(unsigned long va, unsigned long size)

{

     unsigned long end = va + size;

-    dsb(); /* Ensure preceding are visible */

     while ( va < end ) {

         asm volatile(STORE_CP32(0, TLBIMVAH)

                      : : "r" (va) : "memory");

         va += PAGE_SIZE;

     }

+}

+

+/*

+ * Flush a range of VA's hypervisor mappings from the data TLB with barriers.

+ * This barrier is not sufficient when changing code mappings or for self

+ * modifying code.

+ */

+static inline void flush_xen_data_tlb_range_va(unsigned long va, unsigned long size)

+{

+    dsb(); /* Ensure preceding are visible */

+    __flush_xen_data_tlb_range_va(va, size);

     dsb(); /* Ensure completion of the TLB flush */

     isb();

}

diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h

index 5b7b1a8..15ad56d 100644

--- a/xen/include/asm-arm/config.h

+++ b/xen/include/asm-arm/config.h

@@ -87,6 +87,7 @@

  *   0  -   8M   <COMMON>

  *

  *  32M - 128M   Frametable: 24 bytes per page for 16GB of RAM

+ * 128M - 256M   Virtual-linear mapping to P2M table

  * 256M -   1G   VMAP: ioremap and early_ioremap use this virtual address

  *                    space

  *

@@ -124,7 +125,9 @@

#define CONFIG_SEPARATE_XENHEAP 1

 

#define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)

+#define VIRT_LIN_P2M_START     _AT(vaddr_t,0x08000000)

#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)

+#define VIRT_LIN_P2M_END       VMAP_VIRT_START

#define XENHEAP_VIRT_START     _AT(vaddr_t,0x40000000)

#define XENHEAP_VIRT_END       _AT(vaddr_t,0x7fffffff)

#define DOMHEAP_VIRT_START     _AT(vaddr_t,0x80000000)

@@ -157,6 +160,8 @@

 

#define HYPERVISOR_VIRT_END    DIRECTMAP_VIRT_END

 

+/*TODO (ARM_64): define VIRT_LIN_P2M_START VIRT_LIN_P2M_END */

+

#endif

 

/* Fixmap slots */

diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h

index 67bfbbc..4f366f1 100644

--- a/xen/include/asm-arm/domain.h

+++ b/xen/include/asm-arm/domain.h

@@ -112,6 +112,13 @@ struct arch_domain

         spinlock_t                  lock;

     } vuart;

 

+    /* dirty-page tracing */

+    struct {

+        volatile int second_lvl_start;   /* for context switch */

+        volatile int second_lvl_end;

+        lpae_t *second_lvl[2];           /* copy of guest p2m's first */

+    } dirty;

+

}  __cacheline_aligned;

 

struct arch_vcpu

diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h

index c5cb3af..a74e135 100644

--- a/xen/include/asm-arm/mm.h

+++ b/xen/include/asm-arm/mm.h

@@ -342,6 +342,21 @@ static inline void put_page_and_type(struct page_info *page)

}

 

void get_gma_start_end(struct domain *d, paddr_t *start, paddr_t *end);

+int prepare_vlpt(struct domain *d);

+void cleanup_vlpt(struct domain *d);

+void restore_vlpt(struct domain *d);

+

+/* calculate the xen's virtual address for accessing the leaf PTE of

+ * a given address (GPA) */

+static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr)

+{

+    lpae_t *table = (lpae_t *)VIRT_LIN_P2M_START;

+

+    /* Since we slotted the guest's first p2m page table to xen's

+     * second page table, one shift is enough for calculating the

+     * index of guest p2m table entry */

+    return &table[addr >> PAGE_SHIFT];

+}

 

#endif /*  __ARCH_ARM_MM__ */

/*

--

1.8.1.2

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.