[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory



This patch implements restore callbacks for colo:
1. flush_memory():
        We update the memory as the following:
        a. pin non-dirty L1 pagetables
        b. unpin pagetables execpt non-dirty L1
        c. update the memory
        d. pin page tables
        e. unpin non-dirty L1 pagetables

Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxc/xc_domain_restore_colo.c |  372 ++++++++++++++++++++++++++++++++++
 tools/libxc/xc_save_restore_colo.h   |    1 +
 2 files changed, 373 insertions(+), 0 deletions(-)

diff --git a/tools/libxc/xc_domain_restore_colo.c 
b/tools/libxc/xc_domain_restore_colo.c
index 77b63b6..50009fa 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -152,3 +152,375 @@ char* colo_get_page(struct restore_data *comm_data, void 
*data,
     set_bit(pfn, colo_data->dirty_pages);
     return colo_data->pagebase + pfn * PAGE_SIZE;
 }
+
+/* Step1:
+ *
+ * pin non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ *  mL1: L1 pages on master side
+ *  sL1: L1 pages on slaver side
+ */
+static int pin_l1(struct restore_data *comm_data,
+                  struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB)
+                /* don't pin already pined */
+                continue;
+
+            if (test_bit(i, dirty_pages))
+                /* don't pin dirty */
+                continue;
+
+            /* here, it must also be L1 in slaver, otherwise it is dirty.
+             * (add test code ?)
+             */
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step2:
+ *
+ * unpin pagetables execpt non-dirty L1: sL2 + sL3 + sL4 + (dirty_pages & sL1)
+ *  sL1: L1 pages on slaver side
+ *  sL2: L2 pages on slaver side
+ *  sL3: L3 pages on slaver side
+ *  sL4: L4 pages on slaver side
+ */
+static int unpin_pagetable(struct restore_data *comm_data,
+                           struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        if ( (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+            continue;
+
+        switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (!test_bit(i, dirty_pages))
+                /* it is in (~dirty_pages & mL1), keep it */
+                continue;
+            /* fallthrough */
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to unpin batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to unpin batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* we have unpined all pagetables except non-diry l1. So it is OK to map the
+ * dirty memory and update it.
+ */
+static int update_memory(struct restore_data *comm_data,
+                         struct restore_colo_data *colo_data)
+{
+    unsigned long pfn;
+    unsigned long max_mem_pfn = colo_data->max_mem_pfn;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    unsigned long pagetype;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    struct xc_mmu *mmu = comm_data->mmu;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+    char *pagebase = colo_data->pagebase;
+    int pfn_err = 0;
+    char *region_base_slaver;
+    xen_pfn_t region_mfn_slaver;
+    unsigned long mfn;
+    char *pagebuff;
+
+    for (pfn = 0; pfn < max_mem_pfn; pfn++) {
+        if (!test_bit(pfn, dirty_pages))
+            continue;
+
+        pagetype = pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
+            /* a bogus/unmapped page: skip it */
+            continue;
+
+        mfn = comm_data->p2m[pfn];
+        region_mfn_slaver = mfn;
+        region_base_slaver = xc_map_foreign_bulk(xch, dom,
+                                                 PROT_WRITE,
+                                                 &region_mfn_slaver,
+                                                 &pfn_err, 1);
+        if (!region_base_slaver || pfn_err) {
+            PERROR("update_memory: xc_map_foreign_bulk failed");
+            return 1;
+        }
+
+        pagebuff = (char *)(pagebase + pfn * PAGE_SIZE);
+        memcpy(region_base_slaver, pagebuff, PAGE_SIZE);
+        munmap(region_base_slaver, PAGE_SIZE);
+
+        if (xc_add_mmu_update(xch, mmu, (((uint64_t)mfn) << PAGE_SHIFT)
+                              | MMU_MACHPHYS_UPDATE, pfn) )
+        {
+            PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
+            return 1;
+        }
+    }
+
+    /*
+     * Ensure we flush all machphys updates before potential PAE-specific
+     * reallocations below.
+     */
+    if (xc_flush_mmu_updates(xch, mmu))
+    {
+        PERROR("Error doing flush_mmu_updates()");
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step 4: pin master pt
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+static int pin_pagetable(struct restore_data *comm_data,
+                         struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for ( i = 0; i < dinfo->p2m_size; i++ )
+    {
+        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+            continue;
+
+        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (!test_bit(i, dirty_pages))
+                /* it is in (~dirty_pages & mL1)(=~dirty_pages & sL1),
+                 * already pined
+                 */
+                continue;
+
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step5:
+ * unpin unneeded non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages 
& sL1)
+ */
+static int unpin_l1(struct restore_data *comm_data,
+                    struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) // still needed
+                continue;
+            if (test_bit(i, dirty_pages)) // not pined by step 1
+                continue;
+
+            pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+int colo_flush_memory(struct restore_data *comm_data, void *data)
+{
+    struct restore_colo_data *colo_data = data;
+    xc_interface *xch = comm_data->xch;
+    uint32_t dom = comm_data->dom;
+    DECLARE_HYPERCALL;
+
+    if (!colo_data->first_time)
+    {
+        /* reset cpu */
+        hypercall.op = __HYPERVISOR_reset_vcpu_op;
+        hypercall.arg[0] = (unsigned long)dom;
+        do_xen_hypercall(xch, &hypercall);
+    }
+
+    if (pin_l1(comm_data, colo_data) != 0)
+        return -1;
+    if (unpin_pagetable(comm_data, colo_data) != 0)
+        return -1;
+
+    if (update_memory(comm_data, colo_data) != 0)
+        return -1;
+
+    if (pin_pagetable(comm_data, colo_data) != 0)
+        return -1;
+    if (unpin_l1(comm_data, colo_data) != 0)
+        return -1;
+
+    memcpy(colo_data->pfn_type_slaver, comm_data->pfn_type,
+           comm_data->dinfo->p2m_size * sizeof(xen_pfn_t));
+
+    return 0;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h 
b/tools/libxc/xc_save_restore_colo.h
index 67c567c..8af75b4 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -7,5 +7,6 @@
 extern int colo_init(struct restore_data *, void **);
 extern void colo_free(struct restore_data *, void *);
 extern char *colo_get_page(struct restore_data *, void *, unsigned long);
+extern int colo_flush_memory(struct restore_data *, void *);
 
 #endif
-- 
1.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.