[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory
This patch implements restore callbacks for colo: 1. flush_memory(): We update the memory as the following: a. pin non-dirty L1 pagetables b. unpin pagetables execpt non-dirty L1 c. update the memory d. pin page tables e. unpin non-dirty L1 pagetables Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx> Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/libxc/xc_domain_restore_colo.c | 372 ++++++++++++++++++++++++++++++++++ tools/libxc/xc_save_restore_colo.h | 1 + 2 files changed, 373 insertions(+), 0 deletions(-) diff --git a/tools/libxc/xc_domain_restore_colo.c b/tools/libxc/xc_domain_restore_colo.c index 77b63b6..50009fa 100644 --- a/tools/libxc/xc_domain_restore_colo.c +++ b/tools/libxc/xc_domain_restore_colo.c @@ -152,3 +152,375 @@ char* colo_get_page(struct restore_data *comm_data, void *data, set_bit(pfn, colo_data->dirty_pages); return colo_data->pagebase + pfn * PAGE_SIZE; } + +/* Step1: + * + * pin non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1) + * mL1: L1 pages on master side + * sL1: L1 pages on slaver side + */ +static int pin_l1(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + unsigned int nr_pins = 0; + unsigned long i; + struct mmuext_op pin[MAX_PIN_BATCH]; + struct domain_info_context *dinfo = comm_data->dinfo; + unsigned long *pfn_type = comm_data->pfn_type; + uint32_t dom = comm_data->dom; + xc_interface *xch = comm_data->xch; + unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver; + unsigned long *dirty_pages = colo_data->dirty_pages; + + for (i = 0; i < dinfo->p2m_size; i++) + { + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + if (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) + /* don't pin already pined */ + continue; + + if (test_bit(i, dirty_pages)) + /* don't pin dirty */ + continue; + + /* here, it must also be L1 in slaver, otherwise it is dirty. + * (add test code ?) + */ + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L4TAB: + default: + continue; + } + + pin[nr_pins].arg1.mfn = comm_data->p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) + { + if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) + { + PERROR("Failed to pin L1 batch of %d page tables", nr_pins); + return 1; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)) + { + PERROR("Failed to pin L1 batch of %d page tables", nr_pins); + return 1; + } + + return 0; +} + +/* Step2: + * + * unpin pagetables execpt non-dirty L1: sL2 + sL3 + sL4 + (dirty_pages & sL1) + * sL1: L1 pages on slaver side + * sL2: L2 pages on slaver side + * sL3: L3 pages on slaver side + * sL4: L4 pages on slaver side + */ +static int unpin_pagetable(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + unsigned int nr_pins = 0; + unsigned long i; + struct mmuext_op pin[MAX_PIN_BATCH]; + struct domain_info_context *dinfo = comm_data->dinfo; + uint32_t dom = comm_data->dom; + xc_interface *xch = comm_data->xch; + unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver; + unsigned long *dirty_pages = colo_data->dirty_pages; + + for (i = 0; i < dinfo->p2m_size; i++) + { + if ( (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; + + switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + if (!test_bit(i, dirty_pages)) + /* it is in (~dirty_pages & mL1), keep it */ + continue; + /* fallthrough */ + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE; + break; + + default: + continue; + } + + pin[nr_pins].arg1.mfn = comm_data->p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) + { + if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) + { + PERROR("Failed to unpin batch of %d page tables", nr_pins); + return 1; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)) + { + PERROR("Failed to unpin batch of %d page tables", nr_pins); + return 1; + } + + return 0; +} + +/* we have unpined all pagetables except non-diry l1. So it is OK to map the + * dirty memory and update it. + */ +static int update_memory(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + unsigned long pfn; + unsigned long max_mem_pfn = colo_data->max_mem_pfn; + unsigned long *pfn_type = comm_data->pfn_type; + unsigned long pagetype; + uint32_t dom = comm_data->dom; + xc_interface *xch = comm_data->xch; + struct xc_mmu *mmu = comm_data->mmu; + unsigned long *dirty_pages = colo_data->dirty_pages; + char *pagebase = colo_data->pagebase; + int pfn_err = 0; + char *region_base_slaver; + xen_pfn_t region_mfn_slaver; + unsigned long mfn; + char *pagebuff; + + for (pfn = 0; pfn < max_mem_pfn; pfn++) { + if (!test_bit(pfn, dirty_pages)) + continue; + + pagetype = pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if (pagetype == XEN_DOMCTL_PFINFO_XTAB) + /* a bogus/unmapped page: skip it */ + continue; + + mfn = comm_data->p2m[pfn]; + region_mfn_slaver = mfn; + region_base_slaver = xc_map_foreign_bulk(xch, dom, + PROT_WRITE, + ®ion_mfn_slaver, + &pfn_err, 1); + if (!region_base_slaver || pfn_err) { + PERROR("update_memory: xc_map_foreign_bulk failed"); + return 1; + } + + pagebuff = (char *)(pagebase + pfn * PAGE_SIZE); + memcpy(region_base_slaver, pagebuff, PAGE_SIZE); + munmap(region_base_slaver, PAGE_SIZE); + + if (xc_add_mmu_update(xch, mmu, (((uint64_t)mfn) << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE, pfn) ) + { + PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); + return 1; + } + } + + /* + * Ensure we flush all machphys updates before potential PAE-specific + * reallocations below. + */ + if (xc_flush_mmu_updates(xch, mmu)) + { + PERROR("Error doing flush_mmu_updates()"); + return 1; + } + + return 0; +} + +/* Step 4: pin master pt + * Pin page tables. Do this after writing to them as otherwise Xen + * will barf when doing the type-checking. + */ +static int pin_pagetable(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + unsigned int nr_pins = 0; + unsigned long i; + struct mmuext_op pin[MAX_PIN_BATCH]; + struct domain_info_context *dinfo = comm_data->dinfo; + unsigned long *pfn_type = comm_data->pfn_type; + uint32_t dom = comm_data->dom; + xc_interface *xch = comm_data->xch; + unsigned long *dirty_pages = colo_data->dirty_pages; + + for ( i = 0; i < dinfo->p2m_size; i++ ) + { + if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; + + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + if (!test_bit(i, dirty_pages)) + /* it is in (~dirty_pages & mL1)(=~dirty_pages & sL1), + * already pined + */ + continue; + + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L3TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; + + default: + continue; + } + + pin[nr_pins].arg1.mfn = comm_data->p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) + { + if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + return 1; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + return 1; + } + + return 0; +} + +/* Step5: + * unpin unneeded non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1) + */ +static int unpin_l1(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + unsigned int nr_pins = 0; + unsigned long i; + struct mmuext_op pin[MAX_PIN_BATCH]; + struct domain_info_context *dinfo = comm_data->dinfo; + unsigned long *pfn_type = comm_data->pfn_type; + uint32_t dom = comm_data->dom; + xc_interface *xch = comm_data->xch; + unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver; + unsigned long *dirty_pages = colo_data->dirty_pages; + + for (i = 0; i < dinfo->p2m_size; i++) + { + switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + if (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) // still needed + continue; + if (test_bit(i, dirty_pages)) // not pined by step 1 + continue; + + pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L4TAB: + default: + continue; + } + + pin[nr_pins].arg1.mfn = comm_data->p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) + { + if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) + { + PERROR("Failed to pin L1 batch of %d page tables", nr_pins); + return 1; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)) + { + PERROR("Failed to pin L1 batch of %d page tables", nr_pins); + return 1; + } + + return 0; +} + +int colo_flush_memory(struct restore_data *comm_data, void *data) +{ + struct restore_colo_data *colo_data = data; + xc_interface *xch = comm_data->xch; + uint32_t dom = comm_data->dom; + DECLARE_HYPERCALL; + + if (!colo_data->first_time) + { + /* reset cpu */ + hypercall.op = __HYPERVISOR_reset_vcpu_op; + hypercall.arg[0] = (unsigned long)dom; + do_xen_hypercall(xch, &hypercall); + } + + if (pin_l1(comm_data, colo_data) != 0) + return -1; + if (unpin_pagetable(comm_data, colo_data) != 0) + return -1; + + if (update_memory(comm_data, colo_data) != 0) + return -1; + + if (pin_pagetable(comm_data, colo_data) != 0) + return -1; + if (unpin_l1(comm_data, colo_data) != 0) + return -1; + + memcpy(colo_data->pfn_type_slaver, comm_data->pfn_type, + comm_data->dinfo->p2m_size * sizeof(xen_pfn_t)); + + return 0; +} diff --git a/tools/libxc/xc_save_restore_colo.h b/tools/libxc/xc_save_restore_colo.h index 67c567c..8af75b4 100644 --- a/tools/libxc/xc_save_restore_colo.h +++ b/tools/libxc/xc_save_restore_colo.h @@ -7,5 +7,6 @@ extern int colo_init(struct restore_data *, void **); extern void colo_free(struct restore_data *, void *); extern char *colo_get_page(struct restore_data *, void *, unsigned long); +extern int colo_flush_memory(struct restore_data *, void *); #endif -- 1.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |