[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [IA64] live migration
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 86e5d8458c08d057bacd7c578bfa84a219b3d461 # Parent e585c2dade143d171fb589e5a7a33b6c1fa137a9 [IA64] live migration Shadow mode and live migration. Virtualize Dirty bit. Signed-off-by: Tristan Gingold <tristan.gingold@xxxxxxxx> --- tools/libxc/ia64/xc_ia64_linux_restore.c | 2 tools/libxc/ia64/xc_ia64_linux_save.c | 314 ++++++++++++++++++++++----- xen/arch/ia64/asm-offsets.c | 5 xen/arch/ia64/xen/dom0_ops.c | 14 + xen/arch/ia64/xen/domain.c | 163 ++++++++++++-- xen/arch/ia64/xen/faults.c | 91 +++++++ xen/arch/ia64/xen/ivt.S | 43 +++ xen/arch/ia64/xen/mm.c | 20 + xen/arch/ia64/xen/privop.c | 3 xen/arch/ia64/xen/vhpt.c | 2 xen/include/asm-ia64/domain.h | 13 + xen/include/asm-ia64/linux-xen/asm/pgtable.h | 5 xen/include/asm-ia64/shadow.h | 18 + xen/include/asm-ia64/tlbflush.h | 4 14 files changed, 623 insertions(+), 74 deletions(-) diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Jul 26 09:02:43 2006 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Jul 26 09:36:36 2006 -0600 @@ -163,7 +163,7 @@ xc_linux_restore(int xc_handle, int io_f pfn = page_array[mfn]; - DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn); + //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn); if (read_page(xc_handle, io_fd, dom, page_array[mfn]) < 0) goto out; diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Wed Jul 26 09:02:43 2006 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Wed Jul 26 09:36:36 2006 -0600 @@ -15,8 +15,72 @@ #include "xg_private.h" +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_linux_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + /* total number of pages used by the current guest */ static unsigned long max_pfn; + +static int xc_ia64_shadow_control(int xc_handle, + uint32_t domid, + unsigned int sop, + unsigned long *dirty_bitmap, + unsigned long pages, + xc_shadow_control_stats_t *stats) +{ + if (dirty_bitmap != NULL && pages > 0) { + int i; + unsigned char *bmap = (unsigned char *)dirty_bitmap; + unsigned long bmap_bytes = + ((pages + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1)) / 8; + unsigned int bmap_pages = (bmap_bytes + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Touch the page so that it is in the TC. + FIXME: use a more reliable method. */ + for (i = 0 ; i < bmap_pages ; i++) + bmap[i * PAGE_SIZE] = 0; + /* Because bmap is not page aligned (allocated by malloc), be sure the + last page is touched. */ + bmap[bmap_bytes - 1] = 0; + } + + return xc_shadow_control(xc_handle, domid, sop, + dirty_bitmap, pages, stats); +} static inline ssize_t write_exact(int fd, void *buf, size_t count) @@ -77,10 +141,10 @@ xc_linux_save(int xc_handle, int io_fd, xc_dominfo_t info; int rc = 1; - unsigned long N; //int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); + int live = (flags & XCFLAGS_LIVE); /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; @@ -93,10 +157,38 @@ xc_linux_save(int xc_handle, int io_fd, /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; + /* Iteration number. */ + int iter; + + /* Number of pages sent in the last iteration (live only). */ + unsigned int sent_last_iter; + + /* Number of pages sent (live only). */ + unsigned int total_sent; + + /* Size of the shadow bitmap (live only). */ + unsigned int bitmap_size = 0; + + /* True if last iteration. */ + int last_iter; + + /* Bitmap of pages to be sent. */ + unsigned long *to_send = NULL; + /* Bitmap of pages not to be sent (because dirtied). */ + unsigned long *to_skip = NULL; + char *mem; if (debug) fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom); + + /* If no explicit control parameters given, use defaults */ + if (!max_iters) + max_iters = DEF_MAX_ITERS; + if (!max_factor) + max_factor = DEF_MAX_FACTOR; + + //initialize_mbit_rate(); if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { ERR("Could not get domain info"); @@ -124,24 +216,9 @@ xc_linux_save(int xc_handle, int io_fd, max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); - - /* This is a non-live suspend. Issue the call back to get the - domain suspended */ - - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { - ERR("Domain appears not to have suspended"); - goto out; - } - page_array = malloc(max_pfn * sizeof(unsigned long)); if (page_array == NULL) { ERR("Could not allocate memory"); - goto out; - } - - if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, - 0, max_pfn) != max_pfn) { - ERR("Could not get the page frame list"); goto out; } @@ -156,10 +233,13 @@ xc_linux_save(int xc_handle, int io_fd, if the format change. The version is hard-coded, don't forget to change the restore code too! */ - N = 1; - if (!write_exact(io_fd, &N, sizeof(unsigned long))) { - ERR("write: version"); - goto out; + { + unsigned long version = 1; + + if (!write_exact(io_fd, &version, sizeof(unsigned long))) { + ERR("write: version"); + goto out; + } } op.cmd = DOM0_DOMAIN_SETUP; @@ -175,39 +255,165 @@ xc_linux_save(int xc_handle, int io_fd, goto out; } - /* Start writing out the saved-domain record. */ - for (N = 0; N < max_pfn; N++) { - if (page_array[N] == INVALID_MFN) - continue; - if (debug) - fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n", - page_array[N], N, max_pfn); - - if (!write_exact(io_fd, &N, sizeof(N))) { - ERR("write: max_pfn"); - goto out; - } - - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, page_array[N]); - if (mem == NULL) { - ERR("cannot map page"); - goto out; - } - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing to state file (5)"); - goto out; - } - munmap(mem, PAGE_SIZE); + /* Domain is still running at this point */ + if (live) { + + if (xc_ia64_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL ) < 0) { + ERR("Couldn't enable shadow mode"); + goto out; + } + + last_iter = 0; + + bitmap_size = ((max_pfn + BITS_PER_LONG-1) & ~(BITS_PER_LONG-1)) / 8; + to_send = malloc(bitmap_size); + to_skip = malloc(bitmap_size); + + if (!to_send || !to_skip) { + ERR("Couldn't allocate bitmap array"); + goto out; + } + + /* Initially all the pages must be sent. */ + memset(to_send, 0xff, bitmap_size); + + if (mlock(to_send, bitmap_size)) { + ERR("Unable to mlock to_send"); + goto out; + } + if (mlock(to_skip, bitmap_size)) { + ERR("Unable to mlock to_skip"); + goto out; + } + + } else { + + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERR("Domain appears not to have suspended"); + goto out; + } + + } + + sent_last_iter = max_pfn; + total_sent = 0; + + for (iter = 1; ; iter++) { + unsigned int sent_this_iter, skip_this_iter; + unsigned long N; + + sent_this_iter = 0; + skip_this_iter = 0; + + /* Get the pfn list, as it may change. */ + if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, + 0, max_pfn) != max_pfn) { + ERR("Could not get the page frame list"); + goto out; + } + + /* Dirtied pages won't be saved. + slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + if (!last_iter) { + if (xc_ia64_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_PEEK, + to_skip, max_pfn, NULL) != max_pfn) { + ERR("Error peeking shadow bitmap"); + goto out; + } + } + + /* Start writing out the saved-domain record. */ + for (N = 0; N < max_pfn; N++) { + if (page_array[N] == INVALID_MFN) + continue; + if (!last_iter) { + if (test_bit(N, to_skip) && test_bit(N, to_send)) + skip_this_iter++; + if (test_bit(N, to_skip) || !test_bit(N, to_send)) + continue; + } + + if (debug) + fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n", + page_array[N], N, max_pfn); + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, page_array[N]); + if (mem == NULL) { + /* The page may have move. + It will be remarked dirty. + FIXME: to be tracked. */ + fprintf(stderr, "cannot map page %lx: %s\n", + page_array[N], strerror (errno)); + continue; + } + + if (!write_exact(io_fd, &N, sizeof(N))) { + ERR("write: max_pfn"); + goto out; + } + + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERR("Error when writing to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + sent_this_iter++; + total_sent++; + } + + if (last_iter) + break; + + DPRINTF(" %d: sent %d, skipped %d\n", + iter, sent_this_iter, skip_this_iter ); + + if (live) { + if ( /* ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || */ + (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) || + (total_sent > max_pfn*max_factor)) { + DPRINTF("Start last iteration\n"); + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERR("Domain appears not to have suspended"); + goto out; + } + } + + /* Pages to be sent are pages which were dirty. */ + if (xc_ia64_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_CLEAN, + to_send, max_pfn, NULL ) != max_pfn) { + ERR("Error flushing shadow PT"); + goto out; + } + + sent_last_iter = sent_this_iter; + + //print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + } + } fprintf (stderr, "All memory is saved\n"); /* terminate */ - N = INVALID_MFN; - if (!write_exact(io_fd, &N, sizeof(N))) { - ERR("Error when writing to state file (6)"); - goto out; + { + unsigned long pfn = INVALID_MFN; + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERR("Error when writing to state file (6)"); + goto out; + } } /* Send through a list of all the PFNs that were not in map at the close */ @@ -274,8 +480,16 @@ xc_linux_save(int xc_handle, int io_fd, out: - free (page_array); - + if (live) { + if (xc_ia64_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, + NULL, 0, NULL ) < 0) { + DPRINTF("Warning - couldn't disable shadow mode"); + } + } + + free(page_array); + free(to_send); + free(to_skip); if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/asm-offsets.c Wed Jul 26 09:36:36 2006 -0600 @@ -65,6 +65,11 @@ void foo(void) DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb)); BLANK(); + + DEFINE(IA64_DOMAIN_SHADOW_BITMAP_OFFSET, offsetof (struct domain, arch.shadow_bitmap)); + + BLANK(); + DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next)); DEFINE(IA64_CPUINFO_KSOFTIRQD_OFFSET, offsetof (struct cpuinfo_ia64, ksoftirqd)); diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/dom0_ops.c Wed Jul 26 09:36:36 2006 -0600 @@ -265,6 +265,20 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ } break; + case DOM0_SHADOW_CONTROL: + { + struct domain *d; + ret = -ESRCH; + d = find_domain_by_id(op->u.shadow_control.domain); + if ( d != NULL ) + { + ret = shadow_mode_control(d, &op->u.shadow_control); + put_domain(d); + copy_to_guest(u_dom0_op, op, 1); + } + } + break; + default: printf("arch_do_dom0_op: unrecognized dom0 op: %d!!!\n",op->cmd); ret = -ENOSYS; diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/domain.c Wed Jul 26 09:36:36 2006 -0600 @@ -25,26 +25,15 @@ #include <xen/mm.h> #include <xen/iocap.h> #include <asm/asm-xsi-offsets.h> -#include <asm/ptrace.h> #include <asm/system.h> #include <asm/io.h> #include <asm/processor.h> -#include <asm/desc.h> -#include <asm/hw_irq.h> -#include <asm/setup.h> -//#include <asm/mpspec.h> -#include <xen/irq.h> #include <xen/event.h> -//#include <xen/shadow.h> #include <xen/console.h> #include <xen/compile.h> - #include <xen/elf.h> -//#include <asm/page.h> #include <asm/pgalloc.h> - #include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */ - #include <asm/vcpu.h> /* for function declarations */ #include <public/arch-ia64.h> #include <xen/domain.h> @@ -52,13 +41,12 @@ #include <asm/vmx_vcpu.h> #include <asm/vmx_vpd.h> #include <asm/vmx_phy_mode.h> -#include <asm/pal.h> #include <asm/vhpt.h> -#include <public/hvm/ioreq.h> #include <public/arch-ia64.h> #include <asm/tlbflush.h> #include <asm/regionreg.h> #include <asm/dom_fw.h> +#include <asm/shadow.h> #include <asm/privop_stat.h> #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -388,8 +376,11 @@ void arch_domain_destroy(struct domain * BUG_ON(d->arch.mm.pgd != NULL); if (d->shared_info != NULL) free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT)); - - domain_flush_destroy (d); + if (d->arch.shadow_bitmap != NULL) + xfree(d->arch.shadow_bitmap); + + /* Clear vTLB for the next domain. */ + domain_flush_tlb_vhpt(d); deallocate_rid_range(d); } @@ -594,6 +585,148 @@ domain_set_shared_info_va (unsigned long return 0; } +/* Transfer and clear the shadow bitmap in 1kB chunks for L1 cache. */ +#define SHADOW_COPY_CHUNK (1024 / sizeof (unsigned long)) + +int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) +{ + unsigned int op = sc->op; + int rc = 0; + int i; + //struct vcpu *v; + + if (unlikely(d == current->domain)) { + DPRINTK("Don't try to do a shadow op on yourself!\n"); + return -EINVAL; + } + + domain_pause(d); + + switch (op) + { + case DOM0_SHADOW_CONTROL_OP_OFF: + if (shadow_mode_enabled (d)) { + u64 *bm = d->arch.shadow_bitmap; + + /* Flush vhpt and tlb to restore dirty bit usage. */ + domain_flush_tlb_vhpt(d); + + /* Free bitmap. */ + d->arch.shadow_bitmap_size = 0; + d->arch.shadow_bitmap = NULL; + xfree(bm); + } + break; + + case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST: + case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE: + rc = -EINVAL; + break; + + case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY: + if (shadow_mode_enabled(d)) { + rc = -EINVAL; + break; + } + + atomic64_set(&d->arch.shadow_fault_count, 0); + atomic64_set(&d->arch.shadow_dirty_count, 0); + + d->arch.shadow_bitmap_size = (d->max_pages + BITS_PER_LONG-1) & + ~(BITS_PER_LONG-1); + d->arch.shadow_bitmap = xmalloc_array(unsigned long, + d->arch.shadow_bitmap_size / BITS_PER_LONG); + if (d->arch.shadow_bitmap == NULL) { + d->arch.shadow_bitmap_size = 0; + rc = -ENOMEM; + } + else { + memset(d->arch.shadow_bitmap, 0, + d->arch.shadow_bitmap_size / 8); + + /* Flush vhtp and tlb to enable dirty bit + virtualization. */ + domain_flush_tlb_vhpt(d); + } + break; + + case DOM0_SHADOW_CONTROL_OP_FLUSH: + atomic64_set(&d->arch.shadow_fault_count, 0); + atomic64_set(&d->arch.shadow_dirty_count, 0); + break; + + case DOM0_SHADOW_CONTROL_OP_CLEAN: + { + int nbr_longs; + + sc->stats.fault_count = atomic64_read(&d->arch.shadow_fault_count); + sc->stats.dirty_count = atomic64_read(&d->arch.shadow_dirty_count); + + atomic64_set(&d->arch.shadow_fault_count, 0); + atomic64_set(&d->arch.shadow_dirty_count, 0); + + if (guest_handle_is_null(sc->dirty_bitmap) || + (d->arch.shadow_bitmap == NULL)) { + rc = -EINVAL; + break; + } + + if (sc->pages > d->arch.shadow_bitmap_size) + sc->pages = d->arch.shadow_bitmap_size; + + nbr_longs = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG; + + for (i = 0; i < nbr_longs; i += SHADOW_COPY_CHUNK) { + int size = (nbr_longs - i) > SHADOW_COPY_CHUNK ? + SHADOW_COPY_CHUNK : nbr_longs - i; + + if (copy_to_guest_offset(sc->dirty_bitmap, i, + d->arch.shadow_bitmap + i, + size)) { + rc = -EFAULT; + break; + } + + memset(d->arch.shadow_bitmap + i, + 0, size * sizeof(unsigned long)); + } + + break; + } + + case DOM0_SHADOW_CONTROL_OP_PEEK: + { + unsigned long size; + + sc->stats.fault_count = atomic64_read(&d->arch.shadow_fault_count); + sc->stats.dirty_count = atomic64_read(&d->arch.shadow_dirty_count); + + if (guest_handle_is_null(sc->dirty_bitmap) || + (d->arch.shadow_bitmap == NULL)) { + rc = -EINVAL; + break; + } + + if (sc->pages > d->arch.shadow_bitmap_size) + sc->pages = d->arch.shadow_bitmap_size; + + size = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG; + if (copy_to_guest(sc->dirty_bitmap, + d->arch.shadow_bitmap, size)) { + rc = -EFAULT; + break; + } + break; + } + default: + rc = -EINVAL; + break; + } + + domain_unpause(d); + + return rc; +} // remove following line if not privifying in memory //#define HAVE_PRIVIFY_MEMORY diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/faults.c Wed Jul 26 09:36:36 2006 -0600 @@ -1,4 +1,3 @@ - /* * Miscellaneous process/domain related routines * @@ -29,6 +28,7 @@ #include <asm/bundle.h> #include <asm/privop_stat.h> #include <asm/asm-xsi-offsets.h> +#include <asm/shadow.h> extern void die_if_kernel(char *str, struct pt_regs *regs, long err); /* FIXME: where these declarations shold be there ? */ @@ -648,3 +648,92 @@ ia64_handle_reflection (unsigned long if reflect_interruption(isr,regs,vector); } +void +ia64_shadow_fault(unsigned long ifa, unsigned long itir, + unsigned long isr, struct pt_regs *regs) +{ + struct vcpu *v = current; + struct domain *d = current->domain; + unsigned long gpfn; + unsigned long pte = 0; + struct vhpt_lf_entry *vlfe; + + /* There are 2 jobs to do: + - marking the page as dirty (the metaphysical address must be + extracted to do that). + - reflecting or not the fault (the virtual Dirty bit must be + extracted to decide). + Unfortunatly these informations are not immediatly available! + */ + + /* Extract the metaphysical address. + Try to get it from VHPT and M2P as we need the flags. */ + vlfe = (struct vhpt_lf_entry *)ia64_thash(ifa); + pte = vlfe->page_flags; + if (vlfe->ti_tag == ia64_ttag(ifa)) { + /* The VHPT entry is valid. */ + gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + BUG_ON(gpfn == INVALID_M2P_ENTRY); + } + else { + unsigned long itir, iha; + IA64FAULT fault; + + /* The VHPT entry is not valid. */ + vlfe = NULL; + + /* FIXME: gives a chance to tpa, as the TC was valid. */ + + fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha); + + /* Try again! */ + if (fault != IA64_NO_FAULT) { + /* This will trigger a dtlb miss. */ + ia64_ptcl(ifa, PAGE_SHIFT << 2); + return; + } + gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + if (pte & _PAGE_D) + pte |= _PAGE_VIRT_D; + } + + /* Set the dirty bit in the bitmap. */ + shadow_mark_page_dirty (d, gpfn); + + /* Update the local TC/VHPT and decides wether or not the fault should + be reflected. + SMP note: we almost ignore the other processors. The shadow_bitmap + has been atomically updated. If the dirty fault happen on another + processor, it will do its job. + */ + + if (pte != 0) { + /* We will know how to handle the fault. */ + + if (pte & _PAGE_VIRT_D) { + /* Rewrite VHPT entry. + There is no race here because only the + cpu VHPT owner can write page_flags. */ + if (vlfe) + vlfe->page_flags = pte | _PAGE_D; + + /* Purge the TC locally. + It will be reloaded from the VHPT iff the + VHPT entry is still valid. */ + ia64_ptcl(ifa, PAGE_SHIFT << 2); + + atomic64_inc(&d->arch.shadow_fault_count); + } + else { + /* Reflect. + In this case there is no need to purge. */ + ia64_handle_reflection(ifa, regs, isr, 0, 8); + } + } + else { + /* We don't know wether or not the fault must be + reflected. The VHPT entry is not valid. */ + /* FIXME: in metaphysical mode, we could do an ITC now. */ + ia64_ptcl(ifa, PAGE_SHIFT << 2); + } +} diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/ivt.S Wed Jul 26 09:36:36 2006 -0600 @@ -746,7 +746,48 @@ ENTRY(dirty_bit) ENTRY(dirty_bit) DBG_FAULT(8) #ifdef XEN - FAULT_OR_REFLECT(8) + mov r20=cr.ipsr + mov r31=pr;; + extr.u r20=r20,IA64_PSR_CPL0_BIT,2;; + mov r19=8 /* prepare to save predicates */ + cmp.eq p6,p0=r0,r20 /* cpl == 0?*/ +(p6) br.sptk.few dispatch_to_fault_handler + /* If shadow mode is not enabled, reflect the fault. */ + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET + ;; + ld8 r22=[r22] + ;; + add r22=IA64_VCPU_DOMAIN_OFFSET,r22 + ;; + /* Read domain. */ + ld8 r22=[r22] + ;; + add r22=IA64_DOMAIN_SHADOW_BITMAP_OFFSET,r22 + ;; + ld8 r22=[r22] + ;; + cmp.eq p6,p0=r0,r22 /* !shadow_bitmap ?*/ +(p6) br.dptk.many dispatch_reflection + + SAVE_MIN_WITH_COVER + alloc r14=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.itir + mov out2=cr.isr + adds out3=16,sp + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_shadow_fault #else /* * What we do here is to simply turn on the dirty bit in the PTE. We need to diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/mm.c Wed Jul 26 09:36:36 2006 -0600 @@ -170,6 +170,7 @@ #include <asm/pgalloc.h> #include <asm/vhpt.h> #include <asm/vcpu.h> +#include <asm/shadow.h> #include <linux/efi.h> #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -470,7 +471,7 @@ u64 translate_domain_pte(u64 pteval, u64 pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits pteval2 |= (pteval & _PAGE_ED); pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) - pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; + pteval2 |= (pteval & ~_PAGE_PPN_MASK); /* * Don't let non-dom0 domains map uncached addresses. This can * happen when domU tries to touch i/o port space. Also prevents @@ -481,6 +482,18 @@ u64 translate_domain_pte(u64 pteval, u64 */ if (d != dom0 && (pteval2 & _PAGE_MA_MASK) != _PAGE_MA_NAT) pteval2 &= ~_PAGE_MA_MASK; + + /* If shadow mode is enabled, virtualize dirty bit. */ + if (shadow_mode_enabled(d) && (pteval2 & _PAGE_D)) { + u64 mp_page = mpaddr >> PAGE_SHIFT; + pteval2 |= _PAGE_VIRT_D; + + /* If the page is not already dirty, don't set the dirty bit. + This is a small optimization! */ + if (mp_page < d->arch.shadow_bitmap_size * 8 + && !test_bit(mp_page, d->arch.shadow_bitmap)) + pteval2 = (pteval2 & ~_PAGE_D); + } return pteval2; } @@ -1418,10 +1431,13 @@ guest_physmap_remove_page(struct domain //XXX sledgehammer. // flush finer range. -void +static void domain_page_flush(struct domain* d, unsigned long mpaddr, unsigned long old_mfn, unsigned long new_mfn) { + if (shadow_mode_enabled(d)) + shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT); + domain_flush_vtlb_all(); } diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/privop.c Wed Jul 26 09:36:36 2006 -0600 @@ -686,7 +686,8 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN (void)vcpu_increment_iip(vcpu); } if (fault == IA64_ILLOP_FAULT) - printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr); + printf("priv_emulate: priv_handle_op fails, " + "isr=0x%lx iip=%lx\n",isr, regs->cr_iip); return fault; } diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/arch/ia64/xen/vhpt.c Wed Jul 26 09:36:36 2006 -0600 @@ -236,7 +236,7 @@ static void flush_tlb_vhpt_all (struct d local_flush_tlb_all (); } -void domain_flush_destroy (struct domain *d) +void domain_flush_tlb_vhpt(struct domain *d) { /* Very heavy... */ on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/include/asm-ia64/domain.h Wed Jul 26 09:36:36 2006 -0600 @@ -48,6 +48,9 @@ extern unsigned long domain_set_shared_i If sync_only is true, only synchronize I&D caches, if false, flush and invalidate caches. */ extern void domain_cache_flush (struct domain *d, int sync_only); + +/* Control the shadow mode. */ +extern int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc); /* Cleanly crash the current domain with a message. */ extern void panic_domain(struct pt_regs *, const char *, ...) @@ -117,6 +120,16 @@ struct arch_domain { /* Address of fpswa_interface_t (placed in domain memory) */ void *fpswa_inf; + /* Bitmap of shadow dirty bits. + Set iff shadow mode is enabled. */ + u64 *shadow_bitmap; + /* Length (in bits!) of shadow bitmap. */ + unsigned long shadow_bitmap_size; + /* Number of bits set in bitmap. */ + atomic64_t shadow_dirty_count; + /* Number of faults. */ + atomic64_t shadow_fault_count; + struct last_vcpu last_vcpu[NR_CPUS]; }; #define INT_ENABLE_OFFSET(v) \ diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Jul 26 09:36:36 2006 -0600 @@ -62,7 +62,12 @@ #define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ #define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) #define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ +#ifdef XEN +#define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */ +#define _PAGE_PROTNONE 0 +#else #define _PAGE_PROTNONE (__IA64_UL(1) << 63) +#endif /* Valid only for a PTE with the present bit cleared: */ #define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */ diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/shadow.h --- a/xen/include/asm-ia64/shadow.h Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/include/asm-ia64/shadow.h Wed Jul 26 09:36:36 2006 -0600 @@ -45,6 +45,24 @@ void guest_physmap_remove_page(struct do void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn); #endif +static inline int +shadow_mode_enabled(struct domain *d) +{ + return d->arch.shadow_bitmap != NULL; +} + +static inline int +shadow_mark_page_dirty(struct domain *d, unsigned long gpfn) +{ + if (gpfn < d->arch.shadow_bitmap_size * 8 + && !test_and_set_bit(gpfn, d->arch.shadow_bitmap)) { + /* The page was not dirty. */ + atomic64_inc(&d->arch.shadow_dirty_count); + return 1; + } else + return 0; +} + #endif // _XEN_SHADOW_H /* diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/tlbflush.h --- a/xen/include/asm-ia64/tlbflush.h Wed Jul 26 09:02:43 2006 -0600 +++ b/xen/include/asm-ia64/tlbflush.h Wed Jul 26 09:36:36 2006 -0600 @@ -22,8 +22,8 @@ void domain_flush_vtlb_all (void); /* Global range-flush of vTLB. */ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); -/* Final vTLB flush on every dirty cpus. */ -void domain_flush_destroy (struct domain *d); +/* Flush vhpt and mTLB on every dirty cpus. */ +void domain_flush_tlb_vhpt(struct domain *d); /* Flush v-tlb on cpus set in mask for current domain. */ void flush_tlb_mask(cpumask_t mask); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |