[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge with xen-ia64-unstable.hg
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1176302729 -3600 # Node ID db4fcb6093832c24771764bd2cb5af9a2608bca2 # Parent 3d356a2b1c75c2fea9b8eb0643075614e9e3d4fe # Parent 0d92cd901f809ce898c7c62008cf446a0b295c1c Merge with xen-ia64-unstable.hg --- tools/libxc/xc_hvm_save.c | 755 ----- tools/libxc/xc_linux_save.c | 1414 ---------- linux-2.6-xen-sparse/drivers/xen/blkfront/block.h | 14 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 4 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 28 linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c | 2 tools/blktap/drivers/block-qcow.c | 10 tools/ioemu/hw/pc.c | 12 tools/ioemu/vl.c | 2 tools/ioemu/vl.h | 5 tools/ioemu/xenstore.c | 214 - tools/libfsimage/fat/fat.h | 14 tools/libxc/Makefile | 4 tools/libxc/ia64/xc_ia64_linux_save.c | 6 tools/libxc/xc_domain_save.c | 1609 ++++++++++++ tools/libxc/xenguest.h | 19 tools/libxc/xg_private.c | 11 tools/pygrub/src/LiloConf.py | 147 + tools/pygrub/src/pygrub | 32 tools/python/xen/xend/XendCheckpoint.py | 7 tools/python/xen/xend/server/DevController.py | 1 tools/python/xen/xend/server/netif.py | 88 tools/xcutils/xc_save.c | 9 unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 150 - unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 73 unmodified_drivers/linux-2.6/platform-pci/platform-compat.c | 3 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 34 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h | 11 28 files changed, 2150 insertions(+), 2528 deletions(-) diff -r 3d356a2b1c75 -r db4fcb609383 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Apr 11 07:30:02 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Apr 11 15:45:29 2007 +0100 @@ -55,20 +55,6 @@ #include <asm/io.h> #include <asm/atomic.h> #include <asm/uaccess.h> - -#if 1 -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_blk: " fmt, ##args) -#else -#define IPRINTK(fmt, args...) ((void)0) -#endif - -#if 1 -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_blk: " fmt, ##args) -#else -#define WPRINTK(fmt, args...) ((void)0) -#endif #define DPRINTK(_f, _a...) pr_debug(_f, ## _a) diff -r 3d356a2b1c75 -r db4fcb609383 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Apr 11 07:30:02 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Apr 11 15:45:29 2007 +0100 @@ -128,14 +128,12 @@ xlbd_alloc_major_info(int major, int min break; } - printk("Registering block device major %i\n", ptr->major); if (register_blkdev(ptr->major, ptr->type->devname)) { - WPRINTK("can't get major %d with name %s\n", - ptr->major, ptr->type->devname); kfree(ptr); return NULL; } + printk("xen-vbd: registered block device major %i\n", ptr->major); major_info[index] = ptr; return ptr; } diff -r 3d356a2b1c75 -r db4fcb609383 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Wed Apr 11 07:30:02 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Wed Apr 11 15:45:29 2007 +0100 @@ -60,9 +60,6 @@ static DEFINE_SPINLOCK(gnttab_list_lock) static DEFINE_SPINLOCK(gnttab_list_lock); static struct grant_entry *shared; -#ifndef CONFIG_XEN -static unsigned long resume_frames; -#endif static struct gnttab_free_callback *gnttab_free_callback_list; @@ -514,6 +511,8 @@ int gnttab_suspend(void) #include <platform-pci.h> +static unsigned long resume_frames; + static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; @@ -543,23 +542,17 @@ int gnttab_resume(void) if (max_nr_gframes < nr_gframes) return -ENOSYS; - resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); + if (!resume_frames) { + resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); + shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); + if (shared == NULL) { + printk("error to ioremap gnttab share frames\n"); + return -1; + } + } gnttab_map(0, nr_gframes - 1); - shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); - if (shared == NULL) { - printk("error to ioremap gnttab share frames\n"); - return -1; - } - - return 0; -} - -int gnttab_suspend(void) -{ - iounmap(shared); - resume_frames = 0; return 0; } @@ -624,7 +617,6 @@ int __devinit gnttab_init(void) gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; - printk("Grant table initialized\n"); return 0; ini_nomem: diff -r 3d356a2b1c75 -r db4fcb609383 linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Wed Apr 11 07:30:02 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Wed Apr 11 15:45:29 2007 +0100 @@ -209,6 +209,8 @@ int __xen_suspend(int fast_suspend) if (fast_suspend) { xenbus_suspend(); err = stop_machine_run(take_machine_down, &fast_suspend, 0); + if (err < 0) + xenbus_suspend_cancel(); } else { err = take_machine_down(&fast_suspend); } diff -r 3d356a2b1c75 -r db4fcb609383 tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/blktap/drivers/block-qcow.c Wed Apr 11 15:45:29 2007 +0100 @@ -949,8 +949,14 @@ int tdqcow_open (struct disk_driver *dd, goto fail; } init_fds(dd); - s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : - (final_cluster + s->cluster_size)); + + if (!final_cluster) + s->fd_end = s->l1_table_offset + l1_table_size; + else { + s->fd_end = lseek64(fd, 0, SEEK_END); + if (s->fd_end == (off64_t)-1) + goto fail; + } return 0; diff -r 3d356a2b1c75 -r db4fcb609383 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/ioemu/hw/pc.c Wed Apr 11 15:45:29 2007 +0100 @@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size, if (pci_enabled && acpi_enabled) { piix4_pm_init(pci_bus, piix3_devfn + 3); } -#endif /* !CONFIG_DM */ #if 0 /* ??? Need to figure out some way for the user to @@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size, lsi_scsi_attach(scsi, bdrv, -1); } #endif +#else + if (pci_enabled) { + void *scsi; + + scsi = lsi_scsi_init(pci_bus, -1); + for (i = 0; i < MAX_SCSI_DISKS ; i++) { + if (bs_table[i + MAX_DISKS]) + lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1); + } + } +#endif /* !CONFIG_DM */ /* must be done after all PCI devices are instanciated */ /* XXX: should be done in the Bochs BIOS */ if (pci_enabled) { diff -r 3d356a2b1c75 -r db4fcb609383 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/ioemu/vl.c Wed Apr 11 15:45:29 2007 +0100 @@ -116,7 +116,7 @@ void *ioport_opaque[MAX_IOPORTS]; void *ioport_opaque[MAX_IOPORTS]; IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS]; IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS]; -BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD]; +BlockDriverState *bs_table[MAX_DISKS+MAX_SCSI_DISKS], *fd_table[MAX_FD]; int vga_ram_size; int bios_size; static DisplayState display_state; diff -r 3d356a2b1c75 -r db4fcb609383 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/ioemu/vl.h Wed Apr 11 15:45:29 2007 +0100 @@ -818,8 +818,9 @@ int vnc_start_viewer(int port); /* ide.c */ #define MAX_DISKS 4 - -extern BlockDriverState *bs_table[MAX_DISKS]; +#define MAX_SCSI_DISKS 7 + +extern BlockDriverState *bs_table[MAX_DISKS+MAX_SCSI_DISKS]; void isa_ide_init(int iobase, int iobase2, int irq, BlockDriverState *hd0, BlockDriverState *hd1); diff -r 3d356a2b1c75 -r db4fcb609383 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/ioemu/xenstore.c Wed Apr 11 15:45:29 2007 +0100 @@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c int ret = 0; if (*buf) - free(*buf); + free(*buf); va_start(ap, fmt); if (vasprintf(buf, fmt, ap) == -1) { - buf = NULL; - ret = -1; + buf = NULL; + ret = -1; } va_end(ap); return ret; @@ -45,11 +45,11 @@ static void insert_media(void *opaque) int i; for (i = 0; i < MAX_DISKS; i++) { - if (media_filename[i] && bs_table[i]) { - do_change(bs_table[i]->device_name, media_filename[i]); - free(media_filename[i]); - media_filename[i] = NULL; - } + if (media_filename[i] && bs_table[i]) { + do_change(bs_table[i]->device_name, media_filename[i]); + free(media_filename[i]); + media_filename[i] = NULL; + } } } @@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in { if (insert_timer == NULL) - insert_timer = qemu_new_timer(rt_clock, insert_media, NULL); + insert_timer = qemu_new_timer(rt_clock, insert_media, NULL); qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout); } @@ -82,8 +82,8 @@ void xenstore_parse_domain_config(int do char **e = NULL; char *buf = NULL, *path; char *fpath = NULL, *bpath = NULL, - *dev = NULL, *params = NULL, *type = NULL; - int i; + *dev = NULL, *params = NULL, *type = NULL; + int i, is_scsi; unsigned int len, num, hd_index; for(i = 0; i < MAX_DISKS; i++) @@ -91,8 +91,8 @@ void xenstore_parse_domain_config(int do xsh = xs_daemon_open(); if (xsh == NULL) { - fprintf(logfile, "Could not contact xenstore for domain config\n"); - return; + fprintf(logfile, "Could not contact xenstore for domain config\n"); + return; } path = xs_get_domain_path(xsh, domid); @@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do } if (pasprintf(&buf, "%s/device/vbd", path) == -1) - goto out; + goto out; e = xs_directory(xsh, XBT_NULL, buf, &num); if (e == NULL) - goto out; + goto out; for (i = 0; i < num; i++) { - /* read the backend path */ - if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1) - continue; - free(bpath); + /* read the backend path */ + if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1) + continue; + free(bpath); bpath = xs_read(xsh, XBT_NULL, buf, &len); - if (bpath == NULL) - continue; - /* read the name of the device */ - if (pasprintf(&buf, "%s/dev", bpath) == -1) - continue; - free(dev); - dev = xs_read(xsh, XBT_NULL, buf, &len); - if (dev == NULL) - continue; - if (strncmp(dev, "hd", 2) || strlen(dev) != 3) - continue; - hd_index = dev[2] - 'a'; - if (hd_index >= MAX_DISKS) - continue; - /* read the type of the device */ - if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1) - continue; - free(type); - type = xs_read(xsh, XBT_NULL, buf, &len); - if (pasprintf(&buf, "%s/params", bpath) == -1) - continue; - free(params); - params = xs_read(xsh, XBT_NULL, buf, &len); - if (params == NULL) - continue; + if (bpath == NULL) + continue; + /* read the name of the device */ + if (pasprintf(&buf, "%s/dev", bpath) == -1) + continue; + free(dev); + dev = xs_read(xsh, XBT_NULL, buf, &len); + if (dev == NULL) + continue; + is_scsi = !strncmp(dev, "sd", 2); + if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 ) + continue; + hd_index = dev[2] - 'a'; + if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS)) + continue; + /* read the type of the device */ + if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1) + continue; + free(type); + type = xs_read(xsh, XBT_NULL, buf, &len); + if (pasprintf(&buf, "%s/params", bpath) == -1) + continue; + free(params); + params = xs_read(xsh, XBT_NULL, buf, &len); + if (params == NULL) + continue; /* * check if device has a phantom vbd; the phantom is hooked * to the frontend device (for ease of cleanup), so lookup * the frontend device, and see if there is a phantom_vbd * if there is, we will use resolution as the filename */ - if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1) - continue; - free(fpath); + if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1) + continue; + free(fpath); fpath = xs_read(xsh, XBT_NULL, buf, &len); - if (fpath) { - if (pasprintf(&buf, "%s/dev", fpath) == -1) - continue; - free(params); + if (fpath) { + if (pasprintf(&buf, "%s/dev", fpath) == -1) + continue; + free(params); params = xs_read(xsh, XBT_NULL, buf , &len); - if (params) { + if (params) { /* * wait for device, on timeout silently fail because we will * fail to open below @@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do } } - bs_table[hd_index] = bdrv_new(dev); - /* check if it is a cdrom */ - if (type && !strcmp(type, "cdrom")) { - bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM); - if (pasprintf(&buf, "%s/params", bpath) != -1) - xs_watch(xsh, buf, dev); - } - /* open device now if media present */ - if (params[0]) { - if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0) + bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev); + /* check if it is a cdrom */ + if (type && !strcmp(type, "cdrom")) { + bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM); + if (pasprintf(&buf, "%s/params", bpath) != -1) + xs_watch(xsh, buf, dev); + } + /* open device now if media present */ + if (params[0]) { + if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)], + params, 0 /* snapshot */) < 0) fprintf(stderr, "qemu: could not open hard disk image '%s'\n", params); - } + } } /* Set a watch for log-dirty requests from the migration tools */ @@ -199,7 +201,7 @@ int xenstore_fd(void) int xenstore_fd(void) { if (xsh) - return xs_fileno(xsh); + return xs_fileno(xsh); return -1; } @@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque vec = xs_read_watch(xsh, &num); if (!vec) - return; + return; if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) { xenstore_process_logdirty_event(); @@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque } if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) || - strlen(vec[XS_WATCH_TOKEN]) != 3) - goto out; + strlen(vec[XS_WATCH_TOKEN]) != 3) + goto out; hd_index = vec[XS_WATCH_TOKEN][2] - 'a'; image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len); if (image == NULL || !strcmp(image, bs_table[hd_index]->filename)) - goto out; /* gone or identical */ + goto out; /* gone or identical */ do_eject(0, vec[XS_WATCH_TOKEN]); bs_table[hd_index]->filename[0] = 0; if (media_filename[hd_index]) { - free(media_filename[hd_index]); - media_filename[hd_index] = NULL; + free(media_filename[hd_index]); + media_filename[hd_index] = NULL; } if (image[0]) { - media_filename[hd_index] = strdup(image); - xenstore_check_new_media_present(5000); + media_filename[hd_index] = strdup(image); + xenstore_check_new_media_present(5000); } out: @@ -354,7 +356,7 @@ void xenstore_write_vncport(int display) char *portstr = NULL; if (xsh == NULL) - return; + return; path = xs_get_domain_path(xsh, domid); if (path == NULL) { @@ -363,10 +365,10 @@ void xenstore_write_vncport(int display) } if (pasprintf(&buf, "%s/console/vnc-port", path) == -1) - goto out; + goto out; if (pasprintf(&portstr, "%d", 5900 + display) == -1) - goto out; + goto out; if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0) fprintf(logfile, "xs_write() vncport failed\n"); @@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid) unsigned int i, len, rc = 0; if (xsh == NULL) { - return -1; + return -1; } path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid); - return -1; + fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid); + return -1; } pasprintf(&buf, "%s/vm", path); uuid = xs_read(xsh, XBT_NULL, buf, &len); if (uuid == NULL) { - fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf); - free(path); - return -1; + fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf); + free(path); + return -1; } pasprintf(&buf, "%s/vncpasswd", uuid); passwd = xs_read(xsh, XBT_NULL, buf, &len); if (passwd == NULL) { - fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf); - free(uuid); - free(path); - return rc; + fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf); + free(uuid); + free(path); + return rc; } for (i=0; i<len && i<63; i++) { - vncpasswd[i] = passwd[i]; - passwd[i] = '\0'; + vncpasswd[i] = passwd[i]; + passwd[i] = '\0'; } vncpasswd[len] = '\0'; pasprintf(&buf, "%s/vncpasswd", uuid); if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) { - fprintf(logfile, "xs_write() vncpasswd failed.\n"); - rc = -1; + fprintf(logfile, "xs_write() vncpasswd failed.\n"); + rc = -1; } free(passwd); @@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc goto out; if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1) - goto out; + goto out; e = xs_directory(handle, XBT_NULL, buf, num); @@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str buf = get_device_variable_path(devtype, inst, var); if (NULL == buf) - goto out; + goto out; value = xs_read(handle, XBT_NULL, buf, &len); free(buf); -out: + out: return value; } @@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char * char *buf = NULL, *path = NULL, *value = NULL; if (xsh == NULL) - goto out; + goto out; path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path(%d): error\n", domid); - goto out; + fprintf(logfile, "xs_get_domain_path(%d): error\n", domid); + goto out; } pasprintf(&buf, "%s/vm", path); free(path); path = xs_read(xsh, XBT_NULL, buf, NULL); if (path == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } pasprintf(&buf, "%s/%s", path, key); value = xs_read(xsh, XBT_NULL, buf, len); if (value == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } out: @@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k int rc = -1; if (xsh == NULL) - goto out; + goto out; path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path: error\n"); - goto out; + fprintf(logfile, "xs_get_domain_path: error\n"); + goto out; } pasprintf(&buf, "%s/vm", path); free(path); path = xs_read(xsh, XBT_NULL, buf, NULL); if (path == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } pasprintf(&buf, "%s/%s", path, key); rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value)); if (rc) { - fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key); - goto out; + fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key); + goto out; } out: diff -r 3d356a2b1c75 -r db4fcb609383 tools/libfsimage/fat/fat.h --- a/tools/libfsimage/fat/fat.h Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/libfsimage/fat/fat.h Wed Apr 11 15:45:29 2007 +0100 @@ -84,17 +84,17 @@ struct fat_bpb { #define FAT_DIRENTRY_LENGTH 32 #define FAT_DIRENTRY_ATTRIB(entry) \ - (*((unsigned char *) (entry+11))) + (*((__u8 *) (entry+11))) #define FAT_DIRENTRY_VALID(entry) \ - ( ((*((unsigned char *) entry)) != 0) \ - && ((*((unsigned char *) entry)) != 0xE5) \ + ( ((*((__u8 *) entry)) != 0) \ + && ((*((__u8 *) entry)) != 0xE5) \ && !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) ) #define FAT_DIRENTRY_FIRST_CLUSTER(entry) \ - ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16)) + ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16)) #define FAT_DIRENTRY_FILELENGTH(entry) \ - (*((unsigned long *) (entry+28))) + (*((__u32 *) (entry+28))) #define FAT_LONGDIR_ID(entry) \ - (*((unsigned char *) (entry))) + (*((__u8 *) (entry))) #define FAT_LONGDIR_ALIASCHECKSUM(entry) \ - (*((unsigned char *) (entry+13))) + (*((__u8 *) (entry+13))) diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/Makefile --- a/tools/libxc/Makefile Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/libxc/Makefile Wed Apr 11 15:45:29 2007 +0100 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c # symlink libelf from xen/common/libelf/ LIBELF_SRCS := libelf-tools.c libelf-loader.c diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Wed Apr 11 15:45:29 2007 +0100 @@ -134,8 +134,10 @@ retry: } int -xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int), + int hvm, void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) { DECLARE_DOMCTL; xc_dominfo_t info; diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/xc_domain_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_domain_save.c Wed Apr 11 15:45:29 2007 +0100 @@ -0,0 +1,1609 @@ +/****************************************************************************** + * xc_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> + +#include "xc_private.h" +#include "xc_dom.h" +#include "xg_private.h" +#include "xg_save_restore.h" + +#include <xen/hvm/params.h> +#include <xen/hvm/e820.h> + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_domain_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */ + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the current guest */ +static unsigned int pt_levels; + +/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */ +static unsigned long *qemu_bitmaps[2]; +static int qemu_active; +static int qemu_non_active; + +/* number of pfns this guest has (i.e. number of entries in the P2M) */ +static unsigned long p2m_size; + +/* Live mapping of the table mapping each PFN to its current MFN. */ +static xen_pfn_t *live_p2m = NULL; + +/* Live mapping of system MFN to PFN table. */ +static xen_pfn_t *live_m2p = NULL; +static unsigned long m2p_mfn0; + +/* grep fodder: machine_to_phys */ + +#define mfn_to_pfn(_mfn) live_m2p[(_mfn)] + +/* + * Returns TRUE if the given machine frame number has a unique mapping + * in the guest's pseudophysical map. + */ +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ + (((_mfn) < (max_mfn)) && \ + ((mfn_to_pfn(_mfn) < (p2m_size)) && \ + (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) + +/* Returns TRUE if MFN is successfully converted to a PFN. */ +#define translate_mfn_to_pfn(_pmfn) \ +({ \ + unsigned long mfn = *(_pmfn); \ + int _res = 1; \ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ + _res = 0; \ + else \ + *(_pmfn) = mfn_to_pfn(mfn); \ + _res; \ +}) + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define BITMAP_SIZE (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long)) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + +/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); +} + +static inline int count_bits ( int nr, volatile void *addr) +{ + int i, count = 0; + volatile unsigned long *p = (volatile unsigned long *)addr; + /* We know that the array is padded to unsigned long. */ + for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) + count += hweight32(*p); + return count; +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ + + do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } + while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ + + return i; +} + +static uint64_t tv_to_us(struct timeval *new) +{ + return (new->tv_sec * 1000000) + new->tv_usec; +} + +static uint64_t llgettimeofday(void) +{ + struct timeval now; + gettimeofday(&now, NULL); + return tv_to_us(&now); +} + +static uint64_t tv_delta(struct timeval *new, struct timeval *old) +{ + return (((new->tv_sec - old->tv_sec)*1000000) + + (new->tv_usec - old->tv_usec)); +} + +static int noncached_write(int fd, int live, void *buffer, int len) +{ + static int write_count = 0; + + int rc = write(fd,buffer,len); + + write_count += len; + if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) ) + { + /* Time to discard cache - dont care if this fails */ + discard_file_cache(fd, 0 /* no flush */); + write_count = 0; + } + + return rc; +} + +#ifdef ADAPTIVE_SAVE + +/* +** We control the rate at which we transmit (or save) to minimize impact +** on running domains (including the target if we're doing live migrate). +*/ + +#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ +#define START_MBIT_RATE 100 /* initial transmit rate for migrate */ + +/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */ +#define RATE_TO_BTU 781250 + +/* Amount in bytes we allow ourselves to send in a burst */ +#define BURST_BUDGET (100*1024) + +/* We keep track of the current and previous transmission rate */ +static int mbit_rate, ombit_rate = 0; + +/* Have we reached the maximum transmission rate? */ +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) + +static inline void initialize_mbit_rate() +{ + mbit_rate = START_MBIT_RATE; +} + +static int ratewrite(int io_fd, int live, void *buf, int n) +{ + static int budget = 0; + static int burst_time_us = -1; + static struct timeval last_put = { 0 }; + struct timeval now; + struct timespec delay; + long long delta; + + if ( START_MBIT_RATE == 0 ) + return noncached_write(io_fd, live, buf, n); + + budget -= n; + if ( budget < 0 ) + { + if ( mbit_rate != ombit_rate ) + { + burst_time_us = RATE_TO_BTU / mbit_rate; + ombit_rate = mbit_rate; + DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n", + mbit_rate, BURST_BUDGET, burst_time_us); + } + if ( last_put.tv_sec == 0 ) + { + budget += BURST_BUDGET; + gettimeofday(&last_put, NULL); + } + else + { + while ( budget < 0 ) + { + gettimeofday(&now, NULL); + delta = tv_delta(&now, &last_put); + while ( delta > burst_time_us ) + { + budget += BURST_BUDGET; + last_put.tv_usec += burst_time_us; + if ( last_put.tv_usec > 1000000 + { + last_put.tv_usec -= 1000000; + last_put.tv_sec++; + } + delta -= burst_time_us; + } + if ( budget > 0 ) + break; + delay.tv_sec = 0; + delay.tv_nsec = 1000 * (burst_time_us - delta); + while ( delay.tv_nsec > 0 ) + if ( nanosleep(&delay, &delay) == 0 ) + break; + } + } + } + return noncached_write(io_fd, live, buf, n); +} + +#else /* ! ADAPTIVE SAVE */ + +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) +#define initialize_mbit_rate() + +#endif + +static inline ssize_t write_exact(int fd, void *buf, size_t count) +{ + return (write(fd, buf, count) == count); +} + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + xc_shadow_op_stats_t *stats, int print) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; + d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + DPRINTF("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + if ( wall_delta == 0 ) + wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if ( print ) + DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s %" PRId32 " pages\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), + (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), + stats->dirty_count); + +#ifdef ADAPTIVE_SAVE + if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate ) + { + mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) + + 50; + if ( mbit_rate > MAX_MBIT_RATE ) + mbit_rate = MAX_MBIT_RATE; + } +#endif + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + + +static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size, + unsigned long *arr, int runs) +{ + long long start, now; + xc_shadow_op_stats_t stats; + int j; + + start = llgettimeofday(); + + for ( j = 0; j < runs; j++ ) + { + int i; + + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + arr, p2m_size, NULL, 0, NULL); + DPRINTF("#Flush\n"); + for ( i = 0; i < 40; i++ ) + { + usleep(50000); + now = llgettimeofday(); + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, + NULL, 0, NULL, 0, &stats); + DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", + ((now-start)+500)/1000, + stats.fault_count, stats.dirty_count); + } + } + + return -1; +} + + +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info, + vcpu_guest_context_t *ctxt) +{ + int i = 0; + + if ( !(*suspend)(dom) ) + { + ERROR("Suspend request failed"); + return -1; + } + + retry: + + if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) + { + ERROR("Could not get domain info"); + return -1; + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) ) + ERROR("Could not get vcpu context"); + + + if ( info->dying ) + { + ERROR("domain is dying"); + return -1; + } + + if ( info->crashed ) + { + ERROR("domain has crashed"); + return -1; + } + + if ( info->shutdown ) + { + switch ( info->shutdown_reason ) + { + case SHUTDOWN_poweroff: + case SHUTDOWN_reboot: + ERROR("domain has shut down"); + return -1; + case SHUTDOWN_suspend: + return 0; + case SHUTDOWN_crash: + ERROR("domain has crashed"); + return -1; + } + } + + if ( info->paused ) + { + /* Try unpausing domain, wait, and retest. */ + xc_domain_unpause( xc_handle, dom ); + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); /* 10ms */ + goto retry; + } + + if ( ++i < 100 ) + { + ERROR("Retry suspend domain"); + usleep(10000); /* 10ms */ + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +/* +** Map the top-level page of MFNs from the guest. The guest might not have +** finished resuming from a previous restore operation, so we wait a while for +** it to update the MFN to a reasonable value. +*/ +static void *map_frame_list_list(int xc_handle, uint32_t dom, + shared_info_t *shinfo) +{ + int count = 100; + void *p; + + while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) ) + usleep(10000); + + if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 ) + { + ERROR("Timed out waiting for frame list updated."); + return NULL; + } + + p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + shinfo->arch.pfn_to_mfn_frame_list_list); + if ( p == NULL ) + ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); + + return p; +} + +/* +** During transfer (or in the state file), all page-table pages must be +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** +** This function performs the appropriate conversion, taking into account +** which entries do not require canonicalization (in particular, those +** entries which map the virtual address reserved for the hypervisor). +*/ +static int canonicalize_pagetable(unsigned long type, unsigned long pfn, + const void *spage, void *dpage) +{ + + int i, pte_last, xen_start, xen_end, race = 0; + uint64_t pte; + + /* + ** We need to determine which entries in this page table hold + ** reserved hypervisor mappings. This depends on the current + ** page table type as well as the number of paging levels. + */ + xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8); + + if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); + + if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) ) + xen_start = L3_PAGETABLE_ENTRIES_PAE; + + /* + ** in PAE only the L2 mapping the top 1GB contains Xen mappings. + ** We can spot this by looking for the guest linear mapping which + ** Xen always ensures is present in that L2. Guests must ensure + ** that this check will fail for other L2s. + */ + if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) + { + int hstart; + uint64_t he; + + hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) + { + /* hvirt starts with xen stuff... */ + xen_start = hstart; + } + else if ( hvirt_start != 0xf5800000 ) + { + /* old L2s from before hole was shrunk... */ + hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) + xen_start = hstart; + } + } + + if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* + ** XXX SMH: should compute these from hvirt_start (which we have) + ** and hvirt_end (which we don't) + */ + xen_start = 256; + xen_end = 272; + } + + /* Now iterate through the page table, canonicalizing each PTE */ + for (i = 0; i < pte_last; i++ ) + { + unsigned long pfn, mfn; + + if ( pt_levels == 2 ) + pte = ((const uint32_t*)spage)[i]; + else + pte = ((const uint64_t*)spage)[i]; + + if ( (i >= xen_start) && (i < xen_end) ) + pte = 0; + + if ( pte & _PAGE_PRESENT ) + { + mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) + { + /* This will happen if the type info is stale which + is quite feasible under live migration */ + pfn = 0; /* zap it - we'll retransmit this page later */ + race = 1; /* inform the caller of race; fatal if !live */ + } + else + pfn = mfn_to_pfn(mfn); + + pte &= ~MADDR_MASK_X86; + pte |= (uint64_t)pfn << PAGE_SHIFT; + + /* + * PAE guest L3Es can contain these flags when running on + * a 64bit hypervisor. We zap these here to avoid any + * surprise at restore time... + */ + if ( (pt_levels == 3) && + (type == XEN_DOMCTL_PFINFO_L3TAB) && + (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) ) + pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); + } + + if ( pt_levels == 2 ) + ((uint32_t*)dpage)[i] = pte; + else + ((uint64_t*)dpage)[i] = pte; + } + + return race; +} + +static xen_pfn_t *xc_map_m2p(int xc_handle, + unsigned long max_mfn, + int prot) +{ + struct xen_machphys_mfn_list xmml; + privcmd_mmap_entry_t *entries; + unsigned long m2p_chunks, m2p_size; + xen_pfn_t *m2p; + xen_pfn_t *extent_start; + int i, rc; + + m2p_size = M2P_SIZE(max_mfn); + m2p_chunks = M2P_CHUNKS(max_mfn); + + xmml.max_extents = m2p_chunks; + if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) ) + { + ERROR("failed to allocate space for m2p mfns"); + return NULL; + } + set_xen_guest_handle(xmml.extent_start, extent_start); + + if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || + (xmml.nr_extents != m2p_chunks) ) + { + ERROR("xc_get_m2p_mfns"); + return NULL; + } + + if ( (m2p = mmap(NULL, m2p_size, prot, + MAP_SHARED, xc_handle, 0)) == MAP_FAILED ) + { + ERROR("failed to mmap m2p"); + return NULL; + } + + if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) ) + { + ERROR("failed to allocate space for mmap entries"); + return NULL; + } + + for ( i = 0; i < m2p_chunks; i++ ) + { + entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); + entries[i].mfn = extent_start[i]; + entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; + } + + if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, + entries, m2p_chunks)) < 0 ) + { + ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); + return NULL; + } + + m2p_mfn0 = entries[0].mfn; + + free(extent_start); + free(entries); + + return m2p; +} + + +static xen_pfn_t *map_and_save_p2m_table(int xc_handle, + int io_fd, + uint32_t dom, + vcpu_guest_context_t *ctxt, + unsigned long p2m_size, + shared_info_t *live_shinfo) +{ + /* Double and single indirect references to the live P2M table */ + xen_pfn_t *live_p2m_frame_list_list = NULL; + xen_pfn_t *live_p2m_frame_list = NULL; + + /* A copy of the pfn-to-mfn table frame list. */ + xen_pfn_t *p2m_frame_list = NULL; + + /* The mapping of the live p2m table itself */ + xen_pfn_t *p2m = NULL; + + int i, success = 0; + + live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom, + live_shinfo); + if ( !live_p2m_frame_list_list ) + goto out; + + live_p2m_frame_list = + xc_map_foreign_batch(xc_handle, dom, PROT_READ, + live_p2m_frame_list_list, + P2M_FLL_ENTRIES); + if ( !live_p2m_frame_list ) + { + ERROR("Couldn't map p2m_frame_list"); + goto out; + } + + + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. + (its not clear why it would want to change them, and we'll be OK + from a safety POV anyhow. */ + + p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, + live_p2m_frame_list, + P2M_FL_ENTRIES); + if ( !p2m ) + { + ERROR("Couldn't map p2m table"); + goto out; + } + live_p2m = p2m; /* So that translation macros will work */ + + /* Get a local copy of the live_P2M_frame_list */ + if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list array"); + goto out; + } + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); + + /* Canonicalise the pfn-to-mfn table frame-number list. */ + for ( i = 0; i < p2m_size; i += fpp ) + { + if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) ) + { + ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); + ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, + (uint64_t)p2m_frame_list[i/fpp]); + goto out; + } + } + + /* + * Write an extended-info structure to inform the restore code that + * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off + * slow paths in the restore code. + */ + if ( (pt_levels == 3) && + (ctxt->vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) ) + { + unsigned long signature = ~0UL; + uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; + uint32_t chunk_sz = sizeof(struct vcpu_guest_context); + char chunk_sig[] = "vcpu"; + if ( !write_exact(io_fd, &signature, sizeof(signature)) || + !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || + !write_exact(io_fd, &chunk_sig, 4) || + !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || + !write_exact(io_fd, ctxt, sizeof(*ctxt)) ) + { + ERROR("write: extended info"); + goto out; + } + } + + if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) ) + { + ERROR("write: p2m_frame_list"); + goto out; + } + + success = 1; + + out: + + if ( !success && p2m ) + munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + if ( live_p2m_frame_list_list ) + munmap(live_p2m_frame_list_list, PAGE_SIZE); + + if ( live_p2m_frame_list ) + munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + + if ( p2m_frame_list ) + free(p2m_frame_list); + + return success ? p2m : NULL; +} + + + +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int), + int hvm, void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) +{ + xc_dominfo_t info; + + int rc = 1, i, j, last_iter, iter = 0; + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); + int race = 0, sent_last_iter, skip_this_iter; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containing the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + unsigned long *pfn_batch = NULL; + + /* A copy of one frame of guest memory. */ + char page[PAGE_SIZE]; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + /* power of 2 order of p2m_size */ + int order_nr; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; + - to fixup by sending at the end if not already resent; */ + unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; + + xc_shadow_op_stats_t stats; + + unsigned long needed_to_fix = 0; + unsigned long total_sent = 0; + + uint64_t vcpumap = 1ULL; + + /* HVM: a buffer for holding HVM context */ + uint32_t hvm_buf_size = 0; + uint8_t *hvm_buf = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + + /* If no explicit control parameters given, use defaults */ + max_iters = max_iters ? : DEF_MAX_ITERS; + max_factor = max_factor ? : DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if ( !get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels) ) + { + ERROR("Unable to get platform info."); + return 1; + } + + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { + ERROR("Could not get domain info"); + return 1; + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + { + ERROR("Could not get vcpu context"); + goto out; + } + shared_info_frame = info.shared_info_frame; + + /* Map the shared info frame */ + if ( !hvm ) + { + live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame); + if ( !live_shinfo ) + { + ERROR("Couldn't map live_shinfo"); + goto out; + } + } + + /* Get the size of the P2M table */ + p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); + + /* Domain is still running at this point */ + if ( live ) + { + /* Live suspend. Enable log-dirty mode. */ + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { + ERROR("Couldn't enable shadow mode"); + goto out; + } + + if ( hvm ) + { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, BITMAP_SIZE); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + BITMAP_SIZE; + qemu_active = 0; + qemu_non_active = 1; + } + } + else + { + /* This is a non-live suspend. Suspend the domain .*/ + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) + { + ERROR("Domain appears not to have suspended"); + goto out; + } + } + + last_iter = !live; + + /* pretend we sent all the pages last iteration */ + sent_last_iter = p2m_size; + + /* calculate the power of 2 order of p2m_size, e.g. + 15->4 16->4 17->5 */ + for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) + continue; + + /* Setup to_send / to_fix and to_skip bitmaps */ + to_send = malloc(BITMAP_SIZE); + to_fix = calloc(1, BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + + if ( !to_send || !to_fix || !to_skip ) + { + ERROR("Couldn't allocate to_send array"); + goto out; + } + + memset(to_send, 0xff, BITMAP_SIZE); + + if ( lock_pages(to_send, BITMAP_SIZE) ) + { + ERROR("Unable to lock to_send"); + return 1; + } + + /* (to fix is local only) */ + if ( lock_pages(to_skip, BITMAP_SIZE) ) + { + ERROR("Unable to lock to_skip"); + return 1; + } + + if ( hvm ) + { + /* Need another buffer for HVM context */ + hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); + if ( hvm_buf_size == -1 ) + { + ERROR("Couldn't get HVM context size from Xen"); + goto out; + } + hvm_buf = malloc(hvm_buf_size); + if ( !hvm_buf ) + { + ERROR("Couldn't allocate memory"); + goto out; + } + } + + analysis_phase(xc_handle, dom, p2m_size, to_skip, 0); + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); + pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); + if ( (pfn_type == NULL) || (pfn_batch == NULL) ) + { + ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + errno = ENOMEM; + goto out; + } + + if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) ) + { + ERROR("Unable to lock"); + goto out; + } + + /* Setup the mfn_to_pfn table mapping */ + if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) ) + { + ERROR("Failed to map live M2P table"); + goto out; + } + + /* Start writing out the saved-domain record. */ + if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("write: p2m_size"); + goto out; + } + + if ( !hvm ) + { + int err = 0; + unsigned long mfn; + + /* Map the P2M table, and write the list of P2M frames */ + live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, + &ctxt, p2m_size, live_shinfo); + if ( live_p2m == NULL ) + { + ERROR("Failed to map/save the p2m frame list"); + goto out; + } + + /* + * Quick belt and braces sanity check. + */ + + for ( i = 0; i < p2m_size; i++ ) + { + mfn = live_p2m[i]; + if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) ) + { + DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, + mfn, mfn_to_pfn(mfn)); + err++; + } + } + DPRINTF("Had %d unexplained entries in p2m table\n", err); + } + + print_stats(xc_handle, dom, 0, &stats, 0); + + /* Now write out each data page, canonicalising page tables as we go... */ + for ( ; ; ) + { + unsigned int prev_pc, sent_this_iter, N, batch; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N = 0; + + DPRINTF("Saving memory pages: iter %d 0%%", iter); + + while ( N < p2m_size ) + { + unsigned int this_pc = (N * 100) / p2m_size; + int rc; + + if ( (this_pc - prev_pc) >= 5 ) + { + DPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if ( !last_iter ) + { + /* Slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + rc = xc_shadow_control( + xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, + p2m_size, NULL, 0, NULL); + if ( rc != p2m_size ) + { + ERROR("Error peeking shadow bitmap"); + goto out; + } + } + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + for ( batch = 0; + (batch < MAX_BATCH_SIZE) && (N < p2m_size); + N++ ) + { + int n = permute(N, p2m_size, order_nr); + + if ( debug ) + DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", + iter, (unsigned long)n, hvm ? 0 : live_p2m[n], + test_bit(n, to_send), + hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF)); + + if ( !last_iter && + test_bit(n, to_send) && + test_bit(n, to_skip) ) + skip_this_iter++; /* stats keeping */ + + if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter) || + (test_bit(n, to_fix) && last_iter)) ) + continue; + + /* Skip PFNs that aren't really there */ + if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) + && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ) + continue; + + /* + ** we get here if: + ** 1. page is marked to_send & hasn't already been re-dirtied + ** 2. (ignore to_skip in last iteration) + ** 3. add in pages that still need fixup (net bufs) + */ + + pfn_batch[batch] = n; + + /* Hypercall interfaces operate in PFNs for HVM guests + * and MFNs for PV guests */ + if ( hvm ) + pfn_type[batch] = n; + else + pfn_type[batch] = live_p2m[n]; + + if ( !is_mapped(pfn_type[batch]) ) + { + /* + ** not currently in psuedo-physical map -- set bit + ** in to_fix since we must send this page in last_iter + ** unless its sent sooner anyhow, or it never enters + ** pseudo-physical map (e.g. for ballooned down doms) + */ + set_bit(n, to_fix); + continue; + } + + if ( last_iter && + test_bit(n, to_fix) && + !test_bit(n, to_send) ) + { + needed_to_fix++; + DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n", + iter, n, pfn_type[batch]); + } + + clear_bit(n, to_fix); + + batch++; + } + + if ( batch == 0 ) + goto skip; /* vanishingly unlikely... */ + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_type, batch); + if ( region_base == NULL ) + { + ERROR("map batch failed"); + goto out; + } + + if ( !hvm ) + { + /* Get page types */ + for ( j = 0; j < batch; j++ ) + ((uint32_t *)pfn_type)[j] = pfn_type[j]; + if ( xc_get_pfn_type_batch(xc_handle, dom, batch, + (uint32_t *)pfn_type) ) + { + ERROR("get_pfn_type_batch failed"); + goto out; + } + for ( j = batch-1; j >= 0; j-- ) + pfn_type[j] = ((uint32_t *)pfn_type)[j]; + + for ( j = 0; j < batch; j++ ) + { + + if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) == + XEN_DOMCTL_PFINFO_XTAB ) + { + DPRINTF("type fail: page %i mfn %08lx\n", + j, pfn_type[j]); + continue; + } + + if ( debug ) + DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx" + " sum= %08lx\n", + iter, + (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | + pfn_batch[j], + pfn_type[j], + mfn_to_pfn(pfn_type[j] & + ~XEN_DOMCTL_PFINFO_LTAB_MASK), + csum_page(region_base + (PAGE_SIZE*j))); + + /* canonicalise mfn->pfn */ + pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | + pfn_batch[j]; + } + } + + if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) + { + ERROR("Error when writing to state file (2) (errno %d)", + errno); + goto out; + } + + if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) ) + { + ERROR("Error when writing to state file (3) (errno %d)", + errno); + goto out; + } + + /* entering this loop, pfn_type is now in pfns (Not mfns) */ + for ( j = 0; j < batch; j++ ) + { + unsigned long pfn, pagetype; + void *spage = (char *)region_base + (PAGE_SIZE*j); + + pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + /* write out pages in batch */ + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + continue; + + pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && + (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* We have a pagetable page: need to rewrite it. */ + race = + canonicalize_pagetable(pagetype, pfn, spage, page); + + if ( race && !live ) + { + ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn, + pagetype); + goto out; + } + + if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) + { + ERROR("Error when writing to state file (4)" + " (errno %d)", errno); + goto out; + } + } + else + { + /* We have a normal page: just write it directly. */ + if ( ratewrite(io_fd, live, spage, PAGE_SIZE) != + PAGE_SIZE ) + { + ERROR("Error when writing to state file (5)" + " (errno %d)", errno); + goto out; + } + } + } /* end of the write out for this batch */ + + sent_this_iter += batch; + + munmap(region_base, batch*PAGE_SIZE); + + } /* end of this while loop for this iteration */ + + skip: + + total_sent += sent_this_iter; + + DPRINTF("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + if ( last_iter ) + { + print_stats( xc_handle, dom, sent_this_iter, &stats, 1); + + DPRINTF("Total pages sent= %ld (%.2fx)\n", + total_sent, ((float)total_sent)/p2m_size ); + DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); + } + + if ( last_iter && debug ) + { + int minusone = -1; + memset(to_send, 0xff, BITMAP_SIZE); + debug = 0; + DPRINTF("Entering debug resend-all mode\n"); + + /* send "-1" to put receiver into debug mode */ + if ( !write_exact(io_fd, &minusone, sizeof(int)) ) + { + ERROR("Error when writing to state file (6) (errno %d)", + errno); + goto out; + } + + continue; + } + + if ( last_iter ) + break; + + if ( live ) + { + if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > p2m_size*max_factor) ) + { + DPRINTF("Start last iteration\n"); + last_iter = 1; + + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, + &ctxt) ) + { + ERROR("Domain appears not to have suspended"); + goto out; + } + + DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", + info.shared_info_frame, + (unsigned long)ctxt.user_regs.eip, + (unsigned long)ctxt.user_regs.edx); + } + + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + p2m_size, NULL, 0, &stats) != p2m_size ) + { + ERROR("Error flushing shadow PT"); + goto out; + } + + if ( hvm ) + { + /* Pull in the dirty bits from qemu-dm too */ + if ( !last_iter ) + { + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_flip_buffer(dom, qemu_active); + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + { + to_send[j] |= qemu_bitmaps[qemu_non_active][j]; + qemu_bitmaps[qemu_non_active][j] = 0; + } + } + else + { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + to_send[j] |= qemu_bitmaps[qemu_active][j]; + } + } + + sent_last_iter = sent_this_iter; + + print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + + } + } /* end of infinite for loop */ + + DPRINTF("All memory is saved\n"); + + { + struct { + int minustwo; + int max_vcpu_id; + uint64_t vcpumap; + } chunk = { -2, info.max_vcpu_id }; + + if ( info.max_vcpu_id >= 64 ) + { + ERROR("Too many VCPUS in guest!"); + goto out; + } + + for ( i = 1; i <= info.max_vcpu_id; i++ ) + { + xc_vcpuinfo_t vinfo; + if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && + vinfo.online ) + vcpumap |= 1ULL << i; + } + + chunk.vcpumap = vcpumap; + if ( !write_exact(io_fd, &chunk, sizeof(chunk)) ) + { + ERROR("Error when writing to state file (errno %d)", errno); + goto out; + } + } + + /* Zero terminate */ + i = 0; + if ( !write_exact(io_fd, &i, sizeof(int)) ) + { + ERROR("Error when writing to state file (6') (errno %d)", errno); + goto out; + } + + if ( hvm ) + { + uint32_t rec_size; + + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, + (unsigned long *)&magic_pfns[0]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, + (unsigned long *)&magic_pfns[1]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, + (unsigned long *)&magic_pfns[2]); + if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("Error when writing to state file (7)"); + goto out; + } + + /* Save vcpu contexts */ + + for ( i = 0; i <= info.max_vcpu_id; i++ ) + { + if ( !(vcpumap & (1ULL << i)) ) + continue; + + if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + { + ERROR("HVM:Could not get vcpu context"); + goto out; + } + + DPRINTF("write vcpu %d context.\n", i); + if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { + ERROR("write vcpu context failed!\n"); + goto out; + } + } + + /* Get HVM context from Xen and save it too */ + if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, + hvm_buf_size)) == -1 ) + { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { + ERROR("error write hvm buffer size"); + goto out; + } + + if ( !write_exact(io_fd, hvm_buf, rec_size) ) + { + ERROR("write HVM info failed!\n"); + goto out; + } + + /* HVM guests are done now */ + rc = 0; + goto out; + } + + /* PV guests only from now on */ + + /* Send through a list of all the PFNs that were not in map at the close */ + { + unsigned int i,j; + unsigned long pfntab[1024]; + + for ( i = 0, j = 0; i < p2m_size; i++ ) + { + if ( !is_mapped(live_p2m[i]) ) + j++; + } + + if ( !write_exact(io_fd, &j, sizeof(unsigned int)) ) + { + ERROR("Error when writing to state file (6a) (errno %d)", errno); + goto out; + } + + for ( i = 0, j = 0; i < p2m_size; ) + { + if ( !is_mapped(live_p2m[i]) ) + pfntab[j++] = i; + + i++; + if ( (j == 1024) || (i == p2m_size) ) + { + if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) ) + { + ERROR("Error when writing to state file (6b) (errno %d)", + errno); + goto out; + } + j = 0; + } + } + } + + /* Canonicalise the suspend-record frame number. */ + if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) + { + ERROR("Suspend record is not in range of pseudophys map"); + goto out; + } + + for ( i = 0; i <= info.max_vcpu_id; i++ ) + { + if ( !(vcpumap & (1ULL << i)) ) + continue; + + if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + { + ERROR("No context for VCPU%d", i); + goto out; + } + + /* Canonicalise each GDT frame number. */ + for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) + { + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) ) + { + ERROR("GDT frame is not in range of pseudophys map"); + goto out; + } + } + + /* Canonicalise the page table base pointer. */ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) + { + ERROR("PT base is not in range of pseudophys map"); + goto out; + } + ctxt.ctrlreg[3] = + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); + + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) + { + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) + { + ERROR("PT base is not in range of pseudophys map"); + goto out; + } + /* Least-significant bit means 'valid PFN'. */ + ctxt.ctrlreg[1] = 1 | + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); + } + + if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) + { + ERROR("Error when writing to state file (1) (errno %d)", errno); + goto out; + } + } + + /* + * Reset the MFN to be a known-invalid value. See map_frame_list_list(). + */ + memcpy(page, live_shinfo, PAGE_SIZE); + ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0; + if ( !write_exact(io_fd, page, PAGE_SIZE) ) + { + ERROR("Error when writing to state file (1) (errno %d)", errno); + goto out; + } + + /* Success! */ + rc = 0; + + out: + + if ( live ) + { + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0 ) + DPRINTF("Warning - couldn't disable shadow mode"); + } + + /* Flush last write and discard cache for file. */ + discard_file_cache(io_fd, 1 /* flush */); + + if ( live_shinfo ) + munmap(live_shinfo, PAGE_SIZE); + + if ( live_p2m ) + munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + if ( live_m2p ) + munmap(live_m2p, M2P_SIZE(max_mfn)); + + free(pfn_type); + free(pfn_batch); + free(to_send); + free(to_fix); + free(to_skip); + + DPRINTF("Save exit rc=%d\n",rc); + + return !!rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Wed Apr 11 07:30:02 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,755 +0,0 @@ -/****************************************************************************** - * xc_hvm_save.c - * - * Save the state of a running HVM guest. - * - * Copyright (c) 2003, K A Fraser. - * Copyright (c) 2006 Intel Corperation - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <inttypes.h> -#include <time.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/time.h> - -#include "xc_private.h" -#include "xg_private.h" -#include "xg_save_restore.h" - -#include <xen/hvm/e820.h> -#include <xen/hvm/params.h> - -/* -** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_hvm_save(). -** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** -*/ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ -#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ - -/* Shared-memory bitmaps for getting log-dirty bits from qemu */ -static unsigned long *qemu_bitmaps[2]; -static int qemu_active; -static int qemu_non_active; - -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. -*/ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) -#define BITMAP_SIZE (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long)) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - -static inline int test_bit (int nr, volatile void * addr) -{ - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; -} - -static inline void clear_bit (int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); -} - -static inline int permute( int i, int nr, int order_nr ) -{ - /* Need a simple permutation function so that we scan pages in a - pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often - contiguous ranges of pfns that have similar behaviour, and we - want to mix them up. */ - - /* e.g. nr->oder 15->4 16->4 17->5 */ - /* 512MB domain, 128k pages, order 17 */ - - /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA - */ - - /* - QPONMLKJIHGFEDCBA - EDCBA - QPONM - LKJIHGF - */ - - do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } - while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - - return i; -} - - -static uint64_t tv_to_us(struct timeval *new) -{ - return (new->tv_sec * 1000000) + new->tv_usec; -} - -static uint64_t llgettimeofday(void) -{ - struct timeval now; - gettimeofday(&now, NULL); - return tv_to_us(&now); -} - -static uint64_t tv_delta(struct timeval *new, struct timeval *old) -{ - return (((new->tv_sec - old->tv_sec)*1000000) + - (new->tv_usec - old->tv_usec)); -} - - -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) -#define initialize_mbit_rate() - -static inline ssize_t write_exact(int fd, void *buf, size_t count) -{ - return (write(fd, buf, count) == count); -} - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, - xc_shadow_op_stats_t *stats, int print) -{ - static struct timeval wall_last; - static long long d0_cpu_last; - static long long d1_cpu_last; - - struct timeval wall_now; - long long wall_delta; - long long d0_cpu_now, d0_cpu_delta; - long long d1_cpu_now, d1_cpu_delta; - - gettimeofday(&wall_now, NULL); - - d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; - d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) - DPRINTF("ARRHHH!!\n"); - - wall_delta = tv_delta(&wall_now,&wall_last)/1000; - if ( wall_delta == 0 ) - wall_delta = 1; - - d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; - d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; - - if ( print ) - DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " - "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, - (int)((d0_cpu_delta*100)/wall_delta), - (int)((d1_cpu_delta*100)/wall_delta), - (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), - (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), - stats->dirty_count); - - d0_cpu_last = d0_cpu_now; - d1_cpu_last = d1_cpu_now; - wall_last = wall_now; - - return 0; -} - -static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size, - unsigned long *arr, int runs) -{ - long long start, now; - xc_shadow_op_stats_t stats; - int j; - - start = llgettimeofday(); - - for ( j = 0; j < runs; j++ ) - { - int i; - - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, - arr, pfn_array_size, NULL, 0, NULL); - DPRINTF("#Flush\n"); - for ( i = 0; i < 40; i++ ) - { - usleep(50000); - now = llgettimeofday(); - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, - NULL, 0, NULL, 0, &stats); - DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", - ((now-start)+500)/1000, - stats.fault_count, stats.dirty_count); - } - } - - return -1; -} - -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, - int dom, xc_dominfo_t *info, - vcpu_guest_context_t *ctxt) -{ - int i = 0; - - if ( !(*suspend)(dom) ) - { - ERROR("Suspend request failed"); - return -1; - } - - retry: - - if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) - { - ERROR("Could not get domain info"); - return -1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) ) - ERROR("Could not get vcpu context"); - - if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) ) - return 0; /* success */ - - if ( info->paused ) - { - /* Try unpausing domain, wait, and retest. */ - xc_domain_unpause( xc_handle, dom ); - ERROR("Domain was paused. Wait and re-test."); - usleep(10000); /* 10ms */ - goto retry; - } - - if ( ++i < 100 ) - { - ERROR("Retry suspend domain."); - usleep(10000); /* 10ms */ - goto retry; - } - - ERROR("Unable to suspend domain."); - - return -1; -} - -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)) -{ - xc_dominfo_t info; - - int rc = 1, i, j, last_iter, iter = 0; - int live = !!(flags & XCFLAGS_LIVE); - int debug = !!(flags & XCFLAGS_DEBUG); - int sent_last_iter, skip_this_iter; - - /* The highest guest-physical frame number used by the current guest */ - unsigned long max_pfn; - - /* The size of an array big enough to contain all guest pfns */ - unsigned long pfn_array_size; - - /* Magic frames: ioreqs and xenstore comms. */ - uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containg the PFNs (/not/ MFN!) to map. */ - xen_pfn_t *pfn_batch = NULL; - - /* A copy of hvm domain context buffer*/ - uint32_t hvm_buf_size; - uint8_t *hvm_buf = NULL; - - /* base of the region in which domain memory is mapped */ - unsigned char *region_base = NULL; - - uint32_t rec_size, nr_vcpus; - - /* power of 2 order of pfn_array_size */ - int order_nr; - - /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); - - to skip this iteration because already dirty; */ - unsigned long *to_send = NULL, *to_skip = NULL; - - xc_shadow_op_stats_t stats; - - unsigned long total_sent = 0; - - uint64_t vcpumap = 1ULL; - - DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, " - "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags, - live, debug); - - /* If no explicit control parameters given, use defaults */ - max_iters = max_iters ? : DEF_MAX_ITERS; - max_factor = max_factor ? : DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) - { - ERROR("HVM: Could not get domain info"); - return 1; - } - nr_vcpus = info.nr_online_vcpus; - - if ( mlock(&ctxt, sizeof(ctxt)) ) - { - ERROR("HVM: Unable to mlock ctxt"); - return 1; - } - - /* Only have to worry about vcpu 0 even for SMP */ - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) - { - ERROR("HVM: Could not get vcpu context"); - goto out; - } - - DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n", - info.max_memkb, info.nr_pages); - - if ( live ) - { - /* Live suspend. Enable log-dirty mode. */ - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0 ) - { - ERROR("Couldn't enable shadow mode"); - goto out; - } - } - else - { - /* This is a non-live suspend. Suspend the domain .*/ - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) - { - ERROR("HVM Domain appears not to have suspended"); - goto out; - } - } - - last_iter = !live; - - max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); - - DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, " - "max_memkb=0x%lx, live=%d.\n", - max_pfn, info.max_memkb, live); - - /* Size of any array that covers 0 ... max_pfn */ - pfn_array_size = max_pfn + 1; - if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) ) - { - ERROR("Error when writing to state file (1)"); - goto out; - } - - /* pretend we sent all the pages last iteration */ - sent_last_iter = pfn_array_size; - - /* calculate the power of 2 order of pfn_array_size, e.g. - 15->4 16->4 17->5 */ - for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) - continue; - - /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - - if ( live ) - { - /* Get qemu-dm logging dirty pages too */ - void *seg = init_qemu_maps(dom, BITMAP_SIZE); - qemu_bitmaps[0] = seg; - qemu_bitmaps[1] = seg + BITMAP_SIZE; - qemu_active = 0; - qemu_non_active = 1; - } - - hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); - if ( hvm_buf_size == -1 ) - { - ERROR("Couldn't get HVM context size from Xen"); - goto out; - } - hvm_buf = malloc(hvm_buf_size); - - if ( !to_send || !to_skip || !hvm_buf ) - { - ERROR("Couldn't allocate memory"); - goto out; - } - - memset(to_send, 0xff, BITMAP_SIZE); - - if ( lock_pages(to_send, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_send"); - return 1; - } - - /* (to fix is local only) */ - if ( lock_pages(to_skip, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_skip"); - return 1; - } - - analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0); - - /* We want zeroed memory so use calloc rather than malloc. */ - pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - if ( pfn_batch == NULL ) - { - ERROR("failed to alloc memory for pfn_batch array"); - errno = ENOMEM; - goto out; - } - - for ( ; ; ) - { - unsigned int prev_pc, sent_this_iter, N, batch; - - iter++; - sent_this_iter = 0; - skip_this_iter = 0; - prev_pc = 0; - N=0; - - DPRINTF("Saving memory pages: iter %d 0%%", iter); - - while ( N < pfn_array_size ) - { - unsigned int this_pc = (N * 100) / pfn_array_size; - int rc; - - if ( (this_pc - prev_pc) >= 5 ) - { - DPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if ( !last_iter ) - { - /* Slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - rc = xc_shadow_control( - xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - pfn_array_size, NULL, 0, NULL); - if ( rc != pfn_array_size ) - { - ERROR("Error peeking shadow bitmap"); - goto out; - } - } - - /* load pfn_batch[] with the mfn of all the pages we're doing in - this batch. */ - for ( batch = 0; - (batch < MAX_BATCH_SIZE) && (N < pfn_array_size); - N++ ) - { - int n = permute(N, pfn_array_size, order_nr); - - if ( 0 && debug ) - DPRINTF("%d pfn= %08lx %d \n", - iter, (unsigned long)n, test_bit(n, to_send)); - - if ( !last_iter && - test_bit(n, to_send) && - test_bit(n, to_skip) ) - skip_this_iter++; /* stats keeping */ - - if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter)) ) - continue; - - /* Skip PFNs that aren't really there */ - if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */ - || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) && - n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ ) - continue; - - /* - ** we get here if: - ** 1. page is marked to_send & hasn't already been re-dirtied - ** 2. (ignore to_skip in last iteration) - */ - - pfn_batch[batch] = n; - - batch++; - } - - if ( batch == 0 ) - goto skip; /* vanishingly unlikely... */ - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_batch, batch); - if ( region_base == 0 ) - { - ERROR("map batch failed"); - goto out; - } - - /* write num of pfns */ - if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (2)"); - goto out; - } - - /* write all the pfns */ - if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) ) - { - ERROR("Error when writing to state file (3)"); - goto out; - } - - for ( j = 0; j < batch; j++ ) - { - if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK ) - continue; - if ( ratewrite(io_fd, region_base + j*PAGE_SIZE, - PAGE_SIZE) != PAGE_SIZE ) - { - ERROR("ERROR when writing to state file (4)"); - goto out; - } - } - - sent_this_iter += batch; - - munmap(region_base, batch*PAGE_SIZE); - - } /* end of this while loop for this iteration */ - - skip: - - total_sent += sent_this_iter; - - DPRINTF("\r %d: sent %d, skipped %d, ", - iter, sent_this_iter, skip_this_iter ); - - if ( last_iter ) - { - print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", - total_sent, ((float)total_sent)/pfn_array_size ); - } - - if ( last_iter && debug ) - { - int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); - debug = 0; - DPRINTF("Entering debug resend-all mode\n"); - - /* send "-1" to put receiver into debug mode */ - if ( !write_exact(io_fd, &minusone, sizeof(int)) ) - { - ERROR("Error when writing to state file (6)"); - goto out; - } - - continue; - } - - if ( last_iter ) - break; - - if ( live ) - { - if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > pfn_array_size*max_factor) ) - { - DPRINTF("Start last iteration for HVM domain\n"); - last_iter = 1; - - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt)) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - - DPRINTF("SUSPEND eip %08lx edx %08lx\n", - (unsigned long)ctxt.user_regs.eip, - (unsigned long)ctxt.user_regs.edx); - } - - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - pfn_array_size, NULL, - 0, &stats) != pfn_array_size ) - { - ERROR("Error flushing shadow PT"); - goto out; - } - - /* Pull in the dirty bits from qemu too */ - if ( !last_iter ) - { - qemu_active = qemu_non_active; - qemu_non_active = qemu_active ? 0 : 1; - qemu_flip_buffer(dom, qemu_active); - for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) - { - to_send[j] |= qemu_bitmaps[qemu_non_active][j]; - qemu_bitmaps[qemu_non_active][j] = 0; - } - } - else - { - for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) - to_send[j] |= qemu_bitmaps[qemu_active][j]; - } - - sent_last_iter = sent_this_iter; - - print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - } - } /* end of while 1 */ - - - DPRINTF("All HVM memory is saved\n"); - - { - struct { - int minustwo; - int max_vcpu_id; - uint64_t vcpumap; - } chunk = { -2, info.max_vcpu_id }; - - if (info.max_vcpu_id >= 64) { - ERROR("Too many VCPUS in guest!"); - goto out; - } - - for (i = 1; i <= info.max_vcpu_id; i++) { - xc_vcpuinfo_t vinfo; - if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && - vinfo.online) - vcpumap |= 1ULL << i; - } - - chunk.vcpumap = vcpumap; - if(!write_exact(io_fd, &chunk, sizeof(chunk))) { - ERROR("Error when writing to state file (errno %d)", errno); - goto out; - } - } - - /* Zero terminate */ - i = 0; - if ( !write_exact(io_fd, &i, sizeof(int)) ) - { - ERROR("Error when writing to state file (6)"); - goto out; - } - - /* Save magic-page locations. */ - memset(magic_pfns, 0, sizeof(magic_pfns)); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, - (unsigned long *)&magic_pfns[0]); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, - (unsigned long *)&magic_pfns[1]); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, - (unsigned long *)&magic_pfns[2]); - if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) - { - ERROR("Error when writing to state file (7)"); - goto out; - } - - /* save vcpu/vmcs contexts */ - for ( i = 0; i < nr_vcpus; i++ ) - { - if ( !(vcpumap & (1ULL << i)) ) - continue; - - if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) - { - ERROR("HVM:Could not get vcpu context"); - goto out; - } - - DPRINTF("write vcpu %d context.\n", i); - if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) - { - ERROR("write vcpu context failed!\n"); - goto out; - } - } - - if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, - hvm_buf_size)) == -1 ) - { - ERROR("HVM:Could not get hvm buffer"); - goto out; - } - - if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) - { - ERROR("error write hvm buffer size"); - goto out; - } - - if ( !write_exact(io_fd, hvm_buf, rec_size) ) - { - ERROR("write HVM info failed!\n"); - goto out; - } - - /* Success! */ - rc = 0; - - out: - - if ( live ) - { - if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0 ) - DPRINTF("Warning - couldn't disable shadow mode"); - } - - free(hvm_buf); - free(pfn_batch); - free(to_send); - free(to_skip); - - return !!rc; -} diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Wed Apr 11 07:30:02 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1414 +0,0 @@ -/****************************************************************************** - * xc_linux_save.c - * - * Save the state of a running Linux session. - * - * Copyright (c) 2003, K A Fraser. - */ - -#include <inttypes.h> -#include <time.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/time.h> - -#include "xc_private.h" -#include "xc_dom.h" -#include "xg_private.h" -#include "xg_save_restore.h" - -/* -** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_linux_save(). -** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** -*/ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ -#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */ - -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - -/* number of pfns this guest has (i.e. number of entries in the P2M) */ -static unsigned long p2m_size; - -/* Live mapping of the table mapping each PFN to its current MFN. */ -static xen_pfn_t *live_p2m = NULL; - -/* Live mapping of system MFN to PFN table. */ -static xen_pfn_t *live_m2p = NULL; -static unsigned long m2p_mfn0; - -/* grep fodder: machine_to_phys */ - -#define mfn_to_pfn(_mfn) live_m2p[(_mfn)] - -/* - * Returns TRUE if the given machine frame number has a unique mapping - * in the guest's pseudophysical map. - */ -#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ - (((_mfn) < (max_mfn)) && \ - ((mfn_to_pfn(_mfn) < (p2m_size)) && \ - (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) - -/* Returns TRUE if MFN is successfully converted to a PFN. */ -#define translate_mfn_to_pfn(_pmfn) \ -({ \ - unsigned long mfn = *(_pmfn); \ - int _res = 1; \ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ - _res = 0; \ - else \ - *(_pmfn) = mfn_to_pfn(mfn); \ - _res; \ -}) - -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. -*/ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITMAP_SIZE ((p2m_size + BITS_PER_LONG - 1) / 8) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - -static inline int test_bit (int nr, volatile void * addr) -{ - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; -} - -static inline void clear_bit (int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); -} - -static inline void set_bit ( int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); -} - -/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ -static inline unsigned int hweight32(unsigned int w) -{ - unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); - res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); - return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); -} - -static inline int count_bits ( int nr, volatile void *addr) -{ - int i, count = 0; - volatile unsigned long *p = (volatile unsigned long *)addr; - /* We know that the array is padded to unsigned long. */ - for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) - count += hweight32(*p); - return count; -} - -static inline int permute( int i, int nr, int order_nr ) -{ - /* Need a simple permutation function so that we scan pages in a - pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often - contiguous ranges of pfns that have similar behaviour, and we - want to mix them up. */ - - /* e.g. nr->oder 15->4 16->4 17->5 */ - /* 512MB domain, 128k pages, order 17 */ - - /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA - */ - - /* - QPONMLKJIHGFEDCBA - EDCBA - QPONM - LKJIHGF - */ - - do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } - while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - - return i; -} - -static uint64_t tv_to_us(struct timeval *new) -{ - return (new->tv_sec * 1000000) + new->tv_usec; -} - -static uint64_t llgettimeofday(void) -{ - struct timeval now; - gettimeofday(&now, NULL); - return tv_to_us(&now); -} - -static uint64_t tv_delta(struct timeval *new, struct timeval *old) -{ - return (((new->tv_sec - old->tv_sec)*1000000) + - (new->tv_usec - old->tv_usec)); -} - -static int noncached_write(int fd, int live, void *buffer, int len) -{ - static int write_count = 0; - - int rc = write(fd,buffer,len); - - write_count += len; - if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) ) - { - /* Time to discard cache - dont care if this fails */ - discard_file_cache(fd, 0 /* no flush */); - write_count = 0; - } - - return rc; -} - -#ifdef ADAPTIVE_SAVE - -/* -** We control the rate at which we transmit (or save) to minimize impact -** on running domains (including the target if we're doing live migrate). -*/ - -#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ -#define START_MBIT_RATE 100 /* initial transmit rate for migrate */ - -/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */ -#define RATE_TO_BTU 781250 - -/* Amount in bytes we allow ourselves to send in a burst */ -#define BURST_BUDGET (100*1024) - -/* We keep track of the current and previous transmission rate */ -static int mbit_rate, ombit_rate = 0; - -/* Have we reached the maximum transmission rate? */ -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) - -static inline void initialize_mbit_rate() -{ - mbit_rate = START_MBIT_RATE; -} - -static int ratewrite(int io_fd, int live, void *buf, int n) -{ - static int budget = 0; - static int burst_time_us = -1; - static struct timeval last_put = { 0 }; - struct timeval now; - struct timespec delay; - long long delta; - - if ( START_MBIT_RATE == 0 ) - return noncached_write(io_fd, live, buf, n); - - budget -= n; - if ( budget < 0 ) - { - if ( mbit_rate != ombit_rate ) - { - burst_time_us = RATE_TO_BTU / mbit_rate; - ombit_rate = mbit_rate; - DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n", - mbit_rate, BURST_BUDGET, burst_time_us); - } - if ( last_put.tv_sec == 0 ) - { - budget += BURST_BUDGET; - gettimeofday(&last_put, NULL); - } - else - { - while ( budget < 0 ) - { - gettimeofday(&now, NULL); - delta = tv_delta(&now, &last_put); - while ( delta > burst_time_us ) - { - budget += BURST_BUDGET; - last_put.tv_usec += burst_time_us; - if ( last_put.tv_usec > 1000000 - { - last_put.tv_usec -= 1000000; - last_put.tv_sec++; - } - delta -= burst_time_us; - } - if ( budget > 0 ) - break; - delay.tv_sec = 0; - delay.tv_nsec = 1000 * (burst_time_us - delta); - while ( delay.tv_nsec > 0 ) - if ( nanosleep(&delay, &delay) == 0 ) - break; - } - } - } - return noncached_write(io_fd, live, buf, n); -} - -#else /* ! ADAPTIVE SAVE */ - -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) -#define initialize_mbit_rate() - -#endif - -static inline ssize_t write_exact(int fd, void *buf, size_t count) -{ - return (write(fd, buf, count) == count); -} - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, - xc_shadow_op_stats_t *stats, int print) -{ - static struct timeval wall_last; - static long long d0_cpu_last; - static long long d1_cpu_last; - - struct timeval wall_now; - long long wall_delta; - long long d0_cpu_now, d0_cpu_delta; - long long d1_cpu_now, d1_cpu_delta; - - gettimeofday(&wall_now, NULL); - - d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; - d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) - DPRINTF("ARRHHH!!\n"); - - wall_delta = tv_delta(&wall_now,&wall_last)/1000; - if ( wall_delta == 0 ) - wall_delta = 1; - - d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; - d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; - - if ( print ) - DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " - "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, - (int)((d0_cpu_delta*100)/wall_delta), - (int)((d1_cpu_delta*100)/wall_delta), - (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), - (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), - stats->dirty_count); - -#ifdef ADAPTIVE_SAVE - if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate ) - { - mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) - + 50; - if ( mbit_rate > MAX_MBIT_RATE ) - mbit_rate = MAX_MBIT_RATE; - } -#endif - - d0_cpu_last = d0_cpu_now; - d1_cpu_last = d1_cpu_now; - wall_last = wall_now; - - return 0; -} - - -static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size, - unsigned long *arr, int runs) -{ - long long start, now; - xc_shadow_op_stats_t stats; - int j; - - start = llgettimeofday(); - - for ( j = 0; j < runs; j++ ) - { - int i; - - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, - arr, p2m_size, NULL, 0, NULL); - DPRINTF("#Flush\n"); - for ( i = 0; i < 40; i++ ) - { - usleep(50000); - now = llgettimeofday(); - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, - NULL, 0, NULL, 0, &stats); - DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", - ((now-start)+500)/1000, - stats.fault_count, stats.dirty_count); - } - } - - return -1; -} - - -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, - int dom, xc_dominfo_t *info, - vcpu_guest_context_t *ctxt) -{ - int i = 0; - - if ( !(*suspend)(dom) ) - { - ERROR("Suspend request failed"); - return -1; - } - - retry: - - if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) - { - ERROR("Could not get domain info"); - return -1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) ) - ERROR("Could not get vcpu context"); - - - if ( info->dying ) - { - ERROR("domain is dying"); - return -1; - } - - if ( info->crashed ) - { - ERROR("domain has crashed"); - return -1; - } - - if ( info->shutdown ) - { - switch ( info->shutdown_reason ) - { - case SHUTDOWN_poweroff: - case SHUTDOWN_reboot: - ERROR("domain has shut down"); - return -1; - case SHUTDOWN_suspend: - return 0; - case SHUTDOWN_crash: - ERROR("domain has crashed"); - return -1; - } - } - - if ( info->paused ) - { - /* Try unpausing domain, wait, and retest. */ - xc_domain_unpause( xc_handle, dom ); - ERROR("Domain was paused. Wait and re-test."); - usleep(10000); /* 10ms */ - goto retry; - } - - if ( ++i < 100 ) - { - ERROR("Retry suspend domain"); - usleep(10000); /* 10ms */ - goto retry; - } - - ERROR("Unable to suspend domain."); - - return -1; -} - -/* -** Map the top-level page of MFNs from the guest. The guest might not have -** finished resuming from a previous restore operation, so we wait a while for -** it to update the MFN to a reasonable value. -*/ -static void *map_frame_list_list(int xc_handle, uint32_t dom, - shared_info_t *shinfo) -{ - int count = 100; - void *p; - - while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) ) - usleep(10000); - - if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 ) - { - ERROR("Timed out waiting for frame list updated."); - return NULL; - } - - p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, - shinfo->arch.pfn_to_mfn_frame_list_list); - if ( p == NULL ) - ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); - - return p; -} - -/* -** During transfer (or in the state file), all page-table pages must be -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** -** This function performs the appropriate conversion, taking into account -** which entries do not require canonicalization (in particular, those -** entries which map the virtual address reserved for the hypervisor). -*/ -static int canonicalize_pagetable(unsigned long type, unsigned long pfn, - const void *spage, void *dpage) -{ - - int i, pte_last, xen_start, xen_end, race = 0; - uint64_t pte; - - /* - ** We need to determine which entries in this page table hold - ** reserved hypervisor mappings. This depends on the current - ** page table type as well as the number of paging levels. - */ - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8); - - if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); - - if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) ) - xen_start = L3_PAGETABLE_ENTRIES_PAE; - - /* - ** in PAE only the L2 mapping the top 1GB contains Xen mappings. - ** We can spot this by looking for the guest linear mapping which - ** Xen always ensures is present in that L2. Guests must ensure - ** that this check will fail for other L2s. - */ - if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) - { - int hstart; - uint64_t he; - - hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - he = ((const uint64_t *) spage)[hstart]; - - if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) - { - /* hvirt starts with xen stuff... */ - xen_start = hstart; - } - else if ( hvirt_start != 0xf5800000 ) - { - /* old L2s from before hole was shrunk... */ - hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - he = ((const uint64_t *) spage)[hstart]; - if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) - xen_start = hstart; - } - } - - if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* - ** XXX SMH: should compute these from hvirt_start (which we have) - ** and hvirt_end (which we don't) - */ - xen_start = 256; - xen_end = 272; - } - - /* Now iterate through the page table, canonicalizing each PTE */ - for (i = 0; i < pte_last; i++ ) - { - unsigned long pfn, mfn; - - if ( pt_levels == 2 ) - pte = ((const uint32_t*)spage)[i]; - else - pte = ((const uint64_t*)spage)[i]; - - if ( (i >= xen_start) && (i < xen_end) ) - pte = 0; - - if ( pte & _PAGE_PRESENT ) - { - mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) - { - /* This will happen if the type info is stale which - is quite feasible under live migration */ - pfn = 0; /* zap it - we'll retransmit this page later */ - race = 1; /* inform the caller of race; fatal if !live */ - } - else - pfn = mfn_to_pfn(mfn); - - pte &= ~MADDR_MASK_X86; - pte |= (uint64_t)pfn << PAGE_SHIFT; - - /* - * PAE guest L3Es can contain these flags when running on - * a 64bit hypervisor. We zap these here to avoid any - * surprise at restore time... - */ - if ( (pt_levels == 3) && - (type == XEN_DOMCTL_PFINFO_L3TAB) && - (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) ) - pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); - } - - if ( pt_levels == 2 ) - ((uint32_t*)dpage)[i] = pte; - else - ((uint64_t*)dpage)[i] = pte; - } - - return race; -} - -static xen_pfn_t *xc_map_m2p(int xc_handle, - unsigned long max_mfn, - int prot) -{ - struct xen_machphys_mfn_list xmml; - privcmd_mmap_entry_t *entries; - unsigned long m2p_chunks, m2p_size; - xen_pfn_t *m2p; - xen_pfn_t *extent_start; - int i, rc; - - m2p_size = M2P_SIZE(max_mfn); - m2p_chunks = M2P_CHUNKS(max_mfn); - - xmml.max_extents = m2p_chunks; - if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) ) - { - ERROR("failed to allocate space for m2p mfns"); - return NULL; - } - set_xen_guest_handle(xmml.extent_start, extent_start); - - if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || - (xmml.nr_extents != m2p_chunks) ) - { - ERROR("xc_get_m2p_mfns"); - return NULL; - } - - if ( (m2p = mmap(NULL, m2p_size, prot, - MAP_SHARED, xc_handle, 0)) == MAP_FAILED ) - { - ERROR("failed to mmap m2p"); - return NULL; - } - - if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) ) - { - ERROR("failed to allocate space for mmap entries"); - return NULL; - } - - for ( i = 0; i < m2p_chunks; i++ ) - { - entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); - entries[i].mfn = extent_start[i]; - entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; - } - - if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, - entries, m2p_chunks)) < 0 ) - { - ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); - return NULL; - } - - m2p_mfn0 = entries[0].mfn; - - free(extent_start); - free(entries); - - return m2p; -} - -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int)) -{ - xc_dominfo_t info; - - int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); - int race = 0, sent_last_iter, skip_this_iter; - - /* The new domain's shared-info frame number. */ - unsigned long shared_info_frame; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containg the type of each PFN (/not/ MFN!). */ - unsigned long *pfn_type = NULL; - unsigned long *pfn_batch = NULL; - - /* A temporary mapping, and a copy, of one frame of guest memory. */ - char page[PAGE_SIZE]; - - /* Double and single indirect references to the live P2M table */ - xen_pfn_t *live_p2m_frame_list_list = NULL; - xen_pfn_t *live_p2m_frame_list = NULL; - - /* A copy of the pfn-to-mfn table frame list. */ - xen_pfn_t *p2m_frame_list = NULL; - - /* Live mapping of shared info structure */ - shared_info_t *live_shinfo = NULL; - - /* base of the region in which domain memory is mapped */ - unsigned char *region_base = NULL; - - /* power of 2 order of p2m_size */ - int order_nr; - - /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); - - to skip this iteration because already dirty; - - to fixup by sending at the end if not already resent; */ - unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; - - xc_shadow_op_stats_t stats; - - unsigned long needed_to_fix = 0; - unsigned long total_sent = 0; - - uint64_t vcpumap = 1ULL; - - /* If no explicit control parameters given, use defaults */ - max_iters = max_iters ? : DEF_MAX_ITERS; - max_factor = max_factor ? : DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if ( !get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels) ) - { - ERROR("Unable to get platform info."); - return 1; - } - - if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) - { - ERROR("Could not get domain info"); - return 1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) - { - ERROR("Could not get vcpu context"); - goto out; - } - shared_info_frame = info.shared_info_frame; - - /* Map the shared info frame */ - if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, shared_info_frame)) ) - { - ERROR("Couldn't map live_shinfo"); - goto out; - } - - p2m_size = live_shinfo->arch.max_pfn; - - live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom, - live_shinfo); - if ( !live_p2m_frame_list_list ) - goto out; - - live_p2m_frame_list = - xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list_list, - P2M_FLL_ENTRIES); - if ( !live_p2m_frame_list ) - { - ERROR("Couldn't map p2m_frame_list"); - goto out; - } - - /* Map all the frames of the pfn->mfn table. For migrate to succeed, - the guest must not change which frames are used for this purpose. - (its not clear why it would want to change them, and we'll be OK - from a safety POV anyhow. */ - - live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list, - P2M_FL_ENTRIES); - if ( !live_p2m ) - { - ERROR("Couldn't map p2m table"); - goto out; - } - - /* Setup the mfn_to_pfn table mapping */ - if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) ) - { - ERROR("Failed to map live M2P table"); - goto out; - } - - - /* Get a local copy of the live_P2M_frame_list */ - if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) - { - ERROR("Couldn't allocate p2m_frame_list array"); - goto out; - } - memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); - - /* Canonicalise the pfn-to-mfn table frame-number list. */ - for ( i = 0; i < p2m_size; i += fpp ) - { - if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) ) - { - ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, - (uint64_t)p2m_frame_list[i/fpp]); - goto out; - } - } - - /* Domain is still running at this point */ - if ( live ) - { - /* Live suspend. Enable log-dirty mode. */ - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0 ) - { - ERROR("Couldn't enable shadow mode"); - goto out; - } - } - else - { - /* This is a non-live suspend. Suspend the domain .*/ - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - } - - last_iter = !live; - - /* pretend we sent all the pages last iteration */ - sent_last_iter = p2m_size; - - /* calculate the power of 2 order of p2m_size, e.g. - 15->4 16->4 17->5 */ - for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) - continue; - - /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_fix = calloc(1, BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - - if ( !to_send || !to_fix || !to_skip ) - { - ERROR("Couldn't allocate to_send array"); - goto out; - } - - memset(to_send, 0xff, BITMAP_SIZE); - - if ( lock_pages(to_send, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_send"); - return 1; - } - - /* (to fix is local only) */ - if ( lock_pages(to_skip, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_skip"); - return 1; - } - - analysis_phase(xc_handle, dom, p2m_size, to_skip, 0); - - /* We want zeroed memory so use calloc rather than malloc. */ - pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); - pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - if ( (pfn_type == NULL) || (pfn_batch == NULL) ) - { - ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); - errno = ENOMEM; - goto out; - } - - if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) ) - { - ERROR("Unable to lock"); - goto out; - } - - /* - * Quick belt and braces sanity check. - */ - { - int err=0; - unsigned long mfn; - for ( i = 0; i < p2m_size; i++ ) - { - mfn = live_p2m[i]; - if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) ) - { - DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, - mfn, mfn_to_pfn(mfn)); - err++; - } - } - DPRINTF("Had %d unexplained entries in p2m table\n", err); - } - - /* Start writing out the saved-domain record. */ - if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) - { - ERROR("write: p2m_size"); - goto out; - } - - /* - * Write an extended-info structure to inform the restore code that - * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off - * slow paths in the restore code. - */ - if ( (pt_levels == 3) && - (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) ) - { - unsigned long signature = ~0UL; - uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; - uint32_t chunk_sz = sizeof(struct vcpu_guest_context); - char chunk_sig[] = "vcpu"; - if ( !write_exact(io_fd, &signature, sizeof(signature)) || - !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || - !write_exact(io_fd, &chunk_sig, 4) || - !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || - !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) - { - ERROR("write: extended info"); - goto out; - } - } - - if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) ) - { - ERROR("write: p2m_frame_list"); - goto out; - } - - print_stats(xc_handle, dom, 0, &stats, 0); - - /* Now write out each data page, canonicalising page tables as we go... */ - for ( ; ; ) - { - unsigned int prev_pc, sent_this_iter, N, batch; - - iter++; - sent_this_iter = 0; - skip_this_iter = 0; - prev_pc = 0; - N = 0; - - DPRINTF("Saving memory pages: iter %d 0%%", iter); - - while ( N < p2m_size ) - { - unsigned int this_pc = (N * 100) / p2m_size; - int rc; - - if ( (this_pc - prev_pc) >= 5 ) - { - DPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if ( !last_iter ) - { - /* Slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - rc = xc_shadow_control( - xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - p2m_size, NULL, 0, NULL); - if ( rc != p2m_size ) - { - ERROR("Error peeking shadow bitmap"); - goto out; - } - } - - /* load pfn_type[] with the mfn of all the pages we're doing in - this batch. */ - for ( batch = 0; - (batch < MAX_BATCH_SIZE) && (N < p2m_size); - N++ ) - { - int n = permute(N, p2m_size, order_nr); - - if ( debug ) - DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", - iter, (unsigned long)n, live_p2m[n], - test_bit(n, to_send), - mfn_to_pfn(live_p2m[n]&0xFFFFF)); - - if ( !last_iter && - test_bit(n, to_send) && - test_bit(n, to_skip) ) - skip_this_iter++; /* stats keeping */ - - if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter) || - (test_bit(n, to_fix) && last_iter)) ) - continue; - - /* - ** we get here if: - ** 1. page is marked to_send & hasn't already been re-dirtied - ** 2. (ignore to_skip in last iteration) - ** 3. add in pages that still need fixup (net bufs) - */ - - pfn_batch[batch] = n; - pfn_type[batch] = live_p2m[n]; - - if ( !is_mapped(pfn_type[batch]) ) - { - /* - ** not currently in psuedo-physical map -- set bit - ** in to_fix since we must send this page in last_iter - ** unless its sent sooner anyhow, or it never enters - ** pseudo-physical map (e.g. for ballooned down domains) - */ - set_bit(n, to_fix); - continue; - } - - if ( last_iter && - test_bit(n, to_fix) && - !test_bit(n, to_send) ) - { - needed_to_fix++; - DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n", - iter, n, pfn_type[batch]); - } - - clear_bit(n, to_fix); - - batch++; - } - - if ( batch == 0 ) - goto skip; /* vanishingly unlikely... */ - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_type, batch); - if ( region_base == NULL ) - { - ERROR("map batch failed"); - goto out; - } - - for ( j = 0; j < batch; j++ ) - ((uint32_t *)pfn_type)[j] = pfn_type[j]; - if ( xc_get_pfn_type_batch(xc_handle, dom, batch, - (uint32_t *)pfn_type) ) - { - ERROR("get_pfn_type_batch failed"); - goto out; - } - for ( j = batch-1; j >= 0; j-- ) - pfn_type[j] = ((uint32_t *)pfn_type)[j]; - - for ( j = 0; j < batch; j++ ) - { - - if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) == - XEN_DOMCTL_PFINFO_XTAB ) - { - DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]); - continue; - } - - if ( debug ) - DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx" - " sum= %08lx\n", - iter, - (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | - pfn_batch[j], - pfn_type[j], - mfn_to_pfn(pfn_type[j] & - ~XEN_DOMCTL_PFINFO_LTAB_MASK), - csum_page(region_base + (PAGE_SIZE*j))); - - /* canonicalise mfn->pfn */ - pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | - pfn_batch[j]; - } - - if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (2) (errno %d)", - errno); - goto out; - } - - if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) ) - { - ERROR("Error when writing to state file (3) (errno %d)", - errno); - goto out; - } - - /* entering this loop, pfn_type is now in pfns (Not mfns) */ - for ( j = 0; j < batch; j++ ) - { - unsigned long pfn, pagetype; - void *spage = (char *)region_base + (PAGE_SIZE*j); - - pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - /* write out pages in batch */ - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) - continue; - - pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; - - if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && - (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* We have a pagetable page: need to rewrite it. */ - race = - canonicalize_pagetable(pagetype, pfn, spage, page); - - if ( race && !live ) - { - ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn, - pagetype); - goto out; - } - - if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) - { - ERROR("Error when writing to state file (4)" - " (errno %d)", errno); - goto out; - } - } - else - { - /* We have a normal page: just write it directly. */ - if ( ratewrite(io_fd, live, spage, PAGE_SIZE) != - PAGE_SIZE ) - { - ERROR("Error when writing to state file (5)" - " (errno %d)", errno); - goto out; - } - } - } /* end of the write out for this batch */ - - sent_this_iter += batch; - - munmap(region_base, batch*PAGE_SIZE); - - } /* end of this while loop for this iteration */ - - skip: - - total_sent += sent_this_iter; - - DPRINTF("\r %d: sent %d, skipped %d, ", - iter, sent_this_iter, skip_this_iter ); - - if ( last_iter ) - { - print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - - DPRINTF("Total pages sent= %ld (%.2fx)\n", - total_sent, ((float)total_sent)/p2m_size ); - DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); - } - - if ( last_iter && debug ) - { - int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); - debug = 0; - DPRINTF("Entering debug resend-all mode\n"); - - /* send "-1" to put receiver into debug mode */ - if ( !write_exact(io_fd, &minusone, sizeof(int)) ) - { - ERROR("Error when writing to state file (6) (errno %d)", - errno); - goto out; - } - - continue; - } - - if ( last_iter ) - break; - - if ( live ) - { - if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > p2m_size*max_factor) ) - { - DPRINTF("Start last iteration\n"); - last_iter = 1; - - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt) ) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - - DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", - info.shared_info_frame, - (unsigned long)ctxt.user_regs.eip, - (unsigned long)ctxt.user_regs.edx); - } - - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - p2m_size, NULL, 0, &stats) != p2m_size ) - { - ERROR("Error flushing shadow PT"); - goto out; - } - - sent_last_iter = sent_this_iter; - - print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - - } - } /* end of infinite for loop */ - - DPRINTF("All memory is saved\n"); - - { - struct { - int minustwo; - int max_vcpu_id; - uint64_t vcpumap; - } chunk = { -2, info.max_vcpu_id }; - - if ( info.max_vcpu_id >= 64 ) - { - ERROR("Too many VCPUS in guest!"); - goto out; - } - - for ( i = 1; i <= info.max_vcpu_id; i++ ) - { - xc_vcpuinfo_t vinfo; - if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && - vinfo.online ) - vcpumap |= 1ULL << i; - } - - chunk.vcpumap = vcpumap; - if ( !write_exact(io_fd, &chunk, sizeof(chunk)) ) - { - ERROR("Error when writing to state file (errno %d)", errno); - goto out; - } - } - - /* Zero terminate */ - i = 0; - if ( !write_exact(io_fd, &i, sizeof(int)) ) - { - ERROR("Error when writing to state file (6') (errno %d)", errno); - goto out; - } - - /* Send through a list of all the PFNs that were not in map at the close */ - { - unsigned int i,j; - unsigned long pfntab[1024]; - - for ( i = 0, j = 0; i < p2m_size; i++ ) - { - if ( !is_mapped(live_p2m[i]) ) - j++; - } - - if ( !write_exact(io_fd, &j, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (6a) (errno %d)", errno); - goto out; - } - - for ( i = 0, j = 0; i < p2m_size; ) - { - if ( !is_mapped(live_p2m[i]) ) - pfntab[j++] = i; - - i++; - if ( (j == 1024) || (i == p2m_size) ) - { - if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) ) - { - ERROR("Error when writing to state file (6b) (errno %d)", - errno); - goto out; - } - j = 0; - } - } - } - - /* Canonicalise the suspend-record frame number. */ - if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) - { - ERROR("Suspend record is not in range of pseudophys map"); - goto out; - } - - for ( i = 0; i <= info.max_vcpu_id; i++ ) - { - if ( !(vcpumap & (1ULL << i)) ) - continue; - - if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) - { - ERROR("No context for VCPU%d", i); - goto out; - } - - /* Canonicalise each GDT frame number. */ - for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) - { - if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) ) - { - ERROR("GDT frame is not in range of pseudophys map"); - goto out; - } - } - - /* Canonicalise the page table base pointer. */ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) - { - ERROR("PT base is not in range of pseudophys map"); - goto out; - } - ctxt.ctrlreg[3] = - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); - - /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) - { - ERROR("PT base is not in range of pseudophys map"); - goto out; - } - /* Least-significant bit means 'valid PFN'. */ - ctxt.ctrlreg[1] = 1 | - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); - } - - if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) - { - ERROR("Error when writing to state file (1) (errno %d)", errno); - goto out; - } - } - - /* - * Reset the MFN to be a known-invalid value. See map_frame_list_list(). - */ - memcpy(page, live_shinfo, PAGE_SIZE); - ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0; - if ( !write_exact(io_fd, page, PAGE_SIZE) ) - { - ERROR("Error when writing to state file (1) (errno %d)", errno); - goto out; - } - - /* Success! */ - rc = 0; - - out: - - if ( live ) - { - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0 ) - DPRINTF("Warning - couldn't disable shadow mode"); - } - - /* Flush last write and discard cache for file. */ - discard_file_cache(io_fd, 1 /* flush */); - - if ( live_shinfo ) - munmap(live_shinfo, PAGE_SIZE); - - if ( live_p2m_frame_list_list ) - munmap(live_p2m_frame_list_list, PAGE_SIZE); - - if ( live_p2m_frame_list ) - munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); - - if ( live_p2m ) - munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - - if ( live_m2p ) - munmap(live_m2p, M2P_SIZE(max_mfn)); - - free(pfn_type); - free(pfn_batch); - free(to_send); - free(to_fix); - free(to_skip); - - DPRINTF("Save exit rc=%d\n",rc); - - return !!rc; -} - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/libxc/xenguest.h Wed Apr 11 15:45:29 2007 +0100 @@ -16,26 +16,19 @@ /** - * This function will save a domain running Linux. + * This function will save a running domain. * * @parm xc_handle a handle to an open hypervisor interface * @parm fd the file descriptor to save a domain to * @parm dom the id of the domain * @return 0 on success, -1 on failure */ -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid)); +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, + int (*suspend)(int domid), int hvm, + void *(*init_qemu_maps)(int, unsigned), /* HVM only */ + void (*qemu_flip_buffer)(int, int)); /* HVM only */ -/** - * This function will save a hvm domain running unmodified guest. - * @return 0 on success, -1 on failure - */ -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)); /** * This function will restore a saved domain. diff -r 3d356a2b1c75 -r db4fcb609383 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/libxc/xg_private.c Wed Apr 11 15:45:29 2007 +0100 @@ -193,17 +193,6 @@ __attribute__((weak)) uint32_t domid, int memsize, const char *image_name) -{ - errno = ENOSYS; - return -1; -} - -__attribute__((weak)) - int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, - int (*suspend)(int domid), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)) { errno = ENOSYS; return -1; diff -r 3d356a2b1c75 -r db4fcb609383 tools/pygrub/src/LiloConf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/pygrub/src/LiloConf.py Wed Apr 11 15:45:29 2007 +0100 @@ -0,0 +1,147 @@ +# +#LiloConf.py +# + +import sys, re, os +import logging +import GrubConf + +class LiloImage(object): + def __init__(self, lines, path): + self.reset(lines, path) + + def __repr__(self): + return ("title: %s\n" + " root: %s\n" + " kernel: %s\n" + " args: %s\n" + " initrd: %s\n" %(self.title, self.root, self.kernel, + self.args, self.initrd)) + def reset(self, lines, path): + self._root = self._initrd = self._kernel = self._args = None + self.title = "" + self.lines = [] + self.path = path + map(self.set_from_line, lines) + self.root = "" # dummy + + def set_from_line(self, line, replace = None): + (com, arg) = GrubConf.grub_exact_split(line, 2) + + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$', r"\1", arg.strip()))) + else: + logging.info("Ignored image directive %s" %(com,)) + else: + logging.warning("Unknown image directive %s" %(com,)) + + # now put the line in the list of lines + if replace is None: + self.lines.append(line) + else: + self.lines.pop(replace) + self.lines.insert(replace, line) + + def set_kernel(self, val): + self._kernel = (None, self.path + "/" + val) + def get_kernel(self): + return self._kernel + kernel = property(get_kernel, set_kernel) + + def set_initrd(self, val): + self._initrd = (None, self.path + "/" + val) + def get_initrd(self): + return self._initrd + initrd = property(get_initrd, set_initrd) + + # set up command handlers + commands = { "label": "self.title", + "root": "self.root", + "rootnoverify": "self.root", + "image": "self.kernel", + "initrd": "self.initrd", + "append": "self.args", + "read-only": None, + "chainloader": None, + "module": None} + +class LiloConfigFile(object): + def __init__(self, fn = None): + self.filename = fn + self.images = [] + self.timeout = -1 + self._default = 0 + + if fn is not None: + self.parse() + + def parse(self, buf = None): + if buf is None: + if self.filename is None: + raise ValueError, "No config file defined to parse!" + + f = open(self.filename, 'r') + lines = f.readlines() + f.close() + else: + lines = buf.split("\n") + + path = os.path.dirname(self.filename) + img = [] + for l in lines: + l = l.strip() + # skip blank lines + if len(l) == 0: + continue + # skip comments + if l.startswith('#'): + continue + # new image + if l.startswith("image"): + if len(img) > 0: + self.add_image(LiloImage(img, path)) + img = [l] + continue + + if len(img) > 0: + img.append(l) + continue + + (com, arg) = GrubConf.grub_exact_split(l, 2) + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored directive %s" %(com,)) + else: + logging.warning("Unknown directive %s" %(com,)) + + if len(img) > 0: + self.add_image(LiloImage(img, path)) + + def add_image(self, image): + self.images.append(image) + + def _get_default(self): + for i in range(0, len(self.images) - 1): + if self.images[i].title == self._default: + return i + return 0 + def _set_default(self, val): + self._default = val + default = property(_get_default, _set_default) + + commands = { "default": "self.default", + "timeout": "self.timeout", + "prompt": None, + "relocatable": None, + } + +if __name__ == "__main__": + if sys.argv < 2: + raise RuntimeError, "Need a grub.conf to read" + g = LiloConfigFile(sys.argv[1]) + for i in g.images: + print i #, i.title, i.root, i.kernel, i.args, i.initrd + print g.default diff -r 3d356a2b1c75 -r db4fcb609383 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/pygrub/src/pygrub Wed Apr 11 15:45:29 2007 +0100 @@ -16,6 +16,7 @@ import os, sys, string, struct, tempfile import os, sys, string, struct, tempfile, re import copy import logging +import platform import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt @@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p import fsimage import grub.GrubConf +import grub.LiloConf PYGRUB_VER = 0.5 @@ -58,6 +60,13 @@ def get_active_partition(file): # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): return buf[poff:poff+16] + + # type=0xee: GUID partition table + # XXX assume the first partition is active + if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',): + os.lseek(fd, 0x400, 0) + buf = os.read(fd, 512) + return buf[24:40] # XXX buf[32:40] # if there's not a partition marked as active, fall back to # the first partition @@ -346,7 +355,13 @@ class Grub: if not os.access(fn, os.R_OK): raise RuntimeError, "Unable to access %s" %(fn,) - self.cf = grub.GrubConf.GrubConfigFile() + if platform.machine() == 'ia64': + self.cf = grub.LiloConf.LiloConfigFile() + file_list = ("/efi/redhat/elilo.conf",) + else: + self.cf = grub.GrubConf.GrubConfigFile() + file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf") if not fs: # set the config file and parse it @@ -354,18 +369,15 @@ class Grub: self.cf.parse() return - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): + for f in file_list: if fs.file_exists(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find grub config file in the image provided." - f = fs.open_file(grubfile) + self.cf.filename = f + break + if self.cf.filename is None: + raise RuntimeError, "couldn't find bootloader config file in the image provided." + f = fs.open_file(self.cf.filename) buf = f.read() del f - # then parse the grub config self.cf.parse(buf) def run(self): diff -r 3d356a2b1c75 -r db4fcb609383 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Wed Apr 11 15:45:29 2007 +0100 @@ -75,13 +75,6 @@ def save(fd, dominfo, network, live, dst image_cfg = dominfo.info.get('image', {}) hvm = dominfo.info.is_hvm() - stdvga = 0 - - if hvm: - log.info("save hvm domain") - if dominfo.info['platform'].has_key('stdvga'): - if dominfo.info['platform']['stdvga'] == 1: - stdvga = 1 # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the diff -r 3d356a2b1c75 -r db4fcb609383 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/python/xen/xend/server/DevController.py Wed Apr 11 15:45:29 2007 +0100 @@ -223,6 +223,7 @@ class DevController: xstransact.Remove(backpath) xstransact.Remove(frontpath) + self.vm._removeVm("device/%s/%d" % (self.deviceClass, devid)) def configurations(self): return map(self.configuration, self.deviceIDs()) diff -r 3d356a2b1c75 -r db4fcb609383 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/python/xen/xend/server/netif.py Wed Apr 11 15:45:29 2007 +0100 @@ -88,46 +88,6 @@ def parseRate(ratestr): return "%lu,%lu" % (bytes_per_interval, interval_usecs) -write_rate_G_re = re.compile('^([0-9]+)000000000(B/s@[0-9]+us)$') -write_rate_M_re = re.compile('^([0-9]+)000000(B/s@[0-9]+us)$') -write_rate_K_re = re.compile('^([0-9]+)000(B/s@[0-9]+us)$') -write_rate_s_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000000us$') -write_rate_m_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000us$') - -def formatRate(rate): - (bytes_per_interval, interval_usecs) = map(long, rate.split(',')) - - if interval_usecs != 0: - bytes_per_second = (bytes_per_interval * 1000 * 1000) / interval_usecs - else: - bytes_per_second = 0xffffffffL - - ratestr = "%uB/s@%uus" % (bytes_per_second, interval_usecs) - - # look for '000's - m = write_rate_G_re.match(ratestr) - if m: - ratestr = m.group(1) + "G" + m.group(2) - else: - m = write_rate_M_re.match(ratestr) - if m: - ratestr = m.group(1) + "M" + m.group(2) - else: - m = write_rate_K_re.match(ratestr) - if m: - ratestr = m.group(1) + "K" + m.group(2) - - m = write_rate_s_re.match(ratestr) - if m: - ratestr = m.group(1) + "s" - else: - m = write_rate_m_re.match(ratestr) - if m: - ratestr = m.group(1) + "ms" - - return ratestr - - class NetifController(DevController): """Network interface controller. Handles all network devices for a domain. """ @@ -138,8 +98,7 @@ class NetifController(DevController): def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - script = os.path.join(xoptions.network_script_dir, - config.get('script', xoptions.get_vif_script())) + script = config.get('script', xoptions.get_vif_script()) typ = config.get('type') bridge = config.get('bridge') mac = config.get('mac') @@ -149,24 +108,17 @@ class NetifController(DevController): ipaddr = config.get('ip') model = config.get('model') - devid = self.allocateDeviceID() - if not typ: typ = xoptions.netback_type - + if not mac: mac = randomMAC() + devid = self.allocateDeviceID() + back = { 'script' : script, 'mac' : mac, - 'handle' : "%i" % devid, 'type' : typ } - - if typ == 'ioemu': - front = {} - else: - front = { 'handle' : "%i" % devid, - 'mac' : mac } if ipaddr: back['ip'] = ipaddr if bridge: @@ -174,12 +126,26 @@ class NetifController(DevController): if vifname: back['vifname'] = vifname if rate: - back['rate'] = parseRate(rate) + back['rate'] = rate if uuid: back['uuid'] = uuid if model: back['model'] = model + config_path = "device/%s/%d/" % (self.deviceClass, devid) + for x in back: + self.vm._writeVm(config_path + x, back[x]) + + back['handle'] = "%i" % devid + back['script'] = os.path.join(xoptions.network_script_dir, script) + if rate: + back['rate'] = parseRate(rate) + + front = {} + if typ != 'ioemu': + front = { 'handle' : "%i" % devid, + 'mac' : mac } + return (devid, back, front) @@ -187,14 +153,17 @@ class NetifController(DevController): """@see DevController.configuration""" result = DevController.getDeviceConfiguration(self, devid) - devinfo = self.readBackend(devid, 'script', 'ip', 'bridge', - 'mac', 'type', 'vifname', 'rate', - 'uuid', 'model') + + config_path = "device/%s/%d/" % (self.deviceClass, devid) + devinfo = () + for x in ( 'script', 'ip', 'bridge', 'mac', + 'type', 'vifname', 'rate', 'uuid', 'model' ): + y = self.vm._readVm(config_path + x) + devinfo += (y,) (script, ip, bridge, mac, typ, vifname, rate, uuid, model) = devinfo if script: - network_script_dir = xoptions.network_script_dir + os.sep - result['script'] = script.replace(network_script_dir, "") + result['script'] = script if ip: result['ip'] = ip if bridge: @@ -206,11 +175,10 @@ class NetifController(DevController): if vifname: result['vifname'] = vifname if rate: - result['rate'] = formatRate(rate) + result['rate'] = rate if uuid: result['uuid'] = uuid if model: result['model'] = model return result - diff -r 3d356a2b1c75 -r db4fcb609383 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Wed Apr 11 07:30:02 2007 -0600 +++ b/tools/xcutils/xc_save.c Wed Apr 11 15:45:29 2007 +0100 @@ -174,12 +174,9 @@ main(int argc, char **argv) max_f = atoi(argv[4]); flags = atoi(argv[5]); - if (flags & XCFLAGS_HVM) - ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, - &suspend, &init_qemu_maps, &qemu_flip_buffer); - else - ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, - &suspend); + ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, + &suspend, !!(flags & XCFLAGS_HVM), + &init_qemu_maps, &qemu_flip_buffer); xc_interface_close(xc_fd); diff -r 3d356a2b1c75 -r db4fcb609383 unmodified_drivers/linux-2.6/platform-pci/evtchn.c --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Wed Apr 11 07:30:02 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Wed Apr 11 15:45:29 2007 +0100 @@ -28,8 +28,10 @@ * IN THE SOFTWARE. */ +#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> +#include <linux/spinlock.h> #include <xen/evtchn.h> #include <xen/interface/hvm/ioreq.h> #include <xen/features.h> @@ -41,29 +43,37 @@ void *shared_info_area; -static DEFINE_MUTEX(irq_evtchn_mutex); - #define is_valid_evtchn(x) ((x) != 0) #define evtchn_from_irq(x) (irq_evtchn[irq].evtchn) static struct { + spinlock_t lock; irqreturn_t(*handler) (int, void *, struct pt_regs *); void *dev_id; int evtchn; int close:1; /* close on unbind_from_irqhandler()? */ int inuse:1; + int in_handler:1; } irq_evtchn[256]; static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 }; -static int find_unbound_irq(void) +static DEFINE_SPINLOCK(irq_alloc_lock); + +static int alloc_xen_irq(void) { static int warned; int irq; - for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) - if (!irq_evtchn[irq].inuse) - return irq; + spin_lock(&irq_alloc_lock); + + for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) { + if (irq_evtchn[irq].inuse) + continue; + irq_evtchn[irq].inuse = 1; + spin_unlock(&irq_alloc_lock); + return irq; + } if (!warned) { warned = 1; @@ -71,7 +81,16 @@ static int find_unbound_irq(void) "increase irq_evtchn[] size in evtchn.c.\n"); } + spin_unlock(&irq_alloc_lock); + return -ENOSPC; +} + +static void free_xen_irq(int irq) +{ + spin_lock(&irq_alloc_lock); + irq_evtchn[irq].inuse = 0; + spin_unlock(&irq_alloc_lock); } int irq_to_evtchn_port(int irq) @@ -93,8 +112,7 @@ void unmask_evtchn(int port) shared_info_t *s = shared_info_area; vcpu_info_t *vcpu_info; - preempt_disable(); - cpu = smp_processor_id(); + cpu = get_cpu(); vcpu_info = &s->vcpu_info[cpu]; /* Slow path (hypercall) if this is a non-local port. We only @@ -103,7 +121,7 @@ void unmask_evtchn(int port) evtchn_unmask_t op = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); - preempt_enable(); + put_cpu(); return; } @@ -121,7 +139,8 @@ void unmask_evtchn(int port) if (!vcpu_info->evtchn_upcall_mask) force_evtchn_callback(); } - preempt_enable(); + + put_cpu(); } EXPORT_SYMBOL(unmask_evtchn); @@ -135,20 +154,19 @@ int bind_listening_port_to_irqhandler( struct evtchn_alloc_unbound alloc_unbound; int err, irq; - mutex_lock(&irq_evtchn_mutex); - - irq = find_unbound_irq(); - if (irq < 0) { - mutex_unlock(&irq_evtchn_mutex); + irq = alloc_xen_irq(); + if (irq < 0) return irq; - } + + spin_lock_irq(&irq_evtchn[irq].lock); alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = remote_domain; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); if (err) { - mutex_unlock(&irq_evtchn_mutex); + spin_unlock_irq(&irq_evtchn[irq].lock); + free_xen_irq(irq); return err; } @@ -156,13 +174,13 @@ int bind_listening_port_to_irqhandler( irq_evtchn[irq].dev_id = dev_id; irq_evtchn[irq].evtchn = alloc_unbound.port; irq_evtchn[irq].close = 1; - irq_evtchn[irq].inuse = 1; evtchn_to_irq[alloc_unbound.port] = irq; unmask_evtchn(alloc_unbound.port); - mutex_unlock(&irq_evtchn_mutex); + spin_unlock_irq(&irq_evtchn[irq].lock); + return irq; } EXPORT_SYMBOL(bind_listening_port_to_irqhandler); @@ -176,34 +194,34 @@ int bind_caller_port_to_irqhandler( { int irq; - mutex_lock(&irq_evtchn_mutex); - - irq = find_unbound_irq(); - if (irq < 0) { - mutex_unlock(&irq_evtchn_mutex); + irq = alloc_xen_irq(); + if (irq < 0) return irq; - } + + spin_lock_irq(&irq_evtchn[irq].lock); irq_evtchn[irq].handler = handler; irq_evtchn[irq].dev_id = dev_id; irq_evtchn[irq].evtchn = caller_port; irq_evtchn[irq].close = 0; - irq_evtchn[irq].inuse = 1; evtchn_to_irq[caller_port] = irq; unmask_evtchn(caller_port); - mutex_unlock(&irq_evtchn_mutex); + spin_unlock_irq(&irq_evtchn[irq].lock); + return irq; } EXPORT_SYMBOL(bind_caller_port_to_irqhandler); void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - int evtchn = evtchn_from_irq(irq); - - mutex_lock(&irq_evtchn_mutex); + int evtchn; + + spin_lock_irq(&irq_evtchn[irq].lock); + + evtchn = evtchn_from_irq(irq); if (is_valid_evtchn(evtchn)) { evtchn_to_irq[irq] = -1; @@ -216,21 +234,28 @@ void unbind_from_irqhandler(unsigned int irq_evtchn[irq].handler = NULL; irq_evtchn[irq].evtchn = 0; - irq_evtchn[irq].inuse = 0; - - mutex_unlock(&irq_evtchn_mutex); + + spin_unlock_irq(&irq_evtchn[irq].lock); + + while (irq_evtchn[irq].in_handler) + cpu_relax(); + + free_xen_irq(irq); } EXPORT_SYMBOL(unbind_from_irqhandler); void notify_remote_via_irq(int irq) { - int evtchn = evtchn_from_irq(irq); + int evtchn; + + evtchn = evtchn_from_irq(irq); if (is_valid_evtchn(evtchn)) notify_remote_via_evtchn(evtchn); } EXPORT_SYMBOL(notify_remote_via_irq); -irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t evtchn_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { unsigned int l1i, port; /* XXX: All events are bound to vcpu0 but irq may be redirected. */ @@ -249,13 +274,30 @@ irqreturn_t evtchn_interrupt(int irq, vo while ((l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i])) { port = (l1i * BITS_PER_LONG) + __ffs(l2); synch_clear_bit(port, &s->evtchn_pending[0]); + irq = evtchn_to_irq[port]; - if ((irq >= 0) && - ((handler = irq_evtchn[irq].handler) != NULL)) - handler(irq, irq_evtchn[irq].dev_id, regs); - else - printk(KERN_WARNING "unexpected event channel " - "upcall on port %d!\n", port); + if (irq < 0) + continue; + + spin_lock(&irq_evtchn[irq].lock); + handler = irq_evtchn[irq].handler; + dev_id = irq_evtchn[irq].dev_id; + if (unlikely(handler == NULL)) { + printk("Xen IRQ%d (port %d) has no handler!\n", + irq, port); + spin_unlock(&irq_evtchn[irq].lock); + continue; + } + irq_evtchn[irq].in_handler = 1; + spin_unlock(&irq_evtchn[irq].lock); + + local_irq_enable(); + handler(irq, irq_evtchn[irq].dev_id, regs); + local_irq_disable(); + + spin_lock(&irq_evtchn[irq].lock); + irq_evtchn[irq].in_handler = 0; + spin_unlock(&irq_evtchn[irq].lock); } } @@ -267,16 +309,6 @@ void force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } EXPORT_SYMBOL(force_evtchn_callback); - -void irq_suspend(void) -{ - mutex_lock(&irq_evtchn_mutex); -} - -void irq_suspend_cancel(void) -{ - mutex_unlock(&irq_evtchn_mutex); -} void irq_resume(void) { @@ -289,6 +321,16 @@ void irq_resume(void) for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) irq_evtchn[irq].evtchn = 0; - - mutex_unlock(&irq_evtchn_mutex); -} +} + +int xen_irq_init(struct pci_dev *pdev) +{ + int irq; + + for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) + spin_lock_init(&irq_evtchn[irq].lock); + + return request_irq(pdev->irq, evtchn_interrupt, + SA_SHIRQ | SA_SAMPLE_RANDOM | SA_INTERRUPT, + "xen-platform-pci", pdev); +} diff -r 3d356a2b1c75 -r db4fcb609383 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Wed Apr 11 07:30:02 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Wed Apr 11 15:45:29 2007 +0100 @@ -1,24 +1,81 @@ #include <linux/config.h> +#include <linux/stop_machine.h> +#include <xen/evtchn.h> +#include <xen/gnttab.h> #include <xen/xenbus.h> #include "platform-pci.h" #include <asm/hypervisor.h> -int __xen_suspend(int fast_suspend) +/* + * Spinning prevents, for example, APs touching grant table entries while + * the shared grant table is not mapped into the address space imemdiately + * after resume. + */ +static void ap_suspend(void *_ap_spin) +{ + int *ap_spin = _ap_spin; + + BUG_ON(!irqs_disabled()); + + while (*ap_spin) { + cpu_relax(); + HYPERVISOR_yield(); + } +} + +static int bp_suspend(void) { int suspend_cancelled; - xenbus_suspend(); - platform_pci_suspend(); + BUG_ON(!irqs_disabled()); suspend_cancelled = HYPERVISOR_shutdown(SHUTDOWN_suspend); - if (suspend_cancelled) { - platform_pci_suspend_cancel(); + if (!suspend_cancelled) { + platform_pci_resume(); + gnttab_resume(); + irq_resume(); + } + + return suspend_cancelled; +} + +int __xen_suspend(int fast_suspend) +{ + int err, suspend_cancelled, ap_spin; + + xenbus_suspend(); + + preempt_disable(); + + /* Prevent any races with evtchn_interrupt() handler. */ + disable_irq(xen_platform_pdev->irq); + + ap_spin = 1; + smp_mb(); + + err = smp_call_function(ap_suspend, &ap_spin, 0, 0); + if (err < 0) { + preempt_enable(); xenbus_suspend_cancel(); - } else { - platform_pci_resume(); + return err; + } + + local_irq_disable(); + suspend_cancelled = bp_suspend(); + local_irq_enable(); + + smp_mb(); + ap_spin = 0; + + enable_irq(xen_platform_pdev->irq); + + preempt_enable(); + + if (!suspend_cancelled) xenbus_resume(); - } + else + xenbus_suspend_cancel(); return 0; } diff -r 3d356a2b1c75 -r db4fcb609383 unmodified_drivers/linux-2.6/platform-pci/platform-compat.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Wed Apr 11 07:30:02 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Wed Apr 11 15:45:29 2007 +0100 @@ -12,11 +12,10 @@ EXPORT_SYMBOL(system_state); EXPORT_SYMBOL(system_state); #endif -static inline void ctrl_alt_del(void) +void ctrl_alt_del(void) { kill_proc(1, SIGINT, 1); /* interrupt init */ } -EXPORT_SYMBOL(ctrl_alt_del); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) size_t strcspn(const char *s, const char *reject) diff -r 3d356a2b1c75 -r db4fcb609383 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Wed Apr 11 07:30:02 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Wed Apr 11 15:45:29 2007 +0100 @@ -40,7 +40,6 @@ #include <xen/interface/hvm/params.h> #include <xen/features.h> #include <xen/evtchn.h> -#include <xen/gnttab.h> #ifdef __ia64__ #include <asm/xen/xencomm.h> #endif @@ -61,6 +60,8 @@ MODULE_AUTHOR("ssmith@xxxxxxxxxxxxx"); MODULE_AUTHOR("ssmith@xxxxxxxxxxxxx"); MODULE_DESCRIPTION("Xen platform PCI device"); MODULE_LICENSE("GPL"); + +struct pci_dev *xen_platform_pdev; static unsigned long shared_info_frame; static uint64_t callback_via; @@ -88,8 +89,6 @@ static int __devinit init_xen_info(void) ioremap(shared_info_frame << PAGE_SHIFT, PAGE_SIZE); if (shared_info_area == NULL) panic("can't map shared info\n"); - - gnttab_init(); return 0; } @@ -199,8 +198,10 @@ static int set_callback_via(uint64_t via return HYPERVISOR_hvm_op(HVMOP_set_param, &a); } +int xen_irq_init(struct pci_dev *pdev); int xenbus_init(void); int xen_reboot_init(void); +int gnttab_init(void); static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -208,6 +209,10 @@ static int __devinit platform_pci_init(s int i, ret; long ioaddr, iolen; long mmio_addr, mmio_len; + + if (xen_platform_pdev) + return -EBUSY; + xen_platform_pdev = pdev; i = pci_enable_device(pdev); if (i) @@ -249,9 +254,10 @@ static int __devinit platform_pci_init(s if ((ret = init_xen_info())) goto out; - if ((ret = request_irq(pdev->irq, evtchn_interrupt, - SA_SHIRQ | SA_SAMPLE_RANDOM, - "xen-platform-pci", pdev))) + if ((ret = gnttab_init())) + goto out; + + if ((ret = xen_irq_init(pdev))) goto out; if ((ret = set_callback_via(callback_via))) @@ -291,18 +297,6 @@ static struct pci_driver platform_driver }; static int pci_device_registered; - -void platform_pci_suspend(void) -{ - gnttab_suspend(); - irq_suspend(); -} - -void platform_pci_suspend_cancel(void) -{ - irq_suspend_cancel(); - gnttab_resume(); -} void platform_pci_resume(void) { @@ -319,12 +313,8 @@ void platform_pci_resume(void) if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - irq_resume(); - if (set_callback_via(callback_via)) printk("platform_pci_resume failure!\n"); - - gnttab_resume(); } static int __init platform_pci_module_init(void) diff -r 3d356a2b1c75 -r db4fcb609383 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Wed Apr 11 07:30:02 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Wed Apr 11 15:45:29 2007 +0100 @@ -22,16 +22,11 @@ #ifndef _XEN_PLATFORM_PCI_H #define _XEN_PLATFORM_PCI_H -#include <linux/interrupt.h> +#include <linux/pci.h> unsigned long alloc_xen_mmio(unsigned long len); -int gnttab_init(void); -irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs); -void irq_suspend(void); -void irq_suspend_cancel(void); - -void platform_pci_suspend(void); -void platform_pci_suspend_cancel(void); void platform_pci_resume(void); +extern struct pci_dev *xen_platform_pdev; + #endif /* _XEN_PLATFORM_PCI_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |