[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-ia64-devel] [PATCH 7/8] foreign p2m: rewrite save/restore with foreign p2m



This patch doesn't handle live migration with the balloon driver.
Sorry for inconvenience. I'll post the updated patch tomorrow.

On Tue, Aug 28, 2007 at 03:57:32PM +0900, Isaku Yamahata wrote:
> # HG changeset patch
> # User yamahata@xxxxxxxxxxxxx
> # Date 1188274506 -32400
> # Node ID b2b904e37c102327393049c795fa5cdc8b0e3c10
> # Parent  82ebc14bec8f14281401faa2eefe4ebc6e4d77bc
> rewrite ia64 domain save/restore with foreign p2m exposure
> PATCHNAME: rewrite_ia64_domain_save_restore_foreign_p2m
> 
> Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
> 
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_linux_restore.c
> --- a/tools/libxc/ia64/xc_ia64_linux_restore.c        Tue Aug 28 13:10:18 
> 2007 +0900
> +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c        Tue Aug 28 13:15:06 
> 2007 +0900
> @@ -5,12 +5,17 @@
>   *
>   * Copyright (c) 2003, K A Fraser.
>   *  Rewritten for ia64 by Tristan Gingold <tristan.gingold@xxxxxxxx>
> + *
> + * Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
> + *   Use foreign p2m exposure.
>   */
>  
>  #include <stdlib.h>
>  #include <unistd.h>
>  
>  #include "xg_private.h"
> +#include "xc_ia64_save_restore.h"
> +#include "xc_ia64.h"
>  
>  #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
>  
> @@ -40,6 +45,16 @@ read_exact(int fd, void *buf, size_t cou
>  }
>  
>  static int
> +populate_page_if_necessary(int xc_handle, uint32_t dom, unsigned long gmfn,
> +                           struct xen_ia64_p2m_table *p2m_table)
> +{
> +    if (xc_ia64_p2m_present(p2m_table, gmfn))
> +        return 0;
> +
> +    return xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &gmfn);
> +}
> +
> +static int
>  read_page(int xc_handle, int io_fd, uint32_t dom, unsigned long pfn)
>  {
>      void *mem;
> @@ -78,10 +93,11 @@ xc_domain_restore(int xc_handle, int io_
>      /* A copy of the CPU context of the guest. */
>      vcpu_guest_context_t ctxt;
>  
> -    unsigned long *page_array = NULL;
> -
>      /* A temporary mapping of the guest's start_info page. */
>      start_info_t *start_info;
> +
> +    struct xen_ia64_p2m_table p2m_table;
> +    xc_ia64_p2m_init(&p2m_table);
>  
>      if (hvm) {
>          ERROR("HVM Restore is unsupported");
> @@ -102,7 +118,7 @@ xc_domain_restore(int xc_handle, int io_
>          ERROR("Error when reading version");
>          goto out;
>      }
> -    if (ver != 1) {
> +    if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != 
> XC_IA64_SR_FORMAT_VER_TWO) {
>          ERROR("version of save doesn't match");
>          goto out;
>      }
> @@ -112,25 +128,6 @@ xc_domain_restore(int xc_handle, int io_
>          ERROR("Unable to lock_pages ctxt");
>          return 1;
>      }
> -
> -    /* Get pages.  */
> -    page_array = malloc(p2m_size * sizeof(unsigned long));
> -    if (page_array == NULL) {
> -        ERROR("Could not allocate memory");
> -        goto out;
> -    }
> -
> -    for ( i = 0; i < p2m_size; i++ )
> -        page_array[i] = i;
> -
> -    if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
> -                                           0, 0, page_array) )
> -    {
> -        ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
> -              PFN_TO_KB(p2m_size), dom);
> -        goto out;
> -    }
> -    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
>  
>      if (!read_exact(io_fd, &domctl.u.arch_setup, 
> sizeof(domctl.u.arch_setup))) {
>          ERROR("read: domain setup");
> @@ -155,6 +152,34 @@ xc_domain_restore(int xc_handle, int io_
>      }
>      shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
>  
> +    if (ver == XC_IA64_SR_FORMAT_VER_TWO) {
> +        unsigned int memmap_info_num_pages;
> +        unsigned long memmap_size;
> +        xen_ia64_memmap_info_t *memmap_info;
> +
> +        if (!read_exact(io_fd, &memmap_info_num_pages,
> +                        sizeof(memmap_info_num_pages))) {
> +            ERROR("read: memmap_info_num_pages");
> +            goto out;
> +        }
> +        memmap_size = memmap_info_num_pages * PAGE_SIZE;
> +        memmap_info = malloc(memmap_size);
> +        if (memmap_info == NULL) {
> +            ERROR("Could not allocate memory for memmap_info");
> +            goto out;
> +        }
> +        if (!read_exact(io_fd, memmap_info, memmap_size)) {
> +            ERROR("read: memmap_info");
> +            goto out;
> +        }
> +        if (xc_ia64_p2m_map(&p2m_table, xc_handle,
> +                            dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
> +            ERROR("p2m mapping");
> +            goto out;
> +        }
> +        free(memmap_info);
> +    }
> +
>      DPRINTF("Reloading memory pages:   0%%\n");
>  
>      while (1) {
> @@ -165,17 +190,25 @@ xc_domain_restore(int xc_handle, int io_
>          if (gmfn == INVALID_MFN)
>              break;
>  
> +        if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 
> 0) {
> +            ERROR("can not populate page 0x%lx", gmfn);
> +            goto out;
> +        }
>          if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
>              goto out;
>      }
>  
>      DPRINTF("Received all pages\n");
>  
> -    /* Get the list of PFNs that are not in the psuedo-phys map */
> -    {
> +    /*
> +     * Get the list of PFNs that are not in the psuedo-phys map
> +     *
> +     * Now we allocate pages on demand so that we don't need to free
> +     * pages here. just read and discard.
> +     */
> +    if (ver == XC_IA64_SR_FORMAT_VER_ONE) {
>          unsigned int count;
>          unsigned long *pfntab;
> -        int rc;
>  
>          if (!read_exact(io_fd, &count, sizeof(count))) {
>              ERROR("Error when reading pfn count");
> @@ -189,34 +222,11 @@ xc_domain_restore(int xc_handle, int io_
>          }
>  
>          if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
> +            free(pfntab);
>              ERROR("Error when reading pfntab");
>              goto out;
>          }
> -
> -        DPRINTF ("Try to free %u pages\n", count);
> -
> -        for (i = 0; i < count; i++) {
> -
> -            volatile unsigned long pfn;
> -
> -            struct xen_memory_reservation reservation = {
> -                .nr_extents   = 1,
> -                .extent_order = 0,
> -                .domid        = dom
> -            };
> -            set_xen_guest_handle(reservation.extent_start,
> -                                 (unsigned long *)&pfn);
> -
> -            pfn = pfntab[i];
> -            rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
> -                              &reservation);
> -            if (rc != 1) {
> -                ERROR("Could not decrease reservation : %d", rc);
> -                goto out;
> -            }
> -        }
> -
> -        DPRINTF("Decreased reservation by %d pages\n", count);
> +        free(pfntab);
>      }
>  
>  
> @@ -274,6 +284,10 @@ xc_domain_restore(int xc_handle, int io_
>      munmap (shared_info, PAGE_SIZE);
>  
>      /* Uncanonicalise the suspend-record frame number and poke resume rec. */
> +    if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) {
> +        ERROR("cannot populate page 0x%lx", gmfn);
> +        goto out;
> +    }
>      start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
>                                        PROT_READ | PROT_WRITE, gmfn);
>      if (start_info == NULL) {
> @@ -309,8 +323,7 @@ xc_domain_restore(int xc_handle, int io_
>      if ((rc != 0) && (dom != 0))
>          xc_domain_destroy(xc_handle, dom);
>  
> -    if (page_array != NULL)
> -        free(page_array);
> +    xc_ia64_p2m_unmap(&p2m_table);
>  
>      unlock_pages(&ctxt, sizeof(ctxt));
>  
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_linux_save.c
> --- a/tools/libxc/ia64/xc_ia64_linux_save.c   Tue Aug 28 13:10:18 2007 +0900
> +++ b/tools/libxc/ia64/xc_ia64_linux_save.c   Tue Aug 28 13:15:06 2007 +0900
> @@ -5,6 +5,9 @@
>   *
>   * Copyright (c) 2003, K A Fraser.
>   *  Rewritten for ia64 by Tristan Gingold <tristan.gingold@xxxxxxxx>
> + *
> + * Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
> + *   Use foreign p2m exposure.
>   */
>  
>  #include <inttypes.h>
> @@ -14,6 +17,9 @@
>  #include <sys/time.h>
>  
>  #include "xg_private.h"
> +#include "xc_ia64.h"
> +#include "xc_ia64_save_restore.h"
> +#include "xc_efi.h"
>  
>  /*
>  ** Default values for important tuning parameters. Can override by passing
> @@ -151,8 +157,6 @@ xc_domain_save(int xc_handle, int io_fd,
>      /* A copy of the CPU context of the guest. */
>      vcpu_guest_context_t ctxt;
>  
> -    unsigned long *page_array = NULL;
> -
>      /* Live mapping of shared info structure */
>      shared_info_t *live_shinfo = NULL;
>  
> @@ -180,6 +184,16 @@ xc_domain_save(int xc_handle, int io_fd,
>      unsigned long *to_skip = NULL;
>  
>      char *mem;
> +
> +    unsigned int memmap_info_num_pages;
> +    unsigned long memmap_size = 0;
> +    xen_ia64_memmap_info_t *memmap_info_live = NULL;
> +    xen_ia64_memmap_info_t *memmap_info = NULL;
> +    void *memmap_desc_start;
> +    void *memmap_desc_end;
> +    void *p;
> +    struct xen_ia64_p2m_table p2m_table;
> +    xc_ia64_p2m_init(&p2m_table);
>  
>      if (debug)
>          fprintf(stderr, "xc_linux_save (ia64): started dom=%d\n", dom);
> @@ -217,12 +231,6 @@ xc_domain_save(int xc_handle, int io_fd,
>      }
>  
>      p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
> -
> -    page_array = malloc(p2m_size * sizeof(unsigned long));
> -    if (page_array == NULL) {
> -        ERROR("Could not allocate memory");
> -        goto out;
> -    }
>  
>      /* This is expected by xm restore.  */
>      if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
> @@ -236,7 +244,7 @@ xc_domain_save(int xc_handle, int io_fd,
>         The version is hard-coded, don't forget to change the restore code
>         too!  */
>      {
> -        unsigned long version = 1;
> +        unsigned long version = XC_IA64_SR_FORMAT_VER_CURRENT;
>  
>          if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
>              ERROR("write: version");
> @@ -304,6 +312,38 @@ xc_domain_save(int xc_handle, int io_fd,
>  
>      }
>  
> +    memmap_info_num_pages = live_shinfo->arch.memmap_info_num_pages;
> +    memmap_size = PAGE_SIZE * memmap_info_num_pages;
> +    memmap_info_live = xc_map_foreign_range(xc_handle, info.domid,
> +                                       memmap_size, PROT_READ,
> +                                            
> live_shinfo->arch.memmap_info_pfn);
> +    if (memmap_info_live == NULL) {
> +        PERROR("Could not map memmap info.");
> +        goto out;
> +    }
> +    memmap_info = malloc(memmap_size);
> +    if (memmap_info == NULL) {
> +        PERROR("Could not allocate memmap info memory");
> +        goto out;
> +    }
> +    memcpy(memmap_info, memmap_info_live, memmap_size);
> +    munmap(memmap_info_live, memmap_size);
> +    memmap_info_live = NULL;
> +    
> +    if (xc_ia64_p2m_map(&p2m_table, xc_handle, dom, memmap_info, 0) < 0) {
> +        PERROR("xc_ia64_p2m_map");
> +        goto out;
> +    }
> +    if (!write_exact(io_fd,
> +                     &memmap_info_num_pages, sizeof(memmap_info_num_pages))) 
> {
> +        PERROR("write: arch.memmap_info_num_pages");
> +        goto out;
> +    }
> +    if (!write_exact(io_fd, memmap_info, memmap_size)) {
> +        PERROR("write: memmap_info");
> +        goto out;
> +    }
> +
>      sent_last_iter = p2m_size;
>      total_sent = 0;
>  
> @@ -313,13 +353,6 @@ xc_domain_save(int xc_handle, int io_fd,
>  
>          sent_this_iter = 0;
>          skip_this_iter = 0;
> -
> -        /* Get the pfn list, as it may change.  */
> -        if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
> -                                 0, p2m_size) != p2m_size) {
> -            ERROR("Could not get the page frame list");
> -            goto out;
> -        }
>  
>          /* Dirtied pages won't be saved.
>             slightly wasteful to peek the whole array evey time,
> @@ -334,45 +367,64 @@ xc_domain_save(int xc_handle, int io_fd,
>          }
>  
>          /* Start writing out the saved-domain record. */
> -        for (N = 0; N < p2m_size; N++) {
> -            if (page_array[N] == INVALID_MFN)
> +        memmap_desc_start = &memmap_info->memdesc;
> +        memmap_desc_end = memmap_desc_start + memmap_info->efi_memmap_size;
> +        for (p = memmap_desc_start;
> +             p < memmap_desc_end;
> +             p += memmap_info->efi_memdesc_size) {
> +            efi_memory_desc_t *md = p;
> +            if (md->type != EFI_CONVENTIONAL_MEMORY ||
> +                md->attribute != EFI_MEMORY_WB ||
> +                md->num_pages == 0)
>                  continue;
> -            if (!last_iter) {
> -                if (test_bit(N, to_skip) && test_bit(N, to_send))
> -                    skip_this_iter++;
> -                if (test_bit(N, to_skip) || !test_bit(N, to_send))
> +            
> +            for (N = md->phys_addr >> PAGE_SHIFT;
> +                 N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
> +                     PAGE_SHIFT;
> +                 N++) {
> +
> +                if (!xc_ia64_p2m_allocated(&p2m_table, N))
>                      continue;
> +
> +                if (!last_iter) {
> +                    if (test_bit(N, to_skip) && test_bit(N, to_send))
> +                        skip_this_iter++;
> +                    if (test_bit(N, to_skip) || !test_bit(N, to_send))
> +                        continue;
> +                }
> +
> +                if (debug)
> +                    fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
> +                            xc_ia64_p2m_mfn(&p2m_table, N),
> +                            N, p2m_size);
> +
> +                mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
> +                                           PROT_READ|PROT_WRITE, N);
> +                if (mem == NULL) {
> +                    /* The page may have move.
> +                       It will be remarked dirty.
> +                       FIXME: to be tracked.  */
> +                    fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n",
> +                            xc_ia64_p2m_mfn(&p2m_table, N),
> +                            N, safe_strerror(errno));
> +                    continue;
> +                }
> +
> +                if (!write_exact(io_fd, &N, sizeof(N))) {
> +                    ERROR("write: p2m_size");
> +                    munmap(mem, PAGE_SIZE);
> +                    goto out;
> +                }
> +
> +                if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
> +                    ERROR("Error when writing to state file (5)");
> +                    munmap(mem, PAGE_SIZE);
> +                    goto out;
> +                }
> +                munmap(mem, PAGE_SIZE);
> +                sent_this_iter++;
> +                total_sent++;
>              }
> -
> -            if (debug)
> -                fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
> -                        page_array[N], N, p2m_size);
> -
> -            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
> -                                       PROT_READ|PROT_WRITE, N);
> -            if (mem == NULL) {
> -                /* The page may have move.
> -                   It will be remarked dirty.
> -                   FIXME: to be tracked.  */
> -                fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n",
> -                        page_array[N], N, safe_strerror(errno));
> -                continue;
> -            }
> -
> -            if (!write_exact(io_fd, &N, sizeof(N))) {
> -                ERROR("write: p2m_size");
> -                munmap(mem, PAGE_SIZE);
> -                goto out;
> -            }
> -
> -            if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
> -                ERROR("Error when writing to state file (5)");
> -                munmap(mem, PAGE_SIZE);
> -                goto out;
> -            }
> -            munmap(mem, PAGE_SIZE);
> -            sent_this_iter++;
> -            total_sent++;
>          }
>  
>          if (last_iter)
> @@ -420,38 +472,6 @@ xc_domain_save(int xc_handle, int io_fd,
>          }
>      }
>  
> -    /* Send through a list of all the PFNs that were not in map at the close 
> */
> -    {
> -        unsigned int i,j;
> -        unsigned long pfntab[1024];
> -
> -        for (i = 0, j = 0; i < p2m_size; i++) {
> -            if (page_array[i] == INVALID_MFN)
> -                j++;
> -        }
> -
> -        if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
> -            ERROR("Error when writing to state file (6a)");
> -            goto out;
> -        }
> -
> -        for (i = 0, j = 0; i < p2m_size; ) {
> -
> -            if (page_array[i] == INVALID_MFN)
> -                pfntab[j++] = i;
> -
> -            i++;
> -            if (j == 1024 || i == p2m_size) {
> -                if (!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
> -                    ERROR("Error when writing to state file (6b)");
> -                    goto out;
> -                }
> -                j = 0;
> -            }
> -        }
> -
> -    }
> -
>      if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
>          ERROR("Could not get vcpu context");
>          goto out;
> @@ -494,13 +514,17 @@ xc_domain_save(int xc_handle, int io_fd,
>          }
>      }
>  
> -    free(page_array);
>      unlock_pages(to_send, bitmap_size);
>      free(to_send);
>      unlock_pages(to_skip, bitmap_size);
>      free(to_skip);
>      if (live_shinfo)
>          munmap(live_shinfo, PAGE_SIZE);
> +    if (memmap_info_live)
> +        munmap(memmap_info_live, memmap_size);
> +    if (memmap_info)
> +        free(memmap_info);
> +    xc_ia64_p2m_unmap(&p2m_table);
>  
>      fprintf(stderr,"Save exit rc=%d\n",rc);
>  
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_save_restore.h
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/tools/libxc/ia64/xc_ia64_save_restore.h Tue Aug 28 13:15:06 2007 +0900
> @@ -0,0 +1,44 @@
> +/******************************************************************************
> + * xc_ia64_save_restore.h
> + *
> + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + *
> + */
> +
> +#ifndef XC_IA64_SAVE_RESTORE_H
> +#define XC_IA64_SR_H
> +
> +        /* introduced changeset 10692:306d7857928c of xen-ia64-unstable.ht */
> +#define XC_IA64_SR_FORMAT_VER_ONE       1UL
> +        /* using foreign p2m exposure version */
> +#define XC_IA64_SR_FORMAT_VER_TWO       2UL
> +#define XC_IA64_SR_FORMAT_VER_MAX       2UL
> +
> +#define XC_IA64_SR_FORMAT_VER_CURRENT   XC_IA64_SR_FORMAT_VER_TWO
> +
> +#endif /* XC_IA64_SAVE_RESTORE_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-set-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */


> _______________________________________________
> Xen-ia64-devel mailing list
> Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-ia64-devel

-- 
yamahata

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.