[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA information to libxc



On Thu, Feb 04, 2010 at 10:54:22PM +0100, Andre Przywara wrote:
> This patch adds a "struct numa_guest_info" to libxc, which allows to  
> specify a guest's NUMA topology along with the appropriate host binding.
> Here the information will be filled out by the Python wrapper (I left  
> out the libxl part for now), it will fill up the struct with sensible  
> default values (equal distribution), only the number of guest nodes  
> should be specified by the caller. The information is passed on to the  
> hvm_info_table. The respective memory allocation is in another patch.
>
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
>
> Regards,
> Andre.
>
> -- 
> Andre Przywara
> AMD-Operating System Research Center (OSRC), Dresden, Germany
> Tel: +49 351 488-3567-12
> ----to satisfy European Law for business letters:
> Advanced Micro Devices GmbH
> Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
> Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni
> Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
> Registergericht Muenchen, HRB Nr. 43632

> commit b4ee75af6f6220bdafccf75c9bc6e4915a413b18
> Author: Andre Przywara <andre.przywara@xxxxxxx>
> Date:   Mon Feb 1 12:36:28 2010 +0100
> 
>     add struct numainfo to interface and
>     parse xend passed NUMA info and pass on to libxc
> 
> diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
> index 8fc7ac5..02103b1 100644
> --- a/tools/libxc/xc_hvm_build.c
> +++ b/tools/libxc/xc_hvm_build.c
> @@ -106,6 +106,7 @@ static int loadelfimage(
>  
>  static int setup_guest(int xc_handle,
>                         uint32_t dom, int memsize, int target,
> +                       struct numa_guest_info *numainfo,
>                         char *image, unsigned long image_size)
>  {
>      xen_pfn_t *page_array = NULL;
> @@ -334,6 +335,7 @@ static int xc_hvm_build_internal(int xc_handle,
>                                   uint32_t domid,
>                                   int memsize,
>                                   int target,
> +                                 struct numa_guest_info *numainfo,
>                                   char *image,
>                                   unsigned long image_size)
>  {
> @@ -343,7 +345,8 @@ static int xc_hvm_build_internal(int xc_handle,
>          return -1;
>      }
>  
> -    return setup_guest(xc_handle, domid, memsize, target, image, image_size);
> +    return setup_guest(xc_handle, domid, memsize, target,
> +        numainfo, image, image_size);
>  }
>  
>  /* xc_hvm_build:
> @@ -352,6 +355,7 @@ static int xc_hvm_build_internal(int xc_handle,
>  int xc_hvm_build(int xc_handle,
>                   uint32_t domid,
>                   int memsize,
> +                 struct numa_guest_info *numainfo,
>                   const char *image_name)
>  {
>      char *image;
> @@ -362,7 +366,8 @@ int xc_hvm_build(int xc_handle,
>           ((image = xc_read_image(image_name, &image_size)) == NULL) )
>          return -1;
>  
> -    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image, 
> image_size);
> +    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> +                                numainfo, image, image_size);
>  
>      free(image);
>  
> @@ -379,6 +384,7 @@ int xc_hvm_build_target_mem(int xc_handle,
>                             uint32_t domid,
>                             int memsize,
>                             int target,
> +                           struct numa_guest_info *numainfo,
>                             const char *image_name)
>  {
>      char *image;
> @@ -389,7 +395,8 @@ int xc_hvm_build_target_mem(int xc_handle,
>           ((image = xc_read_image(image_name, &image_size)) == NULL) )
>          return -1;
>  
> -    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image, 
> image_size);
> +    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target,
> +                                numainfo, image, image_size);
>  
>      free(image);
>  
> @@ -402,6 +409,7 @@ int xc_hvm_build_target_mem(int xc_handle,
>  int xc_hvm_build_mem(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numainfo,
>                       const char *image_buffer,
>                       unsigned long image_size)
>  {
> @@ -425,7 +433,7 @@ int xc_hvm_build_mem(int xc_handle,
>      }
>  
>      sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> -                                img, img_len);
> +                                numainfo, img, img_len);
>  
>      /* xc_inflate_buffer may return the original buffer pointer (for
>         for already inflated buffers), so exercise some care in freeing */
> diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
> index 851f769..110b0c9 100644
> --- a/tools/libxc/xenguest.h
> +++ b/tools/libxc/xenguest.h
> @@ -62,6 +62,13 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t 
> dom,
>                        unsigned int console_evtchn, unsigned long 
> *console_mfn,
>                        unsigned int hvm, unsigned int pae, int superpages);
>  
> +struct numa_guest_info {
> +     int num_nodes;
> +     int *guest_to_host_node;
> +     int *vcpu_to_node;
> +     uint64_t *node_mem;
> +};
> +
>  /**
>   * This function will create a domain for a paravirtualized Linux
>   * using file names pointing to kernel and ramdisk
> @@ -143,17 +150,20 @@ int xc_linux_build_mem(int xc_handle,
>  int xc_hvm_build(int xc_handle,
>                   uint32_t domid,
>                   int memsize,
> +                 struct numa_guest_info *numa_info,
>                   const char *image_name);
>  
>  int xc_hvm_build_target_mem(int xc_handle,
>                              uint32_t domid,
>                              int memsize,
>                              int target,
> +                            struct numa_guest_info *numa_info,
>                              const char *image_name);
>  
>  int xc_hvm_build_mem(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numa_info,
>                       const char *image_buffer,
>                       unsigned long image_size);
>  
> diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
> index 457001c..15d1b71 100644
> --- a/tools/libxc/xg_private.c
> +++ b/tools/libxc/xg_private.c
> @@ -177,6 +177,7 @@ __attribute__((weak))
>      int xc_hvm_build(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numainfo,
>                       const char *image_name)
>  {
>      errno = ENOSYS;
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index 7196aa8..154253a 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -168,7 +168,7 @@ int build_hvm(struct libxl_ctx *ctx, uint32_t domid,
>  {
>      int ret;
>  
> -    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
> info->kernel);
> +    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
> NULL, info->kernel);
>      if (ret) {
>          XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed");
>          return ERROR_FAIL;
> diff --git a/tools/python/xen/lowlevel/xc/xc.c 
> b/tools/python/xen/lowlevel/xc/xc.c
> index 1932758..ecd7800 100644
> --- a/tools/python/xen/lowlevel/xc/xc.c
> +++ b/tools/python/xen/lowlevel/xc/xc.c
> @@ -915,15 +915,19 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      int i;
>      char *image;
>      int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
> +    int numanodes;
> +    struct numa_guest_info numainfo;
>      PyObject *vcpu_avail_handle = NULL;
> +    PyObject *nodemem_handle = NULL;
>      uint8_t vcpu_avail[HVM_MAX_VCPUS/8];
>  
> -    static char *kwd_list[] = { "domid",
> -                                "memsize", "image", "target", "vcpus", 
> -                                "vcpu_avail", "acpi", "apic", NULL };
> -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list,
> +    static char *kwd_list[] = { "domid", "memsize", "image", "target",
> +                                "vcpus", "vcpu_avail", "acpi", "apic",
> +                                "nodes", "nodemem", NULL };
> +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiiO", kwd_list,
>                                        &dom, &memsize, &image, &target, 
> &vcpus,
> -                                      &vcpu_avail_handle, &acpi, &apic) )
> +                                      &vcpu_avail_handle, &acpi, &apic,
> +                                      &numanodes, &nodemem_handle) )
>          return NULL;
>  
>      memset(vcpu_avail, 0, sizeof(vcpu_avail));
> @@ -954,8 +958,50 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      if ( target == -1 )
>          target = memsize;
>  
> -    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
> -                                 target, image) != 0 )
> +    numainfo.num_nodes = numanodes;
> +    numainfo.guest_to_host_node = malloc(numanodes * sizeof(int)); 
> +    numainfo.vcpu_to_node = malloc(vcpus * sizeof(int));
> +    numainfo.node_mem = malloc(numanodes * sizeof(uint64_t)); 
> +    if (numanodes > 1)
> +    {
> +        int vcpu_per_node, remainder;
> +        int n;
> +        vcpu_per_node = vcpus / numanodes;
> +        remainder = vcpus % numanodes;
> +        n = remainder * (vcpu_per_node + 1);
> +        for (i = 0; i < vcpus; i++)

I don't know the coding style, but you seem to have two different
version of it. Here you do the 'for (i= ..')
> +        {
> +            if (i < n)
> +                numainfo.vcpu_to_node[i] = i / (vcpu_per_node + 1);
> +            else
> +                numainfo.vcpu_to_node[i] = remainder + (i - n) / 
> vcpu_per_node;
> +        }
> +        for (i = 0; i < numanodes; i++)
> +            numainfo.guest_to_host_node[i] = i % 2;
> +        if ( nodemem_handle != NULL && PyList_Check(nodemem_handle) )
> +        {

> +            PyObject *item;
> +            for ( i = 0; i < numanodes; i++)

and here it is ' for ( i = 0 ..')'.

I've failed in the past to find the coding style so I am not exactly
sure which one is correct.
> +            {
> +                item = PyList_GetItem(nodemem_handle, i);
> +                numainfo.node_mem[i] = PyInt_AsLong(item);
> +                fprintf(stderr, "NUMA: node %i has %lu kB\n", i,
> +                    numainfo.node_mem[i]);

There isn't any other way to print this out? Can't you use xc_dom_printf
routines?

> +            }
> +        } else {
> +            unsigned int mempernode;
> +            if ( nodemem_handle != NULL && PyInt_Check(nodemem_handle) )
> +                mempernode = PyInt_AsLong(nodemem_handle);
> +            else
> +                mempernode = (memsize << 10) / numanodes;
> +            fprintf(stderr, "NUMA: setting all nodes to %i KB\n", 
> mempernode);

dittoo..
> +            for (i = 0; i < numanodes; i++)

dittoo for the coding guideline.
> +                numainfo.node_mem[i] = mempernode;
> +        }
> +    }
> +
> +    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target,
> +                                 &numainfo, image) != 0 )

and I guess here too
>          return pyxc_error_to_exception();
>  
>  #if !defined(__ia64__)
> @@ -970,6 +1016,15 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      va_hvm->apic_mode    = apic;
>      va_hvm->nr_vcpus     = vcpus;
>      memcpy(va_hvm->vcpu_online, vcpu_avail, sizeof(vcpu_avail));
> +
> +    va_hvm->num_nodes    = numanodes;
> +    if (numanodes > 0) {
> +        for (i = 0; i < vcpus; i++)
> +            va_hvm->vcpu_to_node[i] = numainfo.vcpu_to_node[i];
> +        for (i = 0; i < numanodes; i++)
> +            va_hvm->node_mem[i] = numainfo.node_mem[i] >> 10;

Would it make sense to have 10 be #defined somewhere?

> +    }
> +
>      for ( i = 0, sum = 0; i < va_hvm->length; i++ )
>          sum += ((uint8_t *)va_hvm)[i];
>      va_hvm->checksum -= sum;
> @@ -1174,7 +1229,7 @@ static PyObject *pyxc_physinfo(XcObject *self)
>              nr_nodes++;
>      }
>  
> -    ret_obj = 
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
> +    ret_obj = 
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",

what changed in that line? My eyes don't seem to be able to find the
difference.

>                              "nr_nodes",         nr_nodes,
>                              "max_node_id",      info.max_node_id,
>                              "max_cpu_id",       info.max_cpu_id,
> diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
> index f3b2cc5..f06d6e2 100644
> --- a/tools/python/xen/xend/image.py
> +++ b/tools/python/xen/xend/image.py
> @@ -961,7 +961,8 @@ class HVMImageHandler(ImageHandler):
>                            vcpus          = self.vm.getVCpuCount(),
>                            vcpu_avail     = self.vm.getVCpuAvail(),
>                            acpi           = self.acpi,
> -                          apic           = self.apic)
> +                          apic           = self.apic,
> +                          nodes          = 0)
>          rc['notes'] = { 'SUSPEND_CANCEL': 1 }
>  
>          rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.