[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/4] xen: report how much memory a domain has on each NUMA node


  • To: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
  • From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>
  • Date: Wed, 05 Mar 2014 15:50:05 +0100
  • Cc: Ian Campbell <Ian.Campbell@xxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
  • Delivery-date: Wed, 05 Mar 2014 14:50:20 +0000
  • Domainkey-signature: s=s1536a; d=ts.fujitsu.com; c=nofws; q=dns; h=X-SBRSScore:X-IronPort-AV:Received:X-IronPort-AV: Received:Message-ID:Date:From:Organization:User-Agent: MIME-Version:To:CC:Subject:References:In-Reply-To: Content-Type:Content-Transfer-Encoding; b=hzQwBXB2G/7DNg6Kq4eKBKnrpRvQDTfAH8O+pZZ4HDyKSrIEndbyV+qN FEu9OM2Tpyl7mWURC/JtuEJ86K+2Rw6uk4vRGVlaDDYixsDFU8qDXjQLX /I+Gwzl5puQhZdH8HZ4js+9TYfr+4eb7YIl6nWbBoDTPPplgAaVOVhBk+ Zgz3qU5VFyX2h1twJ9r3Ziq4q2dY1vg5YJb+sOsBnzdJHSX92JAfVwOMA bpTeu8UWWkFnoKUY7DGnUfYw5+LXb;
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

On 05.03.2014 15:36, Dario Faggioli wrote:
by means of a new hypercal, XEN_DOMCTL_numainfo, doing something
similar to what XEN_SYSCTL_numainfo does, but on a per domain basis.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
  xen/common/domctl.c                 |   45 +++++++++++++++++++++++++++++++++++
  xen/include/public/domctl.h         |   22 +++++++++++++++++
  xen/xsm/flask/hooks.c               |    3 ++
  xen/xsm/flask/policy/access_vectors |    2 ++
  4 files changed, 72 insertions(+)

diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 7cf610a..96bf326 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -574,6 +574,51 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
      }
      break;

+    case XEN_DOMCTL_numainfo:
+    {
+        uint32_t node, max_node_index, last_online_node;
+        xen_domctl_numainfo_t *ni = &op->u.numainfo;
+        uint64_t *memkb_on_node;
+        struct page_info *page;
+
+        /*
+         * We report back info about the min number of nodes between how
+         * much of them the caller can handle and the number of them that
+         * are actually online.
+         */
+        last_online_node = last_node(node_online_map);
+        max_node_index = min_t(uint32_t, ni->max_node_index, last_online_node);
+        ni->max_node_index = max_node_index;
+
+        ret = -ENOMEM;
+        memkb_on_node = xzalloc_array(uint64_t, max_node_index);
+        if ( !memkb_on_node )
+            break;
+
+        spin_lock(&d->page_alloc_lock);
+        page_list_for_each(page, &d->page_list)
+        {
+            node = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
+            /* For nodes that are offline, don't touch the counter */
+            if ( node <= max_node_index && node_online(node) )
+                memkb_on_node[node]++;
+        }

This loop will run quite a long time for huge domains. Wouldn't it be better
to do the accounting during page allocation?

+        spin_unlock(&d->page_alloc_lock);
+
+        for ( node = 0; node <= max_node_index; node++ )
+        {
+            memkb_on_node[node] <<= (PAGE_SHIFT-10);

If you already use a 64 bit element you could use bytes as unit.

+            if ( copy_to_guest_offset(ni->memkb_on_node, node,
+                                      &memkb_on_node[node], 1) )
+                break;
+        }
+
+        ret = ((node <= max_node_index) || copy_to_guest(u_domctl, op, 1))
+            ? -EFAULT : 0;
+        xfree(memkb_on_node);
+    }
+    break;
+
      case XEN_DOMCTL_destroydomain:
      {
          ret = domain_kill(d);
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index f22fe2e..a455d78 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -315,6 +315,26 @@ typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
  DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);


+/* XEN_DOMCTL_numainfo */
+struct xen_domctl_numainfo {
+    /*
+     * IN: maximum addressable entry in the caller-provided arrays.
+     * OUT: minimum between the maximum addressable entry in the
+     *      caller-provided arrays and largest online node identifier
+     *      in the system.
+     */
+    uint32_t max_node_index;

Add explicit padding?

+
+    /*
+     * OUT: memory, in Kb, on each node. i-eth element equal to 0 means
+     *      either "no memory on node i" or "node i offline".
+     */
+    XEN_GUEST_HANDLE_64(uint64) memkb_on_node;
+};
+typedef struct xen_domctl_numainfo xen_domctl_numainfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_numainfo_t);
+
+
  /* XEN_DOMCTL_scheduler_op */
  /* Scheduler types. */
  #define XEN_SCHEDULER_SEDF     4
@@ -966,6 +986,7 @@ struct xen_domctl {
  #define XEN_DOMCTL_getnodeaffinity               69
  #define XEN_DOMCTL_set_max_evtchn                70
  #define XEN_DOMCTL_cacheflush                    71
+#define XEN_DOMCTL_numainfo                      72
  #define XEN_DOMCTL_gdbsx_guestmemio            1000
  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -986,6 +1007,7 @@ struct xen_domctl {
          struct xen_domctl_vcpucontext       vcpucontext;
          struct xen_domctl_getvcpuinfo       getvcpuinfo;
          struct xen_domctl_max_vcpus         max_vcpus;
+        struct xen_domctl_numainfo          numainfo;
          struct xen_domctl_scheduler_op      scheduler_op;
          struct xen_domctl_setdomainhandle   setdomainhandle;
          struct xen_domctl_setdebugging      setdebugging;
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 96276ac..edc1d34 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -727,6 +727,9 @@ static int flask_domctl(struct domain *d, int cmd)
      case XEN_DOMCTL_cacheflush:
          return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);

+    case XEN_DOMCTL_numainfo:
+        return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__NUMAINFO);
+
      default:
          printk("flask_domctl: Unknown op %d\n", cmd);
          return -EPERM;
diff --git a/xen/xsm/flask/policy/access_vectors 
b/xen/xsm/flask/policy/access_vectors
index a0ed13d..e218992 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -198,6 +198,8 @@ class domain2
      set_max_evtchn
  # XEN_DOMCTL_cacheflush
      cacheflush
+# XEN_DOMCTL_numainfo
+    numainfo
  }

  # Similar to class domain, but primarily contains domctls related to HVM 
domains


Juergen

--
Juergen Gross                 Principal Developer Operating Systems
PBG PDG ES&S SWE OS6                   Telephone: +49 (0) 89 62060 2932
Fujitsu                                   e-mail: juergen.gross@xxxxxxxxxxxxxx
Mies-van-der-Rohe-Str. 8                Internet: ts.fujitsu.com
D-80807 Muenchen                 Company details: ts.fujitsu.com/imprint.html

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.