[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 3/9] sysctl: Make XEN_SYSCTL_topologyinfo sysctl a little more efficient



On 03/10/2015 10:29 AM, Andrew Cooper wrote:
On 10/03/15 02:27, Boris Ostrovsky wrote:
Instead of copying data for each field in xen_sysctl_topologyinfo separately
put cpu/socket/node into a single structure and do a single copy for each
processor.

Do not use max_cpu_index, which is almost always used for calculating number
CPUs (thus requiring adding or subtracting one), replace it with num_cpus.

There is no need to copy whole op in sysctl to user at the end, we only need
num_cpus.

Rename xen_sysctl_topologyinfo and XEN_SYSCTL_topologyinfo to reflect the fact
that these are used for CPU topology. Subsequent patch will add support for
PCI topology sysctl.

Replace INVALID_TOPOLOGY_ID with "XEN_"-prefixed macros for each invalid type
(core, socket, node).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
In principle, a good improvement, but I have some specific issues.

---

diff --git a/tools/python/xen/lowlevel/xc/xc.c 
b/tools/python/xen/lowlevel/xc/xc.c
index 2aa0dc7..2fd93e0 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1375,7 +1365,7 @@ static PyObject *pyxc_numainfo(XcObject *self)
          for ( j = 0; j <= max_node_index; j++ )
          {
              uint32_t dist = nodes_dist[i*(max_node_index+1) + j];
-            if ( dist == INVALID_TOPOLOGY_ID )
+            if ( dist == ~0u )
              {
                  PyList_Append(node_to_node_dist_obj, Py_None);
              }
This looks like a spurious hunk (perhaps from patch 9?)

No, this is sort of an intermediate step: INVALID_TOPOLOGY_ID is no longer defined and a new macro for distance will show up in the next patch.

And, in fact, it's wrong anyway since the sysctl never sets distance to INVALID_TOPOLOGY_ID, it sets it explicitly to ~0u. It just so happens that INVALID_TOPOLOGY_ID is also ~0U.

I thought about moving macro definition into this patch but then logically it wouldn't belong here so I figured I'd do what the hypervisor does, which is use a constant. Until the next patch.



diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 0cb6ee1..fe48ee8 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -320,39 +320,61 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
      }
      break;
- case XEN_SYSCTL_topologyinfo:
+    case XEN_SYSCTL_cputopoinfo:
      {
-        uint32_t i, max_cpu_index, last_online_cpu;
-        xen_sysctl_topologyinfo_t *ti = &op->u.topologyinfo;
+        uint32_t i, num_cpus;
+        xen_sysctl_cputopoinfo_t *ti = &op->u.cputopoinfo;
- last_online_cpu = cpumask_last(&cpu_online_map);
-        max_cpu_index = min_t(uint32_t, ti->max_cpu_index, last_online_cpu);
-        ti->max_cpu_index = last_online_cpu;
+        if ( guest_handle_is_null(ti->cputopo) )
+        {
+            ret = -EINVAL;
+            break;
+        }
The prevailing hypervisor style is to use a NULL guest handle as an
explicit request for size.  i.e. write back num_cpus in ti and return
success.

Yes, this would look better from the interface POV.

-boris


- for ( i = 0; i <= max_cpu_index; i++ )
+        num_cpus = cpumask_last(&cpu_online_map) + 1;
+        if ( ti->num_cpus != num_cpus )
          {
-            if ( !guest_handle_is_null(ti->cpu_to_core) )
+            uint32_t array_sz = ti->num_cpus;
+
+            ti->num_cpus = num_cpus;
+            if ( __copy_field_to_guest(u_sysctl, op,
+                                       u.cputopoinfo.num_cpus) )
              {
-                uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_core, i, &core, 1) )
-                    break;
+                ret = -EFAULT;
+                break;
+            }
+            num_cpus = min_t(uint32_t, array_sz, num_cpus);
+        }
+
+        for ( i = 0; i < num_cpus; i++ )
+        {
+            xen_sysctl_cputopo_t cputopo;
+
+            if ( cpu_present(i) )
+            {
+                cputopo.core = cpu_to_core(i);
+                if ( cputopo.core == BAD_APICID )
+                    cputopo.core = XEN_INVALID_CORE_ID;
+                cputopo.socket = cpu_to_socket(i);
+                if ( cputopo.socket == BAD_APICID )
+                    cputopo.socket = XEN_INVALID_SOCKET_ID;
+                cputopo.node = cpu_to_node(i);
+                if ( cputopo.node == NUMA_NO_NODE )
+                    cputopo.node = XEN_INVALID_NODE_ID;
              }
-            if ( !guest_handle_is_null(ti->cpu_to_socket) )
+            else
              {
-                uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_socket, i, &socket, 1) )
-                    break;
+                cputopo.core = XEN_INVALID_CORE_ID;
+                cputopo.socket = XEN_INVALID_SOCKET_ID;
+                cputopo.node = XEN_INVALID_NODE_ID;
              }
-            if ( !guest_handle_is_null(ti->cpu_to_node) )
+
+            if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) )
              {
-                uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_node, i, &node, 1) )
-                    break;
+                ret = -EFAULT;
+                break;
              }
          }
-
-        ret = ((i <= max_cpu_index) || copy_to_guest(u_sysctl, op, 1))
-            ? -EFAULT : 0;
      }
      break;
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 8552dc6..f20da69 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -462,31 +462,37 @@ struct xen_sysctl_lockprof_op {
  typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
-/* XEN_SYSCTL_topologyinfo */
-#define INVALID_TOPOLOGY_ID  (~0U)
-struct xen_sysctl_topologyinfo {
+/* XEN_SYSCTL_cputopoinfo */
+#define XEN_INVALID_CORE_ID     (~0U)
+#define XEN_INVALID_SOCKET_ID   (~0U)
+#define XEN_INVALID_NODE_ID     ((uint8_t)~0)
+
+struct xen_sysctl_cputopo {
+    uint32_t core;
+    uint32_t socket;
+    uint8_t node;
+};
+typedef struct xen_sysctl_cputopo xen_sysctl_cputopo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopo_t);
+
+struct xen_sysctl_cputopoinfo {
      /*
-     * IN: maximum addressable entry in the caller-provided arrays.
-     * OUT: largest cpu identifier in the system.
-     * If OUT is greater than IN then the arrays are truncated!
-     * If OUT is leass than IN then the array tails are not written by sysctl.
+     * IN: size of caller-provided cputopo array.
+     * OUT: Number of CPUs in the system.
       */
-    uint32_t max_cpu_index;
+    uint32_t num_cpus;
/*
-     * If not NULL, these arrays are filled with core/socket/node identifier
-     * for each cpu.
-     * If a cpu has no core/socket/node information (e.g., cpu not present)
-     * then the sentinel value ~0u is written to each array.
-     * The number of array elements written by the sysctl is:
-     *   min(@max_cpu_index_IN,@max_cpu_index_OUT)+1
+     * If not NULL, filled with core/socket/node identifier for each cpu
+     * If information for a particular entry is not avalable it is set to
+     * XEN_INVALID_<xxx>_ID.
+     * The number of array elements for CPU topology written by the sysctl is:
+     *   min(@num_cpus_IN,@num_cpus_OUT)+1.
This is also a problematic interface as it clobbers the actual length of
the valid array.  strace/valgrind have no way of validating the state
after the hypercall, and the user of the interface has to remember the
number they passed in to start with.

It is also prevailing style to return the number of entries actually
written to, and fail the hypercall with -ENOBUFS if there is
insufficient space in the destination array.

See the implementation of XEN_DOMCTL_get_vcpu_msrs as an example which
handles each of these corner cases.

~Andrew

       */
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+    XEN_GUEST_HANDLE_64(xen_sysctl_cputopo_t) cputopo;
  };
-typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
-DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
+typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t);
/* XEN_SYSCTL_numainfo */
  #define INVALID_NUMAINFO_ID (~0U)
@@ -672,7 +678,7 @@ struct xen_sysctl {
  #define XEN_SYSCTL_pm_op                         12
  #define XEN_SYSCTL_page_offline_op               14
  #define XEN_SYSCTL_lockprof_op                   15
-#define XEN_SYSCTL_topologyinfo                  16
+#define XEN_SYSCTL_cputopoinfo                   16
  #define XEN_SYSCTL_numainfo                      17
  #define XEN_SYSCTL_cpupool_op                    18
  #define XEN_SYSCTL_scheduler_op                  19
@@ -683,7 +689,7 @@ struct xen_sysctl {
          struct xen_sysctl_readconsole       readconsole;
          struct xen_sysctl_tbuf_op           tbuf_op;
          struct xen_sysctl_physinfo          physinfo;
-        struct xen_sysctl_topologyinfo      topologyinfo;
+        struct xen_sysctl_cputopoinfo       cputopoinfo;
          struct xen_sysctl_numainfo          numainfo;
          struct xen_sysctl_sched_id          sched_id;
          struct xen_sysctl_perfc_op          perfc_op;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.