[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] xend: Balloon down memory to achive enough DMA32 memory for PV guests



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1258186190 0
# Node ID 01f4bb96bf8536a91043c8f3bb9e55705c5191fa
# Parent  b6b2e97f8db91d66d60c0a389ee33bea9cfad9dd
xend: Balloon down memory to achive enough DMA32 memory for PV guests
with PCI pass-through to succesfully launch.

If the user hasn't used dom0_mem=3D bootup parameter, the privileged
domain usurps all of the memory. During launch of PV guests with PCI
pass-through we ratchet down the memory for the privileged domain to
the required memory for the PV guest. However, for PV guests with PCI
pass-through we do not take into account that the PV guest is going to
swap its SWIOTLB memory for DMA32 memory - in fact, swap 64MB of
it. This patch balloon's down the privileged domain so that there are
64MB of DMA32 memory available.

From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 tools/python/xen/lowlevel/xc/xc.c       |   18 ++++++++++
 tools/python/xen/xend/XendConfig.py     |    7 ++++
 tools/python/xen/xend/XendDomainInfo.py |   56 ++++++++++++++++++++++++++++++--
 tools/python/xen/xend/XendNode.py       |   12 ++++--
 4 files changed, 87 insertions(+), 6 deletions(-)

diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/lowlevel/xc/xc.c Sat Nov 14 08:09:50 2009 +0000
@@ -1059,6 +1059,7 @@ static PyObject *pyxc_physinfo(XcObject 
     int i, j, max_cpu_id;
     uint64_t free_heap;
     PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
+    PyObject *node_to_dma32_mem_obj;
     xc_cpu_to_node_t map[MAX_CPU_ID + 1];
     const char *virtcap_names[] = { "hvm", "hvm_directio" };
 
@@ -1128,10 +1129,27 @@ static PyObject *pyxc_physinfo(XcObject 
         Py_DECREF(pyint);
     }
 
+    xc_dom_loginit();
+    /* DMA memory. */
+    node_to_dma32_mem_obj = PyList_New(0);
+
+    for ( i = 0; i < info.nr_nodes; i++ )
+    {
+        PyObject *pyint;
+
+        xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
+        xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap);
+        pyint = PyInt_FromLong(free_heap / 1024);
+        PyList_Append(node_to_dma32_mem_obj, pyint);
+        Py_DECREF(pyint);
+    }
+
     PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
     Py_DECREF(node_to_cpu_obj);
     PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
     Py_DECREF(node_to_memory_obj);
+    PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
+    Py_DECREF(node_to_dma32_mem_obj);
  
     return ret_obj;
 #undef MAX_CPU_ID
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendConfig.py       Sat Nov 14 08:09:50 2009 +0000
@@ -2111,6 +2111,13 @@ class XendConfig(dict):
     def is_hap(self):
         return self['platform'].get('hap', 0)
 
+    def is_pv_and_has_pci(self):
+        for dev_type, dev_info in self.all_devices_sxpr():
+            if dev_type != 'pci':
+                continue
+            return not self.is_hvm()
+        return False
+
     def update_platform_pci(self):
         pci = []
         for dev_type, dev_info in self.all_devices_sxpr():
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py   Sat Nov 14 08:09:50 2009 +0000
@@ -2580,7 +2580,8 @@ class XendDomainInfo:
 
 
     def _setCPUAffinity(self):
-        """ Repin domain vcpus if a restricted cpus list is provided
+        """ Repin domain vcpus if a restricted cpus list is provided.
+            Returns the choosen node number.
         """
 
         def has_cpus():
@@ -2597,6 +2598,7 @@ class XendDomainInfo:
                         return True
             return False
 
+        index = 0
         if has_cpumap():
             for v in range(0, self.info['VCPUs_max']):
                 if self.info['vcpus_params'].has_key('cpumap%i' % v):
@@ -2647,6 +2649,54 @@ class XendDomainInfo:
                 cpumask = info['node_to_cpu'][index]
                 for v in range(0, self.info['VCPUs_max']):
                     xc.vcpu_setaffinity(self.domid, v, cpumask)
+        return index
+
+    def _freeDMAmemory(self, node):
+
+       # If we are PV and have PCI devices the guest will
+       # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32
+       # zone (under 4GB). To do so, we need to balloon down Dom0 to where
+       # there is enough (64MB) memory under the 4GB mark. This balloon-ing
+       # might take more memory out than just 64MB thought :-(
+       if not self.info.is_pv_and_has_pci():
+               return
+
+       retries = 2000
+       ask_for_mem = 0;
+       need_mem = 0
+       try:            
+           while (retries > 0):
+               physinfo = xc.physinfo()
+               free_mem = physinfo['free_memory']
+               nr_nodes = physinfo['nr_nodes']
+               node_to_dma32_mem = physinfo['node_to_dma32_mem']
+               if (node > nr_nodes):
+                    return;
+               # Extra 2MB above 64GB seems to do the trick.
+               need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node]
+               # our starting point. We ask just for the difference to
+               # be have an extra 64MB under 4GB.
+               ask_for_mem = max(need_mem, ask_for_mem);
+               if (need_mem > 0):
+                    log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. '
+                              'Asking for %dKiB', retries, need_mem,
+                              ask_for_mem)
+
+                    balloon.free(ask_for_mem, self)
+                    ask_for_mem = ask_for_mem + 2048;
+               else:
+                    # OK. We got enough DMA memory.
+                    break
+               retries  = retries - 1
+       except:
+           # This is best-try after all.
+           need_mem = max(1, need_mem);
+           pass
+
+       if (need_mem > 0):
+           log.warn('We tried our best to balloon down DMA memory to '
+                    'accomodate your PV guest. We need %dKiB extra memory.',
+                    need_mem)
 
     def _setSchedParams(self):
         if XendNode.instance().xenschedinfo() == 'credit':
@@ -2668,7 +2718,7 @@ class XendDomainInfo:
             # repin domain vcpus if a restricted cpus list is provided
             # this is done prior to memory allocation to aide in memory
             # distribution for NUMA systems.
-            self._setCPUAffinity()
+            node = self._setCPUAffinity()
 
             # Set scheduling parameters.
             self._setSchedParams()
@@ -2729,6 +2779,8 @@ class XendDomainInfo:
             self._introduceDomain()
             if self.info.target():
                 self._setTarget(self.info.target())
+
+            self._freeDMAmemory(node)
 
             self._createDevices()
 
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendNode.py Sat Nov 14 08:09:50 2009 +0000
@@ -872,11 +872,11 @@ class XendNode:
         except:
             str='none\n'
         return str[:-1];
-    def format_node_to_memory(self, pinfo):
+    def format_node_to_memory(self, pinfo, key):
         str=''
         whitespace=''
         try:
-            node_to_memory=pinfo['node_to_memory']
+            node_to_memory=pinfo[key]
             for i in range(0, pinfo['nr_nodes']):
                 str+='%snode%d:%d\n' % (whitespace,
                                         i,
@@ -896,7 +896,10 @@ class XendNode:
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
         info['node_to_cpu']  = self.format_node_to_cpu(info)
-        info['node_to_memory'] = self.format_node_to_memory(info)
+        info['node_to_memory'] = self.format_node_to_memory(info,
+                                       'node_to_memory')
+        info['node_to_dma32_mem'] = self.format_node_to_memory(info,
+                                       'node_to_dma32_mem')
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -908,7 +911,8 @@ class XendNode:
                       'total_memory',
                       'free_memory',
                       'node_to_cpu',
-                      'node_to_memory'
+                      'node_to_memory',
+                      'node_to_dma32_mem'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.