# HG changeset patch # Parent 9e3587566acea30c0e242f647d14e32f50d34fa0 xend: Add support for passing in the host's E820 for PCI passthrough The code that populates E820 is unconditionally triggered by the guest configuration having 'e820_hole=1'. xend calls the xc_get_machine_memory_map to retrieve the systems E820. Then the E820 is sanitized to weed out E820 entries below 16MB, and as well remove any E820_RAM or E820_UNUSED regions as the guest does not need to know about them. The guest only needs the E820_ACPI, E820_NVS, E820_RESERVED to get an idea of where the PCI I/O space is. Mostly.. The Linux kernel assumes that any gap in the E820 is considered PCI I/O space which means that if we pass in the guest 2GB, and the E820_ACPI, and its friend start at 3GB, the gap between 2GB and 3GB will be considered as PCI I/O space. To guard against that we also create an E820_UNUSABLE between the region of 'target_kb' (called ram_end in the code) up to the first E820_[ACPI,NVS,RESERVED] region. Lastly, the xc_domain_set_memory_map is called to install the new E820. When tested with another PV guest (NetBSD 5.1) the modified E820 gave it no trouble. The code has also been tested with older "classic" Xen Linux and with the newer "pvops" with success (SLES11, RHEL5, Ubuntu Lucid, Debian Squeeze, 2.6.37, 2.6.38, 2.6.39). Memory that is slack or for balloon (so 'maxmem' in guest configuration) is put behind the machine E820. Which in most cases is after the 4GB. The reason for doing the fetching of the E820 using the hypercall in the toolstack (instead of the guest doing it) is that when a guest would do a hypercall to 'XENMEM_machine_memory_map' it would retrieve an E820 with I/O range caps added in. Meaning that the region after 4GB up to end of possible memory would be marked as unusable and the kernel would not have any space to allocate a balloon region. Signed-off-by: Konrad Rzeszutek Wilk diff -r 9e3587566ace tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Wed Nov 16 17:10:09 2011 -0500 +++ b/tools/python/xen/xend/XendConfig.py Tue Dec 13 14:01:06 2011 -0500 @@ -240,6 +240,7 @@ XENAPI_CFG_TYPES = { 'machine_address_size': int, 'suppress_spurious_page_faults': bool0, 's3_integrity' : int, + 'e820_hole' : int, 'superpages' : int, 'memory_sharing': int, 'pool_name' : str, @@ -422,6 +423,7 @@ class XendConfig(dict): 'target': 0, 'pool_name' : 'Pool-0', 'superpages': 0, + 'e820_hole': 0, 'description': '', } @@ -511,6 +513,9 @@ class XendConfig(dict): if 'nomigrate' not in self['platform']: self['platform']['nomigrate'] = 0 + if 'e820_hole' not in self['platform']: + self['platform']['e820_hole'] = 0 + if self.is_hvm(): if 'timer_mode' not in self['platform']: self['platform']['timer_mode'] = 1 @@ -538,6 +543,8 @@ class XendConfig(dict): self['platform']['loader'] = auxbin.pathTo("hvmloader") if not os.path.exists(self['platform']['loader']): raise VmError("kernel '%s' not found" % str(self['platform']['loader'])) + if 'e820_hole' in self['platform'] == 1: + raise VmError("e820_hole can only be used with PV guests!") # Compatibility hack, can go away soon. if 'soundhw' not in self['platform'] and \ @@ -2139,6 +2146,8 @@ class XendConfig(dict): image.append(['args', self['PV_args']]) if self.has_key('superpages'): image.append(['superpages', self['superpages']]) + if self.has_key('e820_hole'): + image.append(['e820_hole', self['e820_hole']]) for key in XENAPI_PLATFORM_CFG_TYPES.keys(): if key in self['platform']: @@ -2183,6 +2192,10 @@ class XendConfig(dict): val = sxp.child_value(image_sxp, 'superpages') if val is not None: self['superpages'] = val + + val = sxp.child_value(image_sxp, 'e820_hole') + if val is not None: + self['e820_hole'] = val val = sxp.child_value(image_sxp, 'memory_sharing') if val is not None: diff -r 9e3587566ace tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Wed Nov 16 17:10:09 2011 -0500 +++ b/tools/python/xen/xend/image.py Tue Dec 13 14:01:06 2011 -0500 @@ -705,12 +705,14 @@ class LinuxImageHandler(ImageHandler): ostype = "linux" flags = 0 vhpt = 0 + e820_hole = 0 def configure(self, vmConfig): ImageHandler.configure(self, vmConfig) self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024 self.is_stubdom = (self.kernel.find('stubdom') >= 0) self.superpages = int(vmConfig['superpages']) + self.e820_hole = int(vmConfig['e820_hole']) def buildDomain(self): store_evtchn = self.vm.getStorePort() @@ -731,6 +733,7 @@ class LinuxImageHandler(ImageHandler): log.debug("superpages = %d", self.superpages) if arch.type == "ia64": log.debug("vhpt = %d", self.vhpt) + log.debug("e820_hole = %d", self.e820_hole) return xc.linux_build(domid = self.vm.getDomid(), memsize = mem_mb, @@ -1065,7 +1068,12 @@ class X86_Linux_ImageHandler(LinuxImageH # set physical mapping limit # add an 8MB slack to balance backend allocations. mem_kb = self.getRequiredMaximumReservation() + (8 * 1024) - xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb) + if self.e820_hole: + mem_kb = self.getRequiredMaximumReservation(); + balloon_kb = 8 * 1024; + xc.domain_set_e820_hole(self.vm.getDomid(), mem_kb, balloon_kb); + else: + xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb) rc = LinuxImageHandler.buildDomain(self) self.setCpuid() return rc diff -r 9e3587566ace tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Wed Nov 16 17:10:09 2011 -0500 +++ b/tools/python/xen/xm/create.py Tue Dec 13 14:01:06 2011 -0500 @@ -349,6 +349,12 @@ gopts.var('pci', val='BUS:DEV.FUNC[@VSLO If power_mgmt is set, the guest OS will be able to program the power states D0-D3hot of the device, HVM only. Default=0.""") +gopts.var('e820_hole', val='no|yes', + fn=set_int, default=0, + use="""Expose hosts' E820 map to PV guest? + (Default is 0).""") + + gopts.var('vscsi', val='PDEV,VDEV[,DOM]', fn=append_value, default=[], use="""Add a SCSI device to a domain. The physical device is PDEV, @@ -1138,7 +1144,7 @@ def make_config(vals): 'on_reboot', 'on_crash', 'features', 'on_xend_start', 'on_xend_stop', 'target', 'cpuid', 'cpuid_check', 'machine_address_size', 'suppress_spurious_page_faults', - 'description']) + 'description', 'e820_hole']) vcpu_conf() if vals.uuid is not None: diff -r 9e3587566ace tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Wed Nov 16 17:10:09 2011 -0500 +++ b/tools/python/xen/xm/xenapi_create.py Tue Dec 13 14:01:06 2011 -0500 @@ -285,6 +285,8 @@ class xenapi_create: vm.attributes["s3_integrity"].value, "superpages": vm.attributes["superpages"].value, + "e820_hole": + vm.attributes["e820_hole"].value, "memory_static_max": get_child_node_attribute(vm, "memory", "static_max"), "memory_static_min": @@ -697,6 +699,8 @@ class sxp2xml: = str(get_child_by_name(config, "s3_integrity", 0)) vm.attributes["superpages"] \ = str(get_child_by_name(config, "superpages", 0)) + vm.attributes["e820_hole"] \ + = str(get_child_by_name(config, "e820_hole", 0)) vm.attributes["pool_name"] \ = str(get_child_by_name(config, "pool_name", "Pool-0")) @@ -1111,7 +1115,8 @@ class sxp2xml: 'pci_msitranslate', 'pci_power_mgmt', 'xen_platform_pci', - 'tsc_mode' + 'tsc_mode', + 'e820_hole', 'description', 'nomigrate' ]