[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/3] dom0 linux: Use _CRS for PCI resource allocation.



This patch add code to use _CRS for PCI resource allocation.
To use _CRS, please add "pci=use_crs" to dom0 linux boot parameter.


Without this patch, MMIO resource is allocated from e820 gap. But e820
gap is available for only low MMIO area. _CRS reports high MMIO area
as well as low MMIO area. With this patch, we become able to use high
MMIO area.


Most of codes are backported from 2.6.26.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff -r cdc6729dc702 arch/i386/pci/acpi.c
--- a/arch/i386/pci/acpi.c      Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/acpi.c      Mon Dec 01 19:09:12 2008 +0900
@@ -5,27 +5,228 @@
 #include <asm/numa.h>
 #include "pci.h"
 
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int 
domain, int busnum)
+/* This struct is backported from 2.6.26 kernel */
+struct pci_root_info {
+       char *name;
+       unsigned int res_num;
+       struct resource *res;
+       struct pci_bus *bus;
+       int busnum;
+};
+
+struct pci_sysdata {
+       int     domain;         /* PCI domain */
+       int     node;           /* NUMA node */
+};
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+resource_to_addr(struct acpi_resource *resource,
+                       struct acpi_resource_address64 *addr)
+{
+       acpi_status status;
+
+       status = acpi_resource_to_address64(resource, addr);
+       if (ACPI_SUCCESS(status) &&
+           (addr->resource_type == ACPI_MEMORY_RANGE ||
+           addr->resource_type == ACPI_IO_RANGE) &&
+           addr->address_length > 0 &&
+           addr->producer_consumer == ACPI_PRODUCER) {
+               return AE_OK;
+       }
+       return AE_ERROR;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+count_resource(struct acpi_resource *acpi_res, void *data)
+{
+       struct pci_root_info *info = data;
+       struct acpi_resource_address64 addr;
+       acpi_status status;
+
+       if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+               return AE_OK;
+
+       status = resource_to_addr(acpi_res, &addr);
+       if (ACPI_SUCCESS(status))
+               info->res_num++;
+
+       return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+setup_resource(struct acpi_resource *acpi_res, void *data)
+{
+       struct pci_root_info *info = data;
+       struct resource *res;
+       struct acpi_resource_address64 addr;
+       acpi_status status;
+       unsigned long flags;
+       struct resource *root;
+
+       if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+               return AE_OK;
+
+       status = resource_to_addr(acpi_res, &addr);
+       if (!ACPI_SUCCESS(status)) {
+               return AE_OK;
+       }
+
+       if (addr.resource_type == ACPI_MEMORY_RANGE) {
+               root = &iomem_resource;
+               flags = IORESOURCE_MEM;
+               if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
+                       flags |= IORESOURCE_PREFETCH;
+       } else if (addr.resource_type == ACPI_IO_RANGE) {
+               root = &ioport_resource;
+               flags = IORESOURCE_IO;
+       } else
+               return AE_OK;
+
+       res = &info->res[info->res_num];
+       res->name = info->name;
+       res->flags = flags;
+       res->start = addr.minimum + addr.translation_offset;
+       res->end = res->start + addr.address_length - 1;
+       res->child = NULL;
+       printk(KERN_DEBUG "PCI: ACPI resource [%llx-%llx:%lx] for %s\n",
+               (unsigned long long)res->start, (unsigned long long)res->end,
+               (unsigned long)res->flags, info->name);
+
+       if (insert_resource(root, res)) {
+               printk(KERN_ERR "PCI: Failed to allocate %llx-%llx from %s"
+                       " for %s\n", (unsigned long long)res->start,
+                       (unsigned long long)res->end, root->name, info->name);
+       } else {
+               info->bus->resource[info->res_num] = res;
+               info->res_num++;
+       }
+       return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit adjust_transparent_bridge_resources(struct pci_bus *bus)
+{
+       struct pci_dev *dev;
+
+       list_for_each_entry(dev, &bus->devices, bus_list) {
+               int i;
+               u16 class = dev->class >> 8;
+
+               if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) {
+                       for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
+                               dev->subordinate->resource[i] =
+                                               dev->bus->resource[i - 3];
+               }
+       }
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit
+get_current_resources(struct acpi_device *device, int busnum,
+                       int domain, struct pci_bus *bus)
+{
+       struct pci_root_info info;
+       size_t size;
+
+       info.bus = bus;
+       info.res_num = 0;
+       info.name = kmalloc(16, GFP_KERNEL);
+       if (!info.name)
+               goto res_alloc_fail;
+       sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum);
+
+       acpi_walk_resources(device->handle, METHOD_NAME__CRS, 
+                               count_resource, &info);
+       if (!info.res_num)
+               return;
+
+       size = sizeof(*info.res) * info.res_num;
+       info.res = kmalloc(size, GFP_KERNEL);
+       if (!info.res) {
+               printk(KERN_ERR "PCI: Failed to allocate resource structure "
+                               "for %s\n", info.name);
+               goto name_alloc_fail;
+       }
+
+       info.res_num = 0;
+       acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+                               setup_resource, &info);
+       if (info.res_num) {
+               adjust_transparent_bridge_resources(bus);
+       }
+
+       return;
+
+name_alloc_fail:
+       kfree(info.res);
+res_alloc_fail:
+       return;
+}
+
+/* This function is backported from 2.6.26 kernel */
+struct pci_bus * __devinit 
+pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
 {
        struct pci_bus *bus;
+       struct pci_sysdata *sd;
+       int node;
+#ifdef CONFIG_ACPI_NUMA
+       int pxm;
+#endif
 
-       if (domain != 0) {
-               printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+       node = -1;
+#ifdef CONFIG_ACPI_NUMA
+       pxm = acpi_get_pxm(device->handle);
+       if (pxm >= 0)
+               node = pxm_to_node(pxm);
+#endif
+
+       /* Allocate per-root-bus (not per bus) arch-specific data.
+        * TODO: leak; this memory is never freed.
+        * It's arguable whether it's worth the trouble to care.
+        */
+       sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+       if (!sd) {
+               printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
                return NULL;
        }
 
-       bus = pcibios_scan_root(busnum);
+       sd->domain = domain;
+       sd->node = node;
+       /*
+        * Maybe the desired pci bus has been already scanned. In such case
+        * it is unnecessary to scan the pci bus with the given domain,busnum.
+        */
+       bus = pci_find_bus(domain, busnum);
+       if (bus) {
+               /*
+                * If the desired bus exits, the content of bus->sysdata will
+                * be replaced by sd.
+                */
+               memcpy(bus->sysdata, sd, sizeof(*sd));
+               kfree(sd);
+       } else
+               bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+
+       if (!bus)
+               kfree(sd);
+
 #ifdef CONFIG_ACPI_NUMA
-       if (bus != NULL) {
-               int pxm = acpi_get_pxm(device->handle);
+       if (bus) {
                if (pxm >= 0) {
-                       bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
-                       printk("bus %d -> pxm %d -> node %ld\n",
-                               busnum, pxm, (long)(bus->sysdata));
+                       printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
+                               busnum, pxm, pxm_to_node(pxm));
                }
        }
 #endif
-       
+
+       if (bus && (pci_probe & PCI_USE__CRS)) {
+               get_current_resources(device, busnum, domain, bus);
+       }
+
        return bus;
 }
 
diff -r cdc6729dc702 arch/i386/pci/common.c
--- a/arch/i386/pci/common.c    Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/common.c    Mon Dec 01 19:09:12 2008 +0900
@@ -260,6 +260,9 @@ char * __devinit  pcibios_setup(char *st
        } else if (!strcmp(str, "assign-busses")) {
                pci_probe |= PCI_ASSIGN_ALL_BUSSES;
                return NULL;
+       } else if (!strcmp(str, "use_crs")) {
+               pci_probe |= PCI_USE__CRS;
+               return NULL;
        } else if (!strcmp(str, "routeirq")) {
                pci_routeirq = 1;
                return NULL;
diff -r cdc6729dc702 arch/i386/pci/pci.h
--- a/arch/i386/pci/pci.h       Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/pci.h       Mon Dec 01 19:09:12 2008 +0900
@@ -25,6 +25,7 @@
 #define PCI_ASSIGN_ROMS                0x1000
 #define PCI_BIOS_IRQ_SCAN      0x2000
 #define PCI_ASSIGN_ALL_BUSSES  0x4000
+#define PCI_USE__CRS           0x10000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
diff -r cdc6729dc702 include/asm-i386/pci.h
--- a/include/asm-i386/pci.h    Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/pci.h    Mon Dec 01 19:09:12 2008 +0900
@@ -4,6 +4,22 @@
 
 #ifdef __KERNEL__
 #include <linux/mm.h>          /* for struct page */
+
+struct pci_sysdata {
+       int     domain;         /* PCI domain */
+       int     node;           /* NUMA node */
+};
+
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+       struct pci_sysdata *sd = bus->sysdata;
+       return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+       return pci_domain_nr(bus);
+}
 
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
@@ -116,4 +132,14 @@ static inline void pci_dma_burst_advice(
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+       struct pci_sysdata *sd = bus->sysdata;
+
+       return sd->node;
+}
+#endif
+
 #endif /* __i386_PCI_H */
diff -r cdc6729dc702 include/asm-i386/topology.h
--- a/include/asm-i386/topology.h       Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/topology.h       Mon Dec 01 19:09:12 2008 +0900
@@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int 
        return first_cpu(mask);
 }
 
-#define pcibus_to_node(bus) ((long) (bus)->sysdata)
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
 #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
 
 /* sched_domains SD_NODE_INIT for NUMAQ machines */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.