[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [LINUX] Backport PCI MMCONFIG patch from 2.6.17 -- only use MMCONFIG



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 9837ff37e35499a2758050fdb40a732942d180ac
# Parent  9deecd4f9cf9ce47aaae2caf1ccee85a9ebfecdb
[LINUX] Backport PCI MMCONFIG patch from 2.6.17 -- only use MMCONFIG
access method if the memory area is reserved in the E820 map.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch |  143 +++++++++++++
 patches/linux-2.6.16.29/series                             |    1 
 2 files changed, 144 insertions(+)

diff -r 9deecd4f9cf9 -r 9837ff37e354 patches/linux-2.6.16.29/series
--- a/patches/linux-2.6.16.29/series    Wed Sep 20 09:56:50 2006 +0100
+++ b/patches/linux-2.6.16.29/series    Wed Sep 20 12:02:13 2006 +0100
@@ -10,6 +10,7 @@ net-gso-2-checksum-fix.patch
 net-gso-2-checksum-fix.patch
 net-gso-3-fix-errorcheck.patch
 net-gso-4-kill-warnon.patch
+pci-mmconfig-fix-from-2.6.17.patch
 pmd-shared.patch
 rcu_needs_cpu.patch
 rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
diff -r 9deecd4f9cf9 -r 9837ff37e354 
patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch        Wed Sep 
20 12:02:13 2006 +0100
@@ -0,0 +1,284 @@
+diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c 
./arch/i386/pci/mmconfig.c
+--- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c   2006-09-12 
19:02:10.000000000 +0100
++++ ./arch/i386/pci/mmconfig.c 2006-09-20 11:55:32.000000000 +0100
+@@ -12,14 +12,22 @@
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/acpi.h>
++#include <asm/e820.h>
+ #include "pci.h"
+ 
++/* aperture is up to 256MB but BIOS may reserve less */
++#define MMCONFIG_APER_MIN     (2 * 1024*1024)
++#define MMCONFIG_APER_MAX     (256 * 1024*1024)
++
++/* Assume systems with more busses have correct MCFG */
++#define MAX_CHECK_BUS 16
++
+ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
+ 
+ /* The base address of the last MMCONFIG device accessed */
+ static u32 mmcfg_last_accessed_device;
+ 
+-static DECLARE_BITMAP(fallback_slots, 32);
++static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
+ 
+ /*
+  * Functions for accessing PCI configuration space with MMCONFIG accesses
+@@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se
+       int cfg_num = -1;
+       struct acpi_table_mcfg_config *cfg;
+ 
+-      if (seg == 0 && bus == 0 &&
+-          test_bit(PCI_SLOT(devfn), fallback_slots))
++      if (seg == 0 && bus < MAX_CHECK_BUS &&
++          test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots))
+               return 0;
+ 
+       while (1) {
+@@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s
+       unsigned long flags;
+       u32 base;
+ 
+-      if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
++      if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
++              *value = -1;
+               return -EINVAL;
++      }
+ 
+       base = get_base_addr(seg, bus, devfn);
+       if (!base)
+@@ -146,30 +156,62 @@ static struct pci_raw_ops pci_mmcfg = {
+    Normally this can be expressed in the MCFG by not listing them
+    and assigning suitable _SEGs, but this isn't implemented in some BIOS.
+    Instead try to discover all devices on bus 0 that are unreachable using MM
+-   and fallback for them.
+-   We only do this for bus 0/seg 0 */
++   and fallback for them. */
+ static __init void unreachable_devices(void)
+ {
+-      int i;
++      int i, k;
+       unsigned long flags;
+ 
+-      for (i = 0; i < 32; i++) {
+-              u32 val1;
+-              u32 addr;
++      for (k = 0; k < MAX_CHECK_BUS; k++) {
++              for (i = 0; i < 32; i++) {
++                      u32 val1;
++                      u32 addr;
++
++                      pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
++                      if (val1 == 0xffffffff)
++                              continue;
++
++                      /* Locking probably not needed, but safer */
++                      spin_lock_irqsave(&pci_config_lock, flags);
++                      addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
++                      if (addr != 0)
++                              pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
++                      if (addr == 0 ||
++                          readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
++                              set_bit(i, fallback_slots);
++                              printk(KERN_NOTICE
++                      "PCI: No mmconfig possible on %x:%x\n", k, i);
++                      }
++                      spin_unlock_irqrestore(&pci_config_lock, flags);
++              }
++      }
++}
+ 
+-              pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1);
+-              if (val1 == 0xffffffff)
++/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */
++static int __init
++e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
++{
++      u64 start = s;
++      u64 end = e;
++      int i;
++      for (i = 0; i < e820.nr_map; i++) {
++              struct e820entry *ei = &e820.map[i];
++              if (type && ei->type != type)
+                       continue;
+-
+-              /* Locking probably not needed, but safer */
+-              spin_lock_irqsave(&pci_config_lock, flags);
+-              addr = get_base_addr(0, 0, PCI_DEVFN(i, 0));
+-              if (addr != 0)
+-                      pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0));
+-              if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1)
+-                      set_bit(i, fallback_slots);
+-              spin_unlock_irqrestore(&pci_config_lock, flags);
++              /* is the region (part) in overlap with the current region ?*/
++              if (ei->addr >= end || ei->addr + ei->size <= start)
++                      continue;
++              /* if the region is at the beginning of <start,end> we move
++               * start to the end of the region since it's ok until there
++               */
++              if (ei->addr <= start)
++                      start = ei->addr + ei->size;
++              /* if start is now at or beyond end, we're done, full
++               * coverage */
++              if (start >= end)
++                      return 1; /* we're done */
+       }
++      return 0;
+ }
+ 
+ static int __init pci_mmcfg_init(void)
+@@ -183,6 +225,15 @@ static int __init pci_mmcfg_init(void)
+           (pci_mmcfg_config[0].base_address == 0))
+               goto out;
+ 
++      if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
++                      pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
++                      E820_RESERVED)) {
++              printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not 
E820-reserved\n",
++                              pci_mmcfg_config[0].base_address);
++              printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
++              goto out;
++      }
++
+       printk(KERN_INFO "PCI: Using MMCONFIG\n");
+       raw_pci_ops = &pci_mmcfg;
+       pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 
./arch/x86_64/pci/mmconfig.c
+--- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 2006-09-12 
19:02:10.000000000 +0100
++++ ./arch/x86_64/pci/mmconfig.c       2006-09-20 11:55:41.000000000 +0100
+@@ -9,11 +9,19 @@
+ #include <linux/init.h>
+ #include <linux/acpi.h>
+ #include <linux/bitmap.h>
++#include <asm/e820.h>
++
+ #include "pci.h"
+ 
+-#define MMCONFIG_APER_SIZE (256*1024*1024)
++/* aperture is up to 256MB but BIOS may reserve less */
++#define MMCONFIG_APER_MIN     (2 * 1024*1024)
++#define MMCONFIG_APER_MAX     (256 * 1024*1024)
++
++/* Verify the first 16 busses. We assume that systems with more busses
++   get MCFG right. */
++#define MAX_CHECK_BUS 16
+ 
+-static DECLARE_BITMAP(fallback_slots, 32);
++static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS);
+ 
+ /* Static virtual mapping of the MMCONFIG aperture */
+ struct mmcfg_virt {
+@@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i
+ static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, 
unsigned int devfn)
+ {
+       char __iomem *addr;
+-      if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots))
++      if (seg == 0 && bus < MAX_CHECK_BUS &&
++              test_bit(32*bus + PCI_SLOT(devfn), fallback_slots))
+               return NULL;
+       addr = get_virt(seg, bus);
+       if (!addr)
+@@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s
+       char __iomem *addr;
+ 
+       /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+-      if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
++      if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
++              *value = -1;
+               return -EINVAL;
++      }
+ 
+       addr = pci_dev_base(seg, bus, devfn);
+       if (!addr)
+@@ -129,23 +140,52 @@ static struct pci_raw_ops pci_mmcfg = {
+    Normally this can be expressed in the MCFG by not listing them
+    and assigning suitable _SEGs, but this isn't implemented in some BIOS.
+    Instead try to discover all devices on bus 0 that are unreachable using MM
+-   and fallback for them.
+-   We only do this for bus 0/seg 0 */
++   and fallback for them. */
+ static __init void unreachable_devices(void)
+ {
+-      int i;
+-      for (i = 0; i < 32; i++) {
+-              u32 val1;
+-              char __iomem *addr;
++      int i, k;
++      /* Use the max bus number from ACPI here? */
++      for (k = 0; k < MAX_CHECK_BUS; k++) {
++              for (i = 0; i < 32; i++) {
++                      u32 val1;
++                      char __iomem *addr;
++
++                      pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1);
++                      if (val1 == 0xffffffff)
++                              continue;
++                      addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
++                      if (addr == NULL|| readl(addr) != val1) {
++                              set_bit(i + 32*k, fallback_slots);
++                              printk(KERN_NOTICE
++                              "PCI: No mmconfig possible on device %x:%x\n",
++                                      k, i);
++                      }
++              }
++      }
++}
+ 
+-              pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1);
+-              if (val1 == 0xffffffff)
++/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */
++static int __init e820_all_mapped(unsigned long start, unsigned long end, 
unsigned type)
++{
++      int i;
++      for (i = 0; i < e820.nr_map; i++) {
++              struct e820entry *ei = &e820.map[i];
++              if (type && ei->type != type)
+                       continue;
+-              addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0));
+-              if (addr == NULL|| readl(addr) != val1) {
+-                      set_bit(i, &fallback_slots);
+-              }
++              /* is the region (part) in overlap with the current region ?*/
++              if (ei->addr >= end || ei->addr + ei->size <= start)
++                      continue;
++
++              /* if the region is at the beginning of <start,end> we move
++               * start to the end of the region since it's ok until there
++               */
++              if (ei->addr <= start)
++                      start = ei->addr + ei->size;
++              /* if start is now at or beyond end, we're done, full coverage 
*/
++              if (start >= end)
++                      return 1; /* we're done */
+       }
++      return 0;
+ }
+ 
+ static int __init pci_mmcfg_init(void)
+@@ -161,6 +201,15 @@ static int __init pci_mmcfg_init(void)
+           (pci_mmcfg_config[0].base_address == 0))
+               return 0;
+ 
++      if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
++                      pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
++                      E820_RESERVED)) {
++              printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not 
E820-reserved\n",
++                              pci_mmcfg_config[0].base_address);
++              printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
++              return 0;
++      }
++
+       /* RED-PEN i386 doesn't do _nocache right now */
+       pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * 
pci_mmcfg_config_num, GFP_KERNEL);
+       if (pci_mmcfg_virt == NULL) {
+@@ -169,7 +218,8 @@ static int __init pci_mmcfg_init(void)
+       }
+       for (i = 0; i < pci_mmcfg_config_num; ++i) {
+               pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
+-              pci_mmcfg_virt[i].virt = 
ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE);
++              pci_mmcfg_virt[i].virt = 
ioremap_nocache(pci_mmcfg_config[i].base_address,
++                                                       MMCONFIG_APER_MAX);
+               if (!pci_mmcfg_virt[i].virt) {
+                       printk("PCI: Cannot map mmconfig aperture for segment 
%d\n",
+                              pci_mmcfg_config[i].pci_segment_group_number);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.