[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-4.1-testing] x86/PCI-MSI: properly determine VF BAR values



# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313504506 -3600
# Node ID 98c98daab56afa001e966c803f242439645a23d0
# Parent  4797cf4823558e8af9cf80e18fa166c8411ea62f
x86/PCI-MSI: properly determine VF BAR values

As was discussed a couple of times on this list, SR-IOV virtual
functions have their BARs read as zero - the physical function's
SR-IOV capability structure must be consulted instead. The bogus
warnings people complained about are being eliminated with this
change.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
xen-unstable changeset:   23766:8d6edc3d26d2
xen-unstable date:        Sat Aug 13 10:14:58 2011 +0100

PCI: consolidate interface for adding devices

The functionality of pci_add_device_ext() can be easily folded into
pci_add_device(), and eliminates the need to change two functions for
future adjustments.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
xen-unstable changeset:   23725:4dc6a9ba90d6
xen-unstable date:        Tue Jul 19 14:14:08 2011 +0100
---


diff -r 4797cf482355 -r 98c98daab56a xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/arch/ia64/xen/hypercall.c     Tue Aug 16 15:21:46 2011 +0100
@@ -662,8 +662,8 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
-            break;
+        ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
+        break;
     }
 
     case PHYSDEVOP_manage_pci_remove: {
@@ -695,10 +695,10 @@
         pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
         pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
         pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
-        ret = pci_add_device_ext(manage_pci_ext.bus,
-                                 manage_pci_ext.devfn,
-                                 &pdev_info);
-            break;
+        ret = pci_add_device(manage_pci_ext.bus,
+                             manage_pci_ext.devfn,
+                             &pdev_info);
+        break;
     }
 
     default:
diff -r 4797cf482355 -r 98c98daab56a xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/arch/x86/msi.c        Tue Aug 16 15:21:46 2011 +0100
@@ -521,12 +521,48 @@
     return 0;
 }
 
-static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
+static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
 {
     u8 limit;
-    u32 addr;
+    u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
 
-    switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
+    if ( vf >= 0 )
+    {
+        struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
+        unsigned int pos = pci_find_ext_capability(0, bus,
+                                                   PCI_DEVFN(slot, func),
+                                                   PCI_EXT_CAP_ID_SRIOV);
+        u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+        u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF);
+        u16 offset = pci_conf_read16(bus, slot, func,
+                                     pos + PCI_SRIOV_VF_OFFSET);
+        u16 stride = pci_conf_read16(bus, slot, func,
+                                     pos + PCI_SRIOV_VF_STRIDE);
+
+        if ( !pdev || !pos ||
+             !(ctrl & PCI_SRIOV_CTRL_VFE) ||
+             !(ctrl & PCI_SRIOV_CTRL_MSE) ||
+             !num_vf || !offset || (num_vf > 1 && !stride) ||
+             bir >= PCI_SRIOV_NUM_BARS ||
+             !pdev->vf_rlen[bir] )
+            return 0;
+        base = pos + PCI_SRIOV_BAR;
+        vf -= PCI_BDF(bus, slot, func) + offset;
+        if ( vf < 0 || (vf && vf % stride) )
+            return 0;
+        if ( stride )
+        {
+            if ( vf % stride )
+                return 0;
+            vf /= stride;
+        }
+        if ( vf >= num_vf )
+            return 0;
+        BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+        disp = vf * pdev->vf_rlen[bir];
+        limit = PCI_SRIOV_NUM_BARS;
+    }
+    else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
     {
     case PCI_HEADER_TYPE_NORMAL:
         limit = 6;
@@ -543,7 +579,7 @@
 
     if ( bir >= limit )
         return 0;
-    addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
+    addr = pci_conf_read32(bus, slot, func, base + bir * 4);
     if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
         return 0;
     if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == 
PCI_BASE_ADDRESS_MEM_TYPE_64 )
@@ -551,11 +587,10 @@
         addr &= PCI_BASE_ADDRESS_MEM_MASK;
         if ( ++bir >= limit )
             return 0;
-        return addr |
-               ((u64)pci_conf_read32(bus, slot, func,
-                                     PCI_BASE_ADDRESS_0 + bir * 4) << 32);
+        return addr + disp +
+               ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
     }
-    return addr & PCI_BASE_ADDRESS_MEM_MASK;
+    return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
 }
 
 /**
@@ -628,11 +663,29 @@
 
     if ( !dev->msix_nr_entries )
     {
+        u8 pbus, pslot, pfunc;
+        int vf;
         u64 pba_paddr;
         u32 pba_offset;
 
+        if ( !dev->info.is_virtfn )
+        {
+            pbus = bus;
+            pslot = slot;
+            pfunc = func;
+            vf = -1;
+        }
+        else
+        {
+            pbus = dev->info.physfn.bus;
+            pslot = PCI_SLOT(dev->info.physfn.devfn);
+            pfunc = PCI_FUNC(dev->info.physfn.devfn);
+            vf = PCI_BDF2(dev->bus, dev->devfn);
+        }
+
         ASSERT(!dev->msix_used_entries);
-        WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
+        WARN_ON(msi->table_base !=
+                read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
 
         dev->msix_nr_entries = nr_entries;
         dev->msix_table.first = PFN_DOWN(table_paddr);
@@ -644,7 +697,7 @@
         pba_offset = pci_conf_read32(bus, slot, func,
                                      msix_pba_offset_reg(pos));
         bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
-        pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
+        pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
         WARN_ON(!pba_paddr);
         pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
 
diff -r 4797cf482355 -r 98c98daab56a xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/arch/x86/physdev.c    Tue Aug 16 15:21:46 2011 +0100
@@ -472,7 +472,7 @@
         if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
             break;
 
-        ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+        ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
         break;
     }
 
@@ -509,9 +509,9 @@
         pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
         pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
         pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
-        ret = pci_add_device_ext(manage_pci_ext.bus,
-                                 manage_pci_ext.devfn,
-                                 &pdev_info);
+        ret = pci_add_device(manage_pci_ext.bus,
+                             manage_pci_ext.devfn,
+                             &pdev_info);
         break;
     }
 
diff -r 4797cf482355 -r 98c98daab56a xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/drivers/passthrough/pci.c     Tue Aug 16 15:21:46 2011 +0100
@@ -142,16 +142,101 @@
     pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
 }
 
-int pci_add_device(u8 bus, u8 devfn)
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
 {
     struct pci_dev *pdev;
+    unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
+    const char *pdev_type;
     int ret = -ENOMEM;
 
+    if (!info)
+        pdev_type = "device";
+    else if (info->is_extfn)
+        pdev_type = "extended function";
+    else if (info->is_virtfn)
+    {
+        spin_lock(&pcidevs_lock);
+        pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
+        spin_unlock(&pcidevs_lock);
+        if ( !pdev )
+            pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
+        pdev_type = "virtual function";
+    }
+    else
+        return -EINVAL;
+
     spin_lock(&pcidevs_lock);
     pdev = alloc_pdev(bus, devfn);
     if ( !pdev )
         goto out;
 
+    if ( info )
+        pdev->info = *info;
+    else if ( !pdev->vf_rlen[0] )
+    {
+        unsigned int pos = pci_find_ext_capability(0, bus, devfn,
+                                                   PCI_EXT_CAP_ID_SRIOV);
+        u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+
+        if ( !pos )
+            /* Nothing */;
+        else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
+        {
+            unsigned int i;
+
+            BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+            for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
+            {
+                unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
+                u32 bar = pci_conf_read32(bus, slot, func, idx);
+                u32 hi = 0;
+
+                if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
+                     PCI_BASE_ADDRESS_SPACE_IO )
+                {
+                    printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
+                                          " BAR%u in IO space\n",
+                           bus, slot, func, i);
+                    continue;
+                }
+                pci_conf_write32(bus, slot, func, idx, ~0);
+                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
+                {
+                    if ( i >= PCI_SRIOV_NUM_BARS )
+                    {
+                        printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
+                                              " 64-bit vf BAR in last slot\n",
+                               bus, slot, func);
+                        break;
+                    }
+                    hi = pci_conf_read32(bus, slot, func, idx + 4);
+                    pci_conf_write32(bus, slot, func, idx + 4, ~0);
+                }
+                pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
+                                   PCI_BASE_ADDRESS_MEM_MASK;
+                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
+                {
+                    pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
+                                                             idx + 4) << 32;
+                    pci_conf_write32(bus, slot, func, idx + 4, hi);
+                }
+                else if ( pdev->vf_rlen[i] )
+                    pdev->vf_rlen[i] |= (u64)~0 << 32;
+                pci_conf_write32(bus, slot, func, idx, bar);
+                pdev->vf_rlen[i] = -pdev->vf_rlen[i];
+                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
+                    ++i;
+            }
+        }
+        else
+            printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
+                                  " functions already enabled (%04x)\n",
+                   bus, slot, func, ctrl);
+    }
+
     ret = 0;
     if ( !pdev->domain )
     {
@@ -169,8 +254,8 @@
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
-           PCI_SLOT(devfn), PCI_FUNC(devfn));
+    printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
+           bus, slot, func);
     return ret;
 }
 
@@ -197,51 +282,6 @@
     return ret;
 }
 
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info)
-{
-    int ret;
-    char *pdev_type;
-    struct pci_dev *pdev;
-
-    if (info->is_extfn)
-        pdev_type = "Extended Function";
-    else if (info->is_virtfn)
-        pdev_type = "Virtual Function";
-    else
-        return -EINVAL;
-
-
-    ret = -ENOMEM;
-    spin_lock(&pcidevs_lock);
-    pdev = alloc_pdev(bus, devfn);
-    if ( !pdev )
-        goto out;
-
-    pdev->info = *info;
-
-    ret = 0;
-    if ( !pdev->domain )
-    {
-        pdev->domain = dom0;
-        ret = iommu_add_device(pdev);
-        if ( ret )
-        {
-            pdev->domain = NULL;
-            goto out;
-        }
-
-        list_add(&pdev->domain_list, &dom0->arch.pdev_list);
-        pci_enable_acs(pdev);
-    }
-
-out:
-    spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
-           bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-
-    return ret;
-}
-
 static void pci_clean_dpci_irqs(struct domain *d)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
diff -r 4797cf482355 -r 98c98daab56a xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/include/xen/pci.h     Tue Aug 16 15:21:46 2011 +0100
@@ -57,6 +57,7 @@
     const u8 bus;
     const u8 devfn;
     struct pci_dev_info info;
+    u64 vf_rlen[6];
 };
 
 #define for_each_pdev(domain, pdev) \
@@ -86,9 +87,8 @@
 struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
 
 void pci_release_devices(struct domain *d);
-int pci_add_device(u8 bus, u8 devfn);
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *);
 int pci_remove_device(u8 bus, u8 devfn);
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info);
 struct pci_dev *pci_get_pdev(int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
 
diff -r 4797cf482355 -r 98c98daab56a xen/include/xen/pci_regs.h
--- a/xen/include/xen/pci_regs.h        Tue Aug 16 15:17:43 2011 +0100
+++ b/xen/include/xen/pci_regs.h        Tue Aug 16 15:21:46 2011 +0100
@@ -425,7 +425,7 @@
 #define PCI_EXT_CAP_ID_ACS     13
 #define PCI_EXT_CAP_ID_ARI     14
 #define PCI_EXT_CAP_ID_ATS     15
-#define PCI_EXT_CAP_ID_IOV     16
+#define PCI_EXT_CAP_ID_SRIOV   16
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
@@ -545,4 +545,35 @@
 #define PCI_ACS_CTRL           0x06    /* ACS Control Register */
 #define PCI_ACS_EGRESS_CTL_V   0x08    /* ACS Egress Control Vector */
 
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP          0x04    /* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM     0x01    /* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL         0x08    /* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE    0x01    /* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM    0x02    /* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR   0x04    /* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE    0x08    /* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI    0x10    /* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS       0x0a    /* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM  0x01    /* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF   0x0c    /* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF     0x0e    /* Total VFs */
+#define PCI_SRIOV_NUM_VF       0x10    /* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK    0x12    /* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET    0x14    /* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE    0x16    /* Following VF Stride */
+#define PCI_SRIOV_VF_DID       0x1a    /* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE   0x1c    /* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE   0x20    /* System Page Size */
+#define PCI_SRIOV_BAR          0x24    /* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS    6       /* Number of VF BARs */
+#define PCI_SRIOV_VFM          0x3c    /* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)  ((x) & 7)       /* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)    /* State Offset */
+#define  PCI_SRIOV_VFM_UA      0x0     /* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI      0x1     /* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO      0x2     /* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV      0x3     /* Active.Available */
+
 #endif /* LINUX_PCI_REGS_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.