[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86/PCI-MSI: properly determine VF BAR values
On 08/08/11 10:22, Jan Beulich wrote: > As was discussed a couple of times on this list, SR-IOV virtual > functions have their BARs read as zero - the physical function's SR-IOV > capability structure must be consulted instead. The bogus warnings > people complained about are being eliminated with this change. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> Tested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -522,12 +522,48 @@ static int msi_capability_init(struct pc > return 0; > } > > -static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) > +static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf) > { > u8 limit; > - u32 addr; > + u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0; > > - switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) > + if ( vf >= 0 ) > + { > + struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func)); > + unsigned int pos = pci_find_ext_capability(0, bus, > + PCI_DEVFN(slot, func), > + PCI_EXT_CAP_ID_SRIOV); > + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); > + u16 num_vf = pci_conf_read16(bus, slot, func, pos + > PCI_SRIOV_NUM_VF); > + u16 offset = pci_conf_read16(bus, slot, func, > + pos + PCI_SRIOV_VF_OFFSET); > + u16 stride = pci_conf_read16(bus, slot, func, > + pos + PCI_SRIOV_VF_STRIDE); > + > + if ( !pdev || !pos || > + !(ctrl & PCI_SRIOV_CTRL_VFE) || > + !(ctrl & PCI_SRIOV_CTRL_MSE) || > + !num_vf || !offset || (num_vf > 1 && !stride) || > + bir >= PCI_SRIOV_NUM_BARS || > + !pdev->vf_rlen[bir] ) > + return 0; > + base = pos + PCI_SRIOV_BAR; > + vf -= PCI_BDF(bus, slot, func) + offset; > + if ( vf < 0 || (vf && vf % stride) ) > + return 0; > + if ( stride ) > + { > + if ( vf % stride ) > + return 0; > + vf /= stride; > + } > + if ( vf >= num_vf ) > + return 0; > + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); > + disp = vf * pdev->vf_rlen[bir]; > + limit = PCI_SRIOV_NUM_BARS; > + } > + else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) > { > case PCI_HEADER_TYPE_NORMAL: > limit = 6; > @@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s > > if ( bir >= limit ) > return 0; > - addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4); > + addr = pci_conf_read32(bus, slot, func, base + bir * 4); > if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) > return 0; > if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > PCI_BASE_ADDRESS_MEM_TYPE_64 ) > @@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s > addr &= PCI_BASE_ADDRESS_MEM_MASK; > if ( ++bir >= limit ) > return 0; > - return addr | > - ((u64)pci_conf_read32(bus, slot, func, > - PCI_BASE_ADDRESS_0 + bir * 4) << 32); > + return addr + disp + > + ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32); > } > - return addr & PCI_BASE_ADDRESS_MEM_MASK; > + return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp; > } > > /** > @@ -629,11 +664,29 @@ static int msix_capability_init(struct p > > if ( !dev->msix_nr_entries ) > { > + u8 pbus, pslot, pfunc; > + int vf; > u64 pba_paddr; > u32 pba_offset; > > + if ( !dev->info.is_virtfn ) > + { > + pbus = bus; > + pslot = slot; > + pfunc = func; > + vf = -1; > + } > + else > + { > + pbus = dev->info.physfn.bus; > + pslot = PCI_SLOT(dev->info.physfn.devfn); > + pfunc = PCI_FUNC(dev->info.physfn.devfn); > + vf = PCI_BDF2(dev->bus, dev->devfn); > + } > + > ASSERT(!dev->msix_used_entries); > - WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir)); > + WARN_ON(msi->table_base != > + read_pci_mem_bar(pbus, pslot, pfunc, bir, vf)); > > dev->msix_nr_entries = nr_entries; > dev->msix_table.first = PFN_DOWN(table_paddr); > @@ -645,7 +698,7 @@ static int msix_capability_init(struct p > pba_offset = pci_conf_read32(bus, slot, func, > msix_pba_offset_reg(pos)); > bir = (u8)(pba_offset & PCI_MSIX_BIRMASK); > - pba_paddr = read_pci_mem_bar(bus, slot, func, bir); > + pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf); > WARN_ON(!pba_paddr); > pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK; > > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -146,6 +146,7 @@ void pci_enable_acs(struct pci_dev *pdev > int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) > { > struct pci_dev *pdev; > + unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); > const char *pdev_type; > int ret = -ENOMEM; > > @@ -154,7 +155,14 @@ int pci_add_device(u8 bus, u8 devfn, con > else if (info->is_extfn) > pdev_type = "extended function"; > else if (info->is_virtfn) > + { > + spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn); > + spin_unlock(&pcidevs_lock); > + if ( !pdev ) > + pci_add_device(info->physfn.bus, info->physfn.devfn, NULL); > pdev_type = "virtual function"; > + } > else > return -EINVAL; > > @@ -165,6 +173,70 @@ int pci_add_device(u8 bus, u8 devfn, con > > if ( info ) > pdev->info = *info; > + else if ( !pdev->vf_rlen[0] ) > + { > + unsigned int pos = pci_find_ext_capability(0, bus, devfn, > + PCI_EXT_CAP_ID_SRIOV); > + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); > + > + if ( !pos ) > + /* Nothing */; > + else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) ) > + { > + unsigned int i; > + > + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); > + for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i ) > + { > + unsigned int idx = pos + PCI_SRIOV_BAR + i * 4; > + u32 bar = pci_conf_read32(bus, slot, func, idx); > + u32 hi = 0; > + > + if ( (bar & PCI_BASE_ADDRESS_SPACE) == > + PCI_BASE_ADDRESS_SPACE_IO ) > + { > + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with > vf" > + " BAR%u in IO space\n", > + bus, slot, func, i); > + continue; > + } > + pci_conf_write32(bus, slot, func, idx, ~0); > + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > + PCI_BASE_ADDRESS_MEM_TYPE_64 ) > + { > + if ( i >= PCI_SRIOV_NUM_BARS ) > + { > + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x > with" > + " 64-bit vf BAR in last > slot\n", > + bus, slot, func); > + break; > + } > + hi = pci_conf_read32(bus, slot, func, idx + 4); > + pci_conf_write32(bus, slot, func, idx + 4, ~0); > + } > + pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) & > + PCI_BASE_ADDRESS_MEM_MASK; > + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > + PCI_BASE_ADDRESS_MEM_TYPE_64 ) > + { > + pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func, > + idx + 4) << 32; > + pci_conf_write32(bus, slot, func, idx + 4, hi); > + } > + else if ( pdev->vf_rlen[i] ) > + pdev->vf_rlen[i] |= (u64)~0 << 32; > + pci_conf_write32(bus, slot, func, idx, bar); > + pdev->vf_rlen[i] = -pdev->vf_rlen[i]; > + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > + PCI_BASE_ADDRESS_MEM_TYPE_64 ) > + ++i; > + } > + } > + else > + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its > virtual" > + " functions already enabled (%04x)\n", > + bus, slot, func, ctrl); > + } > > ret = 0; > if ( !pdev->domain ) > @@ -184,7 +256,7 @@ int pci_add_device(u8 bus, u8 devfn, con > out: > spin_unlock(&pcidevs_lock); > printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, > - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); > + bus, slot, func); > return ret; > } > > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -59,6 +59,7 @@ struct pci_dev { > const u8 bus; > const u8 devfn; > struct pci_dev_info info; > + u64 vf_rlen[6]; > }; > > #define for_each_pdev(domain, pdev) \ > --- a/xen/include/xen/pci_regs.h > +++ b/xen/include/xen/pci_regs.h > @@ -425,7 +425,7 @@ > #define PCI_EXT_CAP_ID_ACS 13 > #define PCI_EXT_CAP_ID_ARI 14 > #define PCI_EXT_CAP_ID_ATS 15 > -#define PCI_EXT_CAP_ID_IOV 16 > +#define PCI_EXT_CAP_ID_SRIOV 16 > > /* Advanced Error Reporting */ > #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ > @@ -545,4 +545,35 @@ > #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ > #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ > > +/* Single Root I/O Virtualization */ > +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ > +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ > +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ > +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ > +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ > +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ > +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ > +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ > +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ > +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ > +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ > +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ > +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ > +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ > +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ > +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ > +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ > +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ > +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ > +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ > +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ > +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ > +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ > +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ > +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ > +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ > +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ > +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ > +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ > + > #endif /* LINUX_PCI_REGS_H */ > > -- Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer T: +44 (0)1223 225 900, http://www.citrix.com _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |