[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 10/10] vpci/sriov: add support for SR-IOV capability



On Wed, Jun 20, 2018 at 04:42:34PM +0200, Roger Pau Monne wrote:
> So that a PCI device that supports SR-IOV (PF) can enable the capability
> and use the virtual functions.
> 
> This code is expected to only be used by privileged domains,
> unprivileged domains should not get access to the SR-IOV capability.
> 
> The current code detects enabling of the virtual functions feature and
> automatically adds the VFs to the domain. It also detects enabling of
> memory space and maps the VFs BARs into the domain p2m. Disabling of
> the VF enable bit removes the devices and the BAR memory map from the
> domain p2m.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
[...]
> +
> +static void modify_memory_mapping(const struct pci_dev *pdev, unsigned int 
> pos,
> +                                  bool enable)
> +{
> +    const struct vpci_sriov *sriov = pdev->vpci->sriov;
> +    unsigned int i;
> +    int rc;
> +
> +    if ( enable )
> +    {
> +        struct pci_dev *pf_dev;
> +
> +        pcidevs_lock();
> +        /*
> +         * NB: a non-const pci_dev of the PF is needed in order to update
> +         * vf_rlen.
> +         */
> +        pf_dev = pci_get_pdev(pdev->seg, pdev->bus, pdev->devfn);
> +        pcidevs_unlock();
> +        ASSERT(pf_dev);
> +
> +        /* Set the BARs addresses and size. */
> +        for ( i = 0; i < PCI_SRIOV_NUM_BARS; i += rc )
> +        {
> +            unsigned int j, idx = pos + PCI_SRIOV_BAR + i * 4;
> +            const pci_sbdf_t sbdf = {
> +                .sbdf = PCI_SBDF3(pdev->seg, pdev->bus, pdev->devfn),
> +            };
> +            uint32_t bar = pci_conf_read32(pdev->seg, pdev->bus,
> +                                           PCI_SLOT(pdev->devfn),
> +                                           PCI_FUNC(pdev->devfn), idx);
> +            uint64_t addr, size;
> +
> +            rc = pci_size_mem_bar(sbdf, idx, &addr, &size,
> +                                  PCI_BAR_VF |
> +                                  ((i == PCI_SRIOV_NUM_BARS - 1) ?
> +                                   PCI_BAR_LAST : 0));

This only returns 1 or 2. The return type is unsigned int which means rc
is never going to be <= 0.

> +            if ( rc <= 0 )
> +            {
> +                gprintk(XENLOG_ERR,
> +                        "%04x:%02x:%02x.%u: failed to size VF BAR\n",
> +                        pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                        PCI_FUNC(pdev->devfn));
> +                domain_crash(pdev->domain);
> +                return;
> +            }
> +
> +            /*
> +             * Update vf_rlen on the PF. According to the spec the size of
> +             * the BARs can change if the system page size register is
> +             * modified, so always update rlen when enabling VFs.
> +             */
> +            pf_dev->vf_rlen[i] = size;
> +
> +            for ( j = 0; j < sriov->num_vfs; j++ )
> +            {
> +                struct vpci_header *header;
> +
> +                if ( !sriov->vf[j] )
> +                    /* Can happen if pci_add_device fails. */
> +                    continue;
> +
> +                spin_lock(&sriov->vf[j]->vpci_lock);
> +                header = &sriov->vf[j]->vpci->header;
> +
> +                if ( !size )
> +                {
> +                    header->bars[i].type = VPCI_BAR_EMPTY;
> +                    spin_unlock(&sriov->vf[j]->vpci_lock);
> +                    continue;
> +                }
> +
> +                header->bars[i].addr = addr + size * j;
> +                header->bars[i].size = size;
> +                header->bars[i].prefetchable =
> +                    bar & PCI_BASE_ADDRESS_MEM_PREFETCH;
> +
> +                switch ( rc )
> +                {
> +                case 1:
> +                    header->bars[i].type = VPCI_BAR_MEM32;
> +                    break;
> +
> +                case 2:
> +                    header->bars[i].type = VPCI_BAR_MEM64_LO;
> +                    header->bars[i + 1].type = VPCI_BAR_MEM64_HI;
> +                    break;
> +
> +                default:
> +                    ASSERT_UNREACHABLE();
> +                    spin_unlock(&sriov->vf[j]->vpci_lock);
> +                    domain_crash(pdev->domain);
> +                    return;
> +                }
> +                spin_unlock(&sriov->vf[j]->vpci_lock);
> +            }
> +        }
> +    }
> +
> +    /* Add/remove mappings for the VFs BARs into the p2m. */
> +    for ( i = 0; i < sriov->num_vfs; i++ )
> +    {
> +        struct pci_dev *vf_pdev = sriov->vf[i];
> +
> +        spin_lock(&vf_pdev->vpci_lock);
> +        rc = vpci_modify_bars(vf_pdev, enable, false);
> +        spin_unlock(&vf_pdev->vpci_lock);
> +        if ( rc )
> +            gprintk(XENLOG_ERR,
> +                    "failed to %smap BARs of VF %04x:%02x:%02x.%u: %d\n",
> +                    enable ? "" : "un", vf_pdev->seg, vf_pdev->bus,
> +                    PCI_SLOT(vf_pdev->devfn), PCI_FUNC(vf_pdev->devfn), rc);
> +    }
> +}
> +
> +static void control_write(const struct pci_dev *pdev, unsigned int reg,
> +                          uint32_t val, void *data)
> +{
> +    struct vpci_sriov *sriov = data;
> +    unsigned int i, pos = reg - PCI_SRIOV_CTRL;
> +    uint16_t control = pci_conf_read16(pdev->seg, pdev->bus,
> +                                       PCI_SLOT(pdev->devfn),
> +                                       PCI_FUNC(pdev->devfn),
> +                                       pos + PCI_SRIOV_CTRL);
> +    bool enabled = control & PCI_SRIOV_CTRL_VFE;
> +    bool mem_enabled = control & PCI_SRIOV_CTRL_MSE;
> +    bool new_enabled = val & PCI_SRIOV_CTRL_VFE;
> +    bool new_mem_enabled = val & PCI_SRIOV_CTRL_MSE;
> +    int rc;
> +
> +    if ( new_enabled != enabled )
> +    {
> +        uint16_t offset = pci_conf_read16(pdev->seg, pdev->bus,
> +                                          PCI_SLOT(pdev->devfn),
> +                                          PCI_FUNC(pdev->devfn),
> +                                          pos + PCI_SRIOV_VF_OFFSET);
> +        uint16_t stride = pci_conf_read16(pdev->seg, pdev->bus,
> +                                          PCI_SLOT(pdev->devfn),
> +                                          PCI_FUNC(pdev->devfn),
> +                                          pos + PCI_SRIOV_VF_STRIDE);
> +
> +        if ( new_enabled )
> +        {
> +            /*
> +             * Only update the number of active VFs when enabling, when
> +             * disabling use the cached value in order to always remove the
> +             * same number of VFs that where active.

where -> were.

> +             */
> +            sriov->num_vfs = pci_conf_read16(pdev->seg, pdev->bus,
> +                                             PCI_SLOT(pdev->devfn),
> +                                             PCI_FUNC(pdev->devfn),
> +                                             pos + PCI_SRIOV_NUM_VF);
> +
> +            /*
> +             * NB: VFE needs to be enabled before calling pci_add_device so 
> Xen
> +             * can access the config space of VFs.
> +             */
> +            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                             PCI_FUNC(pdev->devfn), reg,
> +                             control | PCI_SRIOV_CTRL_VFE);
> +
> +            /*
> +             * The spec states that the software must wait at least 100ms
> +             * before attempting to access VF registers when enabling virtual
> +             * functions on the PF.
> +             */
> +            mdelay(100);

IMHO delaying 100ms like this in an active system is far too long. It
would be better to put this into a loop and process softirqs in between
delays.

> +        }
> +
> +        for ( i = 0; i < sriov->num_vfs; i++ )
> +        {
> +            const pci_sbdf_t bdf = {
> +                .bdf = PCI_BDF2(pdev->bus, pdev->devfn) + offset + stride * 
> i,
> +            };
> +
> +            if ( new_enabled )
> +            {
> +                const struct pci_dev_info info = {
> +                    .is_virtfn = true,
> +                    .physfn.bus = pdev->bus,
> +                    .physfn.devfn = pdev->devfn,
> +                };
> +
> +                rc = pci_add_device(pdev->seg, bdf.bus, bdf.extfunc, &info,
> +                                    pdev->node);
> +            }
> +            else
> +                rc = pci_remove_device(pdev->seg, bdf.bus, bdf.extfunc);
> +            if ( rc )
> +                gprintk(XENLOG_ERR, "failed to %s VF %04x:%02x:%02x.%u: 
> %d\n",
> +                        new_enabled ? "add" : "remove", pdev->seg, bdf.bus,
> +                        bdf.dev, bdf.func, rc);
> +
> +            pcidevs_lock();
> +            sriov->vf[i] = pci_get_pdev(pdev->seg, bdf.bus, bdf.extfunc);
> +            pcidevs_unlock();

So the array is updated regardless of rc?

Wei.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.