[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/3] PCI/MSI: Add pci_enable_msi_partial()



There are PCI devices that require a particular value written
to the Multiple Message Enable (MME) register while aligned on
power of 2 boundary value of actually used MSI vectors 'nvec'
is a lesser of that MME value:

        roundup_pow_of_two(nvec) < 'Multiple Message Enable'

However the existing pci_enable_msi_block() interface is not
able to configure such devices, since the value written to the
MME register is calculated from the number of requested MSIs
'nvec':

        'Multiple Message Enable' = roundup_pow_of_two(nvec)

In this case the result written to the MME register may not
satisfy the aforementioned PCI devices requirement and therefore
the PCI functions will not operate in a desired mode.

This update introduces pci_enable_msi_partial() extension to
pci_enable_msi_block() interface that accepts extra 'nvec_mme'
argument which is then written to MME register while the value
of 'nvec' is still used to setup as many interrupts as requested.

As result of this change, architecture-specific callbacks
arch_msi_check_device() and arch_setup_msi_irqs() get an extra
'nvec_mme' parameter as well, but it is ignored for now.
Therefore, this update is a placeholder for architectures that
wish to support pci_enable_msi_partial() function in the future.

Cc: linux-doc@xxxxxxxxxxxxxxx
Cc: linux-mips@xxxxxxxxxxxxxx
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Cc: linux-s390@xxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
Cc: iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
Cc: linux-ide@xxxxxxxxxxxxxxx
Cc: linux-pci@xxxxxxxxxxxxxxx
Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxx>
---
 Documentation/PCI/MSI-HOWTO.txt |   36 ++++++++++++++--
 arch/mips/pci/msi-octeon.c      |    2 +-
 arch/powerpc/kernel/msi.c       |    4 +-
 arch/s390/pci/pci.c             |    2 +-
 arch/x86/kernel/x86_init.c      |    2 +-
 drivers/pci/msi.c               |   83 ++++++++++++++++++++++++++++++++++-----
 include/linux/msi.h             |    5 +-
 include/linux/pci.h             |    3 +
 8 files changed, 115 insertions(+), 22 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 10a9369..c8a8503 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -195,14 +195,40 @@ By contrast with pci_enable_msi_range() function, 
pci_enable_msi_exact()
 returns zero in case of success, which indicates MSI interrupts have been
 successfully allocated.
 
-4.2.4 pci_disable_msi
+4.2.4 pci_enable_msi_partial
+
+int pci_enable_msi_partial(struct pci_dev *dev, int nvec, int nvec_mme)
+
+This variation on pci_enable_msi_exact() call allows a device driver to
+setup 'nvec_mme' number of multiple MSIs with the PCI function, while
+setup only 'nvec' (which could be a lesser of 'nvec_mme') number of MSIs
+in operating system. The MSI specification only allows 'nvec_mme' to be
+allocated in powers of two, up to a maximum of 2^5 (32).
+
+This function could be used when a PCI function is known to send 'nvec'
+MSIs, but still requires a particular number of MSIs 'nvec_mme' to be
+initialized with. As result, 'nvec_mme' - 'nvec' number of unused MSIs
+do not waste system resources.
+
+If this function returns 0, it has succeeded in allocating 'nvec_mme'
+interrupts and setting up 'nvec' interrupts. In this case, the function
+enables MSI on this device and updates dev->irq to be the lowest of the
+new interrupts assigned to it.  The other interrupts assigned to the
+device are in the range dev->irq to dev->irq + nvec - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.
+
+4.2.5 pci_disable_msi
 
 void pci_disable_msi(struct pci_dev *dev)
 
-This function should be used to undo the effect of pci_enable_msi_range().
-Calling it restores dev->irq to the pin-based interrupt number and frees
-the previously allocated MSIs.  The interrupts may subsequently be assigned
-to another device, so drivers should not cache the value of dev->irq.
+This function should be used to undo the effect of pci_enable_msi_range()
+or pci_enable_msi_partial(). Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated MSIs.  The interrupts
+may subsequently be assigned to another device, so drivers should not cache
+the value of dev->irq.
 
 Before calling this function, a device driver must always call free_irq()
 on any interrupt for which it previously called request_irq().
diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index 2b91b0e..2be7979 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -178,7 +178,7 @@ msi_irq_allocated:
        return 0;
 }
 
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int nvec_mme, int type)
 {
        struct msi_desc *entry;
        int ret;
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 8bbc12d..c60aee3 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -13,7 +13,7 @@
 
 #include <asm/machdep.h>
 
-int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int nvec_mme, int 
type)
 {
        if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
                pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
@@ -32,7 +32,7 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int 
type)
         return 0;
 }
 
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int nvec_mme, int type)
 {
        return ppc_md.setup_msi_irqs(dev, nvec, type);
 }
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9ddc51e..3cf38a8 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -398,7 +398,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
        }
 }
 
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int nvec_mme, int type)
 {
        struct zpci_dev *zdev = get_zdev(pdev);
        unsigned int hwirq, msi_vecs;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e48b674..b65bf95 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -121,7 +121,7 @@ struct x86_msi_ops x86_msi = {
 };
 
 /* MSI arch specific hooks */
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int nvec_mme, int type)
 {
        return x86_msi.setup_msi_irqs(dev, nvec, type);
 }
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 27a7e67..0410d9b 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -56,7 +56,8 @@ void __weak arch_teardown_msi_irq(unsigned int irq)
        chip->teardown_irq(chip, irq);
 }
 
-int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
+int __weak arch_msi_check_device(struct pci_dev *dev,
+                                int nvec, int nvec_mme, int type)
 {
        struct msi_chip *chip = dev->bus->msi;
 
@@ -66,7 +67,8 @@ int __weak arch_msi_check_device(struct pci_dev *dev, int 
nvec, int type)
        return chip->check_device(chip, dev, nvec, type);
 }
 
-int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int __weak arch_setup_msi_irqs(struct pci_dev *dev,
+                              int nvec, int nvec_mme, int type)
 {
        struct msi_desc *entry;
        int ret;
@@ -598,6 +600,7 @@ error_attrs:
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
  * @nvec: number of interrupts to allocate
+ * @nvec_mme: number of interrupts to write to Multiple Message Enable register
  *
  * Setup the MSI capability structure of the device with the requested
  * number of interrupts.  A return value of zero indicates the successful
@@ -605,7 +608,7 @@ error_attrs:
  * an error, and a positive return value indicates the number of interrupts
  * which could have been allocated.
  */
-static int msi_capability_init(struct pci_dev *dev, int nvec)
+static int msi_capability_init(struct pci_dev *dev, int nvec, int nvec_mme)
 {
        struct msi_desc *entry;
        int ret;
@@ -640,7 +643,7 @@ static int msi_capability_init(struct pci_dev *dev, int 
nvec)
        list_add_tail(&entry->list, &dev->msi_list);
 
        /* Configure MSI capability structure */
-       ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
+       ret = arch_setup_msi_irqs(dev, nvec, nvec_mme, PCI_CAP_ID_MSI);
        if (ret) {
                msi_mask_irq(entry, mask, ~mask);
                free_msi_irqs(dev);
@@ -758,7 +761,8 @@ static int msix_capability_init(struct pci_dev *dev,
        if (ret)
                return ret;
 
-       ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
+       /* Parameter 'nvec_mme' does not make sense in case of MSI-X */
+       ret = arch_setup_msi_irqs(dev, nvec, 0, PCI_CAP_ID_MSIX);
        if (ret)
                goto out_avail;
 
@@ -812,13 +816,15 @@ out_free:
  * pci_msi_check_device - check whether MSI may be enabled on a device
  * @dev: pointer to the pci_dev data structure of MSI device function
  * @nvec: how many MSIs have been requested ?
+ * @nvec_mme: how many MSIs write to Multiple Message Enable register ?
  * @type: are we checking for MSI or MSI-X ?
  *
  * Look at global flags, the device itself, and its parent buses
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
-static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev,
+                               int nvec, int nvec_mme, int type)
 {
        struct pci_bus *bus;
        int ret;
@@ -846,7 +852,7 @@ static int pci_msi_check_device(struct pci_dev *dev, int 
nvec, int type)
                if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
                        return -EINVAL;
 
-       ret = arch_msi_check_device(dev, nvec, type);
+       ret = arch_msi_check_device(dev, nvec, nvec_mme, type);
        if (ret)
                return ret;
 
@@ -878,6 +884,62 @@ int pci_msi_vec_count(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_msi_vec_count);
 
+/**
+ * pci_enable_msi_partial - configure device's MSI capability structure
+ * @dev: device to configure
+ * @nvec: number of interrupts to configure
+ * @nvec_mme: number of interrupts to write to Multiple Message Enable register
+ *
+ * This function tries to allocate @nvec number of interrupts while setup
+ * device's Multiple Message Enable register with @nvec_mme interrupts.
+ * It returns a negative errno if an error occurs. If it succeeds, it returns
+ * zero and updates the @dev's irq member to the lowest new interrupt number;
+ * the other interrupt numbers allocated to this device are consecutive.
+ */
+int pci_enable_msi_partial(struct pci_dev *dev, int nvec, int nvec_mme)
+{
+       int maxvec;
+       int rc;
+
+       if (dev->current_state != PCI_D0)
+               return -EINVAL;
+
+       WARN_ON(!!dev->msi_enabled);
+
+       /* Check whether driver already requested MSI-X irqs */
+       if (dev->msix_enabled) {
+               dev_info(&dev->dev, "can't enable MSI "
+                        "(MSI-X already enabled)\n");
+               return -EINVAL;
+       }
+
+       if (!is_power_of_2(nvec_mme))
+               return -EINVAL;
+       if (nvec > nvec_mme)
+               return -EINVAL;
+
+       maxvec = pci_msi_vec_count(dev);
+       if (maxvec < 0)
+               return maxvec;
+       else if (nvec_mme > maxvec)
+               return -EINVAL;
+
+       rc = pci_msi_check_device(dev, nvec, nvec_mme, PCI_CAP_ID_MSI);
+       if (rc < 0)
+               return rc;
+       else if (rc > 0)
+               return -ENOSPC;
+
+       rc = msi_capability_init(dev, nvec, nvec_mme);
+       if (rc < 0)
+               return rc;
+       else if (rc > 0)
+               return -ENOSPC;
+
+       return 0;
+}
+EXPORT_SYMBOL(pci_enable_msi_partial);
+
 void pci_msi_shutdown(struct pci_dev *dev)
 {
        struct msi_desc *desc;
@@ -957,7 +1019,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry 
*entries, int nvec)
        if (!entries || !dev->msix_cap || dev->current_state != PCI_D0)
                return -EINVAL;
 
-       status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
+       status = pci_msi_check_device(dev, nvec, 0, PCI_CAP_ID_MSIX);
        if (status)
                return status;
 
@@ -1110,7 +1172,8 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, 
int maxvec)
                nvec = maxvec;
 
        do {
-               rc = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
+               rc = pci_msi_check_device(dev, nvec, roundup_pow_of_two(nvec),
+                                         PCI_CAP_ID_MSI);
                if (rc < 0) {
                        return rc;
                } else if (rc > 0) {
@@ -1121,7 +1184,7 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, 
int maxvec)
        } while (rc);
 
        do {
-               rc = msi_capability_init(dev, nvec);
+               rc = msi_capability_init(dev, nvec, roundup_pow_of_two(nvec));
                if (rc < 0) {
                        return rc;
                } else if (rc > 0) {
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 92a2f99..b9f89ee 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -57,9 +57,10 @@ struct msi_desc {
  */
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int nvec_mme, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
-int arch_msi_check_device(struct pci_dev* dev, int nvec, int type);
+int arch_msi_check_device(struct pci_dev *dev,
+                         int nvec, int nvec_mme, int type);
 void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 71d9673..7360bd2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1184,6 +1184,7 @@ void pci_disable_msix(struct pci_dev *dev);
 void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 void pci_restore_msi_state(struct pci_dev *dev);
 int pci_msi_enabled(void);
+int pci_enable_msi_partial(struct pci_dev *dev, int nvec, int nvec_mme);
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec);
 static inline int pci_enable_msi_exact(struct pci_dev *dev, int nvec)
 {
@@ -1215,6 +1216,8 @@ static inline void pci_disable_msix(struct pci_dev *dev) 
{ }
 static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) { }
 static inline void pci_restore_msi_state(struct pci_dev *dev) { }
 static inline int pci_msi_enabled(void) { return 0; }
+static int pci_enable_msi_partial(struct pci_dev *dev, int nvec, int nvec_mme)
+{ return -ENOSYS; }
 static inline int pci_enable_msi_range(struct pci_dev *dev, int minvec,
                                       int maxvec)
 { return -ENOSYS; }
-- 
1.7.7.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.