[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR


  • To: "roger.pau@xxxxxxxxxx" <roger.pau@xxxxxxxxxx>
  • From: Oleksandr Andrushchenko <Oleksandr_Andrushchenko@xxxxxxxx>
  • Date: Mon, 25 Oct 2021 11:51:57 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=epam.com; dmarc=pass action=none header.from=epam.com; dkim=pass header.d=epam.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=diyFJu3exAB9XQupRDEjylhswIDofa3cZIW7OV+c/EE=; b=fJrH6oiqiuEa3vDHaPujKzEoOQeDnYRmYLeIAKY44UH9+v0oHveFFVh7ydhbm9rYK/QpkUDOX3Kyfpf9Eu0nKAuHuzlxJeDiQrLCK0B3b1LBEcq41xX0eB6uTYX+bSvetZNr9nFIXMpelyKh2VmP1bFeHnWByTH3GUoHgz7sWTwYQ67kRxV95yzwTjYk93+CGM3diwZovHBv/rEUl5pGniarYicKD4tBhT709gdLfuxxYS2WjwydDI8WjJ6CTRsvsK8P6VE/WlTmu2WpTnxGnPW9VtZ2khifV9CRrUv7NWFGBgbVq/sZKujhu112+Fz4zkfyzU0pa3dg+66BkPg9uw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=Vu11CsIGfiN2RqIulJIaMKJJTbjEiCX11z2hYYsBT16/4P9n0TErDcdOje6nfrdSoNpQxPSil/hdqTRAyofJM4VzrG1wpXKKRnB6O7yKS7XWJLG03iwHaiLZuOR0BOBnB6tUWjXeDZrf4t8BKHrZb6OSoMP7fUFThN1MuTttfkXyiRC5fhlAGmfPJGKLWgvKhAerHgyIxvgdyfi99Lh8uRhtBTYnO0g7iIhv+Vd4QNW0J14ij8GC9ezytd4PWj8G19gCYJb1T6zSSfUf9vC6TAWP4F8vWTkh6SXZbJQEwkXGi/FeXb74Kk+3JlwtUDgSqJq52XjfFsqBhJUAOeyJfA==
  • Cc: "julien@xxxxxxx" <julien@xxxxxxx>, "sstabellini@xxxxxxxxxx" <sstabellini@xxxxxxxxxx>, Oleksandr Tyshchenko <Oleksandr_Tyshchenko@xxxxxxxx>, Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, Artem Mygaiev <Artem_Mygaiev@xxxxxxxx>, "jbeulich@xxxxxxxx" <jbeulich@xxxxxxxx>, Bertrand Marquis <bertrand.marquis@xxxxxxx>, Rahul Singh <rahul.singh@xxxxxxx>, Oleksandr Andrushchenko <Oleksandr_Andrushchenko@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • Delivery-date: Mon, 25 Oct 2021 11:52:12 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHXtdAibxRAFQhJuUO1GvFhv7aI/6vjwcoA
  • Thread-topic: [PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR

Hi, Roger!
Could you please take a look at the below?
Jan was questioning the per BAR range set approach, so it
is crucial for the maintainer (you) to answer here.

Thank you in advance,
Oleksandr

On 30.09.21 10:52, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>
> Instead of handling a single range set, that contains all the memory
> regions of all the BARs and ROM, have them per BAR.
>
> This is in preparation of making non-identity mappings in p2m for the
> MMIOs/ROM.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> ---
>   xen/drivers/vpci/header.c | 172 ++++++++++++++++++++++++++------------
>   xen/include/xen/vpci.h    |   3 +-
>   2 files changed, 122 insertions(+), 53 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index ec4d215f36ff..9c603d26d302 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -131,49 +131,75 @@ static void modify_decoding(const struct pci_dev *pdev, 
> uint16_t cmd,
>   
>   bool vpci_process_pending(struct vcpu *v)
>   {
> -    if ( v->vpci.mem )
> +    if ( v->vpci.num_mem_ranges )
>       {
>           struct map_data data = {
>               .d = v->domain,
>               .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
>           };
> -        int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
> +        struct pci_dev *pdev = v->vpci.pdev;
> +        struct vpci_header *header = &pdev->vpci->header;
> +        unsigned int i;
>   
> -        if ( rc == -ERESTART )
> -            return true;
> +        for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +        {
> +            struct vpci_bar *bar = &header->bars[i];
> +            int rc;
>   
> -        spin_lock(&v->vpci.pdev->vpci->lock);
> -        /* Disable memory decoding unconditionally on failure. */
> -        modify_decoding(v->vpci.pdev,
> -                        rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
> -                        !rc && v->vpci.rom_only);
> -        spin_unlock(&v->vpci.pdev->vpci->lock);
> +            if ( !bar->mem )
> +                continue;
>   
> -        rangeset_destroy(v->vpci.mem);
> -        v->vpci.mem = NULL;
> -        if ( rc )
> -            /*
> -             * FIXME: in case of failure remove the device from the domain.
> -             * Note that there might still be leftover mappings. While this 
> is
> -             * safe for Dom0, for DomUs the domain will likely need to be
> -             * killed in order to avoid leaking stale p2m mappings on
> -             * failure.
> -             */
> -            vpci_remove_device(v->vpci.pdev);
> +            rc = rangeset_consume_ranges(bar->mem, map_range, &data);
> +
> +            if ( rc == -ERESTART )
> +                return true;
> +
> +            spin_lock(&pdev->vpci->lock);
> +            /* Disable memory decoding unconditionally on failure. */
> +            modify_decoding(pdev,
> +                            rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : 
> v->vpci.cmd,
> +                            !rc && v->vpci.rom_only);
> +            spin_unlock(&pdev->vpci->lock);
> +
> +            rangeset_destroy(bar->mem);
> +            bar->mem = NULL;
> +            v->vpci.num_mem_ranges--;
> +            if ( rc )
> +                /*
> +                 * FIXME: in case of failure remove the device from the 
> domain.
> +                 * Note that there might still be leftover mappings. While 
> this is
> +                 * safe for Dom0, for DomUs the domain will likely need to be
> +                 * killed in order to avoid leaking stale p2m mappings on
> +                 * failure.
> +                 */
> +                vpci_remove_device(pdev);
> +        }
>       }
>   
>       return false;
>   }
>   
>   static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
> -                            struct rangeset *mem, uint16_t cmd)
> +                            uint16_t cmd)
>   {
>       struct map_data data = { .d = d, .map = true };
> -    int rc;
> +    struct vpci_header *header = &pdev->vpci->header;
> +    int rc = 0;
> +    unsigned int i;
> +
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
>   
> -    while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == 
> -ERESTART )
> -        process_pending_softirqs();
> -    rangeset_destroy(mem);
> +        if ( !bar->mem )
> +            continue;
> +
> +        while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
> +                                              &data)) == -ERESTART )
> +            process_pending_softirqs();
> +        rangeset_destroy(bar->mem);
> +        bar->mem = NULL;
> +    }
>       if ( !rc )
>           modify_decoding(pdev, cmd, false);
>   
> @@ -181,7 +207,7 @@ static int __init apply_map(struct domain *d, const 
> struct pci_dev *pdev,
>   }
>   
>   static void defer_map(struct domain *d, struct pci_dev *pdev,
> -                      struct rangeset *mem, uint16_t cmd, bool rom_only)
> +                      uint16_t cmd, bool rom_only, uint8_t num_mem_ranges)
>   {
>       struct vcpu *curr = current;
>   
> @@ -192,9 +218,9 @@ static void defer_map(struct domain *d, struct pci_dev 
> *pdev,
>        * started for the same device if the domain is not well-behaved.
>        */
>       curr->vpci.pdev = pdev;
> -    curr->vpci.mem = mem;
>       curr->vpci.cmd = cmd;
>       curr->vpci.rom_only = rom_only;
> +    curr->vpci.num_mem_ranges = num_mem_ranges;
>       /*
>        * Raise a scheduler softirq in order to prevent the guest from resuming
>        * execution with pending mapping operations, to trigger the invocation
> @@ -206,42 +232,47 @@ static void defer_map(struct domain *d, struct pci_dev 
> *pdev,
>   static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool 
> rom_only)
>   {
>       struct vpci_header *header = &pdev->vpci->header;
> -    struct rangeset *mem = rangeset_new(NULL, NULL, 0);
>       struct pci_dev *tmp, *dev = NULL;
>       const struct vpci_msix *msix = pdev->vpci->msix;
> -    unsigned int i;
> +    unsigned int i, j;
>       int rc;
> -
> -    if ( !mem )
> -        return -ENOMEM;
> +    uint8_t num_mem_ranges;
>   
>       /*
> -     * Create a rangeset that represents the current device BARs memory 
> region
> +     * Create a rangeset per BAR that represents the current device memory 
> region
>        * and compare it against all the currently active BAR memory regions. 
> If
>        * an overlap is found, subtract it from the region to be 
> mapped/unmapped.
>        *
> -     * First fill the rangeset with all the BARs of this device or with the 
> ROM
> +     * First fill the rangesets with all the BARs of this device or with the 
> ROM
>        * BAR only, depending on whether the guest is toggling the memory 
> decode
>        * bit of the command register, or the enable bit of the ROM BAR 
> register.
>        */
>       for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>       {
> -        const struct vpci_bar *bar = &header->bars[i];
> +        struct vpci_bar *bar = &header->bars[i];
>           unsigned long start = PFN_DOWN(bar->addr);
>           unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
>   
> +        bar->mem = NULL;
> +
>           if ( !MAPPABLE_BAR(bar) ||
>                (rom_only ? bar->type != VPCI_BAR_ROM
>                          : (bar->type == VPCI_BAR_ROM && 
> !header->rom_enabled)) )
>               continue;
>   
> -        rc = rangeset_add_range(mem, start, end);
> +        bar->mem = rangeset_new(NULL, NULL, 0);
> +        if ( !bar->mem )
> +        {
> +            rc = -ENOMEM;
> +            goto fail;
> +        }
> +
> +        rc = rangeset_add_range(bar->mem, start, end);
>           if ( rc )
>           {
>               printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
>                      start, end, rc);
> -            rangeset_destroy(mem);
> -            return rc;
> +            goto fail;
>           }
>       }
>   
> @@ -252,14 +283,21 @@ static int modify_bars(const struct pci_dev *pdev, 
> uint16_t cmd, bool rom_only)
>           unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
>                                        vmsix_table_size(pdev->vpci, i) - 1);
>   
> -        rc = rangeset_remove_range(mem, start, end);
> -        if ( rc )
> +        for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
>           {
> -            printk(XENLOG_G_WARNING
> -                   "Failed to remove MSIX table [%lx, %lx]: %d\n",
> -                   start, end, rc);
> -            rangeset_destroy(mem);
> -            return rc;
> +            const struct vpci_bar *bar = &header->bars[j];
> +
> +            if ( !bar->mem )
> +                continue;
> +
> +            rc = rangeset_remove_range(bar->mem, start, end);
> +            if ( rc )
> +            {
> +                printk(XENLOG_G_WARNING
> +                       "Failed to remove MSIX table [%lx, %lx]: %d\n",
> +                       start, end, rc);
> +                goto fail;
> +            }
>           }
>       }
>   
> @@ -291,7 +329,8 @@ static int modify_bars(const struct pci_dev *pdev, 
> uint16_t cmd, bool rom_only)
>               unsigned long start = PFN_DOWN(bar->addr);
>               unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
>   
> -            if ( !bar->enabled || !rangeset_overlaps_range(mem, start, end) 
> ||
> +            if ( !bar->enabled ||
> +                 !rangeset_overlaps_range(bar->mem, start, end) ||
>                    /*
>                     * If only the ROM enable bit is toggled check against 
> other
>                     * BARs in the same device for overlaps, but not against 
> the
> @@ -300,13 +339,12 @@ static int modify_bars(const struct pci_dev *pdev, 
> uint16_t cmd, bool rom_only)
>                    (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) )
>                   continue;
>   
> -            rc = rangeset_remove_range(mem, start, end);
> +            rc = rangeset_remove_range(bar->mem, start, end);
>               if ( rc )
>               {
>                   printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
>                          start, end, rc);
> -                rangeset_destroy(mem);
> -                return rc;
> +                goto fail;
>               }
>           }
>       }
> @@ -324,12 +362,42 @@ static int modify_bars(const struct pci_dev *pdev, 
> uint16_t cmd, bool rom_only)
>            * will always be to establish mappings and process all the BARs.
>            */
>           ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
> -        return apply_map(pdev->domain, pdev, mem, cmd);
> +        return apply_map(pdev->domain, pdev, cmd);
>       }
>   
> -    defer_map(dev->domain, dev, mem, cmd, rom_only);
> +    /* Find out how many memory ranges has left after MSI and overlaps. */
> +    num_mem_ranges = 0;
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
> +
> +        if ( !rangeset_is_empty(bar->mem) )
> +            num_mem_ranges++;
> +    }
> +
> +    /*
> +     * There are cases when PCI device, root port for example, has neither
> +     * memory space nor IO. In this case PCI command register write is
> +     * missed resulting in the underlying PCI device not functional, so:
> +     *   - if there are no regions write the command register now
> +     *   - if there are regions then defer work and write later on
> +     */
> +    if ( !num_mem_ranges )
> +        pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
> +    else
> +        defer_map(dev->domain, dev, cmd, rom_only, num_mem_ranges);
>   
>       return 0;
> +
> +fail:
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
> +
> +        rangeset_destroy(bar->mem);
> +        bar->mem = NULL;
> +    }
> +    return rc;
>   }
>   
>   static void cmd_write(const struct pci_dev *pdev, unsigned int reg,
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index a0320b22cb36..352e02d0106d 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -80,6 +80,7 @@ struct vpci {
>               /* Guest view of the BAR. */
>               uint64_t guest_addr;
>               uint64_t size;
> +            struct rangeset *mem;
>               enum {
>                   VPCI_BAR_EMPTY,
>                   VPCI_BAR_IO,
> @@ -154,9 +155,9 @@ struct vpci {
>   
>   struct vpci_vcpu {
>       /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
> -    struct rangeset *mem;
>       struct pci_dev *pdev;
>       uint16_t cmd;
> +    uint8_t num_mem_ranges;
>       bool rom_only : 1;
>   };
>   

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.