|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v11 11/17] vpci/header: program p2m with guest BAR view
On Sat, Dec 02, 2023 at 01:27:05AM +0000, Volodymyr Babchuk wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>
> Take into account guest's BAR view and program its p2m accordingly:
> gfn is guest's view of the BAR and mfn is the physical BAR value.
> This way hardware domain sees physical BAR values and guest sees
> emulated ones.
>
> Hardware domain continues getting the BARs identity mapped, while for
> domUs the BARs are mapped at the requested guest address without
> modifying the BAR address in the device PCI config space.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
> ---
> In v11:
> - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions
> to access guest's view of the VMSIx tables.
> - Use MFN (not GFN) to check access permissions
> - Move page offset check to this patch
> - Call rangeset_remove_range() with correct parameters
> In v10:
> - Moved GFN variable definition outside the loop in map_range()
> - Updated printk error message in map_range()
> - Now BAR address is always stored in bar->guest_addr, even for
> HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars()
> - vmsix_table_base() now uses .guest_addr instead of .addr
> In v9:
> - Extended the commit message
> - Use bar->guest_addr in modify_bars
> - Extended printk error message in map_range
> - Moved map_data initialization so .bar can be initialized during declaration
> Since v5:
> - remove debug print in map_range callback
> - remove "identity" from the debug print
> Since v4:
> - moved start_{gfn|mfn} calculation into map_range
> - pass vpci_bar in the map_data instead of start_{gfn|mfn}
> - s/guest_addr/guest_reg
> Since v3:
> - updated comment (Roger)
> - removed gfn_add(map->start_gfn, rc); which is wrong
> - use v->domain instead of v->vpci.pdev->domain
> - removed odd e.g. in comment
> - s/d%d/%pd in altered code
> - use gdprintk for map/unmap logs
> Since v2:
> - improve readability for data.start_gfn and restructure ?: construct
> Since v1:
> - s/MSI/MSI-X in comments
> ---
> xen/drivers/vpci/header.c | 79 +++++++++++++++++++++++++++++----------
> xen/include/xen/vpci.h | 13 +++++++
> 2 files changed, 73 insertions(+), 19 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 7c84cee5d1..21b3fb5579 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -33,6 +33,7 @@
>
> struct map_data {
> struct domain *d;
> + const struct vpci_bar *bar;
> bool map;
> };
>
> @@ -40,13 +41,24 @@ static int cf_check map_range(
> unsigned long s, unsigned long e, void *data, unsigned long *c)
> {
> const struct map_data *map = data;
> + /* Start address of the BAR as seen by the guest. */
> + unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr);
> + /* Physical start address of the BAR. */
> + mfn_t start_mfn = _mfn(PFN_DOWN(map->bar->addr));
> int rc;
>
> for ( ; ; )
> {
> unsigned long size = e - s + 1;
> + /*
> + * Ranges to be mapped don't always start at the BAR start address,
> as
> + * there can be holes or partially consumed ranges. Account for the
> + * offset of the current address from the BAR start.
> + */
> + mfn_t map_mfn = mfn_add(start_mfn, s - start_gfn);
> + unsigned long m_end = mfn_x(map_mfn) + size - 1;
>
> - if ( !iomem_access_permitted(map->d, s, e) )
> + if ( !iomem_access_permitted(map->d, mfn_x(map_mfn), m_end) )
> {
> printk(XENLOG_G_WARNING
> "%pd denied access to MMIO range [%#lx, %#lx]\n",
> @@ -54,7 +66,8 @@ static int cf_check map_range(
> return -EPERM;
> }
>
> - rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map);
> + rc = xsm_iomem_mapping(XSM_HOOK, map->d, mfn_x(map_mfn), m_end,
> + map->map);
> if ( rc )
> {
> printk(XENLOG_G_WARNING
> @@ -72,8 +85,8 @@ static int cf_check map_range(
> * - {un}map_mmio_regions doesn't support preemption.
> */
>
> - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
> - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
> + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, map_mfn)
> + : unmap_mmio_regions(map->d, _gfn(s), size, map_mfn);
> if ( rc == 0 )
> {
> *c += size;
> @@ -82,8 +95,9 @@ static int cf_check map_range(
> if ( rc < 0 )
> {
> printk(XENLOG_G_WARNING
> - "Failed to identity %smap [%lx, %lx] for d%d: %d\n",
> - map->map ? "" : "un", s, e, map->d->domain_id, rc);
> + "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n",
> + map->map ? "" : "un", s, e, mfn_x(map_mfn),
> + mfn_x(map_mfn) + size, map->d, rc);
Seeing the amount of mfn_x calls, it might be better to do the
calculations as unsigned long, and use _mfn() in the
{,un}map_mmio_regions() calls.
> break;
> }
> ASSERT(rc < size);
> @@ -162,10 +176,6 @@ static void modify_decoding(const struct pci_dev *pdev,
> uint16_t cmd,
> bool vpci_process_pending(struct vcpu *v)
> {
> struct pci_dev *pdev = v->vpci.pdev;
> - struct map_data data = {
> - .d = v->domain,
> - .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
> - };
> struct vpci_header *header = NULL;
> unsigned int i;
>
> @@ -185,6 +195,11 @@ bool vpci_process_pending(struct vcpu *v)
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct map_data data = {
> + .d = v->domain,
> + .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
> + .bar = bar,
> + };
> int rc;
>
> if ( rangeset_is_empty(bar->mem) )
> @@ -235,7 +250,6 @@ bool vpci_process_pending(struct vcpu *v)
> static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
> uint16_t cmd)
> {
> - struct map_data data = { .d = d, .map = true };
> struct vpci_header *header = &pdev->vpci->header;
> int rc = 0;
> unsigned int i;
> @@ -245,6 +259,7 @@ static int __init apply_map(struct domain *d, const
> struct pci_dev *pdev,
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct map_data data = { .d = d, .map = true, .bar = bar };
>
> if ( rangeset_is_empty(bar->mem) )
> continue;
> @@ -310,12 +325,16 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> * First fill the rangesets with the BAR of this device or with the ROM
> * BAR only, depending on whether the guest is toggling the memory decode
> * bit of the command register, or the enable bit of the ROM BAR
> register.
> + *
> + * For non-hardware domain we use guest physical addresses.
> */
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> unsigned long start = PFN_DOWN(bar->addr);
> unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
> + unsigned long start_guest = PFN_DOWN(bar->guest_addr);
> + unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
>
> if ( !bar->mem )
> continue;
> @@ -335,11 +354,25 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> continue;
> }
>
> - rc = rangeset_add_range(bar->mem, start, end);
> + /*
> + * Make sure that the guest set address has the same page offset
> + * as the physical address on the host or otherwise things won't
> work as
> + * expected.
> + */
> + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) )
> + {
> + gprintk(XENLOG_G_WARNING,
> + "%pp: Can't map BAR%d because of page offset mismatch:
> %lx vs %lx\n",
> + &pdev->sbdf, i, PAGE_OFFSET(bar->guest_addr),
> + PAGE_OFFSET(bar->addr));
> + return -EINVAL;
> + }
> +
> + rc = rangeset_add_range(bar->mem, start_guest, end_guest);
> if ( rc )
> {
> printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
> - start, end, rc);
> + start_guest, end_guest, rc);
> return rc;
> }
>
> @@ -351,12 +384,12 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> if ( rangeset_is_empty(prev_bar->mem) )
> continue;
>
> - rc = rangeset_remove_range(prev_bar->mem, start, end);
> + rc = rangeset_remove_range(prev_bar->mem, start_guest,
> end_guest);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> "%pp: failed to remove overlapping range [%lx, %lx]:
> %d\n",
> - &pdev->sbdf, start, end, rc);
> + &pdev->sbdf, start_guest, end_guest, rc);
> return rc;
> }
> }
> @@ -365,8 +398,8 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> /* Remove any MSIX regions if present. */
> for ( i = 0; msix && i < ARRAY_SIZE(msix->tables); i++ )
> {
> - unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
> - unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
> + unsigned long start = PFN_DOWN(vmsix_guest_table_addr(pdev->vpci,
> i));
> + unsigned long end = PFN_DOWN(vmsix_guest_table_addr(pdev->vpci, i) +
> vmsix_table_size(pdev->vpci, i) - 1);
>
> for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
> @@ -424,8 +457,8 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ )
> {
> const struct vpci_bar *remote_bar =
> &tmp->vpci->header.bars[i];
> - unsigned long start = PFN_DOWN(remote_bar->addr);
> - unsigned long end = PFN_DOWN(remote_bar->addr +
> + unsigned long start = PFN_DOWN(remote_bar->guest_addr);
> + unsigned long end = PFN_DOWN(remote_bar->guest_addr +
> remote_bar->size - 1);
>
> if ( !remote_bar->enabled )
> @@ -512,6 +545,8 @@ static void cf_check bar_write(
> struct vpci_bar *bar = data;
> bool hi = false;
>
> + ASSERT(is_hardware_domain(pdev->domain));
> +
> if ( bar->type == VPCI_BAR_MEM64_HI )
> {
> ASSERT(reg > PCI_BASE_ADDRESS_0);
> @@ -542,6 +577,10 @@ static void cf_check bar_write(
> */
> bar->addr &= ~(0xffffffffULL << (hi ? 32 : 0));
> bar->addr |= (uint64_t)val << (hi ? 32 : 0);
> + /*
> + * Update guest address as well, so hardware domain sees BAR identity
> mapped
> + */
> + bar->guest_addr = bar->addr;
>
> /* Make sure Xen writes back the same value for the BAR RO bits. */
> if ( !hi )
> @@ -793,6 +832,7 @@ static int cf_check init_bars(struct pci_dev *pdev)
> }
>
> bars[i].addr = addr;
> + bars[i].guest_addr = addr;
> bars[i].size = size;
> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH;
>
> @@ -814,6 +854,7 @@ static int cf_check init_bars(struct pci_dev *pdev)
> rom->type = VPCI_BAR_ROM;
> rom->size = size;
> rom->addr = addr;
> + rom->guest_addr = addr;
I think you are missing updating guest_addr also in rom_write()?
> header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) &
> PCI_ROM_ADDRESS_ENABLE;
>
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index 18a0eca3da..c39fab4832 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -205,6 +205,19 @@ static inline paddr_t vmsix_table_addr(const struct vpci
> *vpci, unsigned int nr)
> (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
> }
>
> +static inline paddr_t vmsix_guest_table_base(const struct vpci *vpci,
> + unsigned int nr)
> +{
> + return (vpci->header.bars[vpci->msix->tables[nr] &
> + PCI_MSIX_BIRMASK].guest_addr & PCI_BASE_ADDRESS_MEM_MASK);
> +}
> +
> +static inline paddr_t vmsix_guest_table_addr(const struct vpci *vpci,
> + unsigned int nr)
> +{
> + return vmsix_guest_table_base(vpci, nr) +
> + (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
> +}
Do we really need guest versions of this? I was expecting that for
guests those functions should always returns the guest addresses of
the MSI-X structures, which in the dom0 case matches the host address.
If we really need guest specific versions, it's worth mentioning in
the commit message explicitly why.
Thanks, Roger.
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |