[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 11/11] xen/arm: Translate virtual PCI bus topology for guests



Hi Oleksandr,

On 30/09/2021 08:52, Oleksandr Andrushchenko wrote:
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>

There are three  originators for the PCI configuration space access:
1. The domain that owns physical host bridge: MMIO handlers are
there so we can update vPCI register handlers with the values
written by the hardware domain, e.g. physical view of the registers
vs guest's view on the configuration space.
2. Guest access to the passed through PCI devices: we need to properly
map virtual bus topology to the physical one, e.g. pass the configuration
space access to the corresponding physical devices.
3. Emulated host PCI bridge access. It doesn't exist in the physical
topology, e.g. it can't be mapped to some physical host bridge.
So, all access to the host bridge itself needs to be trapped and
emulated.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>

---
Since v2:
  - pass struct domain instead of struct vcpu
  - constify arguments where possible
  - gate relevant code with CONFIG_HAS_VPCI_GUEST_SUPPORT
New in v2
---
  xen/arch/arm/domain.c         |  1 +
  xen/arch/arm/vpci.c           | 86 +++++++++++++++++++++++++++++++----
  xen/arch/arm/vpci.h           |  3 ++
  xen/drivers/passthrough/pci.c | 25 ++++++++++
  xen/include/asm-arm/pci.h     |  1 +
  xen/include/xen/pci.h         |  1 +
  xen/include/xen/sched.h       |  2 +
  7 files changed, 111 insertions(+), 8 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index fa6fcc5e467c..095671742ad8 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -797,6 +797,7 @@ void arch_domain_destroy(struct domain *d)
                         get_order_from_bytes(d->arch.efi_acpi_len));
  #endif
      domain_io_free(d);
+    domain_vpci_free(d);
  }
void arch_domain_shutdown(struct domain *d)
diff --git a/xen/arch/arm/vpci.c b/xen/arch/arm/vpci.c
index 5d6c29c8dcd9..26ec2fa7cf2d 100644
--- a/xen/arch/arm/vpci.c
+++ b/xen/arch/arm/vpci.c
@@ -17,6 +17,14 @@
#define REGISTER_OFFSET(addr) ( (addr) & 0x00000fff) +struct vpci_mmio_priv {
+    /*
+     * Set to true if the MMIO handlers were set up for the emulated
+     * ECAM host PCI bridge.
+     */
+    bool is_virt_ecam;
+};
+
  /* Do some sanity checks. */
  static bool vpci_mmio_access_allowed(unsigned int reg, unsigned int len)
  {
@@ -38,6 +46,7 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
      pci_sbdf_t sbdf;
      unsigned long data = ~0UL;
      unsigned int size = 1U << info->dabt.size;
+    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;

This cast is unnecessary. Same...

sbdf.sbdf = MMCFG_BDF(info->gpa);
      reg = REGISTER_OFFSET(info->gpa);
@@ -45,6 +54,13 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
      if ( !vpci_mmio_access_allowed(reg, size) )
          return 0;
+ /*
+     * For the passed through devices we need to map their virtual SBDF
+     * to the physical PCI device being passed through.
+     */
+    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v->domain, &sbdf) 
)
+            return 1;
+
      data = vpci_read(sbdf, reg, min(4u, size));
      if ( size == 8 )
          data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
@@ -61,6 +77,7 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
      pci_sbdf_t sbdf;
      unsigned long data = r;
      unsigned int size = 1U << info->dabt.size;
+    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;

... here. But is it meant to be modified? If not, then I think you want to turn it to add a const in both cases.

sbdf.sbdf = MMCFG_BDF(info->gpa);
      reg = REGISTER_OFFSET(info->gpa);
@@ -68,6 +85,13 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
      if ( !vpci_mmio_access_allowed(reg, size) )
          return 0;
+ /*
+     * For the passed through devices we need to map their virtual SBDF
+     * to the physical PCI device being passed through.
+     */
+    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v->domain, &sbdf) 
)
+            return 1;
+
      vpci_write(sbdf, reg, min(4u, size), data);
      if ( size == 8 )
          vpci_write(sbdf, reg + 4, 4, data >> 32);
@@ -80,13 +104,48 @@ static const struct mmio_handler_ops vpci_mmio_handler = {
      .write = vpci_mmio_write,
  };
+/*
+ * There are three  originators for the PCI configuration space access:
+ * 1. The domain that owns physical host bridge: MMIO handlers are
+ *    there so we can update vPCI register handlers with the values
+ *    written by the hardware domain, e.g. physical view of the registers/
+ *    configuration space.
+ * 2. Guest access to the passed through PCI devices: we need to properly
+ *    map virtual bus topology to the physical one, e.g. pass the configuration
+ *    space access to the corresponding physical devices.
+ * 3. Emulated host PCI bridge access. It doesn't exist in the physical
+ *    topology, e.g. it can't be mapped to some physical host bridge.
+ *    So, all access to the host bridge itself needs to be trapped and
+ *    emulated.
+ */
  static int vpci_setup_mmio_handler(struct domain *d,
                                     struct pci_host_bridge *bridge)
  {
-    struct pci_config_window *cfg = bridge->cfg;
+    struct vpci_mmio_priv *priv;
+
+    priv = xzalloc(struct vpci_mmio_priv);
+    if ( !priv )
+        return -ENOMEM;
+
+    priv->is_virt_ecam = !is_hardware_domain(d);
- register_mmio_handler(d, &vpci_mmio_handler,
-                          cfg->phys_addr, cfg->size, NULL);
+    if ( is_hardware_domain(d) )
+    {
+        struct pci_config_window *cfg = bridge->cfg;
+
+        bridge->mmio_priv = priv;
+        register_mmio_handler(d, &vpci_mmio_handler,
+                              cfg->phys_addr, cfg->size,
+                              priv);
+    }
+    else
+    {
+        d->vpci_mmio_priv = priv;

Something feels odd to me in this code. The if ( !is_hardware_domain(d) ) part seems to suggests that this can be called on multiple bridge. But here you are directly assigning priv to d->vpci_mmio_priv.

The call...

+        /* Guest domains use what is programmed in their device tree. */
+        register_mmio_handler(d, &vpci_mmio_handler,
+                              GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE,
+                              priv);
+    }
      return 0;
  }
@@ -95,14 +154,25 @@ int domain_vpci_init(struct domain *d)
      if ( !has_vpci(d) )
          return 0;
+ return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);

... here seems to confirm that you may (in theory) have multiple bridges. So the 'else' would want some rework to avoid assuming a single bridge.

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 5b963d75d1ba..b7dffb769cfd 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -889,6 +889,31 @@ int pci_remove_virtual_device(struct domain *d, const 
struct pci_dev *pdev)
      xfree(vdev);
      return 0;
  }
+
+/*
+ * Find the physical device which is mapped to the virtual device
+ * and translate virtual SBDF to the physical one.
+ */
+bool pci_translate_virtual_device(const struct domain *d, pci_sbdf_t *sbdf)
+{
+    struct vpci_dev *vdev;
+    bool found = false;
+
+    pcidevs_lock();
+    list_for_each_entry ( vdev, &d->vdev_list, list )

I haven't looked at the rest of the series yet. But I am a bit concerned to see code to iterate through a list accessible by the guest. 1) What safety mechanism do we have in place to ensure that the list is going to be small 2) If there is a limit, do we have any documentation on top of this limit to make clear this can't be bumped without removing the list?

Cheers,

--
Julien Grall



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.