[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 07/15] xen/overlay: Enable device tree overlay assignment to running domains


  • To: <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Henry Wang <xin.wang2@xxxxxxx>
  • Date: Wed, 24 Apr 2024 11:34:41 +0800
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass (sender ip is 165.204.84.17) smtp.rcpttodomain=lists.xenproject.org smtp.mailfrom=amd.com; dmarc=pass (p=quarantine sp=quarantine pct=100) action=none header.from=amd.com; dkim=none (message not signed); arc=none (0)
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=DTkFoRXnpKJLc0+QsW9+qktSbxTVG+nGYUGcq/+U60w=; b=PJFYV6JxgxLJIcKT3wDdeshkOvzFUTSdtbWqmiBYNmvORLnI+8vKrVmWgUECoiYygBD3BDeKyGOrIDH5P5XK6Yg+uLVXzRl2ZaAQRMqCI6+ZzoQilILVfsUOoGKREpvYlUMJjWbqDLV1N4EwkZWQCbc3KyyroHE/XwTCk8U3b8zVDlq1OieDi4ekDxOV3VPLI/PVinRw7tf3Ch540rPyz6asBXDOviX75DCJWdgMScJkjqn7r1nuHv4NIAhLDcldfbHh6PqrG81U5e2cV8PquMdYd+Vs4s9G9h0MTXb5IkMlllq+Z1okRC2DwbGTRdHF62pqw5WXWfx7zpoANcYg/Q==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=MQ6JSNrzJpeerxjmsbGM+jY1aBs+y+YCk4oE7pRxuTHbAX3LTsdlx4v8TgkZP3SbeNRh2M8WG8aL827wzo2R2n5aFaIDD3c2UDB4B9ElW2EDJ89HAihAiRTHBKtpvcSYgBTNS2m68RGyqY8zBR3mRZ3goc+4UkEUg+/A/LtRvH3o9ix1ELlMZTNkSiDL4VEiOlO+6x1kgmdVwFjqLpoQ3LhiVAmZXOoqOPhw2PycSBgcNPlCFqRc2vZoAPUKd77wigfZ2uM9U2dM7NLsJv6SMgYWAqkSBH0a70uCqq/vLvPvFV8QVYaCIEGS59NwHOTPwLXgzEY2vFUi9DN2wVMyww==
  • Cc: Vikram Garhwal <fnu.vikram@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Bertrand Marquis <bertrand.marquis@xxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, "Volodymyr Babchuk" <Volodymyr_Babchuk@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, "Jan Beulich" <jbeulich@xxxxxxxx>, Vikram Garhwal <vikram.garhwal@xxxxxxx>, "Henry Wang" <xin.wang2@xxxxxxx>
  • Delivery-date: Wed, 24 Apr 2024 03:43:57 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

From: Vikram Garhwal <fnu.vikram@xxxxxxxxxx>

Following changes are done to enable dtbo assignment to a running vm with given
domain_id:
1. Support for irq map and unmap for running domain. We store the IRQ nums for
    each overlay and while removing those IRQ nums are removed.
2. Support iommu assign/reassign to running domains.
3. Added "map_nodes" options to support two modes:
    a. Add the nodes in Xen device tree and map the nodes to given VM
    b. Just add them in xen device tree without mapping.

Change device.c: handle_device() function with input domain_mapping flag.

Signed-off-by: Vikram Garhwal <vikram.garhwal@xxxxxxx>
Signed-off-by: Michal Orzel <michal.orzel@xxxxxxx>
Signed-off-by: Henry Wang <xin.wang2@xxxxxxx>
---
 xen/arch/arm/device.c            |   8 +-
 xen/arch/arm/domain_build.c      |   2 +-
 xen/arch/arm/include/asm/setup.h |   3 +-
 xen/common/dt-overlay.c          | 218 ++++++++++++++++++++++++++-----
 xen/include/public/sysctl.h      |   4 +-
 5 files changed, 199 insertions(+), 36 deletions(-)

diff --git a/xen/arch/arm/device.c b/xen/arch/arm/device.c
index 3e02cff008..df5035f9a8 100644
--- a/xen/arch/arm/device.c
+++ b/xen/arch/arm/device.c
@@ -317,7 +317,8 @@ static int map_device_children(const struct dt_device_node 
*dev,
  *  - Map the IRQs and iomem regions to DOM0
  */
 int handle_device(struct domain *d, struct dt_device_node *dev, p2m_type_t 
p2mt,
-                  struct rangeset *iomem_ranges, struct rangeset *irq_ranges)
+                  struct rangeset *iomem_ranges, struct rangeset *irq_ranges,
+                  bool device_mapping)
 {
     unsigned int naddr;
     unsigned int i;
@@ -334,9 +335,10 @@ int handle_device(struct domain *d, struct dt_device_node 
*dev, p2m_type_t p2mt,
     struct map_range_data mr_data = {
         .d = d,
         .p2mt = p2mt,
-        .skip_mapping = !own_device ||
+        .skip_mapping = (!own_device ||
                         (is_pci_passthrough_enabled() &&
-                        (device_get_class(dev) == DEVICE_PCI_HOSTBRIDGE)),
+                        (device_get_class(dev) == DEVICE_PCI_HOSTBRIDGE))) &&
+                        !device_mapping,
         .iomem_ranges = iomem_ranges,
         .irq_ranges = irq_ranges
     };
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 54232ed4cb..a525aed175 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -1699,7 +1699,7 @@ static int __init handle_node(struct domain *d, struct 
kernel_info *kinfo,
                "WARNING: Path %s is reserved, skip the node as we may re-use 
the path.\n",
                path);
 
-    res = handle_device(d, node, p2mt, NULL, NULL);
+    res = handle_device(d, node, p2mt, NULL, NULL, false);
     if ( res)
         return res;
 
diff --git a/xen/arch/arm/include/asm/setup.h b/xen/arch/arm/include/asm/setup.h
index d15a88d2e0..d6d8c28d02 100644
--- a/xen/arch/arm/include/asm/setup.h
+++ b/xen/arch/arm/include/asm/setup.h
@@ -172,7 +172,8 @@ u32 device_tree_get_u32(const void *fdt, int node,
                         const char *prop_name, u32 dflt);
 
 int handle_device(struct domain *d, struct dt_device_node *dev, p2m_type_t 
p2mt,
-                  struct rangeset *iomem_ranges, struct rangeset *irq_ranges);
+                  struct rangeset *iomem_ranges, struct rangeset *irq_ranges,
+                  bool device_mapping);
 
 int map_device_irqs_to_domain(struct domain *d, struct dt_device_node *dev,
                               bool need_mapping, struct rangeset *irq_ranges);
diff --git a/xen/common/dt-overlay.c b/xen/common/dt-overlay.c
index 39e4ba59dd..8840ea756b 100644
--- a/xen/common/dt-overlay.c
+++ b/xen/common/dt-overlay.c
@@ -356,24 +356,133 @@ static int overlay_get_nodes_info(const void *fdto, char 
**nodes_full_path)
     return 0;
 }
 
+static int remove_all_irqs(struct rangeset *irq_ranges,
+                           struct domain *d, bool domain_mapping)
+{
+    struct range *x;
+    int rc = 0;
+
+    read_lock(&irq_ranges->lock);
+
+    for ( x = first_range(irq_ranges); x != NULL;
+          x = next_range(irq_ranges, x) )
+    {
+        /*
+         * Handle invalid use cases 1:
+         * Where user assigned the nodes to dom0 along with their irq
+         * mappings but now just wants to remove the nodes entry from Xen 
device
+         * device tree without unmapping the irq.
+         */
+        if ( !domain_mapping && vgic_get_hw_irq_desc(d, NULL, x->s) )
+        {
+            printk(XENLOG_ERR "Removing node from device tree without 
releasing it's IRQ/IOMMU is not allowed\n");
+            rc = -EINVAL;
+            goto out;
+        }
+
+        /*
+         * IRQ should always have access unless there are duplication of
+         * of irqs in device tree. There are few cases of xen device tree
+         * where there are duplicate interrupts for the same node.
+         */
+        if (!irq_access_permitted(d, x->s))
+            continue;
+        /*
+         * TODO: We don't handle shared IRQs for now. So, it is assumed that
+         * the IRQs was not shared with another domain.
+         */
+        rc = irq_deny_access(d, x->s);
+        if ( rc )
+        {
+            printk(XENLOG_ERR "unable to revoke access for irq %ld\n", x->s);
+            goto out;
+        }
+
+        if ( domain_mapping )
+        {
+            rc = release_guest_irq(d, x->s);
+            if ( rc )
+            {
+                printk(XENLOG_ERR "unable to release irq %ld\n", x->s);
+                goto out;
+            }
+        }
+    }
+
+out:
+    read_unlock(&irq_ranges->lock);
+    return rc;
+}
+
+static int remove_all_iomems(struct rangeset *iomem_ranges,
+                             struct domain *d,
+                             bool domain_mapping)
+{
+    struct range *x;
+    int rc = 0;
+    p2m_type_t t;
+    mfn_t mfn;
+
+    read_lock(&iomem_ranges->lock);
+
+    for ( x = first_range(iomem_ranges); x != NULL;
+          x = next_range(iomem_ranges, x) )
+    {
+        mfn = p2m_lookup(d, _gfn(x->s), &t);
+        if ( mfn_x(mfn) == 0 || mfn_x(mfn) == ~0UL )
+        {
+            rc = -EINVAL;
+            goto out;
+        }
+
+        rc = iomem_deny_access(d, x->s, x->e);
+        if ( rc )
+        {
+            printk(XENLOG_ERR "Unable to remove dom%d access to %#lx - %#lx\n",
+                   d->domain_id,
+                   x->s, x->e);
+            goto out;
+        }
+
+        rc = unmap_mmio_regions(d, _gfn(x->s), x->e - x->s,
+                                _mfn(x->s));
+        if ( rc )
+            goto out;
+    }
+
+out:
+    read_unlock(&iomem_ranges->lock);
+    return rc;
+}
+
 /* Check if node itself can be removed and remove node from IOMMU. */
-static int remove_node_resources(struct dt_device_node *device_node)
+static int remove_node_resources(struct dt_device_node *device_node,
+                                 struct domain *d, bool domain_mapping)
 {
     int rc = 0;
     unsigned int len;
     domid_t domid;
 
-    domid = dt_device_used_by(device_node);
+    if ( !d )
+    {
+        domid = dt_device_used_by(device_node);
 
-    dt_dprintk("Checking if node %s is used by any domain\n",
-               device_node->full_name);
+        /*
+         * We also check if device is assigned to DOMID_IO as when a domain
+         * is destroyed device is assigned to DOMID_IO or for the case when
+         * device was never mapped to a running domain.
+         */
+        if ( domid != hardware_domain->domain_id && domid != DOMID_IO )
+        {
+            printk(XENLOG_ERR "Device %s is being assigned to %u. Device is 
assigned to %d\n",
+                   device_node->full_name, DOMID_IO, domid);
+            return -EINVAL;
+        }
 
-    /* Remove the node if only it's assigned to hardware domain or domain io. 
*/
-    if ( domid != hardware_domain->domain_id && domid != DOMID_IO )
-    {
-        printk(XENLOG_ERR "Device %s is being used by domain %u. Removing 
nodes failed\n",
-               device_node->full_name, domid);
-        return -EINVAL;
+        /*
+         * Device is assigned to hardware domain.
+         */
+         d = hardware_domain;
     }
 
     /* Check if iommu property exists. */
@@ -382,6 +491,16 @@ static int remove_node_resources(struct dt_device_node 
*device_node)
         if ( dt_device_is_protected(device_node) )
         {
             write_lock(&dt_host_lock);
+            if ( domain_mapping && !list_empty(&device_node->domain_list) )
+            {
+                rc = iommu_deassign_dt_device(d, device_node);
+                if ( rc < 0 )
+                {
+                    write_unlock(&dt_host_lock);
+                    return rc;
+                }
+            }
+
             rc = iommu_remove_dt_device(device_node);
             if ( rc < 0 )
             {
@@ -397,7 +516,8 @@ static int remove_node_resources(struct dt_device_node 
*device_node)
 
 /* Remove all descendants from IOMMU. */
 static int
-remove_descendant_nodes_resources(const struct dt_device_node *device_node)
+remove_descendant_nodes_resources(const struct dt_device_node *device_node,
+                                  struct domain *d, bool domain_mapping)
 {
     int rc = 0;
     struct dt_device_node *child_node;
@@ -407,12 +527,13 @@ remove_descendant_nodes_resources(const struct 
dt_device_node *device_node)
     {
         if ( child_node->child )
         {
-            rc = remove_descendant_nodes_resources(child_node);
+            rc = remove_descendant_nodes_resources(child_node, d,
+                                                   domain_mapping);
             if ( rc )
                 return rc;
         }
 
-        rc = remove_node_resources(child_node);
+        rc = remove_node_resources(child_node, d, domain_mapping);
         if ( rc )
             return rc;
     }
@@ -421,12 +542,13 @@ remove_descendant_nodes_resources(const struct 
dt_device_node *device_node)
 }
 
 /* Remove nodes from dt_host. */
-static int remove_nodes(const struct overlay_track *tracker)
+static int remove_nodes(const struct overlay_track *tracker,
+                        struct domain *d)
 {
     int rc = 0;
     struct dt_device_node *overlay_node;
     unsigned int j;
-    struct domain *d = hardware_domain;
+    bool domain_mapping = (d != NULL);
 
     for ( j = 0; j < tracker->num_nodes; j++ )
     {
@@ -434,11 +556,19 @@ static int remove_nodes(const struct overlay_track 
*tracker)
         if ( overlay_node == NULL )
             return -EINVAL;
 
-        rc = remove_descendant_nodes_resources(overlay_node);
+        rc = remove_descendant_nodes_resources(overlay_node, d, 
domain_mapping);
+        if ( rc )
+            return rc;
+
+        rc = remove_node_resources(overlay_node, d, domain_mapping);
+        if ( rc )
+            return rc;
+
+        rc = remove_all_irqs(tracker->irq_ranges, d, domain_mapping);
         if ( rc )
             return rc;
 
-        rc = remove_node_resources(overlay_node);
+        rc = remove_all_iomems(tracker->iomem_ranges, d, domain_mapping);
         if ( rc )
             return rc;
 
@@ -481,7 +611,8 @@ static int remove_nodes(const struct overlay_track *tracker)
  * while destroying the domain.
  */
 static long handle_remove_overlay_nodes(const void *overlay_fdt,
-                                        uint32_t overlay_fdt_size)
+                                        uint32_t overlay_fdt_size,
+                                        struct domain *d)
 {
     int rc;
     struct overlay_track *entry, *temp, *track;
@@ -520,7 +651,7 @@ static long handle_remove_overlay_nodes(const void 
*overlay_fdt,
 
     }
 
-    rc = remove_nodes(entry);
+    rc = remove_nodes(entry, d);
     if ( rc )
     {
         printk(XENLOG_ERR "Removing node failed\n");
@@ -560,12 +691,21 @@ static void free_nodes_full_path(unsigned int num_nodes, 
char **nodes_full_path)
     xfree(nodes_full_path);
 }
 
-static long add_nodes(struct overlay_track *tr, char **nodes_full_path)
+static long add_nodes(struct overlay_track *tr, char **nodes_full_path,
+                      struct domain *d)
 
 {
     int rc;
     unsigned int j;
     struct dt_device_node *overlay_node;
+    bool domain_mapping = (d != NULL);
+
+    /*
+     * If domain is NULL, then we add the devices into hardware domain and skip
+     * IRQs/IOMMUs mapping.
+     */
+    if ( d == NULL )
+        d = hardware_domain;
 
     for ( j = 0; j < tr->num_nodes; j++ )
     {
@@ -609,8 +749,6 @@ static long add_nodes(struct overlay_track *tr, char 
**nodes_full_path)
             return rc;
         }
 
-        write_unlock(&dt_host_lock);
-
         prev_node->allnext = next_node;
 
         overlay_node = dt_find_node_by_path(overlay_node->full_name);
@@ -618,18 +756,23 @@ static long add_nodes(struct overlay_track *tr, char 
**nodes_full_path)
         {
             /* Sanity check. But code will never come here. */
             ASSERT_UNREACHABLE();
+            write_unlock(&dt_host_lock);
             return -EFAULT;
         }
 
-        rc = handle_device(hardware_domain, overlay_node, p2m_mmio_direct_c,
+        rc = handle_device(d, overlay_node, p2m_mmio_direct_c,
                            tr->iomem_ranges,
-                           tr->irq_ranges);
+                           tr->irq_ranges,
+                           domain_mapping);
         if ( rc )
         {
             printk(XENLOG_ERR "Adding IRQ and IOMMU failed\n");
+            write_unlock(&dt_host_lock);
             return rc;
         }
 
+        write_unlock(&dt_host_lock);
+
         /* Keep overlay_node address in tracker. */
         tr->nodes_address[j] = (unsigned long)overlay_node;
     }
@@ -643,7 +786,8 @@ static long add_nodes(struct overlay_track *tr, char 
**nodes_full_path)
  * to hardware domain done by handle_node().
  */
 static long handle_add_overlay_nodes(void *overlay_fdt,
-                                     uint32_t overlay_fdt_size)
+                                     uint32_t overlay_fdt_size,
+                                     struct domain *d)
 {
     int rc;
     unsigned int j;
@@ -788,7 +932,7 @@ static long handle_add_overlay_nodes(void *overlay_fdt,
         goto err;
     }
 
-    rc = add_nodes(tr, nodes_full_path);
+    rc = add_nodes(tr, nodes_full_path, d);
     if ( rc )
     {
         printk(XENLOG_ERR "Adding nodes failed. Removing the partially added 
nodes.\n");
@@ -810,7 +954,7 @@ static long handle_add_overlay_nodes(void *overlay_fdt,
  */
  remove_node:
     tr->num_nodes = j;
-    rc = remove_nodes(tr);
+    rc = remove_nodes(tr, d);
 
     if ( rc )
     {
@@ -855,6 +999,7 @@ long dt_overlay_sysctl(struct xen_sysctl_dt_overlay *op)
 {
     long ret;
     void *overlay_fdt;
+    struct domain *d = NULL;
 
     if ( op->overlay_op != XEN_SYSCTL_DT_OVERLAY_ADD &&
          op->overlay_op != XEN_SYSCTL_DT_OVERLAY_REMOVE )
@@ -863,7 +1008,7 @@ long dt_overlay_sysctl(struct xen_sysctl_dt_overlay *op)
     if ( op->overlay_fdt_size == 0 || op->overlay_fdt_size > KB(500) )
         return -EINVAL;
 
-    if ( op->pad[0] || op->pad[1] || op->pad[2] )
+    if ( op->pad[0] || op->pad[1] )
         return -EINVAL;
 
     overlay_fdt = xmalloc_bytes(op->overlay_fdt_size);
@@ -880,13 +1025,26 @@ long dt_overlay_sysctl(struct xen_sysctl_dt_overlay *op)
         return -EFAULT;
     }
 
+    /*
+     * If domain_mapping == false, domain_id can be ignored as we don't need to
+     * map resource to any domain.
+     *
+     * If domain_mapping == true, domain_id, get the target domain for the
+     * mapping.
+     */
+    if ( op->domain_mapping )
+        d = rcu_lock_domain_by_id(op->domain_id);
+
     if ( op->overlay_op == XEN_SYSCTL_DT_OVERLAY_REMOVE )
-        ret = handle_remove_overlay_nodes(overlay_fdt, op->overlay_fdt_size);
+        ret = handle_remove_overlay_nodes(overlay_fdt, op->overlay_fdt_size, 
d);
     else
-        ret = handle_add_overlay_nodes(overlay_fdt, op->overlay_fdt_size);
+        ret = handle_add_overlay_nodes(overlay_fdt, op->overlay_fdt_size, d);
 
     xfree(overlay_fdt);
 
+    if ( op->domain_mapping )
+        rcu_unlock_domain(d);
+
     return ret;
 }
 
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 9b19679cae..f5435f8890 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1197,7 +1197,9 @@ struct xen_sysctl_dt_overlay {
 #define XEN_SYSCTL_DT_OVERLAY_ADD                   1
 #define XEN_SYSCTL_DT_OVERLAY_REMOVE                2
     uint8_t overlay_op;                     /* IN: Add or remove. */
-    uint8_t pad[3];                         /* IN: Must be zero. */
+    bool domain_mapping;                    /* IN: True of False. */
+    uint8_t pad[2];                         /* IN: Must be zero. */
+    uint32_t domain_id;
 };
 #endif
 
-- 
2.34.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.