[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] tools/xenstore: remove nodes owned by destroyed domain



commit 755d3f9debf8879448211fffb018f556136f6a79
Author:     Juergen Gross <jgross@xxxxxxxx>
AuthorDate: Tue Sep 13 07:35:12 2022 +0200
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Tue Nov 1 13:05:44 2022 +0000

    tools/xenstore: remove nodes owned by destroyed domain
    
    In case a domain is removed from Xenstore, remove all nodes owned by
    it per default.
    
    This tackles the problem that nodes might be created by a domain
    outside its home path in Xenstore, leading to Xenstore hogging more
    and more memory. Domain quota don't work in this case if the guest is
    rebooting in between.
    
    Since XSA-322 ownership of such stale nodes is transferred to dom0,
    which is helping against unintended access, but not against OOM of
    Xenstore.
    
    As a fallback for weird cases add a Xenstore start parameter for
    keeping today's way to handle stale nodes, adding the risk of Xenstore
    hitting an OOM situation.
    
    This is part of XSA-419 / CVE-2022-42322.
    
    Fixes: 496306324d8d ("tools/xenstore: revoke access rights for removed 
domains")
    Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
    Reviewed-by: Julien Grall <jgrall@xxxxxxxxxx>
---
 tools/xenstore/xenstored_core.c   | 17 +++++---
 tools/xenstore/xenstored_core.h   |  4 ++
 tools/xenstore/xenstored_domain.c | 84 +++++++++++++++++++++++++++++----------
 tools/xenstore/xenstored_domain.h |  2 +-
 4 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bdc14679ad..13e48aaa73 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -80,6 +80,7 @@ static bool verbose = false;
 LIST_HEAD(connections);
 int tracefd = -1;
 static bool recovery = true;
+bool keep_orphans = false;
 static int reopen_log_pipe[2];
 static int reopen_log_pipe0_pollfd_idx = -1;
 char *tracefile = NULL;
@@ -753,7 +754,7 @@ struct node *read_node(struct connection *conn, const void 
*ctx,
        node->perms.p = hdr->perms;
        node->acc.domid = node->perms.p[0].id;
        node->acc.memory = data.dsize;
-       if (domain_adjust_node_perms(conn, node))
+       if (domain_adjust_node_perms(node))
                goto error;
 
        /* If owner is gone reset currently accounted memory size. */
@@ -796,7 +797,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, 
struct node *node,
        void *p;
        struct xs_tdb_record_hdr *hdr;
 
-       if (domain_adjust_node_perms(conn, node))
+       if (domain_adjust_node_perms(node))
                return errno;
 
        data.dsize = sizeof(*hdr)
@@ -1647,7 +1648,7 @@ static int delnode_sub(const void *ctx, struct connection 
*conn,
        return WALK_TREE_RM_CHILDENTRY;
 }
 
-static int _rm(struct connection *conn, const void *ctx, const char *name)
+int rm_node(struct connection *conn, const void *ctx, const char *name)
 {
        struct node *parent;
        char *parentname = get_parent(ctx, name);
@@ -1711,7 +1712,7 @@ static int do_rm(const void *ctx, struct connection *conn,
        if (streq(name, "/"))
                return EINVAL;
 
-       ret = _rm(conn, ctx, name);
+       ret = rm_node(conn, ctx, name);
        if (ret)
                return ret;
 
@@ -2618,6 +2619,8 @@ static void usage(void)
 "  -R, --no-recovery       to request that no recovery should be attempted 
when\n"
 "                          the store is corrupted (debug only),\n"
 "  -I, --internal-db       store database in memory, not on disk\n"
+"  -K, --keep-orphans      don't delete nodes owned by a domain when the\n"
+"                          domain is deleted (this is a security risk!)\n"
 "  -V, --verbose           to request verbose execution.\n");
 }
 
@@ -2642,6 +2645,7 @@ static struct option options[] = {
        { "timeout", 1, NULL, 'w' },
        { "no-recovery", 0, NULL, 'R' },
        { "internal-db", 0, NULL, 'I' },
+       { "keep-orphans", 0, NULL, 'K' },
        { "verbose", 0, NULL, 'V' },
        { "watch-nb", 1, NULL, 'W' },
 #ifndef NO_LIVE_UPDATE
@@ -2721,7 +2725,7 @@ int main(int argc, char *argv[])
        orig_argc = argc;
        orig_argv = argv;
 
-       while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:q:T:RVW:w:U",
+       while ((opt = getopt_long(argc, argv, "DE:F:HKNPS:t:A:M:Q:q:T:RVW:w:U",
                                  options, NULL)) != -1) {
                switch (opt) {
                case 'D':
@@ -2757,6 +2761,9 @@ int main(int argc, char *argv[])
                case 'I':
                        tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
                        break;
+               case 'K':
+                       keep_orphans = true;
+                       break;
                case 'V':
                        verbose = true;
                        break;
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index acb00ad969..37006d508d 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -240,6 +240,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, 
struct node *node,
 struct node *read_node(struct connection *conn, const void *ctx,
                       const char *name);
 
+/* Remove a node and its children. */
+int rm_node(struct connection *conn, const void *ctx, const char *name);
+
 void setup_structure(bool live_update);
 struct connection *new_connection(const struct interface_funcs *funcs);
 struct connection *get_connection_by_id(unsigned int conn_id);
@@ -284,6 +287,7 @@ extern int quota_req_outstanding;
 extern int quota_trans_nodes;
 extern int quota_memory_per_domain_soft;
 extern int quota_memory_per_domain_hard;
+extern bool keep_orphans;
 
 extern unsigned int timeout_watch_event_msec;
 
diff --git a/tools/xenstore/xenstored_domain.c 
b/tools/xenstore/xenstored_domain.c
index 98b401fdec..84b7817cd5 100644
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -227,10 +227,64 @@ static void unmap_interface(void *interface)
        xengnttab_unmap(*xgt_handle, interface, 1);
 }
 
+static int domain_tree_remove_sub(const void *ctx, struct connection *conn,
+                                 struct node *node, void *arg)
+{
+       struct domain *domain = arg;
+       TDB_DATA key;
+       int ret = WALK_TREE_OK;
+
+       if (node->perms.p[0].id != domain->domid)
+               return WALK_TREE_OK;
+
+       if (keep_orphans) {
+               set_tdb_key(node->name, &key);
+               domain->nbentry--;
+               node->perms.p[0].id = priv_domid;
+               node->acc.memory = 0;
+               domain_entry_inc(NULL, node);
+               if (write_node_raw(NULL, &key, node, true)) {
+                       /* That's unfortunate. We only can try to continue. */
+                       syslog(LOG_ERR,
+                              "error when moving orphaned node %s to dom0\n",
+                              node->name);
+               } else
+                       trace("orphaned node %s moved to dom0\n", node->name);
+       } else {
+               if (rm_node(NULL, ctx, node->name)) {
+                       /* That's unfortunate. We only can try to continue. */
+                       syslog(LOG_ERR,
+                              "error when deleting orphaned node %s\n",
+                              node->name);
+               } else
+                       trace("orphaned node %s deleted\n", node->name);
+
+               /* Skip children in all cases in order to avoid more errors. */
+               ret = WALK_TREE_SKIP_CHILDREN;
+       }
+
+       return domain->nbentry > 0 ? ret : WALK_TREE_SUCCESS_STOP;
+}
+
+static void domain_tree_remove(struct domain *domain)
+{
+       int ret;
+       struct walk_funcs walkfuncs = { .enter = domain_tree_remove_sub };
+
+       if (domain->nbentry > 0) {
+               ret = walk_node_tree(domain, NULL, "/", &walkfuncs, domain);
+               if (ret == WALK_TREE_ERROR_STOP)
+                       syslog(LOG_ERR,
+                              "error when looking for orphaned nodes\n");
+       }
+}
+
 static int destroy_domain(void *_domain)
 {
        struct domain *domain = _domain;
 
+       domain_tree_remove(domain);
+
        list_del(&domain->list);
 
        if (!domain->introduced)
@@ -883,15 +937,15 @@ int domain_entry_inc(struct connection *conn, struct node 
*node)
        struct domain *d;
        unsigned int domid;
 
-       if (!conn)
+       if (!node->perms.p)
                return 0;
 
-       domid = node->perms.p ? node->perms.p[0].id : conn->id;
+       domid = node->perms.p[0].id;
 
-       if (conn->transaction) {
+       if (conn && conn->transaction) {
                transaction_entry_inc(conn->transaction, domid);
        } else {
-               d = (domid == conn->id && conn->domain) ? conn->domain
+               d = (conn && domid == conn->id && conn->domain) ? conn->domain
                    : find_or_alloc_existing_domain(domid);
                if (d)
                        d->nbentry++;
@@ -952,23 +1006,11 @@ int domain_alloc_permrefs(struct node_perms *perms)
  * Remove permissions for no longer existing domains in order to avoid a new
  * domain with the same domid inheriting the permissions.
  */
-int domain_adjust_node_perms(struct connection *conn, struct node *node)
+int domain_adjust_node_perms(struct node *node)
 {
        unsigned int i;
        int ret;
 
-       ret = chk_domain_generation(node->perms.p[0].id, node->generation);
-
-       /* If the owner doesn't exist any longer give it to priv domain. */
-       if (!ret) {
-               /*
-                * In theory we'd need to update the number of dom0 nodes here,
-                * but we could be called for a read of the node. So better
-                * avoid the risk to overflow the node count of dom0.
-                */
-               node->perms.p[0].id = priv_domid;
-       }
-
        for (i = 1; i < node->perms.num; i++) {
                if (node->perms.p[i].perms & XS_PERM_IGNORE)
                        continue;
@@ -986,15 +1028,15 @@ void domain_entry_dec(struct connection *conn, struct 
node *node)
        struct domain *d;
        unsigned int domid;
 
-       if (!conn)
+       if (!node->perms.p)
                return;
 
        domid = node->perms.p ? node->perms.p[0].id : conn->id;
 
-       if (conn->transaction) {
+       if (conn && conn->transaction) {
                transaction_entry_dec(conn->transaction, domid);
        } else {
-               d = (domid == conn->id && conn->domain) ? conn->domain
+               d = (conn && domid == conn->id && conn->domain) ? conn->domain
                    : find_domain_struct(domid);
                if (d) {
                        d->nbentry--;
@@ -1113,7 +1155,7 @@ int domain_memory_add(unsigned int domid, int mem, bool 
no_quota_check)
                 * exist, as accounting is done either for a domain related to
                 * the current connection, or for the domain owning a node
                 * (which is always existing, as the owner of the node is
-                * tested to exist and replaced by domid 0 if not).
+                * tested to exist and deleted or replaced by domid 0 if not).
                 * So not finding the related domain MUST be an error in the
                 * data base.
                 */
diff --git a/tools/xenstore/xenstored_domain.h 
b/tools/xenstore/xenstored_domain.h
index 7fe0a21d9e..b38c82991d 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -62,7 +62,7 @@ const char *get_implicit_path(const struct connection *conn);
 bool domain_is_unprivileged(struct connection *conn);
 
 /* Remove node permissions for no longer existing domains. */
-int domain_adjust_node_perms(struct connection *conn, struct node *node);
+int domain_adjust_node_perms(struct node *node);
 int domain_alloc_permrefs(struct node_perms *perms);
 
 /* Quota manipulation */
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.