[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] tools/xenstore: limit outstanding requests



commit 36de433a273f55d614c83b89c9a8972287a1e475
Author:     Juergen Gross <jgross@xxxxxxxx>
AuthorDate: Tue Sep 13 07:35:08 2022 +0200
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Tue Nov 1 13:05:44 2022 +0000

    tools/xenstore: limit outstanding requests
    
    Add another quota for limiting the number of outstanding requests of a
    guest. As the way to specify quotas on the command line is becoming
    rather nasty, switch to a new scheme using [--quota|-Q] <what>=<val>
    allowing to add more quotas in future easily.
    
    Set the default value to 20 (basically a random value not seeming to
    be too high or too low).
    
    A request is said to be outstanding if any message generated by this
    request (the direct response plus potential watch events) is not yet
    completely stored into a ring buffer. The initial watch event sent as
    a result of registering a watch is an exception.
    
    Note that across a live update the relation to buffered watch events
    for other domains is lost.
    
    Use talloc_zero() for allocating the domain structure in order to have
    all per-domain quota zeroed initially.
    
    This is part of XSA-326 / CVE-2022-42312.
    
    Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
    Acked-by: Julien Grall <jgrall@xxxxxxxxxx>
---
 tools/xenstore/xenstored_core.c   | 88 +++++++++++++++++++++++++++++++++++++--
 tools/xenstore/xenstored_core.h   | 20 ++++++++-
 tools/xenstore/xenstored_domain.c | 38 ++++++++++++++---
 tools/xenstore/xenstored_domain.h |  3 ++
 tools/xenstore/xenstored_watch.c  | 15 +++++--
 5 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index cce02f24b5..54e6add1a1 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -107,6 +107,7 @@ int quota_max_entry_size = 2048; /* 2K */
 int quota_max_transaction = 10;
 int quota_nb_perms_per_node = 5;
 int quota_max_path_len = XENSTORE_REL_PATH_MAX;
+int quota_req_outstanding = 20;
 
 unsigned int timeout_watch_event_msec = 20000;
 
@@ -219,12 +220,24 @@ static uint64_t get_now_msec(void)
        return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
 }
 
+/*
+ * Remove a struct buffered_data from the list of outgoing data.
+ * A struct buffered_data related to a request having caused watch events to be
+ * sent is kept until all those events have been written out.
+ * Each watch event is referencing the related request via pend.req, while the
+ * number of watch events caused by a request is kept in pend.ref.event_cnt
+ * (those two cases are mutually exclusive, so the two fields can share memory
+ * via a union).
+ * The struct buffered_data is freed only if no related watch event is
+ * referencing it. The related return data can be freed right away.
+ */
 static void free_buffered_data(struct buffered_data *out,
                               struct connection *conn)
 {
        struct buffered_data *req;
 
        list_del(&out->list);
+       out->on_out_list = false;
 
        /*
         * Update conn->timeout_msec with the next found timeout value in the
@@ -240,6 +253,30 @@ static void free_buffered_data(struct buffered_data *out,
                }
        }
 
+       if (out->hdr.msg.type == XS_WATCH_EVENT) {
+               req = out->pend.req;
+               if (req) {
+                       req->pend.ref.event_cnt--;
+                       if (!req->pend.ref.event_cnt && !req->on_out_list) {
+                               if (req->on_ref_list) {
+                                       domain_outstanding_domid_dec(
+                                               req->pend.ref.domid);
+                                       list_del(&req->list);
+                               }
+                               talloc_free(req);
+                       }
+               }
+       } else if (out->pend.ref.event_cnt) {
+               /* Hang out off from conn. */
+               talloc_steal(NULL, out);
+               if (out->buffer != out->default_buffer)
+                       talloc_free(out->buffer);
+               list_add(&out->list, &conn->ref_list);
+               out->on_ref_list = true;
+               return;
+       } else
+               domain_outstanding_dec(conn);
+
        talloc_free(out);
 }
 
@@ -401,6 +438,7 @@ int delay_request(struct connection *conn, struct 
buffered_data *in,
 static int destroy_conn(void *_conn)
 {
        struct connection *conn = _conn;
+       struct buffered_data *req;
 
        /* Flush outgoing if possible, but don't block. */
        if (!conn->domain) {
@@ -414,6 +452,11 @@ static int destroy_conn(void *_conn)
                                break;
                close(conn->fd);
        }
+
+       conn_free_buffered_data(conn);
+       list_for_each_entry(req, &conn->ref_list, list)
+               req->on_ref_list = false;
+
         if (conn->target)
                 talloc_unlink(conn, conn->target);
        list_del(&conn->list);
@@ -889,6 +932,8 @@ void send_reply(struct connection *conn, enum 
xsd_sockmsg_type type,
 
        /* Queue for later transmission. */
        list_add_tail(&bdata->list, &conn->out_list);
+       bdata->on_out_list = true;
+       domain_outstanding_inc(conn);
 }
 
 /*
@@ -896,7 +941,8 @@ void send_reply(struct connection *conn, enum 
xsd_sockmsg_type type,
  * As this is not directly related to the current command, errors can't be
  * reported.
  */
-void send_event(struct connection *conn, const char *path, const char *token)
+void send_event(struct buffered_data *req, struct connection *conn,
+               const char *path, const char *token)
 {
        struct buffered_data *bdata;
        unsigned int len;
@@ -926,8 +972,13 @@ void send_event(struct connection *conn, const char *path, 
const char *token)
                        conn->timeout_msec = bdata->timeout_msec;
        }
 
+       bdata->pend.req = req;
+       if (req)
+               req->pend.ref.event_cnt++;
+
        /* Queue for later transmission. */
        list_add_tail(&bdata->list, &conn->out_list);
+       bdata->on_out_list = true;
 }
 
 /* Some routines (write, mkdir, etc) just need a non-error return */
@@ -1714,6 +1765,7 @@ static void handle_input(struct connection *conn)
                        return;
        }
        in = conn->in;
+       in->pend.ref.domid = conn->id;
 
        /* Not finished header yet? */
        if (in->inhdr) {
@@ -1787,6 +1839,7 @@ struct connection *new_connection(const struct 
interface_funcs *funcs)
        new->is_stalled = false;
        new->transaction_started = 0;
        INIT_LIST_HEAD(&new->out_list);
+       INIT_LIST_HEAD(&new->ref_list);
        INIT_LIST_HEAD(&new->watches);
        INIT_LIST_HEAD(&new->transaction_list);
        INIT_LIST_HEAD(&new->delayed);
@@ -2270,6 +2323,9 @@ static void usage(void)
 "  -t, --transaction <nb>  limit the number of transaction allowed per 
domain,\n"
 "  -A, --perm-nb <nb>      limit the number of permissions per node,\n"
 "  -M, --path-max <chars>  limit the allowed Xenstore node path length,\n"
+"  -Q, --quota <what>=<nb> set the quota <what> to the value <nb>, allowed\n"
+"                          quotas are:\n"
+"                          outstanding: number of outstanding requests\n"
 "  -w, --timeout <what>=<seconds>   set the timeout in seconds for <what>,\n"
 "                          allowed timeout candidates are:\n"
 "                          watch-event: time a watch-event is kept pending\n"
@@ -2295,6 +2351,7 @@ static struct option options[] = {
        { "transaction", 1, NULL, 't' },
        { "perm-nb", 1, NULL, 'A' },
        { "path-max", 1, NULL, 'M' },
+       { "quota", 1, NULL, 'Q' },
        { "timeout", 1, NULL, 'w' },
        { "no-recovery", 0, NULL, 'R' },
        { "internal-db", 0, NULL, 'I' },
@@ -2342,6 +2399,20 @@ static void set_timeout(const char *arg)
                barf("unknown timeout \"%s\"\n", arg);
 }
 
+static void set_quota(const char *arg)
+{
+       const char *eq = strchr(arg, '=');
+       int val;
+
+       if (!eq)
+               barf("quotas must be specified via <what>=<nb>\n");
+       val = get_optval_int(eq + 1);
+       if (what_matches(arg, "outstanding"))
+               quota_req_outstanding = val;
+       else
+               barf("unknown quota \"%s\"\n", arg);
+}
+
 int main(int argc, char *argv[])
 {
        int opt;
@@ -2356,8 +2427,8 @@ int main(int argc, char *argv[])
        orig_argc = argc;
        orig_argv = argv;
 
-       while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", 
options,
-                                 NULL)) != -1) {
+       while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:T:RVW:w:U",
+                                 options, NULL)) != -1) {
                switch (opt) {
                case 'D':
                        no_domain_init = true;
@@ -2406,6 +2477,9 @@ int main(int argc, char *argv[])
                        quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
                                                 quota_max_path_len);
                        break;
+               case 'Q':
+                       set_quota(optarg);
+                       break;
                case 'w':
                        set_timeout(optarg);
                        break;
@@ -2848,6 +2922,14 @@ static void add_buffered_data(struct buffered_data 
*bdata,
 
        /* Queue for later transmission. */
        list_add_tail(&bdata->list, &conn->out_list);
+       bdata->on_out_list = true;
+       /*
+        * Watch events are never "outstanding", but the request causing them
+        * are instead kept "outstanding" until all watch events caused by that
+        * request have been delivered.
+        */
+       if (bdata->hdr.msg.type != XS_WATCH_EVENT)
+               domain_outstanding_inc(conn);
 }
 
 void read_state_buffered_data(const void *ctx, struct connection *conn,
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 745262af96..acb6b9fe2a 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -56,6 +56,8 @@ struct xs_state_connection;
 struct buffered_data
 {
        struct list_head list;
+       bool on_out_list;
+       bool on_ref_list;
 
        /* Are we still doing the header? */
        bool inhdr;
@@ -63,6 +65,17 @@ struct buffered_data
        /* How far are we? */
        unsigned int used;
 
+       /* Outstanding request accounting. */
+       union {
+               /* ref is being used for requests. */
+               struct {
+                       unsigned int event_cnt; /* # of outstanding events. */
+                       unsigned int domid;     /* domid of request. */
+               } ref;
+               /* req is being used for watch events. */
+               struct buffered_data *req;      /* request causing event. */
+       } pend;
+
        union {
                struct xsd_sockmsg msg;
                char raw[sizeof(struct xsd_sockmsg)];
@@ -123,6 +136,9 @@ struct connection
        struct list_head out_list;
        uint64_t timeout_msec;
 
+       /* Referenced requests no longer pending. */
+       struct list_head ref_list;
+
        /* Transaction context for current request (NULL if none). */
        struct transaction *transaction;
 
@@ -191,7 +207,8 @@ unsigned int get_string(const struct buffered_data *data, 
unsigned int offset);
 
 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
                const void *data, unsigned int len);
-void send_event(struct connection *conn, const char *path, const char *token);
+void send_event(struct buffered_data *req, struct connection *conn,
+               const char *path, const char *token);
 
 /* Some routines (write, mkdir, etc) just need a non-error return */
 void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
@@ -245,6 +262,7 @@ extern int dom0_domid;
 extern int dom0_event;
 extern int priv_domid;
 extern int quota_nb_entry_per_domain;
+extern int quota_req_outstanding;
 
 extern unsigned int timeout_watch_event_msec;
 
diff --git a/tools/xenstore/xenstored_domain.c 
b/tools/xenstore/xenstored_domain.c
index de349e2a77..c0a37712f8 100644
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -78,6 +78,9 @@ struct domain
        /* number of watch for this domain */
        int nbwatch;
 
+       /* Number of outstanding requests. */
+       int nboutstanding;
+
        /* write rate limit */
        wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
        struct wrl_timestampt wrl_timestamp;
@@ -183,8 +186,12 @@ static bool domain_can_read(struct connection *conn)
 {
        struct xenstore_domain_interface *intf = conn->domain->interface;
 
-       if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
-               return false;
+       if (domain_is_unprivileged(conn)) {
+               if (conn->domain->wrl_credit < 0)
+                       return false;
+               if (conn->domain->nboutstanding >= quota_req_outstanding)
+                       return false;
+       }
 
        return (intf->req_cons != intf->req_prod);
 }
@@ -331,7 +338,7 @@ static struct domain *alloc_domain(const void *context, 
unsigned int domid)
 {
        struct domain *domain;
 
-       domain = talloc(context, struct domain);
+       domain = talloc_zero(context, struct domain);
        if (!domain) {
                errno = ENOMEM;
                return NULL;
@@ -392,9 +399,6 @@ static int new_domain(struct domain *domain, int port, bool 
restore)
        domain->conn->domain = domain;
        domain->conn->id = domain->domid;
 
-       domain->nbentry = 0;
-       domain->nbwatch = 0;
-
        return 0;
 }
 
@@ -970,6 +974,28 @@ int domain_watch(struct connection *conn)
                : 0;
 }
 
+void domain_outstanding_inc(struct connection *conn)
+{
+       if (!conn || !conn->domain)
+               return;
+       conn->domain->nboutstanding++;
+}
+
+void domain_outstanding_dec(struct connection *conn)
+{
+       if (!conn || !conn->domain)
+               return;
+       conn->domain->nboutstanding--;
+}
+
+void domain_outstanding_domid_dec(unsigned int domid)
+{
+       struct domain *d = find_domain_by_domid(domid);
+
+       if (d)
+               d->nboutstanding--;
+}
+
 static wrl_creditt wrl_config_writecost      = WRL_FACTOR;
 static wrl_creditt wrl_config_rate           = WRL_RATE   * WRL_FACTOR;
 static wrl_creditt wrl_config_dburst         = WRL_DBURST * WRL_FACTOR;
diff --git a/tools/xenstore/xenstored_domain.h 
b/tools/xenstore/xenstored_domain.h
index 4a37de67a0..617d0acfd7 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -65,6 +65,9 @@ int domain_entry(struct connection *conn);
 void domain_watch_inc(struct connection *conn);
 void domain_watch_dec(struct connection *conn);
 int domain_watch(struct connection *conn);
+void domain_outstanding_inc(struct connection *conn);
+void domain_outstanding_dec(struct connection *conn);
+void domain_outstanding_domid_dec(unsigned int domid);
 
 /* Special node permission handling. */
 int set_perms_special(struct connection *conn, const char *name,
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index 205d9d8ea1..0755ffa375 100644
--- a/tools/xenstore/xenstored_watch.c
+++ b/tools/xenstore/xenstored_watch.c
@@ -142,6 +142,7 @@ void fire_watches(struct connection *conn, const void *ctx, 
const char *name,
                  struct node *node, bool exact, struct node_perms *perms)
 {
        struct connection *i;
+       struct buffered_data *req;
        struct watch *watch;
 
        /* During transactions, don't fire watches, but queue them. */
@@ -150,6 +151,8 @@ void fire_watches(struct connection *conn, const void *ctx, 
const char *name,
                return;
        }
 
+       req = domain_is_unprivileged(conn) ? conn->in : NULL;
+
        /* Create an event for each watch. */
        list_for_each_entry(i, &connections, list) {
                /* introduce/release domain watches */
@@ -164,12 +167,12 @@ void fire_watches(struct connection *conn, const void 
*ctx, const char *name,
                list_for_each_entry(watch, &i->watches, list) {
                        if (exact) {
                                if (streq(name, watch->node))
-                                       send_event(i,
+                                       send_event(req, i,
                                                   get_watch_path(watch, name),
                                                   watch->token);
                        } else {
                                if (is_child(name, watch->node))
-                                       send_event(i,
+                                       send_event(req, i,
                                                   get_watch_path(watch, name),
                                                   watch->token);
                        }
@@ -269,8 +272,12 @@ int do_watch(struct connection *conn, struct buffered_data 
*in)
        trace_create(watch, "watch");
        send_ack(conn, XS_WATCH);
 
-       /* We fire once up front: simplifies clients and restart. */
-       send_event(conn, get_watch_path(watch, watch->node), watch->token);
+       /*
+        * We fire once up front: simplifies clients and restart.
+        * This event will not be linked to the XS_WATCH request.
+        */
+       send_event(NULL, conn, get_watch_path(watch, watch->node),
+                  watch->token);
 
        return 0;
 }
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.