[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Require that xenstored writes to a domain complete in a single chunk


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: David Edmondson <dme@xxxxxxx>
  • Date: Mon, 26 Feb 2007 16:24:23 +0000
  • Cancel-lock: sha1:m217nwzsu46bAYTPrG/IG32ZhNw=
  • Delivery-date: Mon, 26 Feb 2007 08:21:54 -0800
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

If xenstored is part-way through writing a header+payload into the
buffer shared with a guest domain when the guest domain decides to
suspend, the buffer is corrupted, as xenstored doesn't know that it
has a partial write to complete when the domain revives.  The domain
is expecting proper completion of the partial header+payload and is
disappointed.

The attached patch modifies xenstored such that it checks for
sufficient space for header+payload before making any changes to the
shared buffer.

It is against 3.0.4-1, but the code in unstable looks the same.

Require that writes to a domain complete in a single chunk (i.e
both header and payload are written at the same time) to avoid leaving
the ring in an inconsistent state across suspend/resume.

Signed-off-by: David Edmondson <dme@xxxxxxx>

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -264,10 +264,17 @@ static bool write_messages(struct connec
 {
        int ret;
        struct buffered_data *out;
+       unsigned int space_required = 0;
+       bool r = false;
 
        out = list_top(&conn->out_list, struct buffered_data, list);
        if (out == NULL)
                return true;
+
+       if (out->inhdr)
+               space_required += sizeof (out->hdr);
+       space_required += out->hdr.msg.len;
+       space_required -= out->used;
 
        if (out->inhdr) {
                if (verbose)
@@ -277,36 +284,59 @@ static bool write_messages(struct connec
                                out->buffer, conn);
                ret = conn->write(conn, out->hdr.raw + out->used,
                                  sizeof(out->hdr) - out->used);
-               if (ret < 0)
-                       return false;
-
+               if (ret < 0) {
+                       r = false;
+                       goto done;
+               }
+
+               space_required -= ret;
                out->used += ret;
-               if (out->used < sizeof(out->hdr))
-                       return true;
+               if (out->used < sizeof(out->hdr)) {
+                       r = true;
+                       goto done;
+               }
 
                out->inhdr = false;
                out->used = 0;
 
                /* Second write might block if non-zero. */
-               if (out->hdr.msg.len && !conn->domain)
-                       return true;
+               if ((out->hdr.msg.len != 0) &&
+                   (conn->domain == NULL)) {
+                       r = true;
+                       goto done;
+               }
        }
 
        ret = conn->write(conn, out->buffer + out->used,
                          out->hdr.msg.len - out->used);
-       if (ret < 0)
-               return false;
-
+       if (ret < 0) {
+               r = false;
+               goto done;
+       }
+
+       space_required -= ret;
        out->used += ret;
-       if (out->used != out->hdr.msg.len)
-               return true;
-
+       if (out->used != out->hdr.msg.len) {
+               r = true;
+               goto done;
+       }
+
+       r = true;
        trace_io(conn, "OUT", out);
 
        list_del(&out->list);
        talloc_free(out);
 
-       return true;
+done:
+       if ((conn->domain != NULL) && (space_required != 0))
+               /*
+                * Not all of the data was consumed.  This can leave
+                * the shared ring pointers in an inconsistent state
+                * across suspend/resume.
+                */
+               eprintf("partial write to shared page of domain");
+
+       return r;
 }
 
 static int destroy_conn(void *_conn)
@@ -1997,7 +2027,7 @@ int main(int argc, char *argv[])
                                goto more;
                        }
 
-                       if (domain_can_write(i) && !list_empty(&i->out_list)) {
+                       if (domain_can_write(i)) {
                                handle_output(i);
                                goto more;
                        }
diff --git a/tools/xenstore/xenstored_domain.c 
b/tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -109,23 +109,29 @@ static int writechn(struct connection *c
        void *dest;
        struct xenstore_domain_interface *intf = conn->domain->interface;
        XENSTORE_RING_IDX cons, prod;
-
-       /* Must read indexes once, and before anything else, and verified. */
-       cons = intf->rsp_cons;
-       prod = intf->rsp_prod;
-       mb();
-       if (!check_indexes(cons, prod)) {
-               errno = EIO;
-               return -1;
-       }
-
-       dest = get_output_chunk(cons, prod, intf->rsp, &avail);
-       if (avail < len)
-               len = avail;
-
-       memcpy(dest, data, len);
-       mb();
-       intf->rsp_prod += len;
+       unsigned int remain = len;
+
+       while (remain > 0) {
+               /* Must read indexes once, and before anything else,
+                * and verified. */
+               cons = intf->rsp_cons;
+               prod = intf->rsp_prod;
+               mb();
+               if (!check_indexes(cons, prod)) {
+                       errno = EIO;
+                       return -1;
+               }
+
+               dest = get_output_chunk(cons, prod, intf->rsp, &avail);
+               if (avail > remain)
+                       avail = remain;
+               memcpy(dest, data, avail);
+               data += avail;
+               remain -= avail;
+
+               mb();
+               intf->rsp_prod += avail;
+       }
 
        xc_evtchn_notify(xce_handle, conn->domain->port);
 
@@ -236,7 +242,30 @@ bool domain_can_write(struct connection 
 bool domain_can_write(struct connection *conn)
 {
        struct xenstore_domain_interface *intf = conn->domain->interface;
-       return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+       struct buffered_data *out;
+       unsigned int space_required = 0, space_available;
+
+       out = list_top(&conn->out_list, struct buffered_data, list);
+       if (out == NULL)
+               return false;
+
+       if (out->inhdr)
+               space_required += sizeof (out->hdr);
+       space_required += out->hdr.msg.len;
+       space_required -= out->used;
+
+       if (space_required > XENSTORE_RING_SIZE) {
+               eprintf("attempt to write %d bytes "
+                       "to domain %d will never succeed",
+                       space_required, conn->domain->domid);
+               talloc_free(conn);
+               return false;
+       }
+
+       space_available = XENSTORE_RING_SIZE -
+               (intf->rsp_prod - intf->rsp_cons);
+
+       return (space_available >= space_required);
 }
 
 static char *talloc_domain_path(void *context, unsigned int domid)
dme.
-- 
David Edmondson, Sun Microsystems, http://www.dme.org
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.