[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] tools/libxl: Fixes to stream v2 task joining logic



During review of the libxl migration v2 series, I changes the task
joining logic, but clearly didn't think the result through
properly. This patch fixes several errors.

1) Do not call check_all_finished() in the success cases of
libxl__xc_domain_{save,restore}_done().  It serves no specific purpose
as the save helper state will report itself as inactive by this point,
and avoids triggering a second stream->completion_callback() in the case
that write_toolstack_record()/stream_continue() record errors
synchronously themselves.

2) Only ever set stream->rc in stream_complete().  The first version of
the migration v2 series had separate rc and joined_rc parameters, where
this logic worked.  However when combining the two, the teardown path
fails to trigger if stream_done() records stream->rc itself.  A side
effect of this is that stream_done() needs to take an rc parameter.

3) Avoid stacking of check_all_finished() via synchronous teardown of
tasks.  If the _abort() functions call back synchronously,
stream->completion_callback() ends up getting called twice, as first and
last check_all_finished() frames observe each task being finished.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>

---
I found this while working to fix the toolstack record issue, but am
posting this bugfix ahead of the other work as OSSTest has tripped over
the issue.
---
 tools/libxl/libxl_internal.h     |    2 ++
 tools/libxl/libxl_stream_read.c  |   32 ++++++++++++++++++++++++++------
 tools/libxl/libxl_stream_write.c |   32 ++++++++++++++++++++++++++------
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3c09668..73b4d62 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2996,6 +2996,7 @@ struct libxl__stream_write_state {
     int rc;
     bool running;
     bool in_checkpoint;
+    bool sync_teardown;
     libxl__save_helper_state shs;
 
     /* Main stream-writing data. */
@@ -3337,6 +3338,7 @@ struct libxl__stream_read_state {
     int rc;
     bool running;
     bool in_checkpoint;
+    bool sync_teardown;
     libxl__save_helper_state shs;
     libxl__conversion_helper_state chs;
 
diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 32a3551..70d3e5f 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -112,7 +112,7 @@ static void stream_complete(libxl__egc *egc,
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_read_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_read_state *stream);
+                        libxl__stream_read_state *stream, int rc);
 static void conversion_done(libxl__egc *egc,
                             libxl__conversion_helper_state *chs, int rc);
 static void check_all_finished(libxl__egc *egc,
@@ -176,6 +176,7 @@ void libxl__stream_read_init(libxl__stream_read_state 
*stream)
     stream->rc = 0;
     stream->running = false;
     stream->in_checkpoint = false;
+    stream->sync_teardown = false;
     libxl__save_helper_init(&stream->shs);
     libxl__conversion_helper_init(&stream->chs);
     FILLZERO(stream->dc);
@@ -669,9 +670,7 @@ static void stream_complete(libxl__egc *egc,
         return;
     }
 
-    if (!stream->rc)
-        stream->rc = rc;
-    stream_done(egc, stream);
+    stream_done(egc, stream, rc);
 }
 
 static void checkpoint_done(libxl__egc *egc,
@@ -695,7 +694,7 @@ static void checkpoint_done(libxl__egc *egc,
 }
 
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_read_state *stream)
+                        libxl__stream_read_state *stream, int rc)
 {
     libxl__sr_record_buf *rec, *trec;
 
@@ -720,7 +719,7 @@ static void stream_done(libxl__egc *egc,
     LIBXL_STAILQ_FOREACH_SAFE(rec, &stream->record_queue, entry, trec)
         free_record(rec);
 
-    check_all_finished(egc, stream, stream->rc);
+    check_all_finished(egc, stream, rc);
 }
 
 void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
@@ -744,6 +743,7 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void 
*dcs_void,
      * libxl records from it.
      */
     stream_continue(egc, stream);
+    return;
 
  err:
     check_all_finished(egc, stream, rc);
@@ -762,13 +762,33 @@ static void check_all_finished(libxl__egc *egc,
 {
     STATE_AO_GC(stream->ao);
 
+    /*
+     * In the case of a failure, the _abort()'s below might cancel
+     * synchronously on top of us, or asynchronously at a later point.
+     *
+     * We must avoid the situation where all _abort() cancel
+     * synchronously and the completion_callback() gets called twice;
+     * once by the first error and once by the final stacked abort(),
+     * both of whom will find that all of the tasks have stopped.
+     *
+     * To avoid this problem, any stacked re-entry into this function is
+     * ineligible to fire the completion callback.  The outermost
+     * instance will take care of completing, once the stack has
+     * unwound.
+     */
+    if (stream->sync_teardown)
+        return;
+
     if (!stream->rc && rc) {
         /* First reported failure. Tear everything down. */
         stream->rc = rc;
+        stream->sync_teardown = true;
 
         libxl__stream_read_abort(egc, stream, rc);
         libxl__save_helper_abort(egc, &stream->shs);
         libxl__conversion_helper_abort(egc, &stream->chs, rc);
+
+        stream->sync_teardown = false;
     }
 
     /* Don't fire the callback until all our parallel tasks have stopped. */
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 5bff52b..5135bc3 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -55,7 +55,7 @@ static void stream_success(libxl__egc *egc,
 static void stream_complete(libxl__egc *egc,
                             libxl__stream_write_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_write_state *stream);
+                        libxl__stream_write_state *stream, int rc);
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc);
@@ -158,6 +158,7 @@ void libxl__stream_write_init(libxl__stream_write_state 
*stream)
     stream->rc = 0;
     stream->running = false;
     stream->in_checkpoint = false;
+    stream->sync_teardown = false;
     FILLZERO(stream->dc);
     stream->record_done_callback = NULL;
     FILLZERO(stream->emu_dc);
@@ -275,6 +276,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
     }
 
     write_toolstack_record(egc, stream);
+    return;
 
  err:
     check_all_finished(egc, stream, rc);
@@ -492,13 +494,11 @@ static void stream_complete(libxl__egc *egc,
         return;
     }
 
-    if (!stream->rc)
-        stream->rc = rc;
-    stream_done(egc, stream);
+    stream_done(egc, stream, rc);
 }
 
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_write_state *stream)
+                        libxl__stream_write_state *stream, int rc)
 {
     assert(stream->running);
     stream->running = false;
@@ -507,7 +507,7 @@ static void stream_done(libxl__egc *egc,
         libxl__carefd_close(stream->emu_carefd);
     free(stream->emu_body);
 
-    check_all_finished(egc, stream, stream->rc);
+    check_all_finished(egc, stream, rc);
 }
 
 static void checkpoint_done(libxl__egc *egc,
@@ -526,12 +526,32 @@ static void check_all_finished(libxl__egc *egc,
 {
     STATE_AO_GC(stream->ao);
 
+    /*
+     * In the case of a failure, the _abort()'s below might cancel
+     * synchronously on top of us, or asynchronously at a later point.
+     *
+     * We must avoid the situation where all _abort() cancel
+     * synchronously and the completion_callback() gets called twice;
+     * once by the first error and once by the final stacked abort(),
+     * both of whom will find that all of the tasks have stopped.
+     *
+     * To avoid this problem, any stacked re-entry into this function is
+     * ineligible to fire the completion callback.  The outermost
+     * instance will take care of completing, once the stack has
+     * unwound.
+     */
+    if (stream->sync_teardown)
+        return;
+
     if (!stream->rc && rc) {
         /* First reported failure. Tear everything down. */
         stream->rc = rc;
+        stream->sync_teardown = true;
 
         libxl__stream_write_abort(egc, stream, rc);
         libxl__save_helper_abort(egc, &stream->shs);
+
+        stream->sync_teardown = false;
     }
 
     /* Don't fire the callback until all our parallel tasks have stopped. */
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.