[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 08/10] libxl: Kill QEMU by uid when possible



The privcmd fd that a dm_restrict'ed QEMU has gives it permission to
one specific domain ID.  This domain ID will probably eventually be
used again.  It is therefore necessary to make absolutely sure that a
rogue QEMU process cannot hang around after its domain has exited.

Killing QEMU by pid is insufficient in this situation, because QEMU
may be able to fork() to escape killing.  It is surprisingly tricky to
kill a process which can call fork() without races; the only reliable
way is to use kill(-1) to kill all processes with a given uid.

We can use this method only when we're sure that there's only one QEMU
instance per uid.  Add a dm_uid into the domain_build_state struct,
and set it in libxl__domain_get_device_model_uid() when it's safe to
kill by UID.  Store this in xenstore next to device-model-pid.

On domain destroy, check to see if device-model-uid is present in
xenstore.  If so, fork off a reaper process, setuid to that uid, and
do kill(-9) to kill all uids of that type.  Otherwise, carry on
destroying by pid.

While we're here, make libxl__destroy_device_model() consistently:
 1. Return an error when anything fails
 2. But continue to do as much clean-up as possible

NOTE that this is not yet completely safe: with ruid == dm_uid, the
device model may be able to kill(-9) the 'reaper' process before the
reaper process can kill it.  Further patches will address this.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx>
---
v2:
- Rebase on top of previous "goto out" refactoring
- Rather than introducing a `uid` string, Introduce a boolean,
  "kill_by_uid"; and do the GCSPRINTF() once if that is set.
- Fix typo "starting"
- Always call kill_device_model_uid_cb(); only call
  libxl__qmp_cleanup() from there
- Refactor libxl__destroy_device_model() to follow "goto out on error"
  pattern
- Retain and report errors even when we continue trying to clean up
- Report errors removing DM xenstore directory (except -ENOENT)
- Report errors reading device-model-uid
- Put "kill by uid" child logic in a separate function
- Refactor "kill by uid" to follow "goto out on error" pattern
- Change "kill by uid" to return libxl-style error, rather than errno
- Document the intention of when to return errors
- Assert that dm_uid != 0
- Log what the reaper process setresuid'd to

CC: Ian Jackson <ian.jackson@xxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
CC: Anthony Perard <anthony.perard@xxxxxxxxxx>
---
 tools/libxl/libxl_dm.c       | 206 +++++++++++++++++++++++++++++++++--
 tools/libxl/libxl_internal.h |   4 +-
 2 files changed, 200 insertions(+), 10 deletions(-)

diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 7f9c6a62fe..53fdf8daf7 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -129,6 +129,8 @@ static int libxl__domain_get_device_model_uid(libxl__gc *gc,
     int rc;
     char *user;
     uid_t intended_uid;
+    bool kill_by_uid;
+
 
     /* Only qemu-upstream can run as a different uid */
     if (b_info->device_model_version != LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN)
@@ -148,8 +150,10 @@ static int libxl__domain_get_device_model_uid(libxl__gc 
*gc,
             LOGD(ERROR, guest_domid, "Couldn't find device_model_user %s",
                  user);
             rc = ERROR_INVAL;
-        } else
+        } else {
             intended_uid = user_base->pw_uid;
+            kill_by_uid = true;
+        }
 
         goto out;
     }
@@ -192,11 +196,14 @@ static int libxl__domain_get_device_model_uid(libxl__gc 
*gc,
         LOGD(DEBUG, guest_domid, "using uid %ld", (long)intended_uid);
         user = GCSPRINTF("%ld:%ld", (long)intended_uid,
                          (long)user_base->pw_gid);
+        kill_by_uid = true;
         goto out;
     }
 
     /*
-     * We couldn't find QEMU_USER_BASE_RANGE; look for QEMU_USER_SHARED
+     * We couldn't find QEMU_USER_BASE_RANGE; look for
+     * QEMU_USER_SHARED.  NB for QEMU_USER_SHARED, all QEMU will run
+     * as the same UID, we can't kill by uid; therefore don't set uid.
      */
     user = LIBXL_QEMU_USER_SHARED;
     rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base);
@@ -206,6 +213,7 @@ static int libxl__domain_get_device_model_uid(libxl__gc *gc,
         LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s",
              LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED);
         intended_uid = user_base->pw_uid;
+        kill_by_uid = false;
         goto out;
     }
 
@@ -226,6 +234,8 @@ out:
         }
 
         state->dm_runas = user;
+        if (kill_by_uid)
+            state->dm_uid = GCSPRINTF("%ld", (long)intended_uid);
     }
 
     return rc;
@@ -2408,6 +2418,15 @@ void libxl__spawn_local_dm(libxl__egc *egc, 
libxl__dm_spawn_state *dmss)
 
     const char *dom_path = libxl__xs_get_dompath(gc, domid);
 
+    /*
+     * If we're starting the dm with a non-root UID, save the UID so
+     * that we can reliably kill it and any subprocesses
+     */
+    if (state->dm_uid)
+        libxl__xs_printf(gc, XBT_NULL,
+                         GCSPRINTF("%s/image/device-model-uid", dom_path),
+                         "%s", state->dm_uid);
+
     if (vnc && vnc->passwd) {
         /* This xenstore key will only be used by qemu-xen-traditionnal.
          * The code to supply vncpasswd to qemu-xen is later. */
@@ -2677,25 +2696,194 @@ out:
     return rc;
 }
 
+/* Asynchronous device model destroy functions */
+
+static int kill_device_model_uid_child(libxl__destroy_devicemodel_state *ddms,
+                                       const char *dm_uid_str);
+
+static void kill_device_model_uid_cb(libxl__egc *egc,
+                                     libxl__ev_child *destroyer,
+                                     pid_t pid, int status);
+
+/*
+ * If we have a uid, we shouldn't kill by pid.  This is because a
+ * hostile QEMU might have exited, in which case the pid we have may
+ * be that of another process.
+ *
+ * The running devicemodel has permission over a specific domain id;
+ * this means that ideally we wouldn't the domain in question (freeing
+ * up the domain id for reuse) until we're confident that we've killed
+ * the domain.
+ *
+ * In general, destroy as much as we can; but return an error if there
+ * are any errors, so that the domain destroy will be aborted, and the
+ * domain itself will remain, giving the admin an opportunity to fix
+ * any issues and re-try the domain destroy.
+ */
+#define PROPAGATE_RC if(!ddms->rc) ddms->rc = rc
+
 void libxl__destroy_device_model(libxl__egc *egc,
                                  libxl__destroy_devicemodel_state *ddms)
 {
     STATE_AO_GC(ddms->ao);
     int rc;
     int domid = ddms->domid;
-    char *path = DEVICE_MODEL_XS_PATH(gc, LIBXL_TOOLSTACK_DOMID, domid, "");
+    char *path;
+    const char *dm_uid_str = NULL;
+    int reaper_pid;
 
-    if (!xs_rm(CTX->xsh, XBT_NULL, path))
+    ddms->rc = 0;
+
+    path = DEVICE_MODEL_XS_PATH(gc, LIBXL_TOOLSTACK_DOMID, domid, "");
+    rc = libxl__xs_rm_checked(gc, XBT_NULL, path);
+    if (rc) {
+        PROPAGATE_RC;
         LOGD(ERROR, domid, "xs_rm failed for %s", path);
+    }
+
+    /*
+     * See if we should try to kill by uid
+     */
+    path = GCSPRINTF("/local/domain/%d/image/device-model-uid", domid);
+    rc = libxl__xs_read_checked(gc, XBT_NULL, path, &dm_uid_str);
+
+    /*
+     * If there was an error here, accumulate the error and fall back
+     * to killing by pid.
+     */
+    if (rc) {
+        PROPAGATE_RC;
+        LOGD(ERROR, domid, "Reading dm UID path failed for %s", path);
+    }
+
+    /* The DM has its own uid; Attempt to kill all processes with that UID */
+    if (dm_uid_str) {
+        LOGD(DEBUG, domid, "Found DM uid %s, destroying by uid", dm_uid_str);
+
+        reaper_pid = libxl__ev_child_fork(gc, &ddms->destroyer,
+                                          kill_device_model_uid_cb);
+        if (reaper_pid < 0) {
+            rc = ERROR_FAIL;
+            PROPAGATE_RC;
+            /*
+             * Note that if this fails, we still don't kill by pid, to
+             * make sure that an untrusted DM has not "maliciously"
+             * exited (potentially causing us to kill an unrelated
+             * process which happened to get the same pid).
+             */
+            goto out;
+        }
+
+        if (!reaper_pid) {  /* child */
+            rc = kill_device_model_uid_child(ddms, dm_uid_str);
+            _exit(rc);
+        }
+
+        /*
+         * Parent of successful fork; execution will pick up in
+         * kill_device_model_uid_cb when child exits
+         */
+        return;
+    }
 
-    /* We should try to destroy the device model anyway. */
-    rc = kill_device_model(gc,
-              GCSPRINTF("/local/domain/%d/image/device-model-pid", domid));
+    /*
+     * No uid to kill; attept to kill by pid.
+     */
+    LOGD(DEBUG, domid, "Didn't find dm UID; destroying by pid");
+
+    path = GCSPRINTF("/local/domain/%d/image/device-model-pid", domid);
+    rc = kill_device_model(gc, path);
+
+    if (rc) {
+        PROPAGATE_RC;
+        LOGD(ERROR, domid, "Killing device model pid from path %s", path);
+    }
+
+out:
+    /*
+     * NB that we always return '0' here for the "status of exited
+     * process"; since there is no process, it always "succeeds".
+     * Errors are accumulated in ddms->rc and will be handled
+     * correctly.
+     */
+    kill_device_model_uid_cb(egc, &ddms->destroyer, -1, 0);
+    return;
+}
+
+/*
+ * Destroy all processes of the given uid by setresuid to the
+ * specified uid and kill(-1).  NB this MUST BE CALLED FROM A SEPARATE
+ * PROCESS from the normal libxl process.  Returns a libxl-style error
+ * code.
+ */
+static int kill_device_model_uid_child(libxl__destroy_devicemodel_state *ddms,
+                                       const char *dm_uid_str) {
+    STATE_AO_GC(ddms->ao);
+    int domid = ddms->domid;
+    int r, rc = 0;
+    uid_t dm_uid = atoi(dm_uid_str);
+
+    /*
+     * FIXME: the second uid needs to be distinct to avoid being
+     * killed by a potential rogue process
+     */
+
+    /*
+     * Should never happen; but if it does, better to have the
+     * toolstack crash with an error than nuking dom0.
+      */
+    assert(dm_uid);
+
+    LOGD(DEBUG, domid, "DM reaper: calling setresuid(%d, %d, 0)",
+         dm_uid, dm_uid);
+    r = setresuid(dm_uid, dm_uid, 0);
+    if (r) {
+        LOGED(ERROR, domid, "setresuid to (%d, %d, 0)", dm_uid, dm_uid);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    /*
+     * And kill everyone but me.
+     *
+     * NB that it's not clear from either POSIX or the Linux man page
+     * that ESRCH would be returned with a pid value of -1, but it
+     * doesn't hurt to check.
+     */
+    r = kill(-1, 9);
+    if (r && errno != ESRCH) {
+        LOGED(ERROR, domid, "kill(-1,9)");
+        rc = ERROR_FAIL;
+    }
+
+out:
+    return rc;
+}
+
+static void kill_device_model_uid_cb(libxl__egc *egc,
+                                    libxl__ev_child *destroyer,
+                                    pid_t pid, int status)
+{
+    libxl__destroy_devicemodel_state *ddms = CONTAINER_OF(destroyer, *ddms, 
destroyer);
+    STATE_AO_GC(ddms->ao);
+
+    if (status) {
+        int rc = ERROR_FAIL;;
+
+        if (WIFEXITED(status))
+            rc = WEXITSTATUS(status) - 128;
+
+        PROPAGATE_RC;
+        libxl_report_child_exitstatus(CTX, XTL_ERROR,
+                                      "async domain destroy", pid, status);
+    }
 
-    libxl__qmp_cleanup(gc, domid);
+    /* Always try to clean up qmp, even if something went wrong */
+    libxl__qmp_cleanup(gc, ddms->domid);
 
-    ddms->callback(egc, ddms, rc);
+    ddms->callback(egc, ddms, ddms->rc);
 }
+#undef PROPAGATE_RC
 
 /* Return 0 if no dm needed, 1 if needed and <0 if error. */
 int libxl__need_xenpv_qemu(libxl__gc *gc, libxl_domain_config *d_config)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index f9e0bf6578..cd3208f4b8 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1135,7 +1135,7 @@ typedef struct {
     const char *shim_cmdline;
     const char *pv_cmdline;
 
-    char *dm_runas;
+    char *dm_runas, *dm_uid;
 
     xen_vmemrange_t *vmemranges;
     uint32_t num_vmemranges;
@@ -3706,6 +3706,8 @@ struct libxl__destroy_devicemodel_state {
     uint32_t domid;
     libxl__devicemodel_destroy_cb *callback; /* May be called re-entrantly */
     /* private to implementation */
+    libxl__ev_child destroyer;
+    int rc; /* Accumulated return value for the destroy operation */
 };
 
 struct libxl__destroy_domid_state {
-- 
2.19.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.