[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Watchdog timers for domains



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1275647723 -3600
# Node ID 497bda800505b740c9aea42647031fc27abd2e8c
# Parent  d4a91417a0231ff4a434d1a5b46749acda1dfe13
Watchdog timers for domains

Each domain is allowed to set, reset and disable its timers; when any
timer runs out the domain is killed.

Patch from Christian Limpach <Christian.Limpach@xxxxxxxxxx>
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
 .hgignore                  |    1 
 tools/libxc/xc_domain.c    |   28 +++++++++++++
 tools/libxc/xenctrl.h      |    4 +
 tools/misc/Makefile        |    8 ++-
 tools/misc/xen-watchdog    |   59 +++++++++++++++++++++++++++
 tools/misc/xenwatchdogd.c  |   96 +++++++++++++++++++++++++++++++++++++++++++++
 xen/common/domain.c        |   11 ++++-
 xen/common/keyhandler.c    |    5 ++
 xen/common/schedule.c      |   85 +++++++++++++++++++++++++++++++++++++++
 xen/common/shutdown.c      |    9 ++++
 xen/include/public/sched.h |   17 +++++++
 xen/include/xen/sched.h    |   11 ++++-
 12 files changed, 328 insertions(+), 6 deletions(-)

diff -r d4a91417a023 -r 497bda800505 .hgignore
--- a/.hgignore Fri Jun 04 10:46:32 2010 +0100
+++ b/.hgignore Fri Jun 04 11:35:23 2010 +0100
@@ -237,6 +237,7 @@
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
 ^tools/xcutils/readnotes$
+^tools/misc/xenwatchdogd$
 ^tools/xenfb/sdlfb$
 ^tools/xenfb/vncfb$
 ^tools/xenmon/xentrace_setmask$
diff -r d4a91417a023 -r 497bda800505 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/libxc/xc_domain.c   Fri Jun 04 11:35:23 2010 +0100
@@ -364,6 +364,34 @@ int xc_vcpu_getcontext(xc_interface *xch
     unlock_pages(ctxt, sz);
 
     return rc;
+}
+
+int xc_watchdog(xc_interface *xch,
+                uint32_t id,
+                uint32_t timeout)
+{
+    int ret = -1;
+    sched_watchdog_t arg;
+    DECLARE_HYPERCALL;
+
+    hypercall.op     = __HYPERVISOR_sched_op;
+    hypercall.arg[0] = (unsigned long)SCHEDOP_watchdog;
+    hypercall.arg[1] = (unsigned long)&arg;
+    arg.id = id;
+    arg.timeout = timeout;
+
+    if ( lock_pages(&arg, sizeof(arg)) != 0 )
+    {
+        PERROR("Could not lock memory for Xen hypercall");
+        goto out1;
+    }
+
+    ret = do_xen_hypercall(xch, &hypercall);
+
+    unlock_pages(&arg, sizeof(arg));
+
+ out1:
+    return ret;
 }
 
 
diff -r d4a91417a023 -r 497bda800505 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/libxc/xenctrl.h     Fri Jun 04 11:35:23 2010 +0100
@@ -331,6 +331,10 @@ int xc_domain_shutdown(xc_interface *xch
 int xc_domain_shutdown(xc_interface *xch,
                        uint32_t domid,
                        int reason);
+
+int xc_watchdog(xc_interface *xch,
+               uint32_t id,
+               uint32_t timeout);
 
 int xc_vcpu_setaffinity(xc_interface *xch,
                         uint32_t domid,
diff -r d4a91417a023 -r 497bda800505 tools/misc/Makefile
--- a/tools/misc/Makefile       Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/misc/Makefile       Fri Jun 04 11:35:23 2010 +0100
@@ -10,7 +10,7 @@ CFLAGS   += $(INCLUDES)
 
 HDRS     = $(wildcard *.h)
 
-TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat 
xenlockprof xen-hptool
+TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat 
xenlockprof xen-hptool xenwatchdogd
 TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx
 TARGETS := $(TARGETS-y)
 
@@ -22,7 +22,7 @@ INSTALL_BIN-$(CONFIG_X86) += xen-detect
 INSTALL_BIN-$(CONFIG_X86) += xen-detect
 INSTALL_BIN := $(INSTALL_BIN-y)
 
-INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool
+INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd
 INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx
 INSTALL_SBIN := $(INSTALL_SBIN-y)
 
@@ -37,8 +37,10 @@ install: build
 install: build
        $(INSTALL_DIR) $(DESTDIR)$(BINDIR)
        $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(CONFIG_DIR)/init.d
        $(INSTALL_PYTHON_PROG) $(INSTALL_BIN) $(DESTDIR)$(BINDIR)
        $(INSTALL_PYTHON_PROG) $(INSTALL_SBIN) $(DESTDIR)$(SBINDIR)
+       $(INSTALL_PROG) xen-watchdog $(DESTDIR)$(CONFIG_DIR)/init.d
        set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d install-recurse; done
 
 .PHONY: clean
@@ -49,7 +51,7 @@ clean:
 %.o: %.c $(HDRS) Makefile
        $(CC) -c $(CFLAGS) -o $@ $<
 
-xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool: %: %.o Makefile
+xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool xenwatchdogd: %: 
%.o Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl) 
$(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
 
 gtraceview: %: %.o Makefile
diff -r d4a91417a023 -r 497bda800505 tools/misc/xen-watchdog
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/misc/xen-watchdog   Fri Jun 04 11:35:23 2010 +0100
@@ -0,0 +1,59 @@
+#! /bin/bash
+#
+# xen-watchdog
+#
+# chkconfig: 2345 21 79
+# description: Run domain watchdog daemon
+#
+
+# Source function library.
+. /etc/init.d/functions
+
+start() {
+       local r
+       base="watchdogd"
+       echo -n $"Starting domain watchdog daemon: "
+
+       /usr/sbin/xenwatchdogd 30 15
+       r=$?
+       [ "$r" -eq 0 ] && success $"$base startup" || failure $"$base startup"
+       echo
+
+       return $r
+}
+
+stop() {
+       local r
+       base="watchdogd"
+       echo -n $"Stopping domain watchdog daemon: "
+
+       killall -USR1 watchdogd 2>/dev/null
+       r=$?
+       [ "$r" -eq 0 ] && success $"$base stop" || failure $"$base stop"
+       echo
+
+       return $r
+}
+
+case "$1" in
+  start)
+       start
+       ;;
+  stop)
+       stop
+       ;;
+  restart)
+       stop
+       start
+       ;;
+  status)
+       ;;
+  condrestart)
+       stop
+       start
+       ;;
+  *)
+       echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+       exit 1
+esac
+
diff -r d4a91417a023 -r 497bda800505 tools/misc/xenwatchdogd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/misc/xenwatchdogd.c Fri Jun 04 11:35:23 2010 +0100
@@ -0,0 +1,96 @@
+
+#include <err.h>
+#include <limits.h>
+#include "xenctrl.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+
+xc_interface *h;
+int id = 0;
+
+void daemonize(void)
+{
+    switch (fork()) {
+    case -1:
+       err(1, "fork");
+    case 0:
+       break;
+    default:
+       exit(0);
+    }
+    umask(0);
+    if (setsid() < 0)
+       err(1, "setsid");
+    if (chdir("/") < 0)
+       err(1, "chdir /");
+    freopen("/dev/null", "r", stdin);
+    freopen("/dev/null", "w", stdout);
+    freopen("/dev/null", "w", stderr);
+}
+
+void catch_exit(int sig)
+{
+    if (id)
+        xc_watchdog(h, id, 300);
+    exit(0);
+}
+
+void catch_usr1(int sig)
+{
+    if (id)
+        xc_watchdog(h, id, 0);
+    exit(0);
+}
+
+int main(int argc, char **argv)
+{
+    int t, s;
+    int ret;
+
+    if (argc < 2)
+       errx(1, "usage: %s <timeout> <sleep>", argv[0]);
+
+    daemonize();
+
+    h = xc_interface_open(NULL, NULL, 0);
+    if (h == NULL)
+       err(1, "xc_interface_open");
+
+    t = strtoul(argv[1], NULL, 0);
+    if (t == ULONG_MAX)
+       err(1, "strtoul");
+
+    s = t / 2;
+    if (argc == 3) {
+       s = strtoul(argv[2], NULL, 0);
+       if (s == ULONG_MAX)
+           err(1, "strtoul");
+    }
+
+    if (signal(SIGHUP, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGINT, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGQUIT, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGTERM, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGUSR1, &catch_usr1) == SIG_ERR)
+       err(1, "signal");
+
+    id = xc_watchdog(h, 0, t);
+    if (id <= 0)
+        err(1, "xc_watchdog setup");
+
+    for (;;) {
+        sleep(s);
+        ret = xc_watchdog(h, id, t);
+        if (ret != 0)
+            err(1, "xc_watchdog");
+    }
+}
diff -r d4a91417a023 -r 497bda800505 xen/common/domain.c
--- a/xen/common/domain.c       Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/domain.c       Fri Jun 04 11:35:23 2010 +0100
@@ -209,8 +209,8 @@ struct domain *domain_create(
     domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
 {
     struct domain *d, **pd;
-    enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
-           INIT_gnttab = 1u<<3, INIT_arch = 1u<<4 };
+    enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
+           INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
     int init_status = 0;
     int poolid = CPUPOOLID_NONE;
 
@@ -224,6 +224,9 @@ struct domain *domain_create(
     if ( xsm_alloc_security_domain(d) != 0 )
         goto fail;
     init_status |= INIT_xsm;
+
+    watchdog_domain_init(d);
+    init_status |= INIT_watchdog;
 
     atomic_set(&d->refcnt, 1);
     spin_lock_init_prof(d, domain_lock);
@@ -327,6 +330,8 @@ struct domain *domain_create(
     }
     if ( init_status & INIT_rangeset )
         rangeset_domain_destroy(d);
+    if ( init_status & INIT_watchdog )
+        watchdog_domain_destroy(d);
     if ( init_status & INIT_xsm )
         xsm_free_security_domain(d);
     xfree(d->pirq_mask);
@@ -604,6 +609,8 @@ static void complete_domain_destroy(stru
 
     arch_domain_destroy(d);
 
+    watchdog_domain_destroy(d);
+
     rangeset_domain_destroy(d);
 
     cpupool_rm_domain(d);
diff -r d4a91417a023 -r 497bda800505 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/keyhandler.c   Fri Jun 04 11:35:23 2010 +0100
@@ -241,6 +241,7 @@ static void dump_domains(unsigned char k
 
     for_each_domain ( d )
     {
+        unsigned int i;
         printk("General information for domain %u:\n", d->domain_id);
         cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
         printk("    refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
@@ -254,6 +255,10 @@ static void dump_domains(unsigned char k
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
                d->handle[12], d->handle[13], d->handle[14], d->handle[15],
                d->vm_assist);
+        for (i = 0 ; i < NR_DOMAIN_WATCHDOG_TIMERS; i++)
+            if ( test_bit(i, &d->watchdog_inuse_map) )
+                printk("    watchdog %d expires in %d seconds\n",
+                       i, (u32)((d->watchdog_timer[i].expires - NOW()) >> 30));
 
         arch_dump_domain_info(d);
 
diff -r d4a91417a023 -r 497bda800505 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/schedule.c     Fri Jun 04 11:35:23 2010 +0100
@@ -632,6 +632,78 @@ static long do_yield(void)
     return 0;
 }
 
+static void domain_watchdog_timeout(void *data)
+{
+    struct domain *d = data;
+
+    if ( d->is_shutting_down || d->is_dying )
+        return;
+
+    printk("Watchdog timer fired for domain %u\n", d->domain_id);
+    domain_shutdown(d, SHUTDOWN_watchdog);
+}
+
+static long domain_watchdog(struct domain *d, uint32_t id, uint32_t timeout)
+{
+    if ( id > NR_DOMAIN_WATCHDOG_TIMERS )
+        return -EINVAL;
+
+    spin_lock(&d->watchdog_lock);
+
+    if ( id == 0 )
+    {
+        for ( id = 0; id < NR_DOMAIN_WATCHDOG_TIMERS; id++ )
+        {
+            if ( test_and_set_bit(id, &d->watchdog_inuse_map) )
+                continue;
+            set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+            break;
+        }
+        spin_unlock(&d->watchdog_lock);
+        return id == NR_DOMAIN_WATCHDOG_TIMERS ? -EEXIST : id + 1;
+    }
+
+    id -= 1;
+    if ( !test_bit(id, &d->watchdog_inuse_map) )
+    {
+        spin_unlock(&d->watchdog_lock);
+        return -EEXIST;
+    }
+
+    if ( timeout == 0 )
+    {
+        stop_timer(&d->watchdog_timer[id]);
+        clear_bit(id, &d->watchdog_inuse_map);
+    }
+    else
+    {
+        set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+    }
+
+    spin_unlock(&d->watchdog_lock);
+    return 0;
+}
+
+void watchdog_domain_init(struct domain *d)
+{
+    unsigned int i;
+
+    spin_lock_init(&d->watchdog_lock);
+
+    d->watchdog_inuse_map = 0;
+
+    for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+        init_timer(&d->watchdog_timer[i], domain_watchdog_timeout, d, 0);
+}
+
+void watchdog_domain_destroy(struct domain *d)
+{
+    unsigned int i;
+
+    for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+        kill_timer(&d->watchdog_timer[i]);
+}
+
 long do_sched_op_compat(int cmd, unsigned long arg)
 {
     long ret = 0;
@@ -770,6 +842,19 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN
         rcu_unlock_domain(d);
         ret = 0;
 
+        break;
+    }
+
+    case SCHEDOP_watchdog:
+    {
+        struct sched_watchdog sched_watchdog;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&sched_watchdog, arg, 1) )
+            break;
+
+        ret = domain_watchdog(
+            current->domain, sched_watchdog.id, sched_watchdog.timeout);
         break;
     }
 
diff -r d4a91417a023 -r 497bda800505 xen/common/shutdown.c
--- a/xen/common/shutdown.c     Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/shutdown.c     Fri Jun 04 11:35:23 2010 +0100
@@ -5,6 +5,7 @@
 #include <xen/domain.h>
 #include <xen/delay.h>
 #include <xen/shutdown.h>
+#include <xen/console.h>
 #include <asm/debugger.h>
 #include <public/sched.h>
 
@@ -53,6 +54,14 @@ void dom0_shutdown(u8 reason)
         break; /* not reached */
     }
 
+    case SHUTDOWN_watchdog:
+    {
+        printk("Domain 0 shutdown: watchdog rebooting machine.\n");
+        kexec_crash();
+        machine_restart(0);
+        break; /* not reached */
+    }
+
     default:
     {
         printk("Domain 0 shutdown (unknown reason %u): ", reason);
diff -r d4a91417a023 -r 497bda800505 xen/include/public/sched.h
--- a/xen/include/public/sched.h        Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/include/public/sched.h        Fri Jun 04 11:35:23 2010 +0100
@@ -106,6 +106,22 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
 #define SCHEDOP_shutdown_code 5
 
 /*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ *               after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog    6
+struct sched_watchdog {
+    uint32_t id;                /* watchdog ID */
+    uint32_t timeout;           /* timeout */
+};
+typedef struct sched_watchdog sched_watchdog_t;
+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
+
+/*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
  * software to determine the appropriate action. For the most part, Xen does
  * not care about the shutdown code.
@@ -114,6 +130,7 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
 #define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
 #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
 #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
 
 #endif /* __XEN_PUBLIC_SCHED_H__ */
 
diff -r d4a91417a023 -r 497bda800505 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/include/xen/sched.h   Fri Jun 04 11:35:23 2010 +0100
@@ -191,7 +191,7 @@ struct mem_event_domain
     /* event channel port (vcpu0 only) */
     int xen_port;
 };
- 
+
 struct domain
 {
     domid_t          domain_id;
@@ -294,6 +294,12 @@ struct domain
     /* OProfile support. */
     struct xenoprof *xenoprof;
     int32_t time_offset_seconds;
+
+    /* Domain watchdog. */
+#define NR_DOMAIN_WATCHDOG_TIMERS 2
+    spinlock_t watchdog_lock;
+    uint32_t watchdog_inuse_map;
+    struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
 
     struct rcu_head rcu;
 
@@ -598,6 +604,9 @@ uint64_t get_cpu_idle_time(unsigned int 
      cpu_online(cpu) &&                         \
      !per_cpu(tasklet_work_to_do, cpu))
 
+void watchdog_domain_init(struct domain *d);
+void watchdog_domain_destroy(struct domain *d);
+
 #define IS_PRIV(_d) ((_d)->is_privileged)
 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == 
(_t)))
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.