[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 09/13] xsplice: Implement support for applying/reverting/replacing patches. (v2)



On Tue, Jan 19, 2016 at 02:39:40PM +0000, Ross Lagerwall wrote:
> On 01/14/2016 09:47 PM, Konrad Rzeszutek Wilk wrote:
> >From: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
> >
> >Implement support for the apply, revert and replace actions.
> >
> snip
> >+#include <xen/cpu.h>
> >  #include <xen/guest_access.h>
> >  #include <xen/keyhandler.h>
> >  #include <xen/lib.h>
> >@@ -10,25 +11,38 @@
> >  #include <xen/mm.h>
> >  #include <xen/sched.h>
> >  #include <xen/smp.h>
> >+#include <xen/softirq.h>
> >  #include <xen/spinlock.h>
> >+#include <xen/wait.h>
> >  #include <xen/xsplice_elf.h>
> >  #include <xen/xsplice.h>
> >
> >  #include <asm/event.h>
> >+#include <asm/nmi.h>
> >  #include <public/sysctl.h>
> >
> >-static DEFINE_SPINLOCK(payload_list_lock);
> >+/*
> >+ * Protects against payload_list operations and also allows only one
> >+ * caller in schedule_work.
> >+ */
> >+static DEFINE_SPINLOCK(payload_lock);
> 
> I think it would be cleaner if all the payload_list_lock changes were folded
> into Patch 3.

Good idea.
> 
> >  static LIST_HEAD(payload_list);
> >
> >+static LIST_HEAD(applied_list);
> >+
> >  static unsigned int payload_cnt;
> >  static unsigned int payload_version = 1;
> >
> >  struct payload {
> >      int32_t state;                       /* One of the XSPLICE_STATE_*. */
> >      int32_t rc;                          /* 0 or -XEN_EXX. */
> >+    uint32_t timeout;                    /* Timeout to do the operation. */
> 
> This should go into struct xsplice_work.

/me nods.
> 
> >      struct list_head list;               /* Linked to 'payload_list'. */
> >      void *payload_address;               /* Virtual address mapped. */
> >      size_t payload_pages;                /* Nr of the pages. */
> >+    struct list_head applied_list;       /* Linked to 'applied_list'. */
> >+    struct xsplice_patch_func *funcs;    /* The array of functions to 
> >patch. */
> >+    unsigned int nfuncs;                 /* Nr of functions to patch. */
> >
> >      char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
> >  };
> >@@ -36,6 +50,23 @@ struct payload {
> >  static int load_payload_data(struct payload *payload, uint8_t *raw, 
> > ssize_t len);
> >  static void free_payload_data(struct payload *payload);
> >
> >+/* Defines an outstanding patching action. */
> >+struct xsplice_work
> >+{
> >+    atomic_t semaphore;          /* Used for rendezvous. First to grab it 
> >will
> >+                                    do the patching. */
> >+    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
> >+    struct payload *data;        /* The payload on which to act. */
> >+    volatile bool_t do_work;     /* Signals work to do. */
> >+    volatile bool_t ready;       /* Signals all CPUs synchronized. */
> >+    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
> >+};
> >+
> >+/* There can be only one outstanding patching action. */
> >+static struct xsplice_work xsplice_work;
> >+
> >+static int schedule_work(struct payload *data, uint32_t cmd);
> >+
> snip
> >+
> >+/*
> >+ * This function is executed having all other CPUs with no stack (we may
> >+ * have cpu_idle on it) and IRQs disabled.
> >+ */
> >+static int revert_payload(struct payload *data)
> >+{
> >+    unsigned int i;
> >+
> >+    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
> >+
> >+    for ( i = 0; i < data->nfuncs; i++ )
> >+        xsplice_revert_jmp(data->funcs + i);
> >+
> >+    list_del(&data->applied_list);
> >+
> >+    return 0;
> >+}
> >+
> >+/* Must be holding the payload_list lock. */
> 
> payload lock?
> 
> >+static int schedule_work(struct payload *data, uint32_t cmd)
> >+{
> >+    /* Fail if an operation is already scheduled. */
> >+    if ( xsplice_work.do_work )
> >+        return -EAGAIN;
> 
> Hmm, I don't think EAGAIN is correct. It will cause xen-xsplice to poll for
> a status update, but the operation hasn't actually been submitted.

-EBUSY -EDEADLK ?
> 
> >+
> >+    xsplice_work.cmd = cmd;
> >+    xsplice_work.data = data;
> >+    atomic_set(&xsplice_work.semaphore, -1);
> >+    atomic_set(&xsplice_work.irq_semaphore, -1);
> >+
> >+    xsplice_work.ready = 0;
> >+    smp_wmb();
> >+    xsplice_work.do_work = 1;
> >+    smp_wmb();
> >+
> >+    return 0;
> >+}
> >+
> >+/*
> >+ * Note that because of this NOP code the do_nmi is not safely patchable.
> >+ * Also if we do receive 'real' NMIs we have lost them.
> >+ */
> >+static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
> >+{
> >+    return 1;
> >+}
> >+
> >+static void reschedule_fn(void *unused)
> >+{
> >+    smp_mb(); /* Synchronize with setting do_work */
> >+    raise_softirq(SCHEDULE_SOFTIRQ);
> >+}
> >+
> >+static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
> >+                           unsigned int total_cpus, const char *s)
> >+{
> >+    int rc = 0;
> >+
> >+    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
> >+        cpu_relax();
> >+
> >+    /* Log & abort. */
> >+    if ( atomic_read(counter) != total_cpus )
> >+    {
> >+        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
> >+               s, atomic_read(counter), total_cpus);
> >+        rc = -EBUSY;
> >+        xsplice_work.data->rc = rc;
> >+        xsplice_work.do_work = 0;
> >+        smp_wmb();
> >+        return rc;
> >+    }
> >+    return rc;
> >+}
> >+
> >+static void xsplice_do_single(unsigned int total_cpus)
> >+{
> >+    nmi_callback_t saved_nmi_callback;
> >+    s_time_t timeout;
> >+    struct payload *data, *tmp;
> >+    int rc;
> >+
> >+    data = xsplice_work.data;
> >+    timeout = data->timeout ? data->timeout : MILLISECS(30);
> 
> The design doc says that a timeout of 0 means infinity.

True. Lets update the document.
> 
> >+    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
> >+           timeout / MILLISECS(1));
> >+
> >+    timeout += NOW();
> >+
> >+    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> >+                         "Timed out on CPU semaphore") )
> >+        return;
> >+
> >+    /* "Mask" NMIs. */
> >+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> >+
> >+    /* All CPUs are waiting, now signal to disable IRQs. */
> >+    xsplice_work.ready = 1;
> >+    smp_wmb();
> >+
> >+    atomic_inc(&xsplice_work.irq_semaphore);
> >+    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> >+                         "Timed out on IRQ semaphore.") )
> >+        return;
> >+
> >+    local_irq_disable();
> 
> As far as I can tell, the mechanics of how this works haven't changed, the
> code has just been reorganized. Which means the points that Martin raised
> about this mechanism are still outstanding.

A bit. I added the extra timeout on both of the 'spin-around' and also
moved some of the barriers around. Also removed your spin-lock and used
the atomic_t mechanism to synchronize.

But the one thing that I didn't do was the spin on the 'workers?' that
are just spinnig idly. They will do that forever if say the 'master'
hasn't gone to the IRQ semaphore part.

My thinking was that the 'workers' could also use the timeout feature
but just multiple it by two?

> 
> -- 
> Ross Lagerwall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.