[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 8/8] evtchn: add FIFO-based event channel hypercalls and port ops



From: David Vrabel <david.vrabel@xxxxxxxxxx>

Add the implementation for the FIFO-based event channel ABI.  The new
hypercall sub-ops (EVTCHNOP_init_control, EVTCHNOP_expand_array,
EVTCHNOP_set_priority) and the required evtchn_ops (set_pending,
unmask, etc.).

This current implementation has three main limitations:

- EVTCHNOP_set_limit is not yet implemented so any guest will be able
  to use up to 2^17 (requiring 128 global mapping pages for a fully
  populated event array).

- The control block frames are not required to be shared with the
  vcpu_info structure.  This requires an additional global mapping
  page per-VCPU.  This does makes the guest implementation cleaner
  though so perhaps we do not need to fix this?

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 xen/common/Makefile          |    1 +
 xen/common/event_channel.c   |   37 ++++
 xen/common/event_fifo.c      |  491 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/event.h      |    6 +-
 xen/include/xen/event_fifo.h |   56 +++++
 xen/include/xen/sched.h      |    4 +
 6 files changed, 593 insertions(+), 2 deletions(-)
 create mode 100644 xen/common/event_fifo.c
 create mode 100644 xen/include/xen/event_fifo.h

diff --git a/xen/common/Makefile b/xen/common/Makefile
index ef03eac..afd7d40 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -7,6 +7,7 @@ obj-y += domctl.o
 obj-y += domain.o
 obj-y += event_2l.o
 obj-y += event_channel.o
+obj-y += event_fifo.o
 obj-y += grant_table.o
 obj-y += irq.o
 obj-y += kernel.o
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 67dcdbc..a048107 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -26,6 +26,7 @@
 #include <xen/compat.h>
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
+#include <xen/event_fifo.h>
 #include <asm/current.h>
 
 #include <public/xen.h>
@@ -1053,6 +1054,40 @@ long do_event_channel_op(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     }
 
+    case EVTCHNOP_init_control: {
+        struct evtchn_init_control init_control;
+        if ( copy_from_guest(&init_control, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_init_control(&init_control);
+        if ( !rc && __copy_to_guest(arg, &init_control, 1) )
+            rc = -EFAULT;
+        break;
+    }
+
+    case EVTCHNOP_expand_array: {
+        struct evtchn_expand_array expand_array;
+        if ( copy_from_guest(&expand_array, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_expand_array(&expand_array);
+        break;
+    }
+
+    case EVTCHNOP_set_priority: {
+        struct evtchn_set_priority set_priority;
+        if ( copy_from_guest(&set_priority, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_set_priority(&set_priority);
+        break;
+    }
+
+    case EVTCHNOP_set_limit: {
+        struct evtchn_set_limit set_limit;
+        if ( copy_from_guest(&set_limit, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_set_limit(&set_limit);
+        break;
+    }
+
     default:
         rc = -ENOSYS;
         break;
@@ -1214,6 +1249,8 @@ void evtchn_destroy(struct domain *d)
         (void)__evtchn_close(d, i);
     }
 
+    evtchn_fifo_destroy(d);
+
     /* Free all event-channel buckets. */
     spin_lock(&d->event_lock);
     for ( i = 0; i < NR_EVTCHN_GROUPS; i++ )
diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c
new file mode 100644
index 0000000..a7b5dc9
--- /dev/null
+++ b/xen/common/event_fifo.c
@@ -0,0 +1,491 @@
+/*
+ * FIFO event channel management.
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ * 
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/event_fifo.h>
+#include <xen/paging.h>
+#include <xen/mm.h>
+
+#include <public/event_channel.h>
+
+static inline event_word_t *evtchn_fifo_word_from_port(struct domain *d,
+                                                       unsigned port)
+{
+    unsigned p, w;
+
+    if ( unlikely(port >= d->evtchn_fifo->num_evtchns) )
+        return NULL;
+
+    p = port / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+    w = port % EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+
+    return d->evtchn_fifo->event_array[p].virt + w;
+}
+
+static bool_t evtchn_fifo_set_link(event_word_t *word, uint32_t link)
+{
+    event_word_t n, o, w;
+
+    w = *word;
+
+    do {
+        if ( !(w & (1 << EVTCHN_FIFO_LINKED)) )
+            return 0;
+        o = w;
+        n = (w & ~EVTCHN_FIFO_LINK_MASK) | link;
+    } while ( (w = cmpxchg(word, o, n)) != o );
+
+    return 1;
+}
+
+static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn)
+{
+    struct domain *d = v->domain;
+    unsigned port;
+    event_word_t *word;
+    struct evtchn_fifo_queue *q;
+    unsigned long flags;
+    bool_t was_pending;
+
+    port = evtchn->port;
+    word = evtchn_fifo_word_from_port(d, port);
+    if ( unlikely(!word) )
+        return;
+
+    /*
+     * No locking around getting the queue. This may race with
+     * changing the priority but we are allowed to signal the event
+     * once on the old priority.
+     */
+    q = evtchn->queue;
+
+    was_pending = test_and_set_bit(EVTCHN_FIFO_PENDING, word);
+
+    /*
+     * Link the event if it unmasked and not already linked.
+     */
+    if ( !test_bit(EVTCHN_FIFO_MASKED, word)
+         && !test_and_set_bit(EVTCHN_FIFO_LINKED, word) )
+    {
+        event_word_t *tail_word;
+        bool_t linked = 0;
+
+        spin_lock_irqsave(&q->lock, flags);
+
+        /*
+         * Atomically link the tail to port iff the tail is linked.
+         * If the tail is unlinked the queue is empty.
+         *
+         * If port is the same as tail, the queue is empty but q->tail
+         * will appear linked as we just set LINKED above.
+         *
+         * If the queue is empty (i.e., we haven't linked to the new
+         * event), head must be updated.
+         */
+        if ( port != q->tail )
+        {
+            tail_word = evtchn_fifo_word_from_port(d, q->tail);
+            linked = evtchn_fifo_set_link(tail_word, port);
+        }
+        if ( !linked )
+            write_atomic(q->head, port);
+        q->tail = port;
+
+        spin_unlock_irqrestore(&q->lock, flags);
+
+        if ( !test_and_set_bit(q->priority, 
&v->evtchn_fifo->control_block->ready) )
+            vcpu_mark_events_pending(v);
+    }
+
+    if ( !was_pending )
+        evtchn_check_pollers(d, port);
+}
+
+static void evtchn_fifo_clear_pending(struct domain *d, struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return;
+
+    /*
+     * Just clear the P bit.
+     *
+     * No need to unlink as the guest will unlink and ignore
+     * non-pending events.
+     */
+    clear_bit(EVTCHN_FIFO_PENDING, word);
+}
+
+static void evtchn_fifo_unmask(struct domain *d, struct evtchn *evtchn)
+{
+    struct vcpu *v = d->vcpu[evtchn->notify_vcpu_id];
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return;
+
+    clear_bit(EVTCHN_FIFO_MASKED, word);
+
+    /* Relink if pending. */
+    if ( test_bit(EVTCHN_FIFO_PENDING, word ) )
+        evtchn_fifo_set_pending(v, evtchn);
+}
+
+static bool_t evtchn_fifo_is_pending(struct domain *d,
+                                     const struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return 0;
+
+    return test_bit(EVTCHN_FIFO_PENDING, word);
+}
+
+static bool_t evtchn_fifo_is_masked(struct domain *d,
+                                    const struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return 1;
+
+    return test_bit(EVTCHN_FIFO_MASKED, word);
+}
+
+static void evtchn_fifo_bind_to_vcpu(struct domain *d, struct evtchn *evtchn,
+                                     struct vcpu *v)
+{
+    unsigned priority;
+
+    /* Keep the same priority if possible, otherwise use the
+       default. */
+    if ( evtchn->queue )
+        priority = evtchn->queue->priority;
+    else
+        priority = EVTCHN_FIFO_PRIORITY_DEFAULT;
+
+    evtchn->queue = &v->evtchn_fifo->queue[priority];
+}
+
+static const struct evtchn_port_ops evtchn_port_ops_fifo =
+{
+    .set_pending   = evtchn_fifo_set_pending,
+    .clear_pending = evtchn_fifo_clear_pending,
+    .unmask        = evtchn_fifo_unmask,
+    .is_pending    = evtchn_fifo_is_pending,
+    .is_masked     = evtchn_fifo_is_masked,
+    .bind_to_vcpu  = evtchn_fifo_bind_to_vcpu,
+};
+
+static int map_guest_page(struct domain *d, uint64_t gfn,
+                          struct page_info **page, void **virt)
+{
+    struct page_info *p;
+
+    p = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+    if ( !p )
+        return -EINVAL;
+
+    if ( !get_page_type(p, PGT_writable_page) )
+    {
+        put_page(p);
+        return -EINVAL;
+    }
+
+    *virt = map_domain_page_global(gfn);
+    if ( *virt == NULL )
+    {
+        put_page_and_type(p);
+        return -ENOMEM;
+    }
+    *page = p;
+    return 0;
+}
+
+static void unmap_guest_page(struct page_info *page, void *virt)
+{
+    if ( page == NULL )
+        return;
+
+    unmap_domain_page_global(virt);
+    put_page_and_type(page);
+}
+
+static void cleanup_control_block(struct vcpu *v)
+{
+    if ( v->evtchn_fifo )
+    {
+        unmap_guest_page(v->evtchn_fifo->cb_page, 
v->evtchn_fifo->control_block);
+        xfree(v->evtchn_fifo);
+        v->evtchn_fifo = NULL;
+    }
+}
+
+static void init_queue(struct vcpu *v, struct evtchn_fifo_queue *q, unsigned i)
+{
+    spin_lock_init(&q->lock);
+    q->priority = i;
+    q->head = &v->evtchn_fifo->control_block->head[i];
+}
+
+static int setup_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset)
+{
+    struct domain *d = v->domain;
+    struct evtchn_fifo_vcpu *efv;
+    struct page_info *page;
+    void *virt;
+    unsigned i;
+    int rc;
+
+    if ( v->evtchn_fifo )
+        return -EINVAL;
+
+    efv = xzalloc(struct evtchn_fifo_vcpu);
+    if ( efv == NULL )
+        return -ENOMEM;
+
+    rc = map_guest_page(d, gfn, &page, &virt);
+    if ( rc < 0 )
+    {
+        xfree(efv);
+        return rc;
+    }
+
+    v->evtchn_fifo = efv;
+
+    v->evtchn_fifo->cb_page       = page;
+    v->evtchn_fifo->control_block = virt + offset;
+
+    for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ )
+        init_queue(v, &v->evtchn_fifo->queue[i], i);
+ 
+    return 0;
+}
+
+/*
+ * Setup an event array with no pages.
+ */
+static int setup_event_array(struct domain *d)
+{
+    if ( d->evtchn_fifo )
+        return 0;
+
+    d->evtchn_fifo = xzalloc(struct evtchn_fifo_domain);
+    if ( d->evtchn_fifo == NULL )
+        return -ENOMEM;
+
+    d->evtchn_fifo->num_evtchns = 0;
+
+    return 0;
+}
+
+/*
+ * Some ports may already be bound, bind them to the correct VCPU so
+ * they have a valid queue.
+ *
+ * Note: any events that are currently pending will not be resent and
+ * will be lost.
+ */
+static void rebind_all_ports(struct domain *d)
+{
+    unsigned port;
+
+    for ( port = 1; port < d->max_evtchns; port++ )
+    {
+        struct evtchn *evtchn;
+
+        if ( !port_is_valid(d, port) )
+            break;
+
+        evtchn = evtchn_from_port(d, port);
+        switch ( evtchn->state )
+        {
+        case ECS_INTERDOMAIN:
+        case ECS_PIRQ:
+        case ECS_VIRQ:
+            evtchn_port_bind_to_vcpu(d, evtchn, 
d->vcpu[evtchn->notify_vcpu_id]);
+            break;
+        default:
+            break;
+        }
+    }
+}
+
+static void cleanup_event_array(struct domain *d)
+{
+    unsigned i;
+
+    if ( d->evtchn_fifo == NULL )
+        return;
+
+    for ( i = 0; i < EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES; i++ )
+    {
+        unmap_guest_page(d->evtchn_fifo->event_array[i].page,
+                         d->evtchn_fifo->event_array[i].virt);
+    }
+    xfree(d->evtchn_fifo);
+}
+
+int evtchn_fifo_init_control(struct evtchn_init_control *init_control)
+{
+    struct domain *d = current->domain;
+    uint32_t vcpu_id;
+    uint64_t gfn;
+    uint32_t offset;
+    struct vcpu *v;
+    int rc;
+
+    init_control->link_bits = EVTCHN_FIFO_LINK_BITS;
+
+    vcpu_id = init_control->vcpu;
+    gfn     = init_control->control_mfn;
+    offset  = init_control->offset;
+
+    if ( (vcpu_id >= d->max_vcpus) || (d->vcpu[vcpu_id] == NULL) )
+        return -ENOENT;
+    v = d->vcpu[vcpu_id];
+
+    /* Must not cross page boundary. */
+    if ( offset > (PAGE_SIZE - sizeof(evtchn_fifo_control_block_t)) )
+        return -EINVAL;
+
+    /* Must be 8-bytes aligned. */
+    if ( offset & (8 - 1) )
+        return -EINVAL;
+
+    spin_lock(&d->event_lock);
+
+    rc = setup_control_block(v, gfn, offset);
+
+    /* If this is the first control block, setup an empty event array
+       and switch to the fifo port ops. */
+    if ( d->evtchn_fifo == NULL )
+    {
+        rc = setup_event_array(d);
+        if ( rc < 0 )
+            cleanup_control_block(v);
+        else
+        {
+            d->evtchn_port_ops = &evtchn_port_ops_fifo;
+            d->max_evtchns = 1 << EVTCHN_FIFO_LINK_BITS;
+            rebind_all_ports(d);
+        }
+    }
+
+    spin_unlock(&d->event_lock);
+
+    return rc;
+}
+
+static int add_page_to_event_array(struct domain *d, unsigned long gfn)
+{
+    struct page_info *page = NULL;
+    void *virt;
+    unsigned slot;
+    int rc;
+
+    slot = d->evtchn_fifo->num_evtchns / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+    if ( slot >= EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES )
+        return -ENOSPC;
+
+    rc = map_guest_page(d, gfn, &page, &virt);
+    if ( rc < 0 )
+        return rc;
+
+    d->evtchn_fifo->event_array[slot].page = page;
+    d->evtchn_fifo->event_array[slot].virt = virt;
+
+    d->evtchn_fifo->num_evtchns += EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+
+    return 0;
+}
+
+int evtchn_fifo_expand_array(struct evtchn_expand_array *expand_array)
+{
+    struct domain *d = current->domain;
+    int rc;
+
+    spin_lock(&d->event_lock);
+    rc = add_page_to_event_array(d, expand_array->array_mfn);
+    spin_unlock(&d->event_lock);
+
+    return rc;
+}
+
+int evtchn_fifo_set_priority(struct evtchn_set_priority *set_priority)
+{
+    struct domain *d = current->domain;
+    struct vcpu *v;
+    uint32_t priority;
+    unsigned port;
+    struct evtchn *evtchn;
+
+    priority = set_priority->priority;
+    port     = set_priority->port;
+    
+    if ( priority > EVTCHN_FIFO_PRIORITY_MIN )
+        return -EINVAL;
+
+    spin_lock(&d->event_lock);
+
+    if ( !port_is_valid(d, port) )
+    {
+        spin_unlock(&d->event_lock);
+        return -EINVAL;
+    }
+
+    /*
+     * Switch to the new queue for future events. If the event is
+     * already pending or in the process of being linked it will be on
+     * the old queue -- this is fine.
+     */
+    evtchn = evtchn_from_port(d, port);
+    v = d->vcpu[evtchn->notify_vcpu_id];
+    evtchn->queue = &v->evtchn_fifo->queue[priority];
+
+    spin_unlock(&d->event_lock);
+
+    return 0;
+}
+
+int evtchn_fifo_set_limit(struct evtchn_set_limit *set_limit)
+{
+    /* FIXME: not supported yet. */
+    return -ENOSYS;
+}
+
+void evtchn_fifo_destroy(struct domain *d)
+{
+    struct vcpu *v;
+
+    for_each_vcpu( d, v )
+        cleanup_control_block(v);
+    cleanup_event_array(d);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index a795ae6..3a71956 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -127,12 +127,14 @@ struct evtchn_port_ops {
                          struct vcpu *vcpu);
 };
 
-static inline void evtchn_port_set_pending(struct vcpu *v, struct evtchn 
*evtchn)
+static inline void evtchn_port_set_pending(struct vcpu *v,
+                                           struct evtchn *evtchn)
 {
     v->domain->evtchn_port_ops->set_pending(v, evtchn);
 }
 
-static inline void evtchn_port_clear_pending(struct domain *d, struct evtchn 
*evtchn)
+static inline void evtchn_port_clear_pending(struct domain *d,
+                                             struct evtchn *evtchn)
 {
     d->evtchn_port_ops->clear_pending(d, evtchn);
 }
diff --git a/xen/include/xen/event_fifo.h b/xen/include/xen/event_fifo.h
new file mode 100644
index 0000000..2d7b7d0
--- /dev/null
+++ b/xen/include/xen/event_fifo.h
@@ -0,0 +1,56 @@
+/*
+ * FIFO-based event channel ABI.
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+#ifndef __XEN_EVENT_FIFO_H__
+#define __XEN_EVENT_FIFO_H__
+
+struct evtchn_fifo_queue {
+    uint32_t *head; /* points into control block */
+    uint32_t tail;
+    spinlock_t lock;
+    uint8_t priority;
+};
+
+struct evtchn_fifo_vcpu {
+    struct page_info *cb_page;
+    struct evtchn_fifo_control_block *control_block;
+    struct evtchn_fifo_queue queue[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+#define EVTCHN_FIFO_EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES \
+    ((1 << EVTCHN_FIFO_LINK_BITS) / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE)
+
+
+struct evtchn_fifo_array_page {
+    struct page_info *page;
+    event_word_t *virt;
+};
+
+struct evtchn_fifo_domain {
+    struct evtchn_fifo_array_page 
event_array[EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES];
+    unsigned num_evtchns;
+};
+
+int evtchn_fifo_init_control(struct evtchn_init_control *init_control);
+int evtchn_fifo_expand_array(struct evtchn_expand_array *expand_array);
+int evtchn_fifo_set_priority(struct evtchn_set_priority *set_priority);
+int evtchn_fifo_set_limit(struct evtchn_set_limit *set_limit);
+void evtchn_fifo_destroy(struct domain *domain);
+
+#endif /* __XEN_EVENT_FIFO_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index f95cf99..4c5cc20 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -96,6 +96,7 @@ struct evtchn
         } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
+    struct evtchn_fifo_queue *queue;
 #ifdef FLASK_ENABLE
     void *ssid;
 #endif
@@ -210,6 +211,8 @@ struct vcpu
     /* Guest-specified relocation of vcpu_info. */
     unsigned long vcpu_info_mfn;
 
+    struct evtchn_fifo_vcpu *evtchn_fifo;
+
     struct arch_vcpu arch;
 };
 
@@ -290,6 +293,7 @@ struct domain
     unsigned         max_evtchns;
     spinlock_t       event_lock;
     const struct evtchn_port_ops *evtchn_port_ops;
+    struct evtchn_fifo_domain *evtchn_fifo;
 
     struct grant_table *grant_table;
 
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.