[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 12/12] xen/events: use the FIFO-based ABI if available



On Fri, Aug 09, 2013 at 07:15:21PM +0100, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
> 
> If the hypervisor supports the FIFO-based ABI, enable it by
> initializing the control block for the boot VCPU and subsequent VCPUs
> as they are brought up.  The event array is expanded as required when
> event ports are setup.
> 
> This implementation has some known limitations:
> 
> - Migration will not work as the control blocks or event arrays are
>   not remapped by Xen at the destination.
> 

Is it possible to hook into xen_vcpu_restore to make it work?

> - The timer VIRQ which previously was treated as the highest priority
>   event has the default priority.
> 

So what's missing from the series is a patch that allows kernel to
actually make use of the priority queues? IMHO it's not just about the
timer VIRQ. How do you plan to expose this interface to drivers?

> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  drivers/xen/events/Makefile          |    1 +
>  drivers/xen/events/events.c          |    7 +-
>  drivers/xen/events/events_internal.h |    1 +
>  drivers/xen/events/fifo.c            |  335 
> ++++++++++++++++++++++++++++++++++
>  4 files changed, 343 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/xen/events/fifo.c
> 
> diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
> index aea331e..74644d0 100644
> --- a/drivers/xen/events/Makefile
> +++ b/drivers/xen/events/Makefile
> @@ -1,2 +1,3 @@
>  obj-y += events.o
> +obj-y += fifo.o
>  obj-y += n-level.o
> diff --git a/drivers/xen/events/events.c b/drivers/xen/events/events.c
> index 55d33d1..2e7416e 100644
> --- a/drivers/xen/events/events.c
> +++ b/drivers/xen/events/events.c
> @@ -1589,8 +1589,13 @@ void xen_callback_vector(void) {}
>  void __init xen_init_IRQ(void)
>  {
>       int i;
> +     int ret;
>  
> -     xen_evtchn_init_nlevel();
> +     ret = xen_evtchn_init_fifo();
> +     if (ret < 0) {
> +             printk(KERN_INFO "xen: falling back to n-level event channels");
> +             xen_evtchn_init_nlevel();
> +     }
>  
>       evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
>                               sizeof(*evtchn_to_irq), GFP_KERNEL);
> diff --git a/drivers/xen/events/events_internal.h 
> b/drivers/xen/events/events_internal.h
> index 9d8b70c..e14f616 100644
> --- a/drivers/xen/events/events_internal.h
> +++ b/drivers/xen/events/events_internal.h
> @@ -138,5 +138,6 @@ static inline void xen_evtchn_handle_events(int cpu)
>  }
>  
>  void xen_evtchn_init_nlevel(void);
> +int xen_evtchn_init_fifo(void);
>  
>  #endif /* #ifndef __EVENTS_INTERNAL_H__ */
> diff --git a/drivers/xen/events/fifo.c b/drivers/xen/events/fifo.c
> new file mode 100644
> index 0000000..b07a603
> --- /dev/null
> +++ b/drivers/xen/events/fifo.c
> @@ -0,0 +1,335 @@
> +/*
> + * Xen event channels (FIFO-based ABI)
> + *
> + * Copyright (C) 2013 Citrix Systems R&D ltd.
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2 or later.  See the file COPYING for more details.
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/smp.h>
> +#include <linux/percpu.h>
> +#include <linux/cpu.h>
> +
> +#include <asm/sync_bitops.h>
> +#include <asm/xen/hypercall.h>
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/page.h>
> +
> +#include <xen/xen.h>
> +#include <xen/xen-ops.h>
> +#include <xen/events.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/event_channel.h>
> +
> +#include "events_internal.h"
> +
> +#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
> +#define MAX_EVENT_ARRAY_PAGES ((1 << EVTCHN_FIFO_LINK_BITS)  \
> +                            / EVENT_WORDS_PER_PAGE)
> +
> +struct evtchn_fifo_queue {
> +     uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
> +};
> +
> +static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
> +static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
> +static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES];
> +static unsigned event_array_pages;
> +
> +#define BM(w) ((unsigned long *)(w))
> +
> +static inline event_word_t *event_word_from_port(int port)
> +{
> +     int i = port / EVENT_WORDS_PER_PAGE;
> +
> +     if (i >= event_array_pages)
> +             return NULL;
> +     return event_array[i] + port % EVENT_WORDS_PER_PAGE;
> +}
> +
> +static unsigned fifo_max_channels(void)
> +{
> +     return 1 << EVTCHN_FIFO_LINK_BITS;
> +}
> +
> +static unsigned fifo_nr_channels(void)
> +{
> +     return event_array_pages * EVENT_WORDS_PER_PAGE;
> +}
> +
> +static int fifo_setup(struct irq_info *info)
> +{
> +     int port = info->evtchn;
> +     int i;
> +     int ret = -ENOMEM;
> +
> +     i = port / EVENT_WORDS_PER_PAGE;
> +
> +     if (i >= MAX_EVENT_ARRAY_PAGES)
> +             return -EINVAL;
> +
> +     while (i >= event_array_pages) {
> +             struct page *array_page = NULL;
> +             struct evtchn_expand_array expand_array;
> +
> +             array_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +             if (array_page == NULL)
> +                     goto error;
> +
> +             expand_array.array_mfn = virt_to_mfn(page_address(array_page));
> +
> +             ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, 
> &expand_array);
> +             if (ret < 0) {
> +                     __free_page(array_page);
> +                     goto error;
> +             }
> +
> +             event_array[event_array_pages++] = page_address(array_page);
> +     }
> +     return 0;
> +
> +  error:
> +     if (event_array_pages == 0)
> +             panic("xen: unable to expand event array with initial page 
> (%d)\n", ret);
> +     else
> +             printk(KERN_ERR "xen: unable to expand event array (%d)\n", 
> ret);
> +     return ret;
> +}
> +
> +static void fifo_bind_to_cpu(struct irq_info *info, int cpu)
> +{
> +     /* no-op */
> +}
> +
> +static void fifo_clear_pending(int port)
> +{
> +     event_word_t *word = event_word_from_port(port);
> +     sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static void fifo_set_pending(int port)
> +{
> +     event_word_t *word = event_word_from_port(port);
> +     sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static bool fifo_is_pending(int port)
> +{
> +     event_word_t *word = event_word_from_port(port);
> +     return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static bool fifo_test_and_set_mask(int port)
> +{
> +     event_word_t *word = event_word_from_port(port);
> +     return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
> +}
> +
> +static void fifo_mask(int port)
> +{
> +     event_word_t *word = event_word_from_port(port);
> +     if (word)
> +             sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
> +}
> +
> +static void fifo_unmask(int port)
> +{
> +     unsigned int cpu = get_cpu();
> +     bool do_hypercall = false;
> +     bool evtchn_pending = false;
> +
> +     BUG_ON(!irqs_disabled());
> +
> +     if (unlikely((cpu != cpu_from_evtchn(port))))
> +             do_hypercall = true;
> +     else {
> +             event_word_t *word = event_word_from_port(port);
> +
> +             sync_clear_bit(EVTCHN_FIFO_MASKED, BM(word));
> +             evtchn_pending = sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
> +             if (evtchn_pending)
> +                     do_hypercall = true;
> +     }
> +
> +     if (do_hypercall) {
> +             struct evtchn_unmask unmask = { .port = port };
> +             (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
> +     }
> +
> +     put_cpu();
> +}
> +
> +static uint32_t clear_linked(volatile event_word_t *word)
> +{
> +    event_word_t n, o, w;
> +
> +    w = *word;
> +
> +    do {
> +        o = w;
> +        n = (w & ~((1 << EVTCHN_FIFO_LINKED) | EVTCHN_FIFO_LINK_MASK));
> +    } while ( (w = sync_cmpxchg(word, o, n)) != o );
> +
> +    return w & EVTCHN_FIFO_LINK_MASK;
> +}
> +
> +static void handle_irq_for_port(int port)
> +{
> +     int irq;
> +     struct irq_desc *desc;
> +
> +     irq = get_evtchn_to_irq(port);
> +     if (irq != -1) {
> +             desc = irq_to_desc(irq);
> +             if (desc)
> +                     generic_handle_irq_desc(irq, desc);
> +     }
> +}
> +
> +static void consume_one_event(int cpu,
> +                           struct evtchn_fifo_control_block *control_block,
> +                           int priority, uint32_t *ready)
> +{
> +     struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
> +     uint32_t head;
> +     int port;
> +     event_word_t *word;
> +
> +     head = q->head[priority];
> +
> +     /* Reached the tail last time?  Read the new HEAD from the
> +        control block. */
> +     if (head == 0) {
> +             rmb(); /* Ensure word is up-to-date before reading head. */
> +             head = control_block->head[priority];
> +     }
> +
> +     port = head;
> +     word = event_word_from_port(port);
> +     head = clear_linked(word);
> +
> +     /*
> +      * If the link is non-zero, there are more events in the
> +      * queue, otherwise the queue is empty.
> +      *
> +      * If the queue is empty, clear this priority from our local
> +      * copy of the ready word.
> +      */
> +     if (head == 0)
> +             clear_bit(priority, BM(ready));
> +
> +     if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
> +         && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
> +             handle_irq_for_port(port);
> +
> +     q->head[priority] = head;
> +}
> +
> +#define EVTCHN_FIFO_READY_MASK ((1 << EVTCHN_FIFO_MAX_QUEUES) - 1)
> +
> +static void fifo_handle_events(int cpu)
> +{
> +     struct evtchn_fifo_control_block *control_block;
> +     uint32_t ready;
> +     int q;
> +
> +     control_block = per_cpu(cpu_control_block, cpu);
> +
> +     ready = xchg(&control_block->ready, 0);
> +
> +     while (ready & EVTCHN_FIFO_READY_MASK) {
> +             q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
> +             consume_one_event(cpu, control_block, q, &ready);
> +             ready |= xchg(&control_block->ready, 0);
> +     }
> +}
> +
> +static const struct evtchn_ops evtchn_ops_fifo = {
> +     .max_channels      = fifo_max_channels,
> +     .nr_channels       = fifo_nr_channels,
> +     .setup             = fifo_setup,
> +     .bind_to_cpu       = fifo_bind_to_cpu,
> +     .clear_pending     = fifo_clear_pending,
> +     .set_pending       = fifo_set_pending,
> +     .is_pending        = fifo_is_pending,
> +     .test_and_set_mask = fifo_test_and_set_mask,
> +     .mask              = fifo_mask,
> +     .unmask            = fifo_unmask,
> +     .handle_events     = fifo_handle_events,
> +};
> +
> +static int __cpuinit fifo_init_control_block(int cpu)
> +{
> +     struct page *control_block = NULL;
> +     struct evtchn_init_control init_control;
> +     int ret = -ENOMEM;
> +
> +     control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +     if (control_block == NULL)
> +             goto error;
> +
> +     init_control.control_mfn = virt_to_mfn(page_address(control_block));
> +     init_control.offset      = 0;
> +     init_control.vcpu        = cpu;
> +
> +     ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
> +     if (ret < 0)
> +             goto error;
> +
> +     per_cpu(cpu_control_block, cpu) = page_address(control_block);
> +
> +     return 0;
> +
> +  error:
> +     __free_page(control_block);
> +     return ret;
> +}
> +
> +static int __cpuinit fifo_cpu_notification(struct notifier_block *self,
> +                                        unsigned long action, void *hcpu)
> +{
> +     int cpu = (long)hcpu;
> +     int ret = 0;
> +
> +     switch (action) {
> +     case CPU_UP_PREPARE:
> +             ret = fifo_init_control_block(cpu);
> +             break;

On the hypervisor side fifo_init_control_block would return -EINVAL if
this CPU has previous mapped control block.

Do you need to tear down the control block when you offline a CPU? (That
would mean another sub-op for the interface)

> +     default:
> +             break;
> +     }
> +     return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
> +}
> +
> +static struct notifier_block fifo_cpu_notifier __cpuinitdata = {
> +     .notifier_call  = fifo_cpu_notification,
> +};
> +
> +
> +int __init xen_evtchn_init_fifo(void)
> +{
> +     int cpu = get_cpu();
> +     int ret;
> +
> +     ret = fifo_init_control_block(cpu);
> +     if (ret < 0)
> +             goto error;
> +
> +     printk(KERN_INFO "xen: switching to FIFO-based event channels\n");
> +
> +     evtchn_ops = &evtchn_ops_fifo;
> +
> +     register_cpu_notifier(&fifo_cpu_notifier);
> +
> +     put_cpu();
> +     return 0;
> +
> +  error:
> +     put_cpu();
> +     return ret;

Could be written as:

if (ret < 0)
  goto out;

  ...
  register_cpu_notifier();

  ret = 0;

out:
  put_cpu();
  return ret;

It is only a matter of personal taste though.


Wei.

> +}
> -- 
> 1.7.2.5

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.