[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 2/5] x86/hpet: Use singe apic vector rather than irq_descs for HPET interrupts



This involves rewriting most of the MSI related HPET code, and as a result
this patch looks very complicated.  It is probably best viewed as an end
result, with the following notes explaining what is going on.

The new logic is as follows:
 * A single high priority vector is allocated and uses on all cpus.
 * Reliance on the irq infrastructure is completely removed.
 * Tracking of free hpet channels has changed.  It is now an individual
   bitmap, and allocation is based on winning a test_and_clear_bit()
   operation.
 * There is a notion of strict ownership of hpet channels.
 ** A cpu which owns an HPET channel can program it for a desired deadline.
 ** A cpu which can't find a free HPET channel will have to share.

 ** If an HPET firing at an appropriate time can be found (up to 20us late), a
    CPU will simply request to be woken up with that HPET.
 ** Failing finding an appropriate timed HPET, a CPU shall find the soonest
    late HPET and program it earlier.
 ** Failing any late HPETs, a CPU shall wake up with the latest early HPET it
    can find.
 ** Failing all else, a CPU shall retry to find a free HPET.  This guarantees
    that a CPU will never leave hpet_broadcast_enter() without arranging an
    interrupt.
 * Some functions have been renamed to be more descriptive.  Some functions
   have parameters changed to be more consistent.

Contains a folded half bugfix from Frediano:
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Keir Fraser <keir@xxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/hpet.c |  622 ++++++++++++++++++++++++---------------------------
 1 file changed, 294 insertions(+), 328 deletions(-)

diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c
index d7bc29f..441a3cf 100644
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -4,26 +4,21 @@
  * HPET management.
  */
 
-#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpuidle.h>
 #include <xen/errno.h>
-#include <xen/time.h>
-#include <xen/timer.h>
-#include <xen/smp.h>
 #include <xen/softirq.h>
-#include <xen/irq.h>
-#include <xen/numa.h>
+
+#include <mach_apic.h>
+
 #include <asm/fixmap.h>
 #include <asm/div64.h>
 #include <asm/hpet.h>
-#include <asm/msi.h>
-#include <mach_apic.h>
-#include <xen/cpuidle.h>
 
 #define MAX_DELTA_NS MILLISECS(10*1000)
 #define MIN_DELTA_NS MICROSECS(20)
 
-#define HPET_EVT_USED_BIT    0
-#define HPET_EVT_USED       (1 << HPET_EVT_USED_BIT)
 #define HPET_EVT_DISABLE_BIT 1
 #define HPET_EVT_DISABLE    (1 << HPET_EVT_DISABLE_BIT)
 #define HPET_EVT_LEGACY_BIT  2
@@ -36,8 +31,6 @@ struct hpet_event_channel
     s_time_t      next_event;
     cpumask_var_t cpumask;
     spinlock_t    lock;
-    void          (*event_handler)(struct hpet_event_channel *);
-
     unsigned int idx;   /* physical channel idx */
     unsigned int cpu;   /* msi target */
     struct msi_desc msi;/* msi state */
@@ -48,8 +41,20 @@ static struct hpet_event_channel *__read_mostly hpet_events;
 /* msi hpet channels used for broadcast */
 static unsigned int __read_mostly num_hpets_used;
 
-DEFINE_PER_CPU(struct hpet_event_channel *, cpu_bc_channel);
+/* High-priority vector for HPET interrupts */
+static u8 __read_mostly hpet_vector;
 
+/*
+ * HPET channel used for idling.  Either the HPET channel this cpu owns
+ * (indicated by channel->cpu pointing back), or the HPET channel belonging to
+ * another cpu with which we have requested to be woken.
+ */
+static DEFINE_PER_CPU(struct hpet_event_channel *, hpet_channel);
+
+/* Bitmap of currently-free HPET channels. */
+static uint32_t free_channels;
+
+/* Data from the HPET ACPI table */
 unsigned long __initdata hpet_address;
 u8 __initdata hpet_blockid;
 
@@ -161,89 +166,43 @@ static int hpet_program_time(struct hpet_event_channel 
*ch,
     return ret;
 }
 
-static void evt_do_broadcast(cpumask_t *mask)
+/* Wake up all cpus in the channel mask.  Lock should be held. */
+static void hpet_wake_cpus(struct hpet_event_channel *ch)
 {
-    unsigned int cpu = smp_processor_id();
-
-    if ( cpumask_test_and_clear_cpu(cpu, mask) )
-        raise_softirq(TIMER_SOFTIRQ);
-
-    cpuidle_wakeup_mwait(mask);
-
-    if ( !cpumask_empty(mask) )
-       cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
+    cpuidle_wakeup_mwait(ch->cpumask);
+    cpumask_raise_softirq(ch->cpumask, TIMER_SOFTIRQ);
 }
 
-static void handle_hpet_broadcast(struct hpet_event_channel *ch)
+/* HPET interrupt handler.  Wake all requested cpus.  Lock should be held. */
+static void hpet_interrupt_handler(struct hpet_event_channel *ch)
 {
-    cpumask_t mask;
-    s_time_t now, next_event;
-    unsigned int cpu;
-    unsigned long flags;
-
-    spin_lock_irqsave(&ch->lock, flags);
-
-again:
-    ch->next_event = STIME_MAX;
-
-    spin_unlock_irqrestore(&ch->lock, flags);
-
-    next_event = STIME_MAX;
-    cpumask_clear(&mask);
-    now = NOW();
-
-    /* find all expired events */
-    for_each_cpu(cpu, ch->cpumask)
-    {
-        s_time_t deadline;
-
-        rmb();
-        deadline = per_cpu(timer_deadline, cpu);
-        rmb();
-        if ( !cpumask_test_cpu(cpu, ch->cpumask) )
-            continue;
-
-        if ( deadline <= now )
-            cpumask_set_cpu(cpu, &mask);
-        else if ( deadline < next_event )
-            next_event = deadline;
-    }
-
-    /* wakeup the cpus which have an expired event. */
-    evt_do_broadcast(&mask);
-
-    if ( next_event != STIME_MAX )
-    {
-        spin_lock_irqsave(&ch->lock, flags);
-
-        if ( next_event < ch->next_event &&
-             hpet_program_time(ch, next_event, now, 0) )
-            goto again;
-
-        spin_unlock_irqrestore(&ch->lock, flags);
-    }
+    hpet_wake_cpus(ch);
+    raise_softirq(TIMER_SOFTIRQ);
 }
 
-static void hpet_interrupt_handler(int irq, void *data,
-        struct cpu_user_regs *regs)
+/* HPET interrupt entry.  This is set up as a high priority vector. */
+static void do_hpet_irq(struct cpu_user_regs *regs)
 {
-    struct hpet_event_channel *ch = (struct hpet_event_channel *)data;
-
-    this_cpu(irq_count)--;
+    struct hpet_event_channel *ch = this_cpu(hpet_channel);
 
-    if ( !ch->event_handler )
+    if ( ch )
     {
-        printk(XENLOG_WARNING "Spurious HPET timer interrupt on HPET timer 
%d\n", ch->idx);
-        return;
+        spin_lock(&ch->lock);
+        if ( ch->cpu == smp_processor_id() )
+        {
+            ch->next_event = 0;
+            hpet_interrupt_handler(ch);
+        }
+        spin_unlock(&ch->lock);
     }
 
-    ch->event_handler(ch);
+    ack_APIC_irq();
 }
 
-static void hpet_msi_unmask(struct irq_desc *desc)
+/* Unmask an HPET MSI channel.  Lock should be held */
+static void hpet_msi_unmask(struct hpet_event_channel *ch)
 {
     u32 cfg;
-    struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg |= HPET_TN_ENABLE;
@@ -251,10 +210,10 @@ static void hpet_msi_unmask(struct irq_desc *desc)
     ch->msi.msi_attrib.masked = 0;
 }
 
-static void hpet_msi_mask(struct irq_desc *desc)
+/* Mask an HPET MSI channel.  Lock should be held */
+static void hpet_msi_mask(struct hpet_event_channel *ch)
 {
     u32 cfg;
-    struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg &= ~HPET_TN_ENABLE;
@@ -262,92 +221,36 @@ static void hpet_msi_mask(struct irq_desc *desc)
     ch->msi.msi_attrib.masked = 1;
 }
 
-static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
+/*
+ * Set up the MSI for an HPET channel to point at the allocated cpu, including
+ * interrupt remapping entries when appropriate.  The channel lock is expected
+ * to be held, and the MSI must currently be masked.
+ */
+static int hpet_setup_msi(struct hpet_event_channel *ch)
 {
-    ch->msi.msg = *msg;
+    ASSERT(ch->cpu != -1);
+    ASSERT(ch->msi.msi_attrib.masked == 1);
+
+    msi_compose_msg(hpet_vector, cpumask_of(ch->cpu), &ch->msi.msg);
 
     if ( iommu_intremap )
     {
-        int rc = iommu_update_ire_from_msi(&ch->msi, msg);
+        int rc = iommu_update_ire_from_msi(&ch->msi, &ch->msi.msg);
 
         if ( rc )
             return rc;
     }
 
-    hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx));
-    hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
-
-    return 0;
-}
-
-static void __maybe_unused
-hpet_msi_read(struct hpet_event_channel *ch, struct msi_msg *msg)
-{
-    msg->data = hpet_read32(HPET_Tn_ROUTE(ch->idx));
-    msg->address_lo = hpet_read32(HPET_Tn_ROUTE(ch->idx) + 4);
-    msg->address_hi = MSI_ADDR_BASE_HI;
-    if ( iommu_intremap )
-        iommu_read_msi_from_ire(&ch->msi, msg);
-}
+    hpet_write32(ch->msi.msg.data, HPET_Tn_ROUTE(ch->idx));
+    hpet_write32(ch->msi.msg.address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
 
-static unsigned int hpet_msi_startup(struct irq_desc *desc)
-{
-    hpet_msi_unmask(desc);
     return 0;
 }
 
-#define hpet_msi_shutdown hpet_msi_mask
-
-static void hpet_msi_ack(struct irq_desc *desc)
-{
-    irq_complete_move(desc);
-    move_native_irq(desc);
-    ack_APIC_irq();
-}
-
-static void hpet_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
-{
-    struct hpet_event_channel *ch = desc->action->dev_id;
-    struct msi_msg msg = ch->msi.msg;
-
-    msg.dest32 = set_desc_affinity(desc, mask);
-    if ( msg.dest32 == BAD_APICID )
-        return;
-
-    msg.data &= ~MSI_DATA_VECTOR_MASK;
-    msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
-    msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-    msg.address_lo |= MSI_ADDR_DEST_ID(msg.dest32);
-    if ( msg.data != ch->msi.msg.data || msg.dest32 != ch->msi.msg.dest32 )
-        hpet_msi_write(ch, &msg);
-}
-
-/*
- * IRQ Chip for MSI HPET Devices,
- */
-static hw_irq_controller hpet_msi_type = {
-    .typename   = "HPET-MSI",
-    .startup    = hpet_msi_startup,
-    .shutdown   = hpet_msi_shutdown,
-    .enable        = hpet_msi_unmask,
-    .disable    = hpet_msi_mask,
-    .ack        = hpet_msi_ack,
-    .set_affinity   = hpet_msi_set_affinity,
-};
-
-static int __hpet_setup_msi_irq(struct irq_desc *desc)
-{
-    struct msi_msg msg;
-
-    msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg);
-    return hpet_msi_write(desc->action->dev_id, &msg);
-}
-
-static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
+static int __init hpet_init_msi(struct hpet_event_channel *ch)
 {
     int ret;
     u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
-    irq_desc_t *desc = irq_to_desc(ch->msi.irq);
 
     if ( iommu_intremap )
     {
@@ -358,41 +261,31 @@ static int __init hpet_setup_msi_irq(struct 
hpet_event_channel *ch)
     }
 
     /* set HPET Tn as oneshot */
-    cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+    cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC | HPET_TN_ENABLE);
     cfg |= HPET_TN_FSB | HPET_TN_32BIT;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
-
-    desc->handler = &hpet_msi_type;
-    ret = request_irq(ch->msi.irq, hpet_interrupt_handler, "HPET", ch);
-    if ( ret >= 0 )
-        ret = __hpet_setup_msi_irq(desc);
-    if ( ret < 0 )
-    {
-        if ( iommu_intremap )
-            iommu_update_ire_from_msi(&ch->msi, NULL);
-        return ret;
-    }
-
-    desc->msi_desc = &ch->msi;
+    ch->msi.msi_attrib.masked = 1;
 
     return 0;
 }
 
-static int __init hpet_assign_irq(struct hpet_event_channel *ch)
+static void __init hpet_init_channel(struct hpet_event_channel *ch)
 {
-    int irq;
-
-    if ( (irq = create_irq(NUMA_NO_NODE)) < 0 )
-        return irq;
+    u64 hpet_rate = hpet_setup();
 
-    ch->msi.irq = irq;
-    if ( hpet_setup_msi_irq(ch) )
-    {
-        destroy_irq(irq);
-        return -EINVAL;
-    }
+    /*
+     * The period is a femto seconds value. We need to calculate the scaled
+     * math multiplication factor for nanosecond to hpet tick conversion.
+     */
+    ch->mult = div_sc((unsigned long)hpet_rate,
+                                     1000000000ul, 32);
+    ch->shift = 32;
+    ch->next_event = STIME_MAX;
+    spin_lock_init(&ch->lock);
 
-    return 0;
+    ch->msi.irq = -1;
+    ch->msi.msi_attrib.maskbit = 1;
+    ch->msi.msi_attrib.pos = MSI_TYPE_HPET;
 }
 
 static void __init hpet_fsb_cap_lookup(void)
@@ -412,6 +305,8 @@ static void __init hpet_fsb_cap_lookup(void)
     if ( !hpet_events )
         return;
 
+    alloc_direct_apic_vector(&hpet_vector, do_hpet_irq);
+
     for ( i = 0; i < num_chs && num_hpets_used < nr_cpu_ids; i++ )
     {
         struct hpet_event_channel *ch = &hpet_events[num_hpets_used];
@@ -431,10 +326,12 @@ static void __init hpet_fsb_cap_lookup(void)
             break;
         }
 
+        hpet_init_channel(ch);
+
         ch->flags = 0;
         ch->idx = i;
 
-        if ( hpet_assign_irq(ch) == 0 )
+        if ( hpet_init_msi(ch) == 0 )
             num_hpets_used++;
         else
             free_cpumask_var(ch->cpumask);
@@ -444,102 +341,28 @@ static void __init hpet_fsb_cap_lookup(void)
            num_hpets_used, num_chs);
 }
 
-static struct hpet_event_channel *hpet_get_channel(unsigned int cpu)
+/*
+ * Search for, and allocate, a free HPET channel.  Returns a pointer to the
+ * channel, or NULL in the case that none were free.  The caller is
+ * responsible for returning the channel to the free pool.
+ */
+static struct hpet_event_channel *hpet_get_free_channel(void)
 {
-    static unsigned int next_channel;
-    unsigned int i, next;
-    struct hpet_event_channel *ch;
+    unsigned ch, tries;
 
-    if ( num_hpets_used == 0 )
-        return hpet_events;
-
-    if ( num_hpets_used >= nr_cpu_ids )
-        return &hpet_events[cpu];
-
-    do {
-        next = next_channel;
-        if ( (i = next + 1) == num_hpets_used )
-            i = 0;
-    } while ( cmpxchg(&next_channel, next, i) != next );
-
-    /* try unused channel first */
-    for ( i = next; i < next + num_hpets_used; i++ )
+    for ( tries = num_hpets_used; tries; --tries )
     {
-        ch = &hpet_events[i % num_hpets_used];
-        if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
-        {
-            ch->cpu = cpu;
-            return ch;
-        }
-    }
-
-    /* share a in-use channel */
-    ch = &hpet_events[next];
-    if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
-        ch->cpu = cpu;
-
-    return ch;
-}
-
-static void set_channel_irq_affinity(struct hpet_event_channel *ch)
-{
-    struct irq_desc *desc = irq_to_desc(ch->msi.irq);
-
-    ASSERT(!local_irq_is_enabled());
-    spin_lock(&desc->lock);
-    hpet_msi_mask(desc);
-    hpet_msi_set_affinity(desc, cpumask_of(ch->cpu));
-    hpet_msi_unmask(desc);
-    spin_unlock(&desc->lock);
-
-    spin_unlock(&ch->lock);
-
-    /* We may have missed an interrupt due to the temporary masking. */
-    if ( ch->event_handler && ch->next_event < NOW() )
-        ch->event_handler(ch);
-}
-
-static void hpet_attach_channel(unsigned int cpu,
-                                struct hpet_event_channel *ch)
-{
-    ASSERT(!local_irq_is_enabled());
-    spin_lock(&ch->lock);
-
-    per_cpu(cpu_bc_channel, cpu) = ch;
-
-    /* try to be the channel owner again while holding the lock */
-    if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
-        ch->cpu = cpu;
-
-    if ( ch->cpu != cpu )
-        spin_unlock(&ch->lock);
-    else
-        set_channel_irq_affinity(ch);
-}
-
-static void hpet_detach_channel(unsigned int cpu,
-                                struct hpet_event_channel *ch)
-{
-    spin_lock_irq(&ch->lock);
-
-    ASSERT(ch == per_cpu(cpu_bc_channel, cpu));
+        if ( (ch = ffs(free_channels)) == 0 )
+            break;
 
-    per_cpu(cpu_bc_channel, cpu) = NULL;
+        --ch;
+        ASSERT(ch < num_hpets_used);
 
-    if ( cpu != ch->cpu )
-        spin_unlock_irq(&ch->lock);
-    else if ( cpumask_empty(ch->cpumask) )
-    {
-        ch->cpu = -1;
-        clear_bit(HPET_EVT_USED_BIT, &ch->flags);
-        spin_unlock_irq(&ch->lock);
-    }
-    else
-    {
-        ch->cpu = cpumask_first(ch->cpumask);
-        set_channel_irq_affinity(ch);
-        local_irq_enable();
+        if ( test_and_clear_bit(ch, &free_channels) )
+            return &hpet_events[ch];
     }
+
+    return NULL;
 }
 
 #include <asm/mc146818rtc.h>
@@ -563,7 +386,6 @@ void __init hpet_broadcast_init(void)
 {
     u64 hpet_rate = hpet_setup();
     u32 hpet_id, cfg;
-    unsigned int i, n;
 
     if ( hpet_rate == 0 || hpet_broadcast_is_available() )
         return;
@@ -575,7 +397,7 @@ void __init hpet_broadcast_init(void)
     {
         /* Stop HPET legacy interrupts */
         cfg &= ~HPET_CFG_LEGACY;
-        n = num_hpets_used;
+        free_channels = (u32)~0 >> (32 - num_hpets_used);
     }
     else
     {
@@ -587,11 +409,11 @@ void __init hpet_broadcast_init(void)
             hpet_events = xzalloc(struct hpet_event_channel);
         if ( !hpet_events || !zalloc_cpumask_var(&hpet_events->cpumask) )
             return;
-        hpet_events->msi.irq = -1;
+
+        hpet_init_channel(hpet_events);
 
         /* Start HPET legacy interrupts */
         cfg |= HPET_CFG_LEGACY;
-        n = 1;
 
         hpet_events->flags = HPET_EVT_LEGACY;
 
@@ -601,31 +423,13 @@ void __init hpet_broadcast_init(void)
 
     hpet_write32(cfg, HPET_CFG);
 
-    for ( i = 0; i < n; i++ )
+    if ( cfg & HPET_CFG_LEGACY )
     {
-        if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
-        {
-            /* set HPET T0 as oneshot */
-            cfg = hpet_read32(HPET_Tn_CFG(0));
-            cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
-            cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-            hpet_write32(cfg, HPET_Tn_CFG(0));
-        }
-
-        /*
-         * The period is a femto seconds value. We need to calculate the scaled
-         * math multiplication factor for nanosecond to hpet tick conversion.
-         */
-        hpet_events[i].mult = div_sc((unsigned long)hpet_rate,
-                                     1000000000ul, 32);
-        hpet_events[i].shift = 32;
-        hpet_events[i].next_event = STIME_MAX;
-        spin_lock_init(&hpet_events[i].lock);
-        wmb();
-        hpet_events[i].event_handler = handle_hpet_broadcast;
-
-        hpet_events[i].msi.msi_attrib.maskbit = 1;
-        hpet_events[i].msi.msi_attrib.pos = MSI_TYPE_HPET;
+        /* set HPET T0 as oneshot */
+        cfg = hpet_read32(HPET_Tn_CFG(0));
+        cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+        cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+        hpet_write32(cfg, HPET_Tn_CFG(0));
     }
 }
 
@@ -660,15 +464,24 @@ void hpet_broadcast_resume(void)
 
     for ( i = 0; i < n; i++ )
     {
-        if ( hpet_events[i].msi.irq >= 0 )
-            __hpet_setup_msi_irq(irq_to_desc(hpet_events[i].msi.irq));
-
         /* set HPET Tn as oneshot */
         cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
         cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
-        cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-        if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
+        cfg |= HPET_TN_32BIT;
+
+        /*
+         * Legacy HPET channel enabled here.  MSI channels enabled in
+         * hpet_broadcast_init() when claimed by a cpu.
+         */
+        if ( hpet_events[i].flags & HPET_EVT_LEGACY )
+            cfg |= HPET_TN_ENABLE;
+        else
+        {
+            cfg &= ~HPET_TN_ENABLE;
             cfg |= HPET_TN_FSB;
+            hpet_events[i].msi.msi_attrib.masked = 1;
+        }
+
         hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
 
         hpet_events[i].next_event = STIME_MAX;
@@ -705,50 +518,196 @@ void hpet_disable_legacy_broadcast(void)
 void hpet_broadcast_enter(void)
 {
     unsigned int cpu = smp_processor_id();
-    struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
+    struct hpet_event_channel *ch = this_cpu(hpet_channel);
+    s_time_t deadline = this_cpu(timer_deadline);
+
+    ASSERT(!local_irq_is_enabled());
+    ASSERT(ch == NULL);
 
-    if ( per_cpu(timer_deadline, cpu) == 0 )
+    if ( deadline == 0 )
         return;
 
-    if ( !ch )
-        ch = hpet_get_channel(cpu);
+    /* If using HPET in legacy timer mode */
+    if ( num_hpets_used == 0 )
+    {
+        spin_lock(&hpet_events->lock);
 
-    ASSERT(!local_irq_is_enabled());
+        cpumask_set_cpu(cpu, hpet_events->cpumask);
+        if ( deadline < hpet_events->next_event )
+            hpet_program_time(hpet_events, deadline, NOW(), 1);
+
+        spin_unlock(&hpet_events->lock);
+        return;
+    }
+
+retry_free_channel:
+    ch = hpet_get_free_channel();
+
+    if ( ch )
+    {
+        spin_lock(&ch->lock);
+
+        /* This really should be an MSI channel by this point */
+        ASSERT(!(ch->flags & HPET_EVT_LEGACY));
+
+        hpet_msi_mask(ch);
+
+        ch->cpu = cpu;
+        this_cpu(hpet_channel) = ch;
+        cpumask_set_cpu(cpu, ch->cpumask);
+
+        hpet_setup_msi(ch);
+        hpet_program_time(ch, deadline, NOW(), 1);
+        hpet_msi_unmask(ch);
+
+        spin_unlock(&ch->lock);
+    }
+    else
+    {
+        s_time_t best_early_deadline = 0, best_late_deadline = STIME_MAX;
+        unsigned int i, best_early_idx = -1, best_late_idx = -1;
+
+        for ( i = 0; i < num_hpets_used; ++i )
+        {
+            ch = &hpet_events[i];
+            spin_lock(&ch->lock);
+
+            if ( ch->cpu == -1 )
+                goto continue_search;
+
+            /* This channel is going to expire early */
+            if ( ch->next_event < deadline )
+            {
+                if ( ch->next_event > best_early_deadline )
+                {
+                    best_early_idx = i;
+                    best_early_deadline = ch->next_event;
+                }
+                goto continue_search;
+            }
+
+            /* We can deal with being woken up 20us late */
+            if ( ch->next_event <= deadline + MICROSECS(20) )
+                break;
 
-    if ( !(ch->flags & HPET_EVT_LEGACY) )
-        hpet_attach_channel(cpu, ch);
+            /* Otherwise record the best late channel to program forwards */
+            if ( ch->next_event <= best_late_deadline )
+            {
+                best_late_idx = i;
+                best_late_deadline = ch->next_event;
+            }
+
+        continue_search:
+            spin_unlock(&ch->lock);
+            ch = NULL;
+        }
+
+        if ( ch )
+        {
+            /* Found HPET with an appropriate time.  Request to be woken up */
+            cpumask_set_cpu(cpu, ch->cpumask);
+            this_cpu(hpet_channel) = ch;
+            spin_unlock(&ch->lock);
+            goto done_searching;
+        }
+
+        /* Try and program the best late channel forwards a bit */
+        if ( best_late_deadline < STIME_MAX && best_late_idx != -1 )
+        {
+            ch = &hpet_events[best_late_idx];
+            spin_lock(&ch->lock);
+
+            /* If this is still the same channel, good */
+            if ( ch->next_event == best_late_deadline )
+            {
+                cpumask_set_cpu(cpu, ch->cpumask);
+                hpet_program_time(ch, deadline, NOW(), 1);
+                spin_unlock(&ch->lock);
+                goto done_searching;
+            }
+            /* else it has fired and changed ownership. */
+            else
+            {
+                spin_unlock(&ch->lock);
+                goto retry_free_channel;
+            }
+        }
+
+        /* Try to piggyback on an early channel in the hope that when we
+           wake back up, our fortunes will improve. */
+        if ( best_early_deadline > 0 && best_early_idx != -1 )
+        {
+            ch = &hpet_events[best_early_idx];
+            spin_lock(&ch->lock);
+
+            /* If this is still the same channel, good */
+            if ( ch->next_event == best_early_deadline )
+            {
+                cpumask_set_cpu(cpu, ch->cpumask);
+                spin_unlock(&ch->lock);
+                goto done_searching;
+            }
+            /* else it has fired and changed ownership. */
+            else
+            {
+                spin_unlock(&ch->lock);
+                goto retry_free_channel;
+            }
+        }
+
+        /* All else has failed, and we have wasted some time searching.
+         * See whether another channel has become free. */
+        goto retry_free_channel;
+    }
+
+done_searching:
 
     /* Disable LAPIC timer interrupts. */
     disable_APIC_timer();
-    cpumask_set_cpu(cpu, ch->cpumask);
-
-    spin_lock(&ch->lock);
-    /* reprogram if current cpu expire time is nearer */
-    if ( per_cpu(timer_deadline, cpu) < ch->next_event )
-        hpet_program_time(ch, per_cpu(timer_deadline, cpu), NOW(), 1);
-    spin_unlock(&ch->lock);
 }
 
 void hpet_broadcast_exit(void)
 {
     unsigned int cpu = smp_processor_id();
-    struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
+    struct hpet_event_channel *ch = this_cpu(hpet_channel);
+
+    ASSERT(local_irq_is_enabled());
+
+    if ( this_cpu(timer_deadline) == 0 )
+        return;
 
-    if ( per_cpu(timer_deadline, cpu) == 0 )
+    /* If using HPET in legacy timer mode */
+    if ( num_hpets_used == 0 )
+    {
+        /* This is safe without the spinlock, and will reduce contention. */
+        cpumask_clear_cpu(cpu, hpet_events->cpumask);
         return;
+    }
 
     if ( !ch )
-        ch = hpet_get_channel(cpu);
+        return;
 
-    /* Reprogram the deadline; trigger timer work now if it has passed. */
-    enable_APIC_timer();
-    if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
-        raise_softirq(TIMER_SOFTIRQ);
+    spin_lock_irq(&ch->lock);
 
     cpumask_clear_cpu(cpu, ch->cpumask);
 
-    if ( !(ch->flags & HPET_EVT_LEGACY) )
-        hpet_detach_channel(cpu, ch);
+    /* If we own the channel, detach it */
+    if ( ch->cpu == cpu )
+    {
+        hpet_msi_mask(ch);
+        hpet_wake_cpus(ch);
+        ch->cpu = -1;
+        set_bit(ch->idx, &free_channels);
+    }
+
+    this_cpu(hpet_channel) = NULL;
+
+    spin_unlock_irq(&ch->lock);
+
+    /* Reprogram the deadline; trigger timer work now if it has passed. */
+    enable_APIC_timer();
+    if ( !reprogram_timer(this_cpu(timer_deadline)) )
+        raise_softirq(TIMER_SOFTIRQ);
 }
 
 int hpet_broadcast_is_available(void)
@@ -765,7 +724,14 @@ int hpet_legacy_irq_tick(void)
          (hpet_events->flags & (HPET_EVT_DISABLE|HPET_EVT_LEGACY)) !=
          HPET_EVT_LEGACY )
         return 0;
-    hpet_events->event_handler(hpet_events);
+
+    spin_lock_irq(&hpet_events->lock);
+
+    hpet_interrupt_handler(hpet_events);
+    hpet_events->next_event = STIME_MAX;
+
+    spin_unlock_irq(&hpet_events->lock);
+
     return 1;
 }
 
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.