[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH V5 09/14] xen: implement 3-level event channel routines
Implement several routines for 3-level event channel ABI. Some routines are shared between 2/3-level ABIs. For N-level (now only 2 and 3) event channel ABIs, the active events are processed in a top-down approach, i.e. L1 -> L2 -> .. -> L(n-1) -> bitmap. The selectors are processed recursively, the event bitmap is processed by a dedicated function called process_port. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- drivers/xen/events.c | 376 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 293 insertions(+), 83 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ee35ff9..fe1831b 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -76,7 +76,12 @@ static const struct evtchn_ops *eops; static xen_ulong_t *evtchn_pending; static xen_ulong_t *evtchn_mask; /* The following per-cpu var points to selector(s). */ -static DEFINE_PER_CPU(xen_ulong_t *[1], evtchn_sel); +static DEFINE_PER_CPU(xen_ulong_t *[2], evtchn_sel); +/* + * 2nd level selector for 3-level event channel, '8' stands for 8 bits + * per byte. + */ +static DEFINE_PER_CPU(xen_ulong_t [sizeof(xen_ulong_t) * 8], evtchn_sel_l2); /* * This lock protects updates to the following mapping and reference-count @@ -150,6 +155,11 @@ static bool (*pirq_needs_eoi)(unsigned irq); */ #define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8) /* + * If xen_ulong_t is 8 byte, it's 64 bits wide, 2^6 == 64, otherwise + * it is 32 bits, 2^5 == 32 + */ +#define EVTCHN_WORD_BITORDER (sizeof(xen_ulong_t) == 8 ? 6 : 5) +/* * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t * array. Primarily to avoid long lines (hence the terse name). */ @@ -435,6 +445,29 @@ static inline void __unmask_local_port_l2(int port) vcpu_info->evtchn_upcall_pending = 1; } +static inline void __unmask_local_port_l3(int port) +{ + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + int cpu = smp_processor_id(); + unsigned int l1bit = port >> (EVTCHN_WORD_BITORDER << 1); + unsigned int l2bit = port >> EVTCHN_WORD_BITORDER; + + sync_clear_bit(port, BM(&evtchn_mask[0])); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, BM(&evtchn_pending[0])) && + !sync_test_and_set_bit(l2bit, + BM(per_cpu(evtchn_sel, cpu)[1])) && + !sync_test_and_set_bit(l1bit, + BM(per_cpu(evtchn_sel, cpu)[0]))) + vcpu_info->evtchn_upcall_pending = 1; + +} + static void unmask_evtchn(int port) { unsigned int cpu = get_cpu(); @@ -1326,119 +1359,254 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); + unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_EVTCHN_WORD; + int i; + struct vcpu_info *v; + + v = per_cpu(xen_vcpu, cpu); + + printk(KERN_DEBUG "\npending (only show words which have bits set to 1):\n "); + for (i = nr_elems-1; i >= 0; i--) + if (evtchn_pending[i] != 0UL) { + printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n", + i, + (int)(sizeof(evtchn_pending[0])*2), + evtchn_pending[i]); + } + + printk(KERN_DEBUG "\nglobal mask (only show words which have bits set to 0):\n "); + for (i = nr_elems-1; i >= 0; i--) + if (evtchn_mask[i] != ~0UL) { + printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n", + i, + (int)(sizeof(evtchn_mask[0])*2), + evtchn_mask[i]); + } + + printk(KERN_DEBUG "\nglobally unmasked (only show result words which have bits set to 1):\n "); + for (i = nr_elems-1; i >= 0; i--) + if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) { + printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n", + i, + (int)(sizeof(evtchn_mask[0])*2), + evtchn_pending[i] & ~evtchn_mask[i]); + } + + printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits set to 1):\n ", cpu); + for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--) + if (cpu_evtchn[i] != 0UL) { + printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n", + i, + (int)(sizeof(cpu_evtchn[0])*2), + cpu_evtchn[i]); + } + + printk(KERN_DEBUG "\nlocally unmasked (only show result words which have bits set to 1):\n "); + for (i = nr_elems-1; i >= 0; i--) { + xen_ulong_t pending = evtchn_pending[i] + & ~evtchn_mask[i] + & cpu_evtchn[i]; + if (pending != 0UL) { + printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n", + i, + (int)(sizeof(evtchn_mask[0])*2), + pending); + } + } + + printk(KERN_DEBUG "\npending list:\n"); + for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) { + if (sync_test_bit(i, evtchn_pending)) { + int word_idx = i / (BITS_PER_EVTCHN_WORD * BITS_PER_EVTCHN_WORD); + int word_idx_l2 = i / BITS_PER_EVTCHN_WORD; + printk(KERN_DEBUG " %d: event %d -> irq %d%s%s%s%s\n", + cpu_from_evtchn(i), i, + evtchn_to_irq[i], + !sync_test_bit(word_idx, BM(per_cpu(evtchn_sel, cpu)[0])) + ? "" : " l1-clear", + !sync_test_bit(word_idx_l2, BM(per_cpu(evtchn_sel, cpu)[1])) + ? "" : " l2-clear", + sync_test_bit(i, BM(evtchn_mask)) + ? "" : " globally-masked", + sync_test_bit(i, BM(cpu_evtchn)) + ? "" : " locally-masked"); + } + } + + return IRQ_HANDLED; +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); -static DEFINE_PER_CPU(unsigned int, current_word_idx); -static DEFINE_PER_CPU(unsigned int, current_bit_idx); +static DEFINE_PER_CPU(unsigned int[3], current_idx); /* * Mask out the i least significant bits of w */ #define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i)) +static __always_inline void process_port(int cpu, + unsigned int base, + unsigned int *idx, + unsigned int *idx_array) +{ + xen_ulong_t pending_bits, bits; + int port, irq; + struct irq_desc *desc; + + pending_bits = active_evtchns(cpu, base >> EVTCHN_WORD_BITORDER); + + do { + bits = MASK_LSBS(pending_bits, *idx); + + /* If we masked out all events, move on. */ + if (bits == 0) + break; + + *idx = EVTCHN_FIRST_BIT(bits); + + /* Process port. */ + port = base + *idx; + irq = evtchn_to_irq[port]; + + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + + *idx = (*idx + 1) % BITS_PER_EVTCHN_WORD; + + /* Next caller starts at last processed + 1 */ + /* + * As this routine is shared by 2/3-level event + * channel, we need to write all three current_idx + * elements. In the 2-level case, the caller /should/ + * always set idx_array[2] to ~0U, so in practice the + * write to current_idx[1] is equivalent to writing + * idx_array[1]. + */ + __this_cpu_write(current_idx[0], + idx_array[1] ? idx_array[0] : + (idx_array[0]+1) % BITS_PER_EVTCHN_WORD); + __this_cpu_write(current_idx[1], + idx_array[2] ? idx_array[1] : + (idx_array[1]+1) % BITS_PER_EVTCHN_WORD); + __this_cpu_write(current_idx[2], idx_array[2]); + } while (*idx != 0); +} + /* - * Search the CPUs pending events bitmasks. For each one found, map - * the event number to an irq, and feed it into do_IRQ() for - * handling. + * This function process active event channel top-down, L1 -> L2 -> + * .. -> L(n-1) -> bitmap. The selectors are processed recursively, + * the event bitmap is processed by process_port * - * Xen uses a two-level bitmap to speed searching. The first level is - * a bitset of words which contain pending event bits. The second - * level is a bitset of pending events themselves. + * @cpu: current cpu id + * @base: accumulated offsets along selector processing + * @start_idx: array used to resume index + * @idx: array of current processing index + * @sel_idx: selector word index + * @level: current processing level, from 0 to highest_level + * @highest_level: highest recursion level + * + * If level == higest_level, we reach the event bitmap. level + * variable starts from 0, so highest_level for 2-level ABI is 1, + * while for 3-level ABI it is 2. */ -static void __xen_evtchn_do_upcall_l2(void) +static void process(int cpu, + unsigned int base, + unsigned int *start_idx, + unsigned int *idx, + unsigned int sel_idx, + unsigned short level, + unsigned short highest_level) { - int start_word_idx, start_bit_idx; - int word_idx, bit_idx; int i; - int cpu = get_cpu(); - struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); - unsigned count; + xen_ulong_t pending_words; - do { - xen_ulong_t pending_words; + if (level == highest_level) { + process_port(cpu, base, &idx[level], idx); + return; + } - vcpu_info->evtchn_upcall_pending = 0; + pending_words = + xchg_xen_ulong(&per_cpu(evtchn_sel, cpu)[level][sel_idx], 0); - if (__this_cpu_inc_return(xed_nesting_count) - 1) - goto out; + /* This loop is used to process selectors. */ + for (i = 0; pending_words != 0; i++) { + xen_ulong_t words; + unsigned int saved_base; + + words = MASK_LSBS(pending_words, idx[level]); /* - * Master flag must be cleared /before/ clearing - * selector flag. xchg_xen_ulong must contain an - * appropriate barrier. + * If we masked out all events, wrap to beginning. */ - pending_words = xchg_xen_ulong(per_cpu(evtchn_sel, cpu)[0], 0); - - start_word_idx = __this_cpu_read(current_word_idx); - start_bit_idx = __this_cpu_read(current_bit_idx); - - word_idx = start_word_idx; + if (words == 0) { + idx[level] = 0; + start_idx[level+1] = 0; + continue; + } - for (i = 0; pending_words != 0; i++) { - xen_ulong_t pending_bits; - xen_ulong_t words; + idx[level] = EVTCHN_FIRST_BIT(words); + + idx[level+1] = 0; /* usually scan entire word from start */ + if (idx[level] == start_idx[level]) { + /* We scan the starting word in two parts */ + if (i == 0) + /* 1st time: start in the middle */ + idx[level+1] = start_idx[level+1]; + else + /* 2nd time: mask bits done already */ + idx[level+1] &= (1UL << start_idx[level+1]) - 1; + } - words = MASK_LSBS(pending_words, word_idx); + saved_base = base; + base += (idx[level] << + (EVTCHN_WORD_BITORDER * (highest_level-level))); - /* - * If we masked out all events, wrap to beginning. - */ - if (words == 0) { - word_idx = 0; - bit_idx = 0; - continue; - } - word_idx = EVTCHN_FIRST_BIT(words); - - pending_bits = active_evtchns(cpu, word_idx); - bit_idx = 0; /* usually scan entire word from start */ - if (word_idx == start_word_idx) { - /* We scan the starting word in two parts */ - if (i == 0) - /* 1st time: start in the middle */ - bit_idx = start_bit_idx; - else - /* 2nd time: mask bits done already */ - bit_idx &= (1UL << start_bit_idx) - 1; - } + process(cpu, base, start_idx, idx, idx[level], + level+1, highest_level); - do { - xen_ulong_t bits; - int port, irq; - struct irq_desc *desc; + base = saved_base; - bits = MASK_LSBS(pending_bits, bit_idx); + /* Scan start_l1i twice; all others once. */ + if ((idx[level] != start_idx[level]) || (i != 0)) + pending_words &= ~(1UL << idx[level]); - /* If we masked out all events, move on. */ - if (bits == 0) - break; + idx[level] = (idx[level] + 1) % BITS_PER_EVTCHN_WORD; + } +} - bit_idx = EVTCHN_FIRST_BIT(bits); - /* Process port. */ - port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; - irq = evtchn_to_irq[port]; +/* This routine is shared between 2/3-level ABI */ +static void ___xen_evtchn_do_upcall(unsigned int *start_idx, + unsigned int *idx, + unsigned short highest_level) +{ + int cpu = get_cpu(); + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + unsigned count; - if (irq != -1) { - desc = irq_to_desc(irq); - if (desc) - generic_handle_irq_desc(irq, desc); - } + do { + vcpu_info->evtchn_upcall_pending = 0; - bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; + if (__this_cpu_inc_return(xed_nesting_count) - 1) + goto out; - /* Next caller starts at last processed + 1 */ - __this_cpu_write(current_word_idx, - bit_idx ? word_idx : - (word_idx+1) % BITS_PER_EVTCHN_WORD); - __this_cpu_write(current_bit_idx, bit_idx); - } while (bit_idx != 0); + start_idx[0] = __this_cpu_read(current_idx[0]); + start_idx[1] = __this_cpu_read(current_idx[1]); + start_idx[2] = __this_cpu_read(current_idx[2]); - /* Scan start_l1i twice; all others once. */ - if ((word_idx != start_word_idx) || (i != 0)) - pending_words &= ~(1UL << word_idx); + idx[0] = start_idx[0]; - word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD; - } + process(cpu, 0 /* base */, start_idx, idx, + 0 /* selector index */, + 0 /* starting from L1 (1-1=0) */, + highest_level); BUG_ON(!irqs_disabled()); @@ -1451,6 +1619,42 @@ out: put_cpu(); } +/* + * Search the CPUs pending events bitmasks. For each one found, map + * the event number to an irq, and feed it into do_IRQ() for + * handling. + * + * Xen uses a two-level bitmap to speed searching. The first level is + * a bitset of words which contain pending event bits. The second + * level is a bitset of pending events themselves. + */ +static void __xen_evtchn_do_upcall_l2(void) +{ + /* + * Need three elements to feed into __process_port, but the + * third element is never used for 2-level ABI and should + * always be set to ~0U. + */ + unsigned int start_idx[3] = { 0, 0, ~0U }; + unsigned int idx[3] = { 0, 0, ~0U }; + + ___xen_evtchn_do_upcall(start_idx, idx, 1); +} + +static void __xen_evtchn_do_upcall_l3(void) +{ + /* + * Need three elements to feed into __process_port, but the + * third element is never used for 2-level ABI and should + * always be set to ~0U. + */ + unsigned int start_idx[3] = { 0, 0, 0 }; + unsigned int idx[3] = { 0, 0, 0 }; + + ___xen_evtchn_do_upcall(start_idx, idx, 2); + +} + void xen_evtchn_do_upcall(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1870,6 +2074,12 @@ const struct evtchn_ops evtchn_l2_ops = { .do_upcall = __xen_evtchn_do_upcall_l2 }; +const struct evtchn_ops evtchn_l3_ops = { + .unmask = __unmask_local_port_l3, + .debug_interrupt = xen_debug_interrupt_l3, + .do_upcall = __xen_evtchn_do_upcall_l3 +}; + static int __cpuinit xen_events_notifier_cb(struct notifier_block *self, unsigned long action, void *hcpu) -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |