[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 3/3] Xen: implement 3-level event channel routines.
Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- arch/x86/xen/enlighten.c | 7 + drivers/xen/events.c | 419 +++++++++++++++++++++++++++++++-- include/xen/events.h | 2 + include/xen/interface/event_channel.h | 24 ++ include/xen/interface/xen.h | 2 +- 5 files changed, 437 insertions(+), 17 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bc893e7..f471881 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -43,6 +43,7 @@ #include <xen/hvm.h> #include <xen/hvc-console.h> #include <xen/acpi.h> +#include <xen/events.h> #include <asm/paravirt.h> #include <asm/apic.h> @@ -195,6 +196,9 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) BUG(); } + + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); } static void __init xen_banner(void) @@ -1028,6 +1032,9 @@ void xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) xen_vcpu_setup(cpu); + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); + /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ if (have_vcpu_info_placement) { diff --git a/drivers/xen/events.c b/drivers/xen/events.c index f60ba76..adb94e9 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,9 +52,15 @@ #include <xen/interface/hvm/params.h> /* N-level event channel, starting from 2 */ +unsigned int evtchn_level_param = -1; unsigned int evtchn_level = 2; EXPORT_SYMBOL_GPL(evtchn_level); +/* 3-level event channel */ +DEFINE_PER_CPU(unsigned long [sizeof(unsigned long)*8], evtchn_sel_l2); +unsigned long evtchn_pending[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; +unsigned long evtchn_mask[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; + struct evtchn_ops { unsigned long (*active_evtchns)(unsigned int, struct shared_info*, unsigned int); @@ -142,6 +148,29 @@ static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static int __init parse_evtchn_level(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "3") == 0) + evtchn_level_param = 3; + + return 0; +} +early_param("evtchn_level", parse_evtchn_level); + +static inline int __is_masked_l2(int chn) +{ + struct shared_info *sh = HYPERVISOR_shared_info; + return sync_test_and_set_bit(chn, sh->evtchn_mask); +} + +static inline int __is_masked_l3(int chn) +{ + return sync_test_and_set_bit(chn, evtchn_mask); +} + /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -311,6 +340,15 @@ static inline unsigned long __active_evtchns_l2(unsigned int cpu, ~sh->evtchn_mask[idx]; } +static inline unsigned long __active_evtchns_l3(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return evtchn_pending[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & + ~evtchn_mask[idx]; +} + static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) { int irq = evtchn_to_irq[chn]; @@ -351,18 +389,33 @@ static inline void __clear_evtchn_l2(int port) sync_clear_bit(port, &s->evtchn_pending[0]); } +static inline void __clear_evtchn_l3(int port) +{ + sync_clear_bit(port, &evtchn_pending[0]); +} + static inline void __set_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, &s->evtchn_pending[0]); } +static inline void __set_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_pending[0]); +} + static inline int __test_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_bit(port, &s->evtchn_pending[0]); } +static inline int __test_evtchn_l3(int port) +{ + return sync_test_bit(port, &evtchn_pending[0]); +} + /** * notify_remote_via_irq - send event to remote end of event channel via irq * @irq: irq of event channel to send event to @@ -386,6 +439,11 @@ static void __mask_evtchn_l2(int port) sync_set_bit(port, &s->evtchn_mask[0]); } +static void __mask_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_mask[0]); +} + static void __unmask_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -416,6 +474,36 @@ static void __unmask_evtchn_l2(int port) put_cpu(); } +static void __unmask_evtchn_l3(int port) +{ + unsigned int cpu = get_cpu(); + int l1cb = BITS_PER_LONG * BITS_PER_LONG; + int l2cb = BITS_PER_LONG; + + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + + sync_clear_bit(port, &evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &evtchn_pending[0]) && + !sync_test_and_set_bit(port / l2cb, + &per_cpu(evtchn_sel_l2, cpu)[0]) && + !sync_test_and_set_bit(port / l1cb, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -1181,6 +1269,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +static DEFINE_SPINLOCK(debug_lock); static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) { struct shared_info *sh = HYPERVISOR_shared_info; @@ -1188,7 +1277,6 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); int i; unsigned long flags; - static DEFINE_SPINLOCK(debug_lock); struct vcpu_info *v; spin_lock_irqsave(&debug_lock, flags); @@ -1196,13 +1284,13 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) printk("\nvcpu %d\n ", cpu); for_each_online_cpu(i) { - int pending; + int masked; v = per_cpu(xen_vcpu, i); - pending = (get_irq_regs() && i == cpu) + masked = (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask; printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, - pending, v->evtchn_upcall_pending, + masked, v->evtchn_upcall_pending, (int)(sizeof(v->evtchn_pending_sel)*2), v->evtchn_pending_sel); } @@ -1227,7 +1315,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) i % 8 == 0 ? "\n " : " "); printk("\nlocal cpu%d mask:\n ", cpu); - for (i = (NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG)-1; i >= 0; i--) + for (i = (NR_EVENT_CHANNELS(2)/BITS_PER_LONG)-1; i >= 0; i--) printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), cpu_evtchn[i], i % 8 == 0 ? "\n " : " "); @@ -1242,7 +1330,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) } printk("\npending list:\n"); - for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { + for (i = 0; i < NR_EVENT_CHANNELS(2); i++) { if (sync_test_bit(i, sh->evtchn_pending)) { int word_idx = i / BITS_PER_LONG; printk(" %d: event %d -> irq %d%s%s%s\n", @@ -1262,15 +1350,110 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t __xen_debug_interrupt_l3(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); + int i, j; + unsigned long flags; + struct vcpu_info *v; + + spin_lock_irqsave(&debug_lock, flags); + + printk("\nvcpu %d\n ", cpu); + + for_each_online_cpu(i) { + int masked; + + v = per_cpu(xen_vcpu, i); + masked = (get_irq_regs() && i == cpu) + ? xen_irqs_disabled(get_irq_regs()) + : v->evtchn_upcall_mask; + printk("%d: masked=%d pending=%d event_sel_l1 %0*lx\n ", i, + masked, v->evtchn_upcall_pending, + (int)(sizeof(v->evtchn_pending_sel)*2), + v->evtchn_pending_sel); + + printk("\nevtchn_sel_l2:\n "); + for (j = (sizeof(unsigned long)*8)-1; j >= 0; j--) + printk("%0*lx%s", + (int)(sizeof(evtchn_sel_l2[0])*2), + per_cpu(evtchn_sel_l2, i)[j], + j % 8 == 0 ? "\n " : " "); + } + + v = per_cpu(xen_vcpu, cpu); + + printk("\npending:\n "); + for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_pending[0])*2), + evtchn_pending[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobal mask:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_pending[i] & ~evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocal cpu%d mask:\n ", cpu); + for (i = (NR_EVENT_CHANNELS(3)/BITS_PER_LONG)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), + cpu_evtchn[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) { + unsigned long pending = evtchn_pending[i] + & ~evtchn_mask[i] + & cpu_evtchn[i]; + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + pending, i % 8 == 0 ? "\n " : " "); + } + + printk("\npending list:\n"); + for (i = 0; i < NR_EVENT_CHANNELS(3); i++) { + if (sync_test_bit(i, evtchn_pending)) { + int word_idx_l1 = i / (BITS_PER_LONG * BITS_PER_LONG); + int word_idx_l2 = i / BITS_PER_LONG; + printk(" %d: event %d -> irq %d%s%s%s%s\n", + cpu_from_evtchn(i), i, + evtchn_to_irq[i], + sync_test_bit(word_idx_l1, &v->evtchn_pending_sel) + ? "" : " l1-clear", + sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu)) + ? "" : " l2-clear", + !sync_test_bit(i, evtchn_mask) + ? "" : " globally-masked", + sync_test_bit(i, cpu_evtchn) + ? "" : " locally-masked"); + } + } + + spin_unlock_irqrestore(&debug_lock, flags); + + return IRQ_HANDLED; +} + irqreturn_t xen_debug_interrupt(int irq, void *dev_id) { return eops->xen_debug_interrupt(irq, dev_id); } static DEFINE_PER_CPU(unsigned, xed_nesting_count); + +/* 2-level event channel does not use current_word_idx_l2 */ static DEFINE_PER_CPU(unsigned int, current_word_idx); +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2); static DEFINE_PER_CPU(unsigned int, current_bit_idx); + /* * Mask out the i least significant bits of w */ @@ -1303,7 +1486,8 @@ static void __xen_evtchn_do_upcall_l2(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ wmb(); #endif @@ -1392,6 +1576,155 @@ out: put_cpu(); } +void __xen_evtchn_do_upcall_l3(void) +{ + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + unsigned count; + int start_word_idx_l1, start_word_idx_l2, start_bit_idx; + int word_idx_l1, word_idx_l2, bit_idx; + int i, j; + unsigned long l1cb, l2cb; + int cpu = get_cpu(); + + l1cb = BITS_PER_LONG * BITS_PER_LONG; + l2cb = BITS_PER_LONG; + + do { + unsigned long pending_words_l1; + + vcpu_info->evtchn_upcall_pending = 0; + + if (__this_cpu_inc_return(xed_nesting_count) - 1) + goto out; +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + /* here we get l1 pending selector */ + pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + + start_word_idx_l1 = __this_cpu_read(current_word_idx); + start_word_idx_l2 = __this_cpu_read(current_word_idx_l2); + start_bit_idx = __this_cpu_read(current_bit_idx); + + word_idx_l1 = start_word_idx_l1; + + /* loop through l1, try to pick up l2 */ + for (i = 0; pending_words_l1 != 0; i++) { + unsigned long words_l1; + unsigned long pending_words_l2; + unsigned long pwl2idx; + + words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1); + + if (words_l1 == 0) { + word_idx_l1 = 0; + start_word_idx_l2 = 0; + continue; + } + + word_idx_l1 = __ffs(words_l1); + + pwl2idx = word_idx_l1 * BITS_PER_LONG; + + pending_words_l2 = + xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx], + 0); + + word_idx_l2 = 0; + if (word_idx_l1 == start_word_idx_l1) { + if (i == 0) + word_idx_l2 = start_word_idx_l2; + else + word_idx_l2 &= (1UL << start_word_idx_l2) - 1; + } + + for (j = 0; pending_words_l2 != 0; j++) { + unsigned long pending_bits; + unsigned long words_l2; + unsigned long idx; + + words_l2 = MASK_LSBS(pending_words_l2, + word_idx_l2); + + if (words_l2 == 0) { + word_idx_l2 = 0; + bit_idx = 0; + continue; + } + + word_idx_l2 = __ffs(words_l2); + + idx = word_idx_l1*BITS_PER_LONG+word_idx_l2; + pending_bits = + eops->active_evtchns(cpu, NULL, idx); + + bit_idx = 0; + if (word_idx_l2 == start_word_idx_l2) { + if (j == 0) + bit_idx = start_bit_idx; + else + bit_idx &= (1UL<<start_bit_idx)-1; + } + + /* process port */ + do { + unsigned long bits; + int port, irq; + struct irq_desc *desc; + + bits = MASK_LSBS(pending_bits, bit_idx); + + if (bits == 0) + break; + + bit_idx = __ffs(bits); + + port = word_idx_l1 * l1cb + + word_idx_l2 * l2cb + + bit_idx; + + irq = evtchn_to_irq[port]; + + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + + bit_idx = (bit_idx + 1) % BITS_PER_LONG; + + __this_cpu_write(current_bit_idx, bit_idx); + __this_cpu_write(current_word_idx_l2, + bit_idx ? word_idx_l2 : + (word_idx_l2+1) % BITS_PER_LONG); + __this_cpu_write(current_word_idx_l2, + word_idx_l2 ? word_idx_l1 : + (word_idx_l1+1) % BITS_PER_LONG); + } while (bit_idx != 0); + + if ((word_idx_l2 != start_word_idx_l2) || (j != 0)) + pending_words_l2 &= ~(1UL << word_idx_l2); + + word_idx_l2 = (word_idx_l2) % BITS_PER_LONG; + } + + if ((word_idx_l1 != start_word_idx_l1) || (i != 0)) + pending_words_l1 &= ~(1UL << word_idx_l1); + + word_idx_l1 = (word_idx_l1) % BITS_PER_LONG; + } + + BUG_ON(!irqs_disabled()); + count = __this_cpu_read(xed_nesting_count); + __this_cpu_write(xed_nesting_count, 0); + } while (count != 1 || vcpu_info->evtchn_upcall_pending); + +out: + put_cpu(); +} + void xen_evtchn_do_upcall(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1525,12 +1858,6 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } -static inline int __is_masked_l2(int chn) -{ - struct shared_info *sh = HYPERVISOR_shared_info; - return sync_test_and_set_bit(chn, sh->evtchn_mask); -} - static int retrigger_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); @@ -1821,14 +2148,74 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { .xen_debug_interrupt = __xen_debug_interrupt_l2, }; +static struct evtchn_ops evtchn_ops_l3 __read_mostly = { + .active_evtchns = __active_evtchns_l3, + .clear_evtchn = __clear_evtchn_l3, + .set_evtchn = __set_evtchn_l3, + .test_evtchn = __test_evtchn_l3, + .mask_evtchn = __mask_evtchn_l3, + .unmask_evtchn = __unmask_evtchn_l3, + .is_masked = __is_masked_l3, + .xen_evtchn_do_upcall = __xen_evtchn_do_upcall_l3, + .xen_debug_interrupt = __xen_debug_interrupt_l3, +}; + +int xen_event_channel_setup_3level(void) +{ + evtchn_register_nlevel_t reg; + int i, nr_pages, cpu; + unsigned long mfns[nr_cpu_ids]; + unsigned long offsets[nr_cpu_ids]; + int rc = -EINVAL; + + memset(®, 0, sizeof(reg)); + + reg.level = 3; + nr_pages = (sizeof(unsigned long) == 4 ? 1 : 8); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = PAGE_SIZE * i; + reg.u.l3.evtchn_pending[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_pending+offset)); + reg.u.l3.evtchn_mask[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_mask+offset)); + } + + reg.u.l3.l2sel_mfn = mfns; + reg.u.l3.l2sel_offset = offsets; + reg.u.l3.nr_vcpus = nr_cpu_ids; + + for_each_possible_cpu(cpu) { + reg.u.l3.l2sel_mfn[cpu] = + arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu)); + reg.u.l3.l2sel_offset[cpu] = + offset_in_page(&per_cpu(evtchn_sel_l2, cpu)); + } + + rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_nlevel, ®); + + if (rc == 0) + evtchn_level = 3; + + return rc; +} +EXPORT_SYMBOL_GPL(xen_event_channel_setup_3level); + void __init xen_init_IRQ(void) { int i, rc; int cpu; - /* Setup 2-level event channel */ - eops = &evtchn_ops_l2; - evtchn_level = 2; + switch (evtchn_level) { + case 2: + eops = &evtchn_ops_l2; break; + case 3: + eops = &evtchn_ops_l3; break; + default: + BUG(); + } evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS(evtchn_level), sizeof(*evtchn_to_irq), diff --git a/include/xen/events.h b/include/xen/events.h index bc10f22..87696fc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -111,5 +111,7 @@ int xen_test_irq_shared(int irq); /* N-level event channels */ extern unsigned int evtchn_level; +extern unsigned int evtchn_level_param; +int xen_event_channel_setup_3level(void); #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index f494292..f764d21 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -190,6 +190,30 @@ struct evtchn_reset { }; typedef struct evtchn_reset evtchn_reset_t; +/* + * EVTCHNOP_register_nlevel: Register N level event channels. + * NOTES: + * 1. currently only 3-level is supported. + * 2. should fall back to basic 2-level if this call fails. + */ +#define EVTCHNOP_register_nlevel 11 +#define MAX_L3_PAGES 8 /* 8 pages for 64 bits */ +struct evtchn_register_3level { + unsigned long evtchn_pending[MAX_L3_PAGES]; + unsigned long evtchn_mask[MAX_L3_PAGES]; + unsigned long *l2sel_mfn; + unsigned long *l2sel_offset; + unsigned int nr_vcpus; +}; + +struct evtchn_register_nlevel { + uint32_t level; + union { + struct evtchn_register_3level l3; + } u; +}; +typedef struct evtchn_register_nlevel evtchn_register_nlevel_t; + struct evtchn_op { uint32_t cmd; /* EVTCHNOP_* */ union { diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index c66e1ff..7cb9d8f 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -289,7 +289,7 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); * 32k if a long is 32 bits; 256k if a long is 64 bits. */ #define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64) -#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(unsigned long)) +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64) #define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \ switch (x) { \ case 2: \ -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |