[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86/nmi: Make external NMI injection reliably crash the host
On 26/08/14 11:10, Ross Lagerwall wrote: > Change the watchdog handler to only "tick" if the corresponding perf > counter has overflowed; otherwise, return false from the NMI handler to > indicate that the NMI is not a watchdog tick and let the other handlers > handle it. This allows externally injected NMIs to reliably crash the > host rather than be swallowed by the watchdog handler. More specifically, external NMIs which don't set one of the bits in the System Control Port B Most vendors which support an "Inject NMI" option from their ipmi/web interface do not set any bits in control port B, at which point the attempt to crash the server will be eaten by the watchdog logic. > > Signed-off-by: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx> > CC: Keir Fraser <keir@xxxxxxx> > CC: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- > xen/arch/x86/nmi.c | 76 > ++++++++++++++++++++++++++++++++-------------- > xen/arch/x86/traps.c | 6 ++-- > xen/include/asm-x86/apic.h | 2 +- > 3 files changed, 57 insertions(+), 27 deletions(-) > > diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c > index c4427a6..5bf0e2c 100644 > --- a/xen/arch/x86/nmi.c > +++ b/xen/arch/x86/nmi.c > @@ -15,6 +15,7 @@ > > #include <xen/config.h> > #include <xen/init.h> > +#include <xen/stdbool.h> > #include <xen/lib.h> > #include <xen/mm.h> > #include <xen/irq.h> > @@ -82,6 +83,7 @@ int nmi_active; > #define K7_EVNTSEL_USR (1 << 16) > #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 > #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING > +#define K7_EVENT_WIDTH 32 > > #define P6_EVNTSEL0_ENABLE (1 << 22) > #define P6_EVNTSEL_INT (1 << 20) > @@ -89,10 +91,12 @@ int nmi_active; > #define P6_EVNTSEL_USR (1 << 16) > #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 > #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c > +#define P6_EVENT_WIDTH 32 > > #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) > #define P4_CCCR_OVF_PMI0 (1<<26) > #define P4_CCCR_OVF_PMI1 (1<<27) > +#define P4_CCCR_OVF (1<<31) > #define P4_CCCR_THRESHOLD(N) ((N)<<20) > #define P4_CCCR_COMPLEMENT (1<<19) > #define P4_CCCR_COMPARE (1<<18) > @@ -432,35 +436,23 @@ int __init watchdog_setup(void) > return 0; > } > > -void nmi_watchdog_tick(const struct cpu_user_regs *regs) > +/* Returns true if this was a watchdog NMI, false otherwise */ > +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs) > { > + bool_t watchdog_tick = 0; > unsigned int sum = this_cpu(nmi_timer_ticks); > > - if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() ) > - { > - /* > - * Ayiee, looks like this CPU is stuck ... wait for the timeout > - * before doing the oops ... > - */ > - this_cpu(alert_counter)++; > - if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz ) > - { > - console_force_unlock(); > - printk("Watchdog timer detects that CPU%d is stuck!\n", > - smp_processor_id()); > - fatal_trap(TRAP_nmi, regs); > - } > - } > - else > - { > - this_cpu(last_irq_sums) = sum; > - this_cpu(alert_counter) = 0; > - } > - > if ( nmi_perfctr_msr ) > { > + uint64_t msr_content; > + > + /* Work out if this is a watchdog tick by checking for overflow. */ > if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 ) > { > + rdmsrl(MSR_P4_IQ_CCCR0, msr_content); > + if ( msr_content & P4_CCCR_OVF ) > + watchdog_tick = 1; > + > /* > * P4 quirks: > * - An overflown perfctr will assert its interrupt > @@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs) > } > else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 ) > { > + rdmsrl(MSR_P6_PERFCTR0, msr_content); > + watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH)); > + > /* > * Only P6 based Pentium M need to re-unmask the apic vector but > * it doesn't hurt other P6 variants. > */ > apic_write(APIC_LVTPC, APIC_DM_NMI); > } > - write_watchdog_counter(NULL); > + else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 ) > + { > + rdmsrl(MSR_K7_PERFCTR0, msr_content); > + watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH)); > + } > + > + if ( watchdog_tick ) > + { > + unsigned int *this_alert_counter = &this_cpu(alert_counter); > + unsigned int *this_last_irq_sums = &this_cpu(last_irq_sums); > + > + write_watchdog_counter(NULL); > + > + if ( (*this_last_irq_sums == sum) && watchdog_enabled() ) > + { > + /* > + * Ayiee, looks like this CPU is stuck ... wait for the > timeout > + * before doing the oops ... > + */ > + ++*this_alert_counter; > + if ( *this_alert_counter == opt_watchdog_timeout * nmi_hz ) > + { > + console_force_unlock(); > + printk("Watchdog timer detects that CPU%d is stuck!\n", > + smp_processor_id()); > + fatal_trap(TRAP_nmi, regs); > + } > + } > + else > + { > + *this_last_irq_sums = sum; > + this_alert_counter = 0; > + } > + } > } > + > + return watchdog_tick; > } > > /* > diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c > index 71be2ae..38f42fa 100644 > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs) > if ( nmi_callback(regs, cpu) ) > return; > > - if ( nmi_watchdog ) > - nmi_watchdog_tick(regs); > + if ( nmi_watchdog && nmi_watchdog_tick(regs) ) > + return; > > /* Only the BSP gets external NMIs from the system. */ > if ( cpu == 0 ) > @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs) > pci_serr_error(regs); > if ( reason & 0x40 ) > io_check_error(regs); > - if ( !(reason & 0xc0) && !nmi_watchdog ) > + if ( !(reason & 0xc0) ) > unknown_nmi_error(regs, reason); > } > } > diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h > index 5d7623f..6697245 100644 > --- a/xen/include/asm-x86/apic.h > +++ b/xen/include/asm-x86/apic.h > @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void); > extern void self_nmi(void); > extern void disable_timer_nmi_watchdog(void); > extern void enable_timer_nmi_watchdog(void); > -extern void nmi_watchdog_tick (const struct cpu_user_regs *regs); > +extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs); > extern int APIC_init_uniprocessor (void); > extern void disable_APIC_timer(void); > extern void enable_APIC_timer(void); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |