|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v18 14/16] x86/VPMU: NMI-based VPMU support
Add support for using NMIs as PMU interrupts to allow profiling hypervisor
when interrupts are disabled.
Most of processing is still performed by vpmu_do_interrupt(). However, since
certain operations are not NMI-safe we defer them to a softint that
vpmu_do_interrupt()
will schedule:
* For PV guests that would be send_guest_vcpu_virq()
* For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the later
can be called in privileged profiling mode when the interrupted guest is an HVM
one).
With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and vlapic
accesses for HVM moved to sofint, the only routines/macros that
vpmu_do_interrupt()
calls in NMI mode are:
* memcpy()
* querying domain type (is_XX_domain())
* guest_cpu_user_regs()
* XLAT_cpu_user_regs()
* raise_softirq()
* vcpu_vpmu()
* vpmu_ops->arch_vpmu_save()
* vpmu_ops->do_interrupt()
The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
which would not be NMI-safe).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
docs/misc/xen-command-line.markdown | 8 +-
xen/arch/x86/hvm/svm/vpmu.c | 3 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 3 +-
xen/arch/x86/hvm/vpmu.c | 229 ++++++++++++++++++++++++++++--------
xen/include/asm-x86/hvm/vpmu.h | 4 +-
xen/include/asm-x86/softirq.h | 3 +-
6 files changed, 193 insertions(+), 57 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index bc316be..0ab1188 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1330,11 +1330,11 @@ Use Virtual Processor ID support if available. This
prevents the need for TLB
flushes on VM entry and exit, increasing performance.
### vpmu
-> `= ( bts )`
+> `= ( [nmi,][bts] )`
> Default: `off`
-Switch on the virtualized performance monitoring unit for HVM guests.
+Switch on the virtualized performance monitoring unit.
If the current cpu isn't supported a message like
'VPMU: Initialization failed. ...'
@@ -1348,6 +1348,10 @@ feature is switched on on Intel processors supporting
this feature.
Note that if **watchdog** option is also specified vpmu will be turned off.
+If 'vpmu=nmi' is specified the PMU interrupt will cause an NMI instead of a
+regular vector interrupt (which is the default). This can be useful for
sampling
+hypervisor code that is executed with interrupts disabled.
+
*Warning:*
As the BTS virtualisation is not 100% safe and because of the nehalem quirk
don't use the vpmu flag on production systems with Intel cpus!
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 68113c7..7ddce33 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -168,7 +168,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
msr_bitmap_off(vpmu);
}
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
return 1;
}
@@ -220,6 +220,7 @@ static inline void context_save(struct vpmu_struct *vpmu)
rdmsrl(counters[i], counter_regs[i]);
}
+/* Must be NMI-safe */
static int amd_vpmu_save(struct vpmu_struct *vpmu)
{
struct vcpu *v;
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 8067d83..0c7fd74 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -305,6 +305,7 @@ static inline void __core2_vpmu_save(struct vpmu_struct
*vpmu)
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
+/* Must be NMI-safe */
static int core2_vpmu_save(struct vpmu_struct *vpmu)
{
struct vcpu *v = vpmu_vcpu(vpmu);
@@ -720,7 +721,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
}
}
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *v = current;
u64 msr_content;
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 651cb00..cdead13 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -56,29 +56,47 @@ static bool_t __read_mostly vpmu_disabled = 1;
static void parse_vpmu_param(char *s);
custom_param("vpmu", parse_vpmu_param);
+static void pmu_softnmi(void);
+
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
+
+static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
static void __init parse_vpmu_param(char *s)
{
- switch ( parse_bool(s) )
- {
- case 0:
- break;
- default:
- if ( !strcmp(s, "bts") )
- vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
- else if ( *s )
+ char *ss;
+
+ vpmu_mode = XENPMU_MODE_SELF;
+ vpmu_disabled = 0;
+ if (*s == '\0')
+ return;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ switch ( parse_bool(s) )
{
- printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
- break;
+ default:
+ if ( !strcmp(s, "nmi") )
+ vpmu_interrupt_type = APIC_DM_NMI;
+ else if ( !strcmp(s, "bts") )
+ vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+ else
+ {
+ printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+ case 0:
+ vpmu_mode = XENPMU_MODE_OFF;
+ vpmu_disabled = 1;
+ case 1:
+ return;
+ }
}
- /* fall through */
- case 1:
- /* Default VPMU mode */
- vpmu_mode = XENPMU_MODE_SELF;
- vpmu_disabled = 0;
- break;
- }
+
+ s = ss + 1;
+ } while ( ss );
}
void vpmu_lvtpc_update(uint32_t val)
@@ -92,7 +110,7 @@ void vpmu_lvtpc_update(uint32_t val)
curr = current;
vpmu = vcpu_vpmu(curr);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
/* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
@@ -100,6 +118,30 @@ void vpmu_lvtpc_update(uint32_t val)
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
+static void vpmu_send_interrupt(struct vcpu *v)
+{
+ struct vlapic *vlapic;
+ u32 vlapic_lvtpc;
+
+ ASSERT(is_hvm_vcpu(v));
+
+ vlapic = vcpu_vlapic(v);
+ if ( !is_vlapic_lvtpc_enabled(vlapic) )
+ return;
+
+ vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+ switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ {
+ case APIC_MODE_FIXED:
+ vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+ break;
+ case APIC_MODE_NMI:
+ v->nmi_pending = 1;
+ break;
+ }
+}
+
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write)
{
@@ -157,7 +199,7 @@ static struct vcpu *choose_hwdom_vcpu(void)
return hardware_domain->vcpu[idx];
}
-void vpmu_do_interrupt(struct cpu_user_regs *regs)
+int vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *sampled = current, *sampling;
struct vpmu_struct *vpmu;
@@ -171,7 +213,7 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
{
sampling = choose_hwdom_vcpu();
if ( !sampling )
- return;
+ return 0;
}
else
sampling = sampled;
@@ -185,15 +227,15 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
uint32_t domid;
if ( !vpmu->xenpmu_data )
- return;
+ return 0;
if ( is_pvh_vcpu(sampling) &&
!(vpmu_mode & XENPMU_MODE_ALL) &&
!vpmu->arch_vpmu_ops->do_interrupt(regs) )
- return;
+ return 0;
if ( *flags & PMU_CACHED )
- return;
+ return 1;
/* PV guest will be reading PMU MSRs from xenpmu_data */
vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
@@ -260,15 +302,20 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
}
else
{
- struct segment_register seg;
-
- hvm_get_segment_register(sampled, x86_seg_cs, &seg);
- r->cs = seg.sel;
- hvm_get_segment_register(sampled, x86_seg_ss, &seg);
- r->ss = seg.sel;
- r->cpl = seg.attr.fields.dpl;
if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
*flags |= PMU_SAMPLE_REAL;
+
+ /* Unsafe in NMI context, defer to softint later. */
+ if ( vpmu_interrupt_type != APIC_DM_NMI )
+ {
+ struct segment_register seg;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ }
}
}
@@ -280,35 +327,37 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
*flags |= PMU_CACHED;
- send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
+ }
+ else
+ send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
- return;
+ return 1;
}
if ( vpmu->arch_vpmu_ops )
{
- struct vlapic *vlapic = vcpu_vlapic(sampling);
- u32 vlapic_lvtpc;
-
/* We don't support (yet) HVM dom0 */
ASSERT(sampling == sampled);
- if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
- !is_vlapic_lvtpc_enabled(vlapic) )
- return;
-
- vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+ if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+ return 0;
- switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
{
- case APIC_MODE_FIXED:
- vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
- break;
- case APIC_MODE_NMI:
- sampling->nmi_pending = 1;
- break;
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
}
+ else
+ vpmu_send_interrupt(sampling);
+
+ return 1;
}
+
+ return 0;
}
void vpmu_do_cpuid(unsigned int input,
@@ -336,6 +385,9 @@ static void vpmu_save_force(void *arg)
vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
per_cpu(last_vcpu, smp_processor_id()) = NULL;
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_save(struct vpmu_struct *vpmu)
@@ -352,7 +404,10 @@ void vpmu_save(struct vpmu_struct *vpmu)
if ( vpmu->arch_vpmu_ops->arch_vpmu_save(vpmu) )
vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_load(struct vpmu_struct *vpmu)
@@ -403,6 +458,9 @@ void vpmu_load(struct vpmu_struct *vpmu)
(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) )
return;
+ /* Make sure there are no outstanding PMU NMIs from previous vcpu */
+ pmu_softnmi();
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
{
apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
@@ -426,7 +484,7 @@ void vpmu_initialise(struct vcpu *v)
vpmu_destroy(v);
vpmu_clear(vpmu);
vpmu->context = NULL;
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED;
switch ( vendor )
{
@@ -487,6 +545,55 @@ void vpmu_destroy(struct vcpu *v)
}
}
+/* Process the softirq set by PMU NMI handler */
+static void pmu_softnmi(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *v, *sampled = per_cpu(sampled_vcpu, cpu);
+
+ if ( sampled == NULL )
+ return;
+
+ per_cpu(sampled_vcpu, cpu) = NULL;
+
+ if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+ (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+ {
+ v = choose_hwdom_vcpu();
+ if ( !v )
+ return;
+ }
+ else
+ {
+ if ( is_hvm_vcpu(sampled) )
+ {
+ vpmu_send_interrupt(sampled);
+ return;
+ }
+ v = sampled;
+ }
+
+ if ( has_hvm_container_vcpu(sampled) )
+ {
+ struct segment_register seg;
+ struct xen_pmu_arch *pmu = &v->arch.vpmu.xenpmu_data->pmu;
+ struct xen_pmu_regs *r = &pmu->r.regs;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ }
+
+ send_guest_vcpu_virq(v, VIRQ_XENPMU);
+}
+
+int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
+{
+ return vpmu_do_interrupt(regs);
+}
+
static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
{
struct vcpu *v;
@@ -502,6 +609,7 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
*params)
(d->vcpu[params->vcpu] == NULL) )
return -EINVAL;
+ v = d->vcpu[params->vcpu];
if ( v->arch.vpmu.xenpmu_data )
return -EINVAL;
@@ -515,7 +623,6 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
*params)
return -EINVAL;
}
- v = d->vcpu[params->vcpu];
vpmu = vcpu_vpmu(v);
spin_lock(&vpmu->vpmu_lock);
@@ -832,6 +939,21 @@ static int __init vpmu_init(void)
return 0;
}
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ if ( reserve_lapic_nmi() != 0 )
+ {
+ printk(XENLOG_WARNING "VPMU: Can't reserve NMI, will use"
+ " APIC vector 0x%x\n", PMU_APIC_VECTOR);
+ vpmu_interrupt_type = PMU_APIC_VECTOR;
+ }
+ else
+ {
+ set_nmi_callback(pmu_nmi_interrupt);
+ open_softirq(PMU_SOFTIRQ, pmu_softnmi);
+ }
+ }
+
switch ( vendor )
{
case X86_VENDOR_AMD:
@@ -853,7 +975,14 @@ static int __init vpmu_init(void)
printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
__stringify(XENPMU_VER_MIN) "\n");
else
+ {
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ unset_nmi_callback();
+ release_lapic_nmi();
+ }
vpmu_disabled = 1;
+ }
return 0;
}
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 2c888cc..ed5dc8c 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -53,7 +53,7 @@ struct arch_vpmu_ops {
int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
uint64_t supported);
int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
- int (*do_interrupt)(struct cpu_user_regs *regs);
+ int (*do_interrupt)(const struct cpu_user_regs *regs);
void (*do_cpuid)(unsigned int input,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -102,7 +102,7 @@ static inline bool_t vpmu_are_all_set(const struct
vpmu_struct *vpmu,
void vpmu_lvtpc_update(uint32_t val);
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write);
-void vpmu_do_interrupt(struct cpu_user_regs *regs);
+int vpmu_do_interrupt(const struct cpu_user_regs *regs);
void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void vpmu_initialise(struct vcpu *v);
diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h
index ec787d6..fca110f 100644
--- a/xen/include/asm-x86/softirq.h
+++ b/xen/include/asm-x86/softirq.h
@@ -8,7 +8,8 @@
#define MACHINE_CHECK_SOFTIRQ (NR_COMMON_SOFTIRQS + 3)
#define PCI_SERR_SOFTIRQ (NR_COMMON_SOFTIRQS + 4)
#define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 5)
-#define NR_ARCH_SOFTIRQS 6
+#define PMU_SOFTIRQ (NR_COMMON_SOFTIRQS + 6)
+#define NR_ARCH_SOFTIRQS 7
bool_t arch_skip_send_event_check(unsigned int cpu);
--
1.8.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |