[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 6/8] x86: get per domain CQM information
On 21/11/13 07:20, dongxiao.xu@xxxxxxxxx wrote: > From: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> > > Retrive CQM information for certain domain, which reflects the L3 cache > occupancy for a socket. > > Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx> > Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> > --- > xen/arch/x86/pqos.c | 57 ++++++++++++++++++++++++++++++++++ > xen/arch/x86/sysctl.c | 64 > +++++++++++++++++++++++++++++++++++++++ > xen/include/asm-x86/msr-index.h | 4 +++ > xen/include/asm-x86/pqos.h | 14 +++++++++ > xen/include/public/domctl.h | 14 +++++++++ > xen/include/public/sysctl.h | 15 +++++++++ > 6 files changed, 168 insertions(+) > > diff --git a/xen/arch/x86/pqos.c b/xen/arch/x86/pqos.c > index e294799..d95784c 100644 > --- a/xen/arch/x86/pqos.c > +++ b/xen/arch/x86/pqos.c > @@ -19,14 +19,30 @@ > * Place - Suite 330, Boston, MA 02111-1307 USA. > */ > #include <asm/processor.h> > +#include <asm/msr.h> > #include <xen/init.h> > #include <xen/spinlock.h> > #include <xen/sched.h> > +#include <public/domctl.h> > #include <asm/pqos.h> > > static bool_t pqos_enabled = 1; > boolean_param("pqos", pqos_enabled); > > +static void read_qm_data(void *arg) > +{ > + struct qm_element *qm_element = arg; > + > + wrmsr(MSR_IA32_QOSEVTSEL, qm_element->evtid, qm_element->rmid); > + rdmsrl(MSR_IA32_QMC, qm_element->qm_data); > +} > + > +static void get_generic_qm_info(struct qm_element *qm_element) > +{ > + unsigned int cpu = qm_element->cpu; > + on_selected_cpus(cpumask_of(cpu), read_qm_data, qm_element, 1); > +} > + > unsigned int cqm_res_count = 0; > unsigned int cqm_upscaling_factor = 0; > bool_t cqm_enabled = 0; > @@ -86,6 +102,23 @@ bool_t system_supports_cqm(void) > return cqm_enabled; > } > > +unsigned int get_cqm_count(void) > +{ > + return cqm_res_count; > +} > + > +unsigned int get_cqm_avail(void) > +{ > + unsigned int cqm_avail = 0; > + int i; unsigned int please. If cqm_res_count has its top bit set, the following loop may never terminate. > + > + for ( i = 0; i < cqm_res_count; i++ ) > + if ( !cqm_res_array[i].inuse ) > + cqm_avail++; > + > + return cqm_avail; > +} > + > int alloc_cqm_rmid(struct domain *d) > { > int rmid, rc = 0; > @@ -137,6 +170,30 @@ void free_cqm_rmid(struct domain *d) > d->arch.pqos_cqm_rmid = 0; > } > > +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map, A cpumask_t is already quite large, and will get larger in the future. Pass by pointer please. > + struct xen_domctl_getdomcqminfo *info) > +{ > + struct qm_element element; > + unsigned int cpu, i; > + > + for_each_cpu ( cpu, &cpu_cqmdata_map ) > + { > + element.cpu = cpu; > + element.rmid = rmid; > + element.evtid = QOS_MONITOR_EVTID_L3; > + > + get_generic_qm_info(&element); > + > + i = cpu_to_socket(cpu); cpu_to_socket() can return BAD_APICID. > + info->socket_cqmdata[i].valid = > + (element.qm_data & IA32_QM_CTR_ERROR_MASK) ? 0 : 1; info->socket_cqmdata[i].valid = !(element.qm_data & IA32_QM_CTR_ERROR_MASK); > + if ( info->socket_cqmdata[i].valid ) > + info->socket_cqmdata[i].l3c_occupancy = element.qm_data * > cqm_upscaling_factor; > + else > + info->socket_cqmdata[i].l3c_occupancy = 0; > + } > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c > index 15d4b91..d631769 100644 > --- a/xen/arch/x86/sysctl.c > +++ b/xen/arch/x86/sysctl.c > @@ -28,6 +28,7 @@ > #include <xen/nodemask.h> > #include <xen/cpu.h> > #include <xsm/xsm.h> > +#include <asm/pqos.h> > > #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) > > @@ -101,6 +102,69 @@ long arch_do_sysctl( > } > break; > > + case XEN_SYSCTL_getdomcqminfolist: > + { This whole hypercall makes me somewhat uneasy. > + struct domain *d; > + struct xen_domctl_getdomcqminfo info; > + uint32_t resource_count; > + uint32_t resource_avail; > + uint32_t num_domains = 0; > + cpumask_t cpu_cqmdata_map; > + DECLARE_BITMAP(sockets, QOS_MAX_SOCKETS); > + unsigned int cpu; > + > + if ( !system_supports_cqm() ) > + { > + ret = -ENODEV; > + break; > + } > + > + resource_count = get_cqm_count(); > + resource_avail = get_cqm_avail(); > + > + cpumask_clear(&cpu_cqmdata_map); > + bitmap_zero(sockets, QOS_MAX_SOCKETS); > + for_each_online_cpu(cpu) > + { > + int i = cpu_to_socket(cpu); > + if ( test_and_set_bit(i, sockets) ) > + continue; > + cpumask_set_cpu(cpu, &cpu_cqmdata_map); > + } What is this doing? It appears to be finding the first cpu on each socket. > + > + rcu_read_lock(&domlist_read_lock); > + for_each_domain ( d ) > + { > + if ( d->domain_id < sysctl->u.getdomaininfolist.first_domain ) > + continue; > + if ( num_domains == sysctl->u.getdomaininfolist.max_domains ) > + break; > + if ( d->arch.pqos_cqm_rmid <= 0 ) > + continue; Is there any case where pqos_cqm_rmid can be negative? alloc_cqm_rmid() never assigns a negative number now in v2, in which case d->arch.pqos_cqm_rmid can probably be unsigned (and related int rmid's can be similarly promoted to unsigned) > + memset(&info, 0, sizeof(struct xen_domctl_getdomcqminfo)); > + info.domain = d->domain_id; > + get_cqm_info(d->arch.pqos_cqm_rmid, cpu_cqmdata_map, &info); So for a domain the hypercallee is interested in, we get its rmid, and ask get_cqm_info() to individually IPI each one cpu from a socket to fill in the info field? The IPIs are quite expensive, and this system will currently monopolise the first cpu on each socket. > + > + if ( copy_to_guest_offset(sysctl->u.getdomcqminfolist.buffer, > + num_domains, &info, 1) ) > + { > + ret = -EFAULT; > + break; > + } > + > + num_domains++; So this loop is primarily bounded by the number of domains, where each domain with a valid rmid will result in a spate of IPI? This looks like it needs hypercall continuation logic. Also, how well does this intersect with updating the rmid assignment? > + } > + rcu_read_unlock(&domlist_read_lock); > + > + sysctl->u.getdomcqminfolist.num_domains = num_domains; > + sysctl->u.getdomcqminfolist.resource_count = resource_count; > + sysctl->u.getdomcqminfolist.resource_avail = resource_avail; > + > + if ( copy_to_guest(u_sysctl, sysctl, 1) ) > + ret = -EFAULT; > + } > + break; break should be inside the brace. > + > default: > ret = -ENOSYS; > break; > diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h > index e597a28..46ef165 100644 > --- a/xen/include/asm-x86/msr-index.h > +++ b/xen/include/asm-x86/msr-index.h > @@ -488,4 +488,8 @@ > /* Geode defined MSRs */ > #define MSR_GEODE_BUSCONT_CONF0 0x00001900 > > +/* Platform QoS register */ > +#define MSR_IA32_QOSEVTSEL 0x00000c8d > +#define MSR_IA32_QMC 0x00000c8e > + > #endif /* __ASM_MSR_INDEX_H */ > diff --git a/xen/include/asm-x86/pqos.h b/xen/include/asm-x86/pqos.h > index 88de139..5c86c5d 100644 > --- a/xen/include/asm-x86/pqos.h > +++ b/xen/include/asm-x86/pqos.h > @@ -27,15 +27,29 @@ > /* QoS Monitoring Event ID */ > #define QOS_MONITOR_EVTID_L3 0x1 > > +/* IA32_QM_CTR */ > +#define IA32_QM_CTR_ERROR_MASK (0x3ul << 62) > + > struct cqm_res_struct { > domid_t domain_id; > bool_t inuse; > }; > > +struct qm_element { > + uint64_t qm_data; > + uint32_t cpu; > + uint32_t rmid; > + uint8_t evtid; > +}; > + > void init_platform_qos(void); > > bool_t system_supports_cqm(void); > int alloc_cqm_rmid(struct domain *); > void free_cqm_rmid(struct domain *); > +unsigned int get_cqm_count(void); > +unsigned int get_cqm_avail(void); > +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map, > + struct xen_domctl_getdomcqminfo *info); > > #endif > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index f5d7062..fe8b37f 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -883,6 +883,20 @@ struct xen_domctl_qos_resource { > typedef struct xen_domctl_qos_resource xen_domctl_qos_resource_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_qos_resource_t); > > +struct xen_socket_cqmdata { > + uint64_t l3c_occupancy; > + uint8_t valid; > +}; > + > +struct xen_domctl_getdomcqminfo { > + /* OUT variables. */ > + domid_t domain; > +#define QOS_MAX_SOCKETS 128 Baking this into the ABI seems short sighted, and in this specific case looks to blow the 128 byte union size in a domctl structure. The toolstack should be able to find the number of sockets on the system, and provide a GUEST_HANDLE to an array of socket_cqmdata's of the appropriate length. > + struct xen_socket_cqmdata socket_cqmdata[QOS_MAX_SOCKETS]; > +}; > +typedef struct xen_domctl_getdomcqminfo xen_domctl_getdomcqminfo_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomcqminfo_t); > + > struct xen_domctl { > uint32_t cmd; > #define XEN_DOMCTL_createdomain 1 > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h > index 8437d31..0def306 100644 > --- a/xen/include/public/sysctl.h > +++ b/xen/include/public/sysctl.h > @@ -632,6 +632,19 @@ struct xen_sysctl_coverage_op { > typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; > DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); > > +/* XEN_SYSCTL_getdomcqminfolist */ > +struct xen_sysctl_getdomcqminfolist { > + /* IN variables. */ > + domid_t first_domain; > + uint32_t max_domains; > + XEN_GUEST_HANDLE_64(xen_domctl_getdomcqminfo_t) buffer; > + /* OUT variables. */ > + uint32_t num_domains; num_domains and max_domains can be folded together as both an in and an out parameter. Also, "max_domains" is confusingly close to "max_domain" at a glance. ~Andrew > + uint32_t resource_count; > + uint32_t resource_avail; > +}; > +typedef struct xen_sysctl_getdomcqminfolist xen_sysctl_getdomcqminfolist_t; > +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomcqminfolist_t); > > struct xen_sysctl { > uint32_t cmd; > @@ -654,6 +667,7 @@ struct xen_sysctl { > #define XEN_SYSCTL_cpupool_op 18 > #define XEN_SYSCTL_scheduler_op 19 > #define XEN_SYSCTL_coverage_op 20 > +#define XEN_SYSCTL_getdomcqminfolist 21 > uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ > union { > struct xen_sysctl_readconsole readconsole; > @@ -675,6 +689,7 @@ struct xen_sysctl { > struct xen_sysctl_cpupool_op cpupool_op; > struct xen_sysctl_scheduler_op scheduler_op; > struct xen_sysctl_coverage_op coverage_op; > + struct xen_sysctl_getdomcqminfolist getdomcqminfolist; > uint8_t pad[128]; > } u; > }; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |