[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 6/8] x86: get per domain CQM information
> -----Original Message----- > From: Andrew Cooper [mailto:andrew.cooper3@xxxxxxxxxx] > Sent: Thursday, November 21, 2013 10:09 PM > To: Xu, Dongxiao > Cc: xen-devel@xxxxxxxxxxxxx > Subject: Re: [Xen-devel] [PATCH v2 6/8] x86: get per domain CQM information > > On 21/11/13 07:20, dongxiao.xu@xxxxxxxxx wrote: > > From: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> > > > > Retrive CQM information for certain domain, which reflects the L3 cache > > occupancy for a socket. > > > > Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx> > > Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> > > --- > > xen/arch/x86/pqos.c | 57 > ++++++++++++++++++++++++++++++++++ > > xen/arch/x86/sysctl.c | 64 > +++++++++++++++++++++++++++++++++++++++ > > xen/include/asm-x86/msr-index.h | 4 +++ > > xen/include/asm-x86/pqos.h | 14 +++++++++ > > xen/include/public/domctl.h | 14 +++++++++ > > xen/include/public/sysctl.h | 15 +++++++++ > > 6 files changed, 168 insertions(+) > > > > diff --git a/xen/arch/x86/pqos.c b/xen/arch/x86/pqos.c > > index e294799..d95784c 100644 > > --- a/xen/arch/x86/pqos.c > > +++ b/xen/arch/x86/pqos.c > > @@ -19,14 +19,30 @@ > > * Place - Suite 330, Boston, MA 02111-1307 USA. > > */ > > #include <asm/processor.h> > > +#include <asm/msr.h> > > #include <xen/init.h> > > #include <xen/spinlock.h> > > #include <xen/sched.h> > > +#include <public/domctl.h> > > #include <asm/pqos.h> > > > > static bool_t pqos_enabled = 1; > > boolean_param("pqos", pqos_enabled); > > > > +static void read_qm_data(void *arg) > > +{ > > + struct qm_element *qm_element = arg; > > + > > + wrmsr(MSR_IA32_QOSEVTSEL, qm_element->evtid, > qm_element->rmid); > > + rdmsrl(MSR_IA32_QMC, qm_element->qm_data); > > +} > > + > > +static void get_generic_qm_info(struct qm_element *qm_element) > > +{ > > + unsigned int cpu = qm_element->cpu; > > + on_selected_cpus(cpumask_of(cpu), read_qm_data, qm_element, 1); > > +} > > + > > unsigned int cqm_res_count = 0; > > unsigned int cqm_upscaling_factor = 0; > > bool_t cqm_enabled = 0; > > @@ -86,6 +102,23 @@ bool_t system_supports_cqm(void) > > return cqm_enabled; > > } > > > > +unsigned int get_cqm_count(void) > > +{ > > + return cqm_res_count; > > +} > > + > > +unsigned int get_cqm_avail(void) > > +{ > > + unsigned int cqm_avail = 0; > > + int i; > > unsigned int please. If cqm_res_count has its top bit set, the > following loop may never terminate. OK. > > > + > > + for ( i = 0; i < cqm_res_count; i++ ) > > + if ( !cqm_res_array[i].inuse ) > > + cqm_avail++; > > + > > + return cqm_avail; > > +} > > + > > int alloc_cqm_rmid(struct domain *d) > > { > > int rmid, rc = 0; > > @@ -137,6 +170,30 @@ void free_cqm_rmid(struct domain *d) > > d->arch.pqos_cqm_rmid = 0; > > } > > > > +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map, > > A cpumask_t is already quite large, and will get larger in the future. > Pass by pointer please. OK. > > > + struct xen_domctl_getdomcqminfo *info) > > +{ > > + struct qm_element element; > > + unsigned int cpu, i; > > + > > + for_each_cpu ( cpu, &cpu_cqmdata_map ) > > + { > > + element.cpu = cpu; > > + element.rmid = rmid; > > + element.evtid = QOS_MONITOR_EVTID_L3; > > + > > + get_generic_qm_info(&element); > > + > > + i = cpu_to_socket(cpu); > > cpu_to_socket() can return BAD_APICID. OK, will check the return value correctness. > > > + info->socket_cqmdata[i].valid = > > + (element.qm_data & IA32_QM_CTR_ERROR_MASK) ? 0 : 1; > > info->socket_cqmdata[i].valid = !(element.qm_data & > IA32_QM_CTR_ERROR_MASK); OK. > > > + if ( info->socket_cqmdata[i].valid ) > > + info->socket_cqmdata[i].l3c_occupancy = element.qm_data * > cqm_upscaling_factor; > > + else > > + info->socket_cqmdata[i].l3c_occupancy = 0; > > + } > > +} > > + > > /* > > * Local variables: > > * mode: C > > diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c > > index 15d4b91..d631769 100644 > > --- a/xen/arch/x86/sysctl.c > > +++ b/xen/arch/x86/sysctl.c > > @@ -28,6 +28,7 @@ > > #include <xen/nodemask.h> > > #include <xen/cpu.h> > > #include <xsm/xsm.h> > > +#include <asm/pqos.h> > > > > #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) > > > > @@ -101,6 +102,69 @@ long arch_do_sysctl( > > } > > break; > > > > + case XEN_SYSCTL_getdomcqminfolist: > > + { > > This whole hypercall makes me somewhat uneasy. > > > + struct domain *d; > > + struct xen_domctl_getdomcqminfo info; > > + uint32_t resource_count; > > + uint32_t resource_avail; > > + uint32_t num_domains = 0; > > + cpumask_t cpu_cqmdata_map; > > + DECLARE_BITMAP(sockets, QOS_MAX_SOCKETS); > > + unsigned int cpu; > > + > > + if ( !system_supports_cqm() ) > > + { > > + ret = -ENODEV; > > + break; > > + } > > + > > + resource_count = get_cqm_count(); > > + resource_avail = get_cqm_avail(); > > + > > + cpumask_clear(&cpu_cqmdata_map); > > + bitmap_zero(sockets, QOS_MAX_SOCKETS); > > + for_each_online_cpu(cpu) > > + { > > + int i = cpu_to_socket(cpu); > > + if ( test_and_set_bit(i, sockets) ) > > + continue; > > + cpumask_set_cpu(cpu, &cpu_cqmdata_map); > > + } > > What is this doing? It appears to be finding the first cpu on each socket. Yes, the CQM info is per-socket, so just one CPU within the socket is enough to get/set the data. > > > + > > + rcu_read_lock(&domlist_read_lock); > > + for_each_domain ( d ) > > + { > > + if ( d->domain_id < sysctl->u.getdomaininfolist.first_domain ) > > + continue; > > + if ( num_domains == sysctl->u.getdomaininfolist.max_domains ) > > + break; > > + if ( d->arch.pqos_cqm_rmid <= 0 ) > > + continue; > > Is there any case where pqos_cqm_rmid can be negative? alloc_cqm_rmid() > never assigns a negative number now in v2, in which case > d->arch.pqos_cqm_rmid can probably be unsigned (and related int rmid's > can be similarly promoted to unsigned) Yes, it should be changed in the v2 patch, sorry. > > > + memset(&info, 0, sizeof(struct xen_domctl_getdomcqminfo)); > > + info.domain = d->domain_id; > > + get_cqm_info(d->arch.pqos_cqm_rmid, cpu_cqmdata_map, > &info); > > So for a domain the hypercallee is interested in, we get its rmid, and > ask get_cqm_info() to individually IPI each one cpu from a socket to > fill in the info field? > > The IPIs are quite expensive, and this system will currently monopolise > the first cpu on each socket. Since the L3 cache usage is per-socket wise, so current solution is: - Found the first CPU of each socket, and mark them in cpu_cqmdata_map. - The get_cqm_info() issues IPI to those CPUs marked in cpu_cqmdata_map to get the per-socket data. Do you have proposed solution for this scenario? > > > + > > + if ( copy_to_guest_offset(sysctl->u.getdomcqminfolist.buffer, > > + num_domains, &info, 1) ) > > + { > > + ret = -EFAULT; > > + break; > > + } > > + > > + num_domains++; > > So this loop is primarily bounded by the number of domains, where each > domain with a valid rmid will result in a spate of IPI? > > This looks like it needs hypercall continuation logic. OK, what about a preemption check every domain? > > Also, how well does this intersect with updating the rmid assignment? It needs to acquire cqm_lock to guarantee the simultaneous access to the cqm related data. > > > + } > > + rcu_read_unlock(&domlist_read_lock); > > + > > + sysctl->u.getdomcqminfolist.num_domains = num_domains; > > + sysctl->u.getdomcqminfolist.resource_count = resource_count; > > + sysctl->u.getdomcqminfolist.resource_avail = resource_avail; > > + > > + if ( copy_to_guest(u_sysctl, sysctl, 1) ) > > + ret = -EFAULT; > > + } > > + break; > > break should be inside the brace. I refered to other cases in arch/x86/sysctl.c and arch/x86/domctl.c, and the "break" is mostly placed outside the brace... > > > + > > default: > > ret = -ENOSYS; > > break; > > diff --git a/xen/include/asm-x86/msr-index.h > b/xen/include/asm-x86/msr-index.h > > index e597a28..46ef165 100644 > > --- a/xen/include/asm-x86/msr-index.h > > +++ b/xen/include/asm-x86/msr-index.h > > @@ -488,4 +488,8 @@ > > /* Geode defined MSRs */ > > #define MSR_GEODE_BUSCONT_CONF0 0x00001900 > > > > +/* Platform QoS register */ > > +#define MSR_IA32_QOSEVTSEL 0x00000c8d > > +#define MSR_IA32_QMC 0x00000c8e > > + > > #endif /* __ASM_MSR_INDEX_H */ > > diff --git a/xen/include/asm-x86/pqos.h b/xen/include/asm-x86/pqos.h > > index 88de139..5c86c5d 100644 > > --- a/xen/include/asm-x86/pqos.h > > +++ b/xen/include/asm-x86/pqos.h > > @@ -27,15 +27,29 @@ > > /* QoS Monitoring Event ID */ > > #define QOS_MONITOR_EVTID_L3 0x1 > > > > +/* IA32_QM_CTR */ > > +#define IA32_QM_CTR_ERROR_MASK (0x3ul << 62) > > + > > struct cqm_res_struct { > > domid_t domain_id; > > bool_t inuse; > > }; > > > > +struct qm_element { > > + uint64_t qm_data; > > + uint32_t cpu; > > + uint32_t rmid; > > + uint8_t evtid; > > +}; > > + > > void init_platform_qos(void); > > > > bool_t system_supports_cqm(void); > > int alloc_cqm_rmid(struct domain *); > > void free_cqm_rmid(struct domain *); > > +unsigned int get_cqm_count(void); > > +unsigned int get_cqm_avail(void); > > +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map, > > + struct xen_domctl_getdomcqminfo *info); > > > > #endif > > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > > index f5d7062..fe8b37f 100644 > > --- a/xen/include/public/domctl.h > > +++ b/xen/include/public/domctl.h > > @@ -883,6 +883,20 @@ struct xen_domctl_qos_resource { > > typedef struct xen_domctl_qos_resource xen_domctl_qos_resource_t; > > DEFINE_XEN_GUEST_HANDLE(xen_domctl_qos_resource_t); > > > > +struct xen_socket_cqmdata { > > + uint64_t l3c_occupancy; > > + uint8_t valid; > > +}; > > + > > +struct xen_domctl_getdomcqminfo { > > + /* OUT variables. */ > > + domid_t domain; > > +#define QOS_MAX_SOCKETS 128 > > Baking this into the ABI seems short sighted, and in this specific case > looks to blow the 128 byte union size in a domctl structure. Oh, thanks for catching this, it exceeds 128 bytes. > > The toolstack should be able to find the number of sockets on the > system, and provide a GUEST_HANDLE to an array of socket_cqmdata's of > the appropriate length. OK, will pass the structure from tool stack side. > > > + struct xen_socket_cqmdata socket_cqmdata[QOS_MAX_SOCKETS]; > > +}; > > +typedef struct xen_domctl_getdomcqminfo xen_domctl_getdomcqminfo_t; > > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomcqminfo_t); > > + > > struct xen_domctl { > > uint32_t cmd; > > #define XEN_DOMCTL_createdomain 1 > > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h > > index 8437d31..0def306 100644 > > --- a/xen/include/public/sysctl.h > > +++ b/xen/include/public/sysctl.h > > @@ -632,6 +632,19 @@ struct xen_sysctl_coverage_op { > > typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; > > DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); > > > > +/* XEN_SYSCTL_getdomcqminfolist */ > > +struct xen_sysctl_getdomcqminfolist { > > + /* IN variables. */ > > + domid_t first_domain; > > + uint32_t max_domains; > > + XEN_GUEST_HANDLE_64(xen_domctl_getdomcqminfo_t) buffer; > > + /* OUT variables. */ > > + uint32_t num_domains; > > num_domains and max_domains can be folded together as both an in and an > out parameter. Also, "max_domains" is confusingly close to "max_domain" > at a glance. OK. Thanks, Dongxiao > > ~Andrew > > > + uint32_t resource_count; > > + uint32_t resource_avail; > > +}; > > +typedef struct xen_sysctl_getdomcqminfolist > xen_sysctl_getdomcqminfolist_t; > > +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomcqminfolist_t); > > > > struct xen_sysctl { > > uint32_t cmd; > > @@ -654,6 +667,7 @@ struct xen_sysctl { > > #define XEN_SYSCTL_cpupool_op 18 > > #define XEN_SYSCTL_scheduler_op 19 > > #define XEN_SYSCTL_coverage_op 20 > > +#define XEN_SYSCTL_getdomcqminfolist 21 > > uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ > > union { > > struct xen_sysctl_readconsole readconsole; > > @@ -675,6 +689,7 @@ struct xen_sysctl { > > struct xen_sysctl_cpupool_op cpupool_op; > > struct xen_sysctl_scheduler_op scheduler_op; > > struct xen_sysctl_coverage_op coverage_op; > > + struct xen_sysctl_getdomcqminfolist getdomcqminfolist; > > uint8_t pad[128]; > > } u; > > }; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |