[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 3/7] xen: psr: reserve an RMID for each core



This allows for a new item to be passed as part of the psr=
boot option: "percpu_cmt". If that is specified, Xen tries,
at boot time, to associate an RMID to each core.

XXX This all looks rather straightforward, if it weren't
    for the fact that it is, apparently, more common than
    I though to run out of RMID. For example, on a dev box
    we have in Cambridge, there are 144 pCPUs and only 71
    RMIDs.

    In this preliminary version, nothing particularly smart
    happens if we run out of RMIDs, we just fail attaching
    the remaining cores and that's it. In future, I'd
    probably like to:
     + check whether the operation have any chance to
       succeed up front (by comparing number of pCPUs with
       available RMIDs)
     + on unexpected failure, rollback everything... it
       seems to make more sense to me than just leaving
       the system half configured for per-cpu CMT

    Thoughts?

XXX Another idea I just have is to allow the user to
    somehow specify a different 'granularity'. Something
    like allowing 'percpu_cmt'|'percore_cmt'|'persocket_cmt'
    with the following meaning:
     + 'percpu_cmt': as in this patch
     + 'percore_cmt': same RMID to hthreads of the same core
     + 'persocket_cmt': same RMID to all cores of the same
        socket.

    'percore_cmt' would only allow gathering info on a
    per-core basis... still better than nothing if we
    do not have enough RMIDs for each pCPUs.

    'persocket_cmt' would basically only allow to track the
    amount of free L3 on each socket (by subtracting the
    monitored value from the total). Again, still better
    than nothing, would use very few RMIDs, and I could
    think of ways of using this information in a few
    places in the scheduler...

    Again, thought?

XXX Finally, when a domain with its own RMID executes on
    a core that also has its own RMID, domain monitoring
    just overrides per-CPU monitoring. That means the
    cache occupancy reported fo that pCPU is not accurate.

    For reasons why this situation is difficult to deal
    with properly, see the document in the cover letter.

    Ideas on how to deal with this, either about how to
    make it work or how to handle the thing from a
    'policying' perspective (i.e., which one mechanism
    should be disabled or penalized?), are very welcome

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
 xen/arch/x86/psr.c        |   72 ++++++++++++++++++++++++++++++++++++---------
 xen/include/asm-x86/psr.h |   11 ++++++-
 2 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index 0f2a6ce..a71391c 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -26,10 +26,13 @@ struct psr_assoc {
 
 struct psr_cmt *__read_mostly psr_cmt;
 static bool_t __initdata opt_psr;
+static bool_t __initdata opt_cpu_cmt;
 static unsigned int __initdata opt_rmid_max = 255;
 static uint64_t rmid_mask;
 static DEFINE_PER_CPU(struct psr_assoc, psr_assoc);
 
+DEFINE_PER_CPU(unsigned int, pcpu_rmid);
+
 static void __init parse_psr_param(char *s)
 {
     char *ss, *val_str;
@@ -57,6 +60,8 @@ static void __init parse_psr_param(char *s)
                                     val_str);
             }
         }
+        else if ( !strcmp(s, "percpu_cmt") )
+            opt_cpu_cmt = 1;
         else if ( val_str && !strcmp(s, "rmid_max") )
             opt_rmid_max = simple_strtoul(val_str, NULL, 0);
 
@@ -94,8 +99,8 @@ static void __init init_psr_cmt(unsigned int rmid_max)
     }
 
     psr_cmt->rmid_max = min(psr_cmt->rmid_max, psr_cmt->l3.rmid_max);
-    psr_cmt->rmid_to_dom = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL);
-    if ( !psr_cmt->rmid_to_dom )
+    psr_cmt->rmids = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL);
+    if ( !psr_cmt->rmids )
     {
         xfree(psr_cmt);
         psr_cmt = NULL;
@@ -107,56 +112,86 @@ static void __init init_psr_cmt(unsigned int rmid_max)
      * with it. To reduce the waste of RMID, reserve RMID 0 for all CPUs that
      * have no domain being monitored.
      */
-    psr_cmt->rmid_to_dom[0] = DOMID_XEN;
+    psr_cmt->rmids[0] = DOMID_XEN;
     for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ )
-        psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID;
+        psr_cmt->rmids[rmid] = DOMID_INVALID;
 
     printk(XENLOG_INFO "Cache Monitoring Technology enabled, RMIDs: %u\n",
            psr_cmt->rmid_max);
 }
 
-/* Called with domain lock held, no psr specific lock needed */
-int psr_alloc_rmid(struct domain *d)
+static int _psr_alloc_rmid(unsigned int *trmid, unsigned int id)
 {
     unsigned int rmid;
 
     ASSERT(psr_cmt_enabled());
 
-    if ( d->arch.psr_rmid > 0 )
+    if ( *trmid > 0 )
         return -EEXIST;
 
     for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ )
     {
-        if ( psr_cmt->rmid_to_dom[rmid] != DOMID_INVALID )
+        if ( psr_cmt->rmids[rmid] != DOMID_INVALID )
             continue;
 
-        psr_cmt->rmid_to_dom[rmid] = d->domain_id;
+        psr_cmt->rmids[rmid] = id;
         break;
     }
 
     /* No RMID available, assign RMID=0 by default. */
     if ( rmid > psr_cmt->rmid_max )
     {
-        d->arch.psr_rmid = 0;
+        *trmid = 0;
         return -EUSERS;
     }
 
-    d->arch.psr_rmid = rmid;
+    *trmid = rmid;
 
     return 0;
 }
 
+int psr_alloc_pcpu_rmid(unsigned int cpu)
+{
+    int ret;
+
+    /* XXX Any locking required? */
+    ret = _psr_alloc_rmid(&per_cpu(pcpu_rmid, cpu), DOMID_XEN);
+    if ( !ret )
+        printk(XENLOG_DEBUG "using RMID %u for CPU %u\n",
+               per_cpu(pcpu_rmid, cpu), cpu);
+
+    return ret;
+}
+
 /* Called with domain lock held, no psr specific lock needed */
-void psr_free_rmid(struct domain *d)
+int psr_alloc_rmid(struct domain *d)
 {
-    unsigned int rmid;
+    return _psr_alloc_rmid(&d->arch.psr_rmid, d->domain_id);
+}
 
-    rmid = d->arch.psr_rmid;
+static void _psr_free_rmid(unsigned int rmid)
+{
     /* We do not free system reserved "RMID=0". */
     if ( rmid == 0 )
         return;
 
-    psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID;
+    psr_cmt->rmids[rmid] = DOMID_INVALID;
+}
+
+void psr_free_pcpu_rmid(unsigned int cpu)
+{
+    printk(XENLOG_DEBUG "Freeing RMID %u. CPU %u no longer monitored\n",
+           per_cpu(pcpu_rmid, cpu), cpu);
+
+    /* XXX Any locking required? */
+    _psr_free_rmid(per_cpu(pcpu_rmid, cpu));
+    per_cpu(pcpu_rmid, cpu) = 0;
+}
+
+/* Called with domain lock held, no psr specific lock needed */
+void psr_free_rmid(struct domain *d)
+{
+    _psr_free_rmid(d->arch.psr_rmid);
     d->arch.psr_rmid = 0;
 }
 
@@ -184,6 +219,10 @@ static inline void psr_assoc_reg_write(struct psr_assoc 
*psra, uint64_t reg)
 
 static inline void psr_assoc_rmid(uint64_t *reg, unsigned int rmid)
 {
+    /* Domain not monitored: switch to the RMID of the pcpu (if any) */
+    if ( rmid == 0 )
+        rmid = this_cpu(pcpu_rmid);
+
     *reg = (*reg & ~rmid_mask) | (rmid & rmid_mask);
 }
 
@@ -202,6 +241,9 @@ void psr_ctxt_switch_to(struct domain *d)
 
 static void psr_cpu_init(unsigned int cpu)
 {
+    if ( opt_cpu_cmt && !psr_alloc_pcpu_rmid(cpu) )
+        printk(XENLOG_INFO "pcpu %u: using RMID %u\n",
+                cpu, per_cpu(pcpu_rmid, cpu));
     psr_assoc_init();
 }
 
diff --git a/xen/include/asm-x86/psr.h b/xen/include/asm-x86/psr.h
index 585350c..b70f605 100644
--- a/xen/include/asm-x86/psr.h
+++ b/xen/include/asm-x86/psr.h
@@ -33,17 +33,26 @@ struct psr_cmt_l3 {
 struct psr_cmt {
     unsigned int rmid_max;
     unsigned int features;
-    domid_t *rmid_to_dom;
+    domid_t *rmids;
     struct psr_cmt_l3 l3;
 };
 
 extern struct psr_cmt *psr_cmt;
 
+/*
+ * RMID associated to each core, to track the cache
+ * occupancy contribution of the core itself.
+ */
+DECLARE_PER_CPU(unsigned int, pcpu_rmid);
+
 static inline bool_t psr_cmt_enabled(void)
 {
     return !!psr_cmt;
 }
 
+int psr_alloc_pcpu_rmid(unsigned int cpu);
+void psr_free_pcpu_rmid(unsigned int cpu);
+
 int psr_alloc_rmid(struct domain *d);
 void psr_free_rmid(struct domain *d);
 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.