[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/3] x86/idle: update to include further package/core residency MSRs



> From: Jan Beulich
> Sent: Wednesday, March 05, 2014 6:43 PM
> 
> >>> On 05.03.14 at 11:37, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
> > With the number of these growing it becomes increasingly desirable to
> > not repeatedly alter the sysctl interface to accommodate them. Replace
> > the explicit listing of numbered states by arrays, unused fields of
> > which will remain untouched by the hypercall.
> 
> Just added this to the description:
> 
> "The adjusted sysctl interface at once fixes an unrelated shortcoming
>  of the original one: The "nr" field, specifying the size of the
>  "triggers" and "residencies" arrays, has to be an input (along with
>  being an output), which the previous implementation didn't obey to."
> 
> Sorry for forgetting the first time through.
> 
> Jan
> 
> > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

the idea looks OK to me, but I'm not expert in the tools side. If there's no 
more
concern from Ian, here is my ack:

Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx>

> >
> > --- 2014-02-13.orig/tools/libxc/xc_pm.c     2014-03-04 17:43:06.000000000
> +0100
> > +++ 2014-02-13/tools/libxc/xc_pm.c  2014-03-05 08:54:58.000000000 +0100
> > @@ -123,46 +123,90 @@ int xc_pm_get_max_cx(xc_interface *xch,
> >
> >  int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat *cxpt)
> >  {
> > -    DECLARE_SYSCTL;
> > -    DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers, 0,
> > XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> > -    DECLARE_NAMED_HYPERCALL_BOUNCE(residencies,
> cxpt->residencies, 0,
> > XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> > +    uint64_t pc[7], cc[7];
> > +    struct xc_cx_stat_v2 cxpt2 = {
> > +        .triggers = cxpt->triggers,
> > +        .residencies = cxpt->residencies,
> > +        .nr_pc = sizeof(pc) / sizeof(*pc),
> > +        .nr_cc = sizeof(cc) / sizeof(*cc),
> > +        .pc = pc,
> > +        .cc = cc
> > +    };
> >      int max_cx, ret;
> >
> >      if( !cxpt->triggers || !cxpt->residencies )
> >          return -EINVAL;
> >
> >      if ( (ret = xc_pm_get_max_cx(xch, cpuid, &max_cx)) )
> > -        goto unlock_0;
> > +        return ret;
> >
> > -    HYPERCALL_BOUNCE_SET_SIZE(triggers, max_cx * sizeof(uint64_t));
> > -    HYPERCALL_BOUNCE_SET_SIZE(residencies, max_cx *
> sizeof(uint64_t));
> > +    cxpt2.nr = max_cx;
> > +    ret = xc_pm_get_cx_stat(xch, cpuid, &cxpt2);
> > +
> > +    cxpt->nr = cxpt2.nr;
> > +    cxpt->last = cxpt2.last;
> > +    cxpt->idle_time = cxpt2.idle_time;
> > +    cxpt->pc2 = pc[1];
> > +    cxpt->pc3 = pc[2];
> > +    cxpt->pc6 = pc[5];
> > +    cxpt->pc7 = pc[6];
> > +    cxpt->cc3 = cc[2];
> > +    cxpt->cc6 = cc[5];
> > +    cxpt->cc7 = cc[6];
> > +
> > +    return ret;
> > +}
> > +
> > +int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2
> > *cxpt)
> > +{
> > +    DECLARE_SYSCTL;
> > +    DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers,
> > +                                   cxpt->nr *
> sizeof(*cxpt->triggers),
> > +
> XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> > +    DECLARE_NAMED_HYPERCALL_BOUNCE(residencies,
> cxpt->residencies,
> > +                                   cxpt->nr *
> sizeof(*cxpt->residencies),
> > +
> XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> > +    DECLARE_NAMED_HYPERCALL_BOUNCE(pc, cxpt->pc,
> > +                                   cxpt->nr_pc * sizeof(*cxpt->pc),
> > +
> XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> > +    DECLARE_NAMED_HYPERCALL_BOUNCE(cc, cxpt->cc,
> > +                                   cxpt->nr_cc * sizeof(*cxpt->cc),
> > +
> XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> > +    int ret = -1;
> >
> > -    ret = -1;
> >      if ( xc_hypercall_bounce_pre(xch, triggers) )
> >          goto unlock_0;
> >      if ( xc_hypercall_bounce_pre(xch, residencies) )
> >          goto unlock_1;
> > +    if ( xc_hypercall_bounce_pre(xch, pc) )
> > +        goto unlock_2;
> > +    if ( xc_hypercall_bounce_pre(xch, cc) )
> > +        goto unlock_3;
> >
> >      sysctl.cmd = XEN_SYSCTL_get_pmstat;
> >      sysctl.u.get_pmstat.type = PMSTAT_get_cxstat;
> >      sysctl.u.get_pmstat.cpuid = cpuid;
> > +    sysctl.u.get_pmstat.u.getcx.nr = cxpt->nr;
> > +    sysctl.u.get_pmstat.u.getcx.nr_pc = cxpt->nr_pc;
> > +    sysctl.u.get_pmstat.u.getcx.nr_cc = cxpt->nr_cc;
> >      set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.triggers, triggers);
> >      set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.residencies,
> > residencies);
> > +    set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.pc, pc);
> > +    set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.cc, cc);
> >
> >      if ( (ret = xc_sysctl(xch, &sysctl)) )
> > -        goto unlock_2;
> > +        goto unlock_4;
> >
> >      cxpt->nr = sysctl.u.get_pmstat.u.getcx.nr;
> >      cxpt->last = sysctl.u.get_pmstat.u.getcx.last;
> >      cxpt->idle_time = sysctl.u.get_pmstat.u.getcx.idle_time;
> > -    cxpt->pc2 = sysctl.u.get_pmstat.u.getcx.pc2;
> > -    cxpt->pc3 = sysctl.u.get_pmstat.u.getcx.pc3;
> > -    cxpt->pc6 = sysctl.u.get_pmstat.u.getcx.pc6;
> > -    cxpt->pc7 = sysctl.u.get_pmstat.u.getcx.pc7;
> > -    cxpt->cc3 = sysctl.u.get_pmstat.u.getcx.cc3;
> > -    cxpt->cc6 = sysctl.u.get_pmstat.u.getcx.cc6;
> > -    cxpt->cc7 = sysctl.u.get_pmstat.u.getcx.cc7;
> > +    cxpt->nr_pc = sysctl.u.get_pmstat.u.getcx.nr_pc;
> > +    cxpt->nr_cc = sysctl.u.get_pmstat.u.getcx.nr_cc;
> >
> > +unlock_4:
> > +    xc_hypercall_bounce_post(xch, cc);
> > +unlock_3:
> > +    xc_hypercall_bounce_post(xch, pc);
> >  unlock_2:
> >      xc_hypercall_bounce_post(xch, residencies);
> >  unlock_1:
> > --- 2014-02-13.orig/tools/libxc/xenctrl.h   2014-03-04 17:43:06.000000000
> +0100
> > +++ 2014-02-13/tools/libxc/xenctrl.h        2014-03-04 17:50:49.000000000 
> > +0100
> > @@ -1934,7 +1934,7 @@ int xc_pm_get_max_px(xc_interface *xch,
> >  int xc_pm_get_pxstat(xc_interface *xch, int cpuid, struct xc_px_stat
> > *pxpt);
> >  int xc_pm_reset_pxstat(xc_interface *xch, int cpuid);
> >
> > -struct xc_cx_stat {
> > +struct xc_cx_stat { /* DEPRECATED (use v2 below instead)! */
> >      uint32_t nr;    /* entry nr in triggers & residencies, including C0 */
> >      uint32_t last;         /* last Cx state */
> >      uint64_t idle_time;    /* idle time from boot */
> > @@ -1950,8 +1950,22 @@ struct xc_cx_stat {
> >  };
> >  typedef struct xc_cx_stat xc_cx_stat_t;
> >
> > +struct xc_cx_stat_v2 {
> > +    uint32_t nr;           /* entry nr in triggers[]/residencies[], incl C0
> > */
> > +    uint32_t last;         /* last Cx state */
> > +    uint64_t idle_time;    /* idle time from boot */
> > +    uint64_t *triggers;    /* Cx trigger counts */
> > +    uint64_t *residencies; /* Cx residencies */
> > +    uint32_t nr_pc;        /* entry nr in pc[] */
> > +    uint32_t nr_cc;        /* entry nr in cc[] */
> > +    uint64_t *pc;          /* 1-biased indexing (i.e. excl C0) */
> > +    uint64_t *cc;          /* 1-biased indexing (i.e. excl C0) */
> > +};
> > +typedef struct xc_cx_stat_v2 xc_cx_stat_v2_t;
> > +
> >  int xc_pm_get_max_cx(xc_interface *xch, int cpuid, int *max_cx);
> >  int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat
> > *cxpt);
> > +int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2
> > *);
> >  int xc_pm_reset_cxstat(xc_interface *xch, int cpuid);
> >
> >  int xc_cpu_online(xc_interface *xch, int cpu);
> > --- 2014-02-13.orig/xen/arch/x86/acpi/cpu_idle.c    2014-03-04
> 17:43:06.000000000 +0100
> > +++ 2014-02-13/xen/arch/x86/acpi/cpu_idle.c 2014-03-04
> 17:38:39.000000000 +0100
> > @@ -62,13 +62,17 @@
> >
> >  #define GET_HW_RES_IN_NS(msr, val) \
> >      do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
> > -#define GET_PC2_RES(val)  GET_HW_RES_IN_NS(0x60D, val) /* SNB only
> */
> > +#define GET_PC2_RES(val)  GET_HW_RES_IN_NS(0x60D, val) /* SNB
> onwards */
> >  #define GET_PC3_RES(val)  GET_HW_RES_IN_NS(0x3F8, val)
> >  #define GET_PC6_RES(val)  GET_HW_RES_IN_NS(0x3F9, val)
> >  #define GET_PC7_RES(val)  GET_HW_RES_IN_NS(0x3FA, val)
> > +#define GET_PC8_RES(val)  GET_HW_RES_IN_NS(0x630, val) /* some
> Haswells
> > only */
> > +#define GET_PC9_RES(val)  GET_HW_RES_IN_NS(0x631, val) /* some
> Haswells
> > only */
> > +#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some
> Haswells
> > only */
> > +#define GET_CC1_RES(val)  GET_HW_RES_IN_NS(0x660, val) /*
> Silvermont only
> > */
> >  #define GET_CC3_RES(val)  GET_HW_RES_IN_NS(0x3FC, val)
> >  #define GET_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FD, val)
> > -#define GET_CC7_RES(val)  GET_HW_RES_IN_NS(0x3FE, val) /* SNB only
> */
> > +#define GET_CC7_RES(val)  GET_HW_RES_IN_NS(0x3FE, val) /* SNB
> onwards */
> >
> >  static void lapic_timer_nop(void) { }
> >  void (*__read_mostly lapic_timer_off)(void);
> > @@ -111,8 +115,13 @@ struct hw_residencies
> >  {
> >      uint64_t pc2;
> >      uint64_t pc3;
> > +    uint64_t pc4;
> >      uint64_t pc6;
> >      uint64_t pc7;
> > +    uint64_t pc8;
> > +    uint64_t pc9;
> > +    uint64_t pc10;
> > +    uint64_t cc1;
> >      uint64_t cc3;
> >      uint64_t cc6;
> >      uint64_t cc7;
> > @@ -128,6 +137,12 @@ static void do_get_hw_residencies(void *
> >
> >      switch ( c->x86_model )
> >      {
> > +    /* 4th generation Intel Core (Haswell) */
> > +    case 0x45:
> > +        GET_PC8_RES(hw_res->pc8);
> > +        GET_PC9_RES(hw_res->pc9);
> > +        GET_PC10_RES(hw_res->pc10);
> > +        /* fall through */
> >      /* Sandy bridge */
> >      case 0x2A:
> >      case 0x2D:
> > @@ -137,7 +152,6 @@ static void do_get_hw_residencies(void *
> >      /* Haswell */
> >      case 0x3C:
> >      case 0x3F:
> > -    case 0x45:
> >      case 0x46:
> >      /* future */
> >      case 0x3D:
> > @@ -160,6 +174,22 @@ static void do_get_hw_residencies(void *
> >          GET_CC3_RES(hw_res->cc3);
> >          GET_CC6_RES(hw_res->cc6);
> >          break;
> > +    /* various Atoms */
> > +    case 0x27:
> > +        GET_PC3_RES(hw_res->pc2); /* abusing GET_PC3_RES */
> > +        GET_PC6_RES(hw_res->pc4); /* abusing GET_PC6_RES */
> > +        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
> > +        break;
> > +    /* Silvermont */
> > +    case 0x37:
> > +    case 0x4A:
> > +    case 0x4D:
> > +    case 0x5A:
> > +    case 0x5D:
> > +        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
> > +        GET_CC1_RES(hw_res->cc1);
> > +        GET_CC6_RES(hw_res->cc6);
> > +        break;
> >      }
> >  }
> >
> > @@ -179,10 +209,16 @@ static void print_hw_residencies(uint32_
> >
> >      get_hw_residencies(cpu, &hw_res);
> >
> > -    printk("PC2[%"PRId64"] PC3[%"PRId64"] PC6[%"PRId64"]
> PC7[%"PRId64"]\n",
> > -           hw_res.pc2, hw_res.pc3, hw_res.pc6, hw_res.pc7);
> > -    printk("CC3[%"PRId64"] CC6[%"PRId64"] CC7[%"PRId64"]\n",
> > -           hw_res.cc3, hw_res.cc6,hw_res.cc7);
> > +    printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"]
> > PC7[%"PRIu64"]\n",
> > +           hw_res.pc2,
> > +           hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3,
> > +           hw_res.pc6, hw_res.pc7);
> > +    if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 )
> > +        printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n",
> > +               hw_res.pc8, hw_res.pc9, hw_res.pc10);
> > +    printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n",
> > +           hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3,
> > +           hw_res.cc6, hw_res.cc7);
> >  }
> >
> >  static char* acpi_cstate_method_name[] =
> > @@ -1097,19 +1133,21 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
> >      struct acpi_processor_power *power = processor_powers[cpuid];
> >      uint64_t idle_usage = 0, idle_res = 0;
> >      uint64_t usage[ACPI_PROCESSOR_MAX_POWER],
> > res[ACPI_PROCESSOR_MAX_POWER];
> > -    int i;
> > -    struct hw_residencies hw_res;
> > +    unsigned int i, nr, nr_pc = 0, nr_cc = 0;
> >
> >      if ( power == NULL )
> >      {
> >          stat->last = 0;
> >          stat->nr = 0;
> >          stat->idle_time = 0;
> > +        stat->nr_pc = 0;
> > +        stat->nr_cc = 0;
> >          return 0;
> >      }
> >
> >      stat->last = power->last_state ? power->last_state->idx : 0;
> >      stat->idle_time = get_cpu_idle_time(cpuid);
> > +    nr = min(stat->nr, power->count);
> >
> >      /* mimic the stat when detail info hasn't been registered by dom0 */
> >      if ( pm_idle_save == NULL )
> > @@ -1118,14 +1156,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
> >
> >          usage[1] = idle_usage = 1;
> >          res[1] = idle_res = stat->idle_time;
> > -
> > -        memset(&hw_res, 0, sizeof(hw_res));
> >      }
> >      else
> >      {
> > +        struct hw_residencies hw_res;
> > +
> >          stat->nr = power->count;
> >
> > -        for ( i = 1; i < power->count; i++ )
> > +        for ( i = 1; i < nr; i++ )
> >          {
> >              spin_lock_irq(&power->stat_lock);
> >              usage[i] = power->states[i].usage;
> > @@ -1137,22 +1175,42 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
> >          }
> >
> >          get_hw_residencies(cpuid, &hw_res);
> > +
> > +#define PUT_xC(what, n) do { \
> > +        if ( stat->nr_##what >= n && \
> > +             copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n,
> 1) ) \
> > +            return -EFAULT; \
> > +        if ( hw_res.what##n ) \
> > +            nr_##what = n; \
> > +    } while ( 0 )
> > +#define PUT_PC(n) PUT_xC(pc, n)
> > +        PUT_PC(2);
> > +        PUT_PC(3);
> > +        PUT_PC(4);
> > +        PUT_PC(6);
> > +        PUT_PC(7);
> > +        PUT_PC(8);
> > +        PUT_PC(9);
> > +        PUT_PC(10);
> > +#undef PUT_PC
> > +#define PUT_CC(n) PUT_xC(cc, n)
> > +        PUT_CC(1);
> > +        PUT_CC(3);
> > +        PUT_CC(6);
> > +        PUT_CC(7);
> > +#undef PUT_CC
> > +#undef PUT_xC
> >      }
> >
> >      usage[0] = idle_usage;
> >      res[0] = NOW() - idle_res;
> >
> > -    if ( copy_to_guest(stat->triggers, usage, stat->nr) ||
> > -         copy_to_guest(stat->residencies, res, stat->nr) )
> > +    if ( copy_to_guest(stat->triggers, usage, nr) ||
> > +         copy_to_guest(stat->residencies, res, nr) )
> >          return -EFAULT;
> >
> > -    stat->pc2 = hw_res.pc2;
> > -    stat->pc3 = hw_res.pc3;
> > -    stat->pc6 = hw_res.pc6;
> > -    stat->pc7 = hw_res.pc7;
> > -    stat->cc3 = hw_res.cc3;
> > -    stat->cc6 = hw_res.cc6;
> > -    stat->cc7 = hw_res.cc7;
> > +    stat->nr_pc = nr_pc;
> > +    stat->nr_cc = nr_cc;
> >
> >      return 0;
> >  }
> > --- 2014-02-13.orig/xen/include/public/sysctl.h     2014-03-04
> 17:43:06.000000000 +0100
> > +++ 2014-02-13/xen/include/public/sysctl.h  2014-03-04
> 17:34:15.000000000 +0100
> > @@ -34,7 +34,7 @@
> >  #include "xen.h"
> >  #include "domctl.h"
> >
> > -#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A
> > +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B
> >
> >  /*
> >   * Read console content from Xen buffer ring.
> > @@ -226,13 +226,10 @@ struct pm_cx_stat {
> >      uint64_aligned_t idle_time;                 /* idle time from
> boot */
> >      XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */
> >      XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
> > -    uint64_aligned_t pc2;
> > -    uint64_aligned_t pc3;
> > -    uint64_aligned_t pc6;
> > -    uint64_aligned_t pc7;
> > -    uint64_aligned_t cc3;
> > -    uint64_aligned_t cc6;
> > -    uint64_aligned_t cc7;
> > +    uint32_t nr_pc;                          /* entry nr in pc[] */
> > +    uint32_t nr_cc;                          /* entry nr in cc[] */
> > +    XEN_GUEST_HANDLE_64(uint64) pc;          /* 1-biased indexing
> */
> > +    XEN_GUEST_HANDLE_64(uint64) cc;          /* 1-biased indexing
> */
> >  };
> >
> >  struct xen_sysctl_get_pmstat {
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.