[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: xenpm: provide core/package cstate residencies



On 12/07/2010 16:22, "Wei, Gang" <gang.wei@xxxxxxxxx> wrote:

> Resend it.
> 
> =======
> xenpm: provide core/package cstate residencies
> 
> According to Intel 64 and IA32 Architectures SDM 3B Appendix B, Intel
> Nehalem/Westmere processors provide h/w MSR to report the core/package cstate
> residencies.Extend sysctl_get_pmstat interface to pass the core/package cstate
> residencies, and modify xenpm to output those information.

I applied the hypervisor component of this patch. I leave it to Ian Jackson
to deal with the tools part. In future please split patches that touch both
hypervisor and tools into a patch series in which each component patch
touches only one or the other.

 Thanks,
 Keir

> Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
> 
> diff -r 1af6303f103c tools/libxc/xc_pm.c
> --- a/tools/libxc/xc_pm.c Mon Jul 12 14:12:54 2010 +0800
> +++ b/tools/libxc/xc_pm.c Mon Jul 12 22:16:04 2010 +0800
> @@ -152,6 +152,11 @@ int xc_pm_get_cxstat(xc_interface *xch,
>      cxpt->nr = sysctl.u.get_pmstat.u.getcx.nr;
>      cxpt->last = sysctl.u.get_pmstat.u.getcx.last;
>      cxpt->idle_time = sysctl.u.get_pmstat.u.getcx.idle_time;
> +    cxpt->pc3 = sysctl.u.get_pmstat.u.getcx.pc3;
> +    cxpt->pc6 = sysctl.u.get_pmstat.u.getcx.pc6;
> +    cxpt->pc7 = sysctl.u.get_pmstat.u.getcx.pc7;
> +    cxpt->cc3 = sysctl.u.get_pmstat.u.getcx.cc3;
> +    cxpt->cc6 = sysctl.u.get_pmstat.u.getcx.cc6;
>  
>  unlock_3:
>      unlock_pages(cxpt->residencies, max_cx * sizeof(uint64_t));
> diff -r 1af6303f103c tools/libxc/xenctrl.h
> --- a/tools/libxc/xenctrl.h Mon Jul 12 14:12:54 2010 +0800
> +++ b/tools/libxc/xenctrl.h Mon Jul 12 22:16:04 2010 +0800
> @@ -1393,6 +1393,11 @@ struct xc_cx_stat {
>      uint64_t idle_time;    /* idle time from boot */
>      uint64_t *triggers;    /* Cx trigger counts */
>      uint64_t *residencies; /* Cx residencies */
> +    uint64_t pc3;
> +    uint64_t pc6;
> +    uint64_t pc7;
> +    uint64_t cc3;
> +    uint64_t cc6;
>  };
>  typedef struct xc_cx_stat xc_cx_stat_t;
>  
> diff -r 1af6303f103c tools/misc/xenpm.c
> --- a/tools/misc/xenpm.c Mon Jul 12 14:12:54 2010 +0800
> +++ b/tools/misc/xenpm.c Mon Jul 12 22:16:04 2010 +0800
> @@ -15,6 +15,7 @@
>   * this program; if not, write to the Free Software Foundation, Inc., 59
> Temple
>   * Place - Suite 330, Boston, MA 02111-1307 USA.
>   */
> +#define MAX_NR_CPU 512
>  
>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -91,6 +92,13 @@ static void print_cxstat(int cpuid, stru
>          printf("                       residency  [%020"PRIu64" ms]\n",
>                 cxstat->residencies[i]/1000000UL);
>      }
> +    printf("pc3                  : [%020"PRIu64" ms]\n"
> +           "pc6                  : [%020"PRIu64" ms]\n"
> +           "pc7                  : [%020"PRIu64" ms]\n",
> +           cxstat->pc3/1000000UL, cxstat->pc6/1000000UL,
> cxstat->pc7/1000000UL);
> +    printf("cc3                  : [%020"PRIu64" ms]\n"
> +           "cc6                  : [%020"PRIu64" ms]\n",
> +           cxstat->cc3/1000000UL, cxstat->cc6/1000000UL);
>      printf("\n");
>  }
>  
> @@ -306,9 +314,13 @@ static uint64_t *sum, *sum_cx, *sum_px;
>  
>  static void signal_int_handler(int signo)
>  {
> -    int i, j;
> +    int i, j, k, ret;
>      struct timeval tv;
>      int cx_cap = 0, px_cap = 0;
> +    uint32_t cpu_to_core[MAX_NR_CPU];
> +    uint32_t cpu_to_socket[MAX_NR_CPU];
> +    uint32_t cpu_to_node[MAX_NR_CPU];
> +    xc_topologyinfo_t info = { 0 };
>  
>      if ( gettimeofday(&tv, NULL) == -1 )
>      {
> @@ -369,6 +381,93 @@ static void signal_int_handler(int signo
>                      pxstat_start[i].pt[j].residency;
>                  printf("  P%d\t%"PRIu64"\t(%5.2f%%)\n", j,
>                          res / 1000000UL, 100UL * res / (double)sum_px[i]);
> +            }
> +        }
> +    }
> +
> +    set_xen_guest_handle(info.cpu_to_core, cpu_to_core);
> +    set_xen_guest_handle(info.cpu_to_socket, cpu_to_socket);
> +    set_xen_guest_handle(info.cpu_to_node, cpu_to_node);
> +    info.max_cpu_index = MAX_NR_CPU - 1;
> +
> +    ret = xc_topologyinfo(xc_handle, &info);
> +    if ( !ret )
> +    {
> +        uint32_t socket_ids[MAX_NR_CPU];
> +        uint32_t core_ids[MAX_NR_CPU];
> +        uint32_t socket_nr = 0;
> +        uint32_t core_nr = 0;
> +
> +        if ( info.max_cpu_index > MAX_NR_CPU - 1 )
> +            info.max_cpu_index = MAX_NR_CPU - 1;
> +        /* check validity */
> +        for ( i = 0; i <= info.max_cpu_index; i++ )
> +        {
> +            if ( cpu_to_core[i] == INVALID_TOPOLOGY_ID ||
> +                 cpu_to_socket[i] == INVALID_TOPOLOGY_ID )
> +                break;
> +        }
> +        if ( i > info.max_cpu_index )
> +        {
> +            /* find socket nr & core nr per socket */
> +            for ( i = 0; i <= info.max_cpu_index; i++ )
> +            {
> +                for ( j = 0; j < socket_nr; j++ )
> +                    if ( cpu_to_socket[i] == socket_ids[j] )
> +                        break;
> +                if ( j == socket_nr )
> +                {
> +                    socket_ids[j] = cpu_to_socket[i];
> +                    socket_nr++;
> +                }
> +
> +                for ( j = 0; j < core_nr; j++ )
> +                    if ( cpu_to_core[i] == core_ids[j] )
> +                        break;
> +                if ( j == core_nr )
> +                {
> +                    core_ids[j] = cpu_to_core[i];
> +                    core_nr++;
> +                }
> +            }
> +
> +            /* print out CC? and PC? */
> +            for ( i = 0; i < socket_nr; i++ )
> +            {
> +                uint64_t res;
> +                for ( j = 0; j <= info.max_cpu_index; j++ )
> +                {
> +                    if ( cpu_to_socket[j] == socket_ids[i] )
> +                        break;
> +                }
> +                printf("Socket %d\n", socket_ids[i]);
> +                res = cxstat_end[j].pc3 - cxstat_start[j].pc3;
> +                printf("\tPC3\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL,
> +                       100UL * res / (double)sum_cx[j]);
> +                res = cxstat_end[j].pc6 - cxstat_start[j].pc6;
> +                printf("\tPC6\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL,
> +                       100UL * res / (double)sum_cx[j]);
> +                res = cxstat_end[j].pc7 - cxstat_start[j].pc7;
> +                printf("\tPC7\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL,
> +                       100UL * res / (double)sum_cx[j]);
> +                for ( k = 0; k < core_nr; k++ )
> +                {
> +                    for ( j = 0; j <= info.max_cpu_index; j++ )
> +                    {
> +                        if ( cpu_to_socket[j] == socket_ids[i] &&
> +                             cpu_to_core[j] == core_ids[k] )
> +                            break;
> +                    }
> +                    printf("\t Core %d CPU %d\n", core_ids[k], j);
> +                    res = cxstat_end[j].cc3 - cxstat_start[j].cc3;
> +                    printf("\t\tCC3\t%"PRIu64" ms\t%.2f%%\n",  res /
> 1000000UL, 
> +                           100UL * res / (double)sum_cx[j]);
> +                    res = cxstat_end[j].cc6 - cxstat_start[j].cc6;
> +                    printf("\t\tCC6\t%"PRIu64" ms\t%.2f%%\n",  res /
> 1000000UL, 
> +                           100UL * res / (double)sum_cx[j]);
> +                    printf("\n");
> +
> +                }
>              }
>          }
>          printf("  Avg freq\t%d\tKHz\n", avgfreq[i]);
> @@ -833,8 +932,6 @@ out:
>      fprintf(stderr, "failed to set governor name\n");
>  }
>  
> -#define MAX_NR_CPU 512
> -
>  void cpu_topology_func(int argc, char *argv[])
>  {
>      uint32_t cpu_to_core[MAX_NR_CPU];
> diff -r 1af6303f103c xen/arch/x86/acpi/cpu_idle.c
> --- a/xen/arch/x86/acpi/cpu_idle.c Mon Jul 12 14:12:54 2010 +0800
> +++ b/xen/arch/x86/acpi/cpu_idle.c Mon Jul 12 22:58:25 2010 +0800
> @@ -55,6 +55,14 @@
>  
>  /*#define DEBUG_PM_CX*/
>  
> +#define GET_HW_RES_IN_NS(msr, val) \
> +    do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
> +#define GET_PC3_RES(val)  GET_HW_RES_IN_NS(0x3F8, val)
> +#define GET_PC6_RES(val)  GET_HW_RES_IN_NS(0x3F9, val)
> +#define GET_PC7_RES(val)  GET_HW_RES_IN_NS(0x3FA, val)
> +#define GET_CC3_RES(val)  GET_HW_RES_IN_NS(0x3FC, val)
> +#define GET_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FD, val)
> +
>  static void lapic_timer_nop(void) { }
>  static void (*lapic_timer_off)(void);
>  static void (*lapic_timer_on)(void);
> @@ -75,6 +83,63 @@ boolean_param("lapic_timer_c2_ok", local
>  boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
>  
>  static struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
> +
> +struct hw_residencies
> +{
> +    uint64_t pc3;
> +    uint64_t pc6;
> +    uint64_t pc7;
> +    uint64_t cc3;
> +    uint64_t cc6;
> +};
> +
> +static void do_get_hw_residencies(void *arg)
> +{
> +    struct cpuinfo_x86 *c = &current_cpu_data;
> +    struct hw_residencies *hw_res = (struct hw_residencies *)arg;
> +
> +    if ( c->x86_vendor != X86_VENDOR_INTEL || c->x86 != 6 )
> +        return;
> +
> +    switch ( c->x86_model )
> +    {
> +    /* Nehalem */
> +    case 0x1A:
> +    case 0x1E:
> +    case 0x1F:
> +    case 0x2E:
> +    /* Westmere */
> +    case 0x25:
> +    case 0x2C:
> +        GET_PC3_RES(hw_res->pc3);
> +        GET_PC6_RES(hw_res->pc6);
> +        GET_PC7_RES(hw_res->pc7);
> +        GET_CC3_RES(hw_res->cc3);
> +        GET_CC6_RES(hw_res->cc6);
> +        break;
> +    }
> +}
> +
> +static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res)
> +{
> +    if ( smp_processor_id() == cpu )
> +        do_get_hw_residencies((void *)hw_res);
> +    else
> +        on_selected_cpus(cpumask_of(cpu),
> +                         do_get_hw_residencies, (void *)hw_res, 1);
> +}
> +
> +static void print_hw_residencies(uint32_t cpu)
> +{
> +    struct hw_residencies hw_res = {0};
> +
> +    get_hw_residencies(cpu, &hw_res);
> +
> +    printk("PC3[%"PRId64"] PC6[%"PRId64"] PC7[%"PRId64"]\n",
> +           hw_res.pc3, hw_res.pc6, hw_res.pc7);
> +    printk("CC3[%"PRId64"] CC6[%"PRId64"]\n",
> +           hw_res.cc3, hw_res.cc6);
> +}
>  
>  static char* acpi_cstate_method_name[] =
>  {
> @@ -113,6 +178,7 @@ static void print_acpi_power(uint32_t cp
>      printk("    C0:\tusage[%08d] duration[%"PRId64"]\n",
>             idle_usage, NOW() - idle_res);
>  
> +    print_hw_residencies(cpu);
>  }
>  
>  static void dump_cx(unsigned char key)
> @@ -933,6 +999,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
>      const struct acpi_processor_power *power = processor_powers[cpuid];
>      uint64_t usage, res, idle_usage = 0, idle_res = 0;
>      int i;
> +    struct hw_residencies hw_res = {0};
>  
>      if ( power == NULL )
>      {
> @@ -965,6 +1032,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
>              return -EFAULT;
>      }
>  
> +    get_hw_residencies(cpuid, &hw_res);
> +
> +    stat->pc3 = hw_res.pc3;
> +    stat->pc6 = hw_res.pc6;
> +    stat->pc7 = hw_res.pc7;
> +    stat->cc3 = hw_res.cc3;
> +    stat->cc6 = hw_res.cc6;
> +
>      return 0;
>  }
>  
> diff -r 1af6303f103c xen/arch/x86/time.c
> --- a/xen/arch/x86/time.c Mon Jul 12 14:12:54 2010 +0800
> +++ b/xen/arch/x86/time.c Mon Jul 12 22:16:04 2010 +0800
> @@ -785,6 +785,13 @@ s_time_t get_s_time(void)
>      now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
>  
>      return now;
> +}
> +
> +uint64_t tsc_ticks2ns(uint64_t ticks)
> +{
> +    struct cpu_time *t = &this_cpu(cpu_time);
> +
> +    return scale_delta(ticks, &t->tsc_scale);
>  }
>  
>  /* Explicitly OR with 1 just in case version number gets out of sync. */
> diff -r 1af6303f103c xen/include/asm-x86/time.h
> --- a/xen/include/asm-x86/time.h Mon Jul 12 14:12:54 2010 +0800
> +++ b/xen/include/asm-x86/time.h Mon Jul 12 22:16:04 2010 +0800
> @@ -56,6 +56,8 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
>  uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
>  uint64_t ns_to_acpi_pm_tick(uint64_t ns);
>  
> +uint64_t tsc_ticks2ns(uint64_t ticks);
> +
>  void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp);
>  u64 gtime_to_gtsc(struct domain *d, u64 tsc);
>  
> diff -r 1af6303f103c xen/include/public/sysctl.h
> --- a/xen/include/public/sysctl.h Mon Jul 12 14:12:54 2010 +0800
> +++ b/xen/include/public/sysctl.h Mon Jul 12 22:16:04 2010 +0800
> @@ -223,6 +223,11 @@ struct pm_cx_stat {
>      uint64_aligned_t idle_time;                 /* idle time from boot */
>      XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */
>      XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
> +    uint64_aligned_t pc3;
> +    uint64_aligned_t pc6;
> +    uint64_aligned_t pc7;
> +    uint64_aligned_t cc3;
> +    uint64_aligned_t cc6;
>  };
>  
>  struct xen_sysctl_get_pmstat {



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.