[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 06/15] cpufreq: Add Hardware P-State (HWP) driver


  • To: Jason Andryuk <jandryuk@xxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 24 Jul 2023 18:14:58 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=LaaEnlkpUVwgi3NGcFwJPwKkPKyvm+Dghfw08tkBYYk=; b=Sk6SiYmi6UC7Hud9561N9de+engVoG0FVOe2aW5ArRox2pc/nnc0bjTOaR/7gPDleRheIiAOWujP7TJQArgXNp4cBW/QsQ5P8bkpA9fHQs6dyLe/1x+aHUEoJhMvhiJb5Ii9g8keNCpDaO0QH7S0kefpt5/vDLACMdJ/InpDBd+kXfolEmBs76y581N+3OCiCjHrkYW7xxsI91otJAy/Yg9s3WsI5ZtJs9Im5+sGhiaY4dfFa4xBP+Ex0oh38SlPKHbE07txiVk2tJoYmjAD0crg6cEq1i5x71f8ScYEiovebS5v+aKrDTf6mGSLeCmaI9qPFd/v0UT1DnFm7qwFBA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=Nj0+PU7JiMpJCIvH1GDmjXz7x8aZOaO0Mhx+Y9S2FGCe/zS6i3B+JkUWmGhxUzoGvBHOdgffkeVfEx0lcW/BppCmsJXMTm63vWm/7WfpfsXKqQzxtf92hRcnMpLXzG4+6lr55zoF1QeZ1RS71DBSK8sOBKnt6dyThgZQdZQ7ZWymiKgT8AKi6KG36d38PzyVox5lqZfnQL4KBXdUBn8q/k3sdwwZac6wIER9M6356kOaIB9QNL8A1hcEiepajCAzEdB4gnPW5DXsT+H+epISJR0AaG84u1Gmh5Z1szDtnVkfpoRQXzDeel0FpdaWCum1k7sYUXNK9IGm/eQwIfyZ0g==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Mon, 24 Jul 2023 16:15:20 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 24.07.2023 14:58, Jason Andryuk wrote:
> --- /dev/null
> +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> @@ -0,0 +1,521 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * hwp.c cpufreq driver to run Intel Hardware P-States (HWP)
> + *
> + * Copyright (C) 2021 Jason Andryuk <jandryuk@xxxxxxxxx>
> + */
> +
> +#include <xen/cpumask.h>
> +#include <xen/init.h>
> +#include <xen/param.h>
> +#include <xen/xmalloc.h>
> +#include <asm/msr.h>
> +#include <acpi/cpufreq/cpufreq.h>
> +
> +static bool __ro_after_init feature_hwp_notification;
> +static bool __ro_after_init feature_hwp_activity_window;
> +
> +static bool __ro_after_init feature_hdc;
> +
> +static bool __ro_after_init opt_cpufreq_hdc = true;
> +
> +union hwp_request
> +{
> +    struct
> +    {
> +        unsigned int min_perf:8;
> +        unsigned int max_perf:8;
> +        unsigned int desired:8;
> +        unsigned int energy_perf:8;
> +        unsigned int activity_window:10;
> +        bool package_control:1;
> +        unsigned int :16;
> +        bool activity_window_valid:1;
> +        bool energy_perf_valid:1;
> +        bool desired_valid:1;
> +        bool max_perf_valid:1;
> +        bool min_perf_valid:1;
> +    };
> +    uint64_t raw;
> +};
> +
> +struct hwp_drv_data
> +{
> +    union
> +    {
> +        uint64_t hwp_caps;
> +        struct
> +        {
> +            unsigned int highest:8;
> +            unsigned int guaranteed:8;
> +            unsigned int most_efficient:8;
> +            unsigned int lowest:8;
> +            unsigned int :32;
> +        } hw;
> +    };
> +    union hwp_request curr_req;
> +    int ret;
> +    uint16_t activity_window;
> +    uint8_t minimum;
> +    uint8_t maximum;
> +    uint8_t desired;
> +    uint8_t energy_perf;
> +};
> +static DEFINE_PER_CPU_READ_MOSTLY(struct hwp_drv_data *, hwp_drv_data);
> +
> +#define hwp_err(cpu, fmt, ...) \
> +    printk(XENLOG_ERR "HWP: CPU%u error: " fmt, cpu, ##__VA_ARGS__)
> +#define hwp_info(fmt, ...)    printk(XENLOG_INFO "HWP: " fmt, ##__VA_ARGS__)

I'm not convinced ", ##__VA_ARGS__" is a good construct to use. I notice
we have a few instances (mainly in code inherited from elsewhere), but I
think it ought to either be plain C99 style (without the ##) or purely
the gcc extension form (not using __VA_ARGS__).

> +#define hwp_verbose(fmt, ...)                             \
> +({                                                        \
> +    if ( cpufreq_verbose )                                \
> +        printk(XENLOG_DEBUG "HWP: " fmt, ##__VA_ARGS__);  \

(One more here then.)

> +static bool hwp_handle_option(const char *s, const char *end)

__init?

> +static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
> +{
> +    uint32_t base_khz, max_khz, bus_khz, edx;
> +
> +    cpuid(0x16, &base_khz, &max_khz, &bus_khz, &edx);
> +
> +    policy->cpuinfo.perf_freq = base_khz * 1000;
> +    policy->cpuinfo.min_freq = base_khz * 1000;
> +    policy->cpuinfo.max_freq = max_khz * 1000;
> +    policy->min = base_khz * 1000;
> +    policy->max = max_khz * 1000;

Earlier on I asked what about cases where the CPUID output yields
some zero values (as I know can happen). Iirc you said this doesn't
affect functionality, but wouldn't it make sense to have a comment
to this effect here (proving the cases were thought through).

> +static void cf_check hwp_init_msrs(void *info)
> +{
> +    struct cpufreq_policy *policy = info;
> +    struct hwp_drv_data *data = this_cpu(hwp_drv_data);
> +    uint64_t val;
> +
> +    /*
> +     * Package level MSR, but we don't have a good idea of packages here, so
> +     * just do it everytime.
> +     */
> +    if ( rdmsr_safe(MSR_PM_ENABLE, val) )
> +    {
> +        hwp_err(policy->cpu, "rdmsr_safe(MSR_PM_ENABLE)\n");
> +        data->curr_req.raw = -1;
> +        return;
> +    }
> +
> +    /* Ensure we don't generate interrupts */
> +    if ( feature_hwp_notification )
> +        wrmsr_safe(MSR_HWP_INTERRUPT, 0);
> +
> +    if ( !(val & PM_ENABLE_HWP_ENABLE) )
> +    {
> +        val |= PM_ENABLE_HWP_ENABLE;
> +        if ( wrmsr_safe(MSR_PM_ENABLE, val) )
> +        {
> +            hwp_err(policy->cpu, "wrmsr_safe(MSR_PM_ENABLE, %lx)\n", val);
> +            data->curr_req.raw = -1;
> +            return;
> +        }
> +    }
> +
> +    if ( rdmsr_safe(MSR_HWP_CAPABILITIES, data->hwp_caps) )
> +    {
> +        hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_CAPABILITIES)\n");
> +        goto error;
> +    }
> +
> +    if ( rdmsr_safe(MSR_HWP_REQUEST, data->curr_req.raw) )
> +    {
> +        hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_REQUEST)\n");
> +        goto error;
> +    }
> +
> +    /*
> +     * Check for APERF/MPERF support in hardware
> +     * also check for boost/turbo support
> +     */
> +    intel_feature_detect(policy);

Nit: The comment could do with adding at least a comma or semicolon.

> +    if ( feature_hdc &&
> +         (!hdc_set_pkg_hdc_ctl(policy->cpu, opt_cpufreq_hdc) ||
> +          !hdc_set_pm_ctl1(policy->cpu, opt_cpufreq_hdc)) )
> +    {
> +            hwp_err(policy->cpu, "Disabling HDC support\n");
> +            feature_hdc = false;

Nit: Too deep indentation.

> +static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
> +{
> +    static union hwp_request initial_req;
> +    unsigned int cpu = policy->cpu;
> +    struct hwp_drv_data *data;
> +    bool first_run = false;
> +
> +    data = xzalloc(struct hwp_drv_data);
> +    if ( !data )
> +        return -ENOMEM;
> +
> +    policy->governor = &cpufreq_gov_hwp;
> +
> +    per_cpu(hwp_drv_data, cpu) = data;
> +
> +    on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
> +
> +    if ( data->curr_req.raw == -1 )
> +    {
> +        hwp_err(cpu, "Could not initialize HWP properly\n");
> +        per_cpu(hwp_drv_data, cpu) = NULL;
> +        xfree(data);
> +        return -ENODEV;
> +    }
> +
> +    data->minimum = data->curr_req.min_perf;
> +    data->maximum = data->curr_req.max_perf;
> +    data->desired = data->curr_req.desired;
> +    data->energy_perf = data->curr_req.energy_perf;
> +    data->activity_window = data->curr_req.activity_window;
> +
> +    if ( initial_req.raw == 0 )
> +    {
> +        hwp_verbose("CPU%u: HWP_CAPABILITIES: %016lx\n", cpu, 
> data->hwp_caps);
> +        initial_req = data->curr_req;
> +        first_run = true;
> +    }

What part of data->curr_req is guaranteed to be non-0 (for the condition
around ...

> +    if ( first_run || data->curr_req.raw != initial_req.raw )
> +        hwp_verbose("CPU%u: rdmsr HWP_REQUEST %016lx\n", cpu,
> +                    data->curr_req.raw);

... this logging to be effective)?

> +static void cf_check hwp_set_misc_turbo(void *info)
> +{
> +    const struct cpufreq_policy *policy = info;
> +    struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
> +    uint64_t msr;
> +
> +    data->ret = 0;
> +
> +    if ( rdmsr_safe(MSR_IA32_MISC_ENABLE, msr) )
> +    {
> +        hwp_verbose("CPU%u: error rdmsr_safe(MSR_IA32_MISC_ENABLE)\n",
> +                    policy->cpu);
> +        data->ret = -EINVAL;
> +
> +        return;
> +    }
> +
> +    if ( policy->turbo == CPUFREQ_TURBO_ENABLED )
> +        msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE;
> +    else
> +        msr |= MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE;
> +
> +    if ( wrmsr_safe(MSR_IA32_MISC_ENABLE, msr) )
> +    {
> +        hwp_verbose("CPU%u: error wrmsr_safe(MSR_IA32_MISC_ENABLE): 
> %016lx\n",
> +                    policy->cpu, msr);
> +        data->ret = -EINVAL;
> +    }
> +}

Neither of the two -EINVAL really indicate an invalid argument that was
passed. Maybe EACCES (or less desirably EFAULT)?

> @@ -89,15 +96,45 @@ static int __init cf_check setup_cpufreq_option(const 
> char *str)
>          return 0;
>      }
>  
> -    if ( choice > 0 || !cmdline_strcmp(str, "xen") )
> +    do
>      {
> -        xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> -        cpufreq_controller = FREQCTL_xen;
> -        if ( *arg && *(arg + 1) )
> -            return cpufreq_cmdline_parse(arg + 1);
> -    }
> +        const char *end = strchr(str, ';');
> +
> +        if ( end == NULL )
> +            end = strchr(str, '\0');
> +
> +        arg = strpbrk(str, ",:");
> +        if ( !arg || arg > end )
> +            arg = strchr(str, '\0');
> +
> +        if ( cpufreq_xen_cnt == ARRAY_SIZE(cpufreq_xen_opts) )
> +            return -E2BIG;
> +
> +        if ( choice > 0 || !cmdline_strcmp(str, "xen") )
> +        {
> +            xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> +            cpufreq_controller = FREQCTL_xen;
> +            cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_xen;
> +            ret = 0;
> +            if ( arg[0] && arg[1] )
> +                ret = cpufreq_cmdline_parse(arg + 1, end);
> +        }
> +        else if ( choice < 0 && !cmdline_strcmp(str, "hwp") )
> +        {
> +            xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> +            cpufreq_controller = FREQCTL_xen;
> +            cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_hwp;
> +            ret = 0;
> +            if ( arg[0] && arg[1] )
> +                ret = hwp_cmdline_parse(arg + 1, end);
> +        }
> +        else
> +            ret = -EINVAL;
> +
> +        str = end ? ++end : end;
> +    } while ( choice < 0 && ret == 0 && *str );

According to the earlier of the two lines, str may be NULL and hence
cannot be dereferenced without first checking to be non-NULL. Earlier
in the loop though you ensure end cannot be NULL. In that case,
however, you point end at the nul character, so the increment above
would point end at the next following one. So my guess is that you
meant

        str = *end ? ++end : end;

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.