[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v6 06/15] cpufreq: Add Hardware P-State (HWP) driver
On Mon, Jul 24, 2023 at 12:15 PM Jan Beulich <jbeulich@xxxxxxxx> wrote: > > On 24.07.2023 14:58, Jason Andryuk wrote: > > --- /dev/null > > +++ b/xen/arch/x86/acpi/cpufreq/hwp.c > > @@ -0,0 +1,521 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * hwp.c cpufreq driver to run Intel Hardware P-States (HWP) > > + * > > + * Copyright (C) 2021 Jason Andryuk <jandryuk@xxxxxxxxx> > > + */ > > + > > +#include <xen/cpumask.h> > > +#include <xen/init.h> > > +#include <xen/param.h> > > +#include <xen/xmalloc.h> > > +#include <asm/msr.h> > > +#include <acpi/cpufreq/cpufreq.h> > > + > > +static bool __ro_after_init feature_hwp_notification; > > +static bool __ro_after_init feature_hwp_activity_window; > > + > > +static bool __ro_after_init feature_hdc; > > + > > +static bool __ro_after_init opt_cpufreq_hdc = true; > > + > > +union hwp_request > > +{ > > + struct > > + { > > + unsigned int min_perf:8; > > + unsigned int max_perf:8; > > + unsigned int desired:8; > > + unsigned int energy_perf:8; > > + unsigned int activity_window:10; > > + bool package_control:1; > > + unsigned int :16; > > + bool activity_window_valid:1; > > + bool energy_perf_valid:1; > > + bool desired_valid:1; > > + bool max_perf_valid:1; > > + bool min_perf_valid:1; > > + }; > > + uint64_t raw; > > +}; > > + > > +struct hwp_drv_data > > +{ > > + union > > + { > > + uint64_t hwp_caps; > > + struct > > + { > > + unsigned int highest:8; > > + unsigned int guaranteed:8; > > + unsigned int most_efficient:8; > > + unsigned int lowest:8; > > + unsigned int :32; > > + } hw; > > + }; > > + union hwp_request curr_req; > > + int ret; > > + uint16_t activity_window; > > + uint8_t minimum; > > + uint8_t maximum; > > + uint8_t desired; > > + uint8_t energy_perf; > > +}; > > +static DEFINE_PER_CPU_READ_MOSTLY(struct hwp_drv_data *, hwp_drv_data); > > + > > +#define hwp_err(cpu, fmt, ...) \ > > + printk(XENLOG_ERR "HWP: CPU%u error: " fmt, cpu, ##__VA_ARGS__) > > +#define hwp_info(fmt, ...) printk(XENLOG_INFO "HWP: " fmt, > > ##__VA_ARGS__) > > I'm not convinced ", ##__VA_ARGS__" is a good construct to use. I notice > we have a few instances (mainly in code inherited from elsewhere), but I > think it ought to either be plain C99 style (without the ##) or purely > the gcc extension form (not using __VA_ARGS__). Using plain __VA_ARGS__ doesn't work for the cases without arguments: arch/x86/acpi/cpufreq/hwp.c:78:53: error: expected expression before ‘)’ token 78 | printk(XENLOG_DEBUG "HWP: " fmt, __VA_ARGS__); \ | ^ arch/x86/acpi/cpufreq/hwp.c:201:9: note: in expansion of macro ‘hwp_verbose’ 201 | hwp_verbose("disabled: No energy/performance preference available"); | ^~~~~~~~~~~ I can use "__VA_OPT__(,) __VA_ARGS__" though. > > +#define hwp_verbose(fmt, ...) \ > > +({ \ > > + if ( cpufreq_verbose ) \ > > + printk(XENLOG_DEBUG "HWP: " fmt, ##__VA_ARGS__); \ > > (One more here then.) > > > +static bool hwp_handle_option(const char *s, const char *end) > > __init? Yes, thanks. > > +static void hwp_get_cpu_speeds(struct cpufreq_policy *policy) > > +{ > > + uint32_t base_khz, max_khz, bus_khz, edx; > > + > > + cpuid(0x16, &base_khz, &max_khz, &bus_khz, &edx); > > + > > + policy->cpuinfo.perf_freq = base_khz * 1000; > > + policy->cpuinfo.min_freq = base_khz * 1000; > > + policy->cpuinfo.max_freq = max_khz * 1000; > > + policy->min = base_khz * 1000; > > + policy->max = max_khz * 1000; > > Earlier on I asked what about cases where the CPUID output yields > some zero values (as I know can happen). Iirc you said this doesn't > affect functionality, but wouldn't it make sense to have a comment > to this effect here (proving the cases were thought through). Sure. > > +static void cf_check hwp_init_msrs(void *info) > > +{ > > + struct cpufreq_policy *policy = info; > > + struct hwp_drv_data *data = this_cpu(hwp_drv_data); > > + uint64_t val; > > + > > + /* > > + * Package level MSR, but we don't have a good idea of packages here, > > so > > + * just do it everytime. > > + */ > > + if ( rdmsr_safe(MSR_PM_ENABLE, val) ) > > + { > > + hwp_err(policy->cpu, "rdmsr_safe(MSR_PM_ENABLE)\n"); > > + data->curr_req.raw = -1; > > + return; > > + } > > + > > + /* Ensure we don't generate interrupts */ > > + if ( feature_hwp_notification ) > > + wrmsr_safe(MSR_HWP_INTERRUPT, 0); > > + > > + if ( !(val & PM_ENABLE_HWP_ENABLE) ) > > + { > > + val |= PM_ENABLE_HWP_ENABLE; > > + if ( wrmsr_safe(MSR_PM_ENABLE, val) ) > > + { > > + hwp_err(policy->cpu, "wrmsr_safe(MSR_PM_ENABLE, %lx)\n", val); > > + data->curr_req.raw = -1; > > + return; > > + } > > + } > > + > > + if ( rdmsr_safe(MSR_HWP_CAPABILITIES, data->hwp_caps) ) > > + { > > + hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_CAPABILITIES)\n"); > > + goto error; > > + } > > + > > + if ( rdmsr_safe(MSR_HWP_REQUEST, data->curr_req.raw) ) > > + { > > + hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_REQUEST)\n"); > > + goto error; > > + } > > + > > + /* > > + * Check for APERF/MPERF support in hardware > > + * also check for boost/turbo support > > + */ > > + intel_feature_detect(policy); > > Nit: The comment could do with adding at least a comma or semicolon. Will change to "Check for turbo support." APERF/MPERF was removed from intel_feature_detect() in commit 4dd16c44152f ("x86/cpufreq: Rework APERF/MPERF handling"). > > + if ( feature_hdc && > > + (!hdc_set_pkg_hdc_ctl(policy->cpu, opt_cpufreq_hdc) || > > + !hdc_set_pm_ctl1(policy->cpu, opt_cpufreq_hdc)) ) > > + { > > + hwp_err(policy->cpu, "Disabling HDC support\n"); > > + feature_hdc = false; > > Nit: Too deep indentation. Yes, thanks. > > +static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy) > > +{ > > + static union hwp_request initial_req; > > + unsigned int cpu = policy->cpu; > > + struct hwp_drv_data *data; > > + bool first_run = false; > > + > > + data = xzalloc(struct hwp_drv_data); > > + if ( !data ) > > + return -ENOMEM; > > + > > + policy->governor = &cpufreq_gov_hwp; > > + > > + per_cpu(hwp_drv_data, cpu) = data; > > + > > + on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1); > > + > > + if ( data->curr_req.raw == -1 ) > > + { > > + hwp_err(cpu, "Could not initialize HWP properly\n"); > > + per_cpu(hwp_drv_data, cpu) = NULL; > > + xfree(data); > > + return -ENODEV; > > + } > > + > > + data->minimum = data->curr_req.min_perf; > > + data->maximum = data->curr_req.max_perf; > > + data->desired = data->curr_req.desired; > > + data->energy_perf = data->curr_req.energy_perf; > > + data->activity_window = data->curr_req.activity_window; > > + > > + if ( initial_req.raw == 0 ) > > + { > > + hwp_verbose("CPU%u: HWP_CAPABILITIES: %016lx\n", cpu, > > data->hwp_caps); > > + initial_req = data->curr_req; > > + first_run = true; > > + } > > What part of data->curr_req is guaranteed to be non-0 (for the condition > around ... > > > + if ( first_run || data->curr_req.raw != initial_req.raw ) > > + hwp_verbose("CPU%u: rdmsr HWP_REQUEST %016lx\n", cpu, > > + data->curr_req.raw); > > ... this logging to be effective)? Hmmm. I think you are correct that there is no guarantee that data->curr_req will be non-zero. i.e. a BIOS could zero the whole register. In practice, I see 0x8000ff01 - energy_perf = 0x80, maximum = 0xff and minimum = 0x01. Would you prefer that I make first_run a static variable to track if initial_req has been populated? > > +static void cf_check hwp_set_misc_turbo(void *info) > > +{ > > + const struct cpufreq_policy *policy = info; > > + struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu); > > + uint64_t msr; > > + > > + data->ret = 0; > > + > > + if ( rdmsr_safe(MSR_IA32_MISC_ENABLE, msr) ) > > + { > > + hwp_verbose("CPU%u: error rdmsr_safe(MSR_IA32_MISC_ENABLE)\n", > > + policy->cpu); > > + data->ret = -EINVAL; > > + > > + return; > > + } > > + > > + if ( policy->turbo == CPUFREQ_TURBO_ENABLED ) > > + msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE; > > + else > > + msr |= MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE; > > + > > + if ( wrmsr_safe(MSR_IA32_MISC_ENABLE, msr) ) > > + { > > + hwp_verbose("CPU%u: error wrmsr_safe(MSR_IA32_MISC_ENABLE): > > %016lx\n", > > + policy->cpu, msr); > > + data->ret = -EINVAL; > > + } > > +} > > Neither of the two -EINVAL really indicate an invalid argument that was > passed. Maybe EACCES (or less desirably EFAULT)? Ok, I'll use EACCES. > > @@ -89,15 +96,45 @@ static int __init cf_check setup_cpufreq_option(const > > char *str) > > return 0; > > } > > > > - if ( choice > 0 || !cmdline_strcmp(str, "xen") ) > > + do > > { > > - xen_processor_pmbits |= XEN_PROCESSOR_PM_PX; > > - cpufreq_controller = FREQCTL_xen; > > - if ( *arg && *(arg + 1) ) > > - return cpufreq_cmdline_parse(arg + 1); > > - } > > + const char *end = strchr(str, ';'); > > + > > + if ( end == NULL ) > > + end = strchr(str, '\0'); > > + > > + arg = strpbrk(str, ",:"); > > + if ( !arg || arg > end ) > > + arg = strchr(str, '\0'); > > + > > + if ( cpufreq_xen_cnt == ARRAY_SIZE(cpufreq_xen_opts) ) > > + return -E2BIG; > > + > > + if ( choice > 0 || !cmdline_strcmp(str, "xen") ) > > + { > > + xen_processor_pmbits |= XEN_PROCESSOR_PM_PX; > > + cpufreq_controller = FREQCTL_xen; > > + cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_xen; > > + ret = 0; > > + if ( arg[0] && arg[1] ) > > + ret = cpufreq_cmdline_parse(arg + 1, end); > > + } > > + else if ( choice < 0 && !cmdline_strcmp(str, "hwp") ) > > + { > > + xen_processor_pmbits |= XEN_PROCESSOR_PM_PX; > > + cpufreq_controller = FREQCTL_xen; > > + cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_hwp; > > + ret = 0; > > + if ( arg[0] && arg[1] ) > > + ret = hwp_cmdline_parse(arg + 1, end); > > + } > > + else > > + ret = -EINVAL; > > + > > + str = end ? ++end : end; > > + } while ( choice < 0 && ret == 0 && *str ); > > According to the earlier of the two lines, str may be NULL and hence > cannot be dereferenced without first checking to be non-NULL. Earlier > in the loop though you ensure end cannot be NULL. In that case, > however, you point end at the nul character, so the increment above > would point end at the next following one. So my guess is that you > meant > > str = *end ? ++end : end; Yes, you are correct. Thanks for catching it. Thanks, Jason
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |