|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v9 2/8] xen/cpufreq: implement amd-cppc driver for CPPC in passive mode
On 04.09.2025 08:35, Penny Zheng wrote:
> amd-cppc is the AMD CPU performance scaling driver that introduces a
> new CPU frequency control mechanism. The new mechanism is based on
> Collaborative Processor Performance Control (CPPC) which is a finer grain
> frequency management than legacy ACPI hardware P-States.
> Current AMD CPU platforms are using the ACPI P-states driver to
> manage CPU frequency and clocks with switching only in 3 P-states, while the
> new amd-cppc allows a more flexible, low-latency interface for Xen
> to directly communicate the performance hints to hardware.
>
> "amd-cppc" driver is responsible for implementing CPPC in passive mode, which
> still leverages Xen governors such as *ondemand*, *performance*, etc, to
> calculate the performance hints. In the future, we will introduce an advanced
> active mode to enable autonomous performence level selection.
>
> Field epp, energy performance preference, which only has meaning when active
> mode is enabled and will be introduced later in details, so we read
> pre-defined BIOS value for it in passive mode.
>
> Signed-off-by: Penny Zheng <Penny.Zheng@xxxxxxx>
> Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
With the issue I had pointed out, leading to ...
> ---
> v8 -> v9
> - embed struct amd_cppc_drv_data{} into struct cpufreq_policy{}
... this change, I think the tag would have needed to be dropped.
> +static void cf_check amd_cppc_write_request_msrs(void *info)
> +{
> + const struct amd_cppc_drv_data *data = info;
> +
> + wrmsrl(MSR_AMD_CPPC_REQ, data->req.raw);
> +}
> +
> +static void amd_cppc_write_request(unsigned int cpu,
> + struct amd_cppc_drv_data *data,
> + uint8_t min_perf, uint8_t des_perf,
> + uint8_t max_perf, uint8_t epp)
> +{
> + uint64_t prev = data->req.raw;
> +
> + data->req.min_perf = min_perf;
> + data->req.max_perf = max_perf;
> + data->req.des_perf = des_perf;
> + data->req.epp = epp;
> +
> + if ( prev == data->req.raw )
> + return;
> +
> + on_selected_cpus(cpumask_of(cpu), amd_cppc_write_request_msrs, data, 1);
With "cpu" coming from ...
> +}
> +
> +static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy,
> + unsigned int target_freq,
> + unsigned int relation)
> +{
> + struct amd_cppc_drv_data *data = policy->u.amd_cppc;
> + uint8_t des_perf;
> + int res;
> +
> + if ( unlikely(!target_freq) )
> + return 0;
> +
> + res = amd_cppc_khz_to_perf(data, target_freq, &des_perf);
> + if ( res )
> + return res;
> +
> + /*
> + * Having a performance level lower than the lowest nonlinear
> + * performance level, such as, lowest_perf <= perf <=
> lowest_nonliner_perf,
> + * may actually cause an efficiency penalty, So when deciding the
> min_perf
> + * value, we prefer lowest nonlinear performance over lowest performance.
> + */
> + amd_cppc_write_request(policy->cpu, data,
> data->caps.lowest_nonlinear_perf,
... here, how can this work when this particular CPU isn't online anymore?
> + des_perf, data->caps.highest_perf,
> + /* Pre-defined BIOS value for passive mode */
> + per_cpu(epp_init, policy->cpu));
> + return 0;
> +}
> +
> +static void cf_check amd_cppc_init_msrs(void *info)
> +{
> + struct cpufreq_policy *policy = info;
> + struct amd_cppc_drv_data *data = policy->u.amd_cppc;
> + uint64_t val;
> + unsigned int min_freq = 0, nominal_freq = 0, max_freq;
> +
> + /* Package level MSR */
> + rdmsrl(MSR_AMD_CPPC_ENABLE, val);
Here you clarify the scope, yet what about ...
> + /*
> + * Only when Enable bit is on, the hardware will calculate the
> processor’s
> + * performance capabilities and initialize the performance level fields
> in
> + * the CPPC capability registers.
> + */
> + if ( !(val & AMD_CPPC_ENABLE) )
> + {
> + val |= AMD_CPPC_ENABLE;
> + wrmsrl(MSR_AMD_CPPC_ENABLE, val);
> + }
> +
> + rdmsrl(MSR_AMD_CPPC_CAP1, data->caps.raw);
... this and ...
> + if ( data->caps.highest_perf == 0 || data->caps.lowest_perf == 0 ||
> + data->caps.nominal_perf == 0 || data->caps.lowest_nonlinear_perf ==
> 0 ||
> + data->caps.lowest_perf > data->caps.lowest_nonlinear_perf ||
> + data->caps.lowest_nonlinear_perf > data->caps.nominal_perf ||
> + data->caps.nominal_perf > data->caps.highest_perf )
> + {
> + amd_cppc_err(policy->cpu,
> + "Out of range values: highest(%u), lowest(%u),
> nominal(%u), lowest_nonlinear(%u)\n",
> + data->caps.highest_perf, data->caps.lowest_perf,
> + data->caps.nominal_perf,
> data->caps.lowest_nonlinear_perf);
> + goto err;
> + }
> +
> + amd_process_freq(&cpu_data[policy->cpu],
> + NULL, NULL, &this_cpu(pxfreq_mhz));
> +
> + data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.lowest_mhz,
> + data->caps.lowest_perf, &min_freq);
> + if ( data->err )
> + return;
> +
> + data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.nominal_mhz,
> + data->caps.nominal_perf, &nominal_freq);
> + if ( data->err )
> + return;
> +
> + data->err = amd_get_max_freq(data, &max_freq);
> + if ( data->err )
> + return;
> +
> + if ( min_freq > nominal_freq || nominal_freq > max_freq )
> + {
> + amd_cppc_err(policy->cpu,
> + "min(%u), or max(%u), or nominal(%u) freq value is
> incorrect\n",
> + min_freq, max_freq, nominal_freq);
> + goto err;
> + }
> +
> + policy->min = min_freq;
> + policy->max = max_freq;
> +
> + policy->cpuinfo.min_freq = min_freq;
> + policy->cpuinfo.max_freq = max_freq;
> + policy->cpuinfo.perf_freq = nominal_freq;
> + /*
> + * Set after policy->cpuinfo.perf_freq, as we are taking
> + * APERF/MPERF average frequency as current frequency.
> + */
> + policy->cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG);
> +
> + /* Store pre-defined BIOS value for passive mode */
> + rdmsrl(MSR_AMD_CPPC_REQ, val);
... this?
> +static int cf_check amd_cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> +{
> + unsigned int cpu = policy->cpu;
> + struct amd_cppc_drv_data *data;
> +
> + data = xvzalloc(struct amd_cppc_drv_data);
> + if ( !data )
> + return -ENOMEM;
> + policy->u.amd_cppc = data;
> +
> + data->cppc_data = &processor_pminfo[cpu]->cppc_data;
> +
> + on_selected_cpus(cpumask_of(cpu), amd_cppc_init_msrs, policy, 1);
> +
> + /*
> + * The enable bit is sticky, as we need to enable it at the very first
> + * begining, before CPPC capability values sanity check.
> + * If error path is taken effective, not only amd-cppc cpufreq core fails
> + * to initialize, but also we could not fall back to legacy P-states
> + * driver, irrespective of the command line specifying a fallback option.
> + */
> + if ( data->err )
> + {
> + amd_cppc_err(cpu, "Could not initialize cpufreq core in CPPC
> mode\n");
> + amd_cppc_cpufreq_cpu_exit(policy);
> + return data->err;
amd_cppc_cpufreq_cpu_exit() has already freed what data points to.
> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -63,6 +63,7 @@ struct perf_limits {
> };
>
> struct hwp_drv_data;
> +struct amd_cppc_drv_data;
> struct cpufreq_policy {
> cpumask_var_t cpus; /* affected CPUs */
> unsigned int shared_type; /* ANY or ALL affected CPUs
> @@ -85,6 +86,9 @@ struct cpufreq_policy {
> union {
> #ifdef CONFIG_INTEL
> struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
> +#endif
> +#ifdef CONFIG_AMD
> + struct amd_cppc_drv_data *amd_cppc; /* Driver data for AMD CPPC */
> #endif
> } u;
> };
Same comments here as for the HWP patch.
Jan
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |