|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] xen/cpufreq: introduce new sub-hypercall to propagate CPPC data
commit bceab282bf2f5a28485ba4b19a6f6353596fbca3
Author: Penny Zheng <Penny.Zheng@xxxxxxx>
AuthorDate: Thu Aug 28 14:45:33 2025 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Aug 28 14:45:33 2025 +0200
xen/cpufreq: introduce new sub-hypercall to propagate CPPC data
In order to provide backward compatibility with existing governors
that represent performance as frequency, like ondemand, the _CPC
table can optionally provide processor frequency range values, Lowest
frequency and Nominal frequency, to let OS use Lowest Frequency/
Performance and Nominal Frequency/Performance as anchor points to
create linear mapping of CPPC performance to CPU frequency.
As Xen is uncapable of parsing the ACPI dynamic table, we'd like to
introduce a new sub-hypercall "XEN_PM_CPPC" to propagate required CPPC
data from dom0 kernel to Xen.
In the according handler set_cppc_pminfo(), we do _CPC and _PSD
sanitization check, as both _PSD and _CPC info are necessary for correctly
initializing cpufreq cores in CPPC mode.
Users shall be warned that if we failed at this point,
no fallback scheme, like legacy P-state could be switched to.
A new flag "XEN_CPPC_INIT" is also introduced for cpufreq core initialised
in
CPPC mode. Then all .init flag checking shall be updated to
consider "XEN_CPPC_INIT" too.
We want to bypass construction of px statistic info in
cpufreq_statistic_init()
for CPPC mode, while not bypassing cpufreq_statistic_lock initialization
for a
good reason. The same check is unnecessary for cpufreq_statistic_exit(),
since it has already been covered by px statistic variable
"cpufreq_statistic_data" check
Signed-off-by: Penny Zheng <Penny.Zheng@xxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
---
xen/arch/x86/platform_hypercall.c | 5 ++
xen/arch/x86/x86_64/cpufreq.c | 19 +++++
xen/arch/x86/x86_64/platform_hypercall.c | 3 +
xen/drivers/acpi/pm-op.c | 7 +-
xen/drivers/acpi/pmstat.c | 4 +
xen/drivers/cpufreq/cpufreq.c | 124 +++++++++++++++++++++++++++++-
xen/include/acpi/cpufreq/processor_perf.h | 4 +-
xen/include/public/platform.h | 26 +++++++
xen/include/xen/pmstat.h | 5 ++
xen/include/xlat.lst | 1 +
10 files changed, 193 insertions(+), 5 deletions(-)
diff --git a/xen/arch/x86/platform_hypercall.c
b/xen/arch/x86/platform_hypercall.c
index 21f9f795c1..fafc176475 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -577,6 +577,11 @@ ret_t do_platform_op(
break;
}
+ case XEN_PM_CPPC:
+ ret = set_cppc_pminfo(op->u.set_pminfo.id,
+ &op->u.set_pminfo.u.cppc_data);
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/xen/arch/x86/x86_64/cpufreq.c b/xen/arch/x86/x86_64/cpufreq.c
index e4f3d5b436..525b29da71 100644
--- a/xen/arch/x86/x86_64/cpufreq.c
+++ b/xen/arch/x86/x86_64/cpufreq.c
@@ -54,3 +54,22 @@ int compat_set_px_pminfo(uint32_t acpi_id,
return set_px_pminfo(acpi_id, xen_perf);
}
+
+int compat_set_cppc_pminfo(unsigned int acpi_id,
+ const struct compat_processor_cppc *cppc_data)
+
+{
+ struct xen_processor_cppc *xen_cppc;
+ unsigned long xlat_page_current;
+
+ xlat_malloc_init(xlat_page_current);
+
+ xen_cppc = xlat_malloc_array(xlat_page_current,
+ struct xen_processor_cppc, 1);
+ if ( unlikely(xen_cppc == NULL) )
+ return -ENOMEM;
+
+ XLAT_processor_cppc(xen_cppc, cppc_data);
+
+ return set_cppc_pminfo(acpi_id, xen_cppc);
+}
diff --git a/xen/arch/x86/x86_64/platform_hypercall.c
b/xen/arch/x86/x86_64/platform_hypercall.c
index 9ab631c17f..0288f68df9 100644
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -14,6 +14,9 @@ EMIT_FILE;
#define efi_get_info efi_compat_get_info
#define efi_runtime_call(x) efi_compat_runtime_call(x)
+#define xen_processor_cppc compat_processor_cppc
+#define set_cppc_pminfo compat_set_cppc_pminfo
+
#define xen_processor_performance compat_processor_performance
#define set_px_pminfo compat_set_px_pminfo
diff --git a/xen/drivers/acpi/pm-op.c b/xen/drivers/acpi/pm-op.c
index 9a1970df34..e3b5c8bcaa 100644
--- a/xen/drivers/acpi/pm-op.c
+++ b/xen/drivers/acpi/pm-op.c
@@ -91,7 +91,10 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
pmpt = processor_pminfo[op->cpuid];
policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
- if ( !pmpt || !pmpt->perf.states ||
+ if ( !pmpt ||
+ ((pmpt->init & XEN_PX_INIT) && !pmpt->perf.states) ||
+ /* Avoid NULL deref of pmpt->perf.states[] */
+ ((pmpt->init & XEN_CPPC_INIT) && pmpt->perf.state_count) ||
!policy || !policy->governor )
return -EINVAL;
@@ -351,7 +354,7 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
case CPUFREQ_PARA:
if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
return -ENODEV;
- if ( !pmpt || !(pmpt->init & XEN_PX_INIT) )
+ if ( !pmpt || !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
break;
}
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 4fae0c14af..0f31736df2 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -108,6 +108,10 @@ int cpufreq_statistic_init(unsigned int cpu)
if ( !pmpt )
return -EINVAL;
+ /* Only need to initialize in Px mode */
+ if ( !(pmpt->init & XEN_PX_INIT) )
+ return 0;
+
spin_lock(cpufreq_statistic_lock);
pxpt = per_cpu(cpufreq_statistic_data, cpu);
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 267e4d3f3b..f553fbcb1c 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -40,6 +40,7 @@
#include <xen/domain.h>
#include <xen/cpu.h>
#include <xen/pmstat.h>
+#include <xen/xvmalloc.h>
#include <asm/io.h>
#include <asm/processor.h>
@@ -234,6 +235,11 @@ static int get_psd_info(unsigned int cpu, unsigned int
*shared_type,
*domain_info = &processor_pminfo[cpu]->perf.domain_info;
break;
+ case XEN_CPPC_INIT:
+ *shared_type = processor_pminfo[cpu]->cppc_data.shared_type;
+ *domain_info = &processor_pminfo[cpu]->cppc_data.domain_info;
+ break;
+
default:
ret = -EINVAL;
break;
@@ -259,7 +265,7 @@ int cpufreq_add_cpu(unsigned int cpu)
if ( !processor_pminfo[cpu] || !cpu_online(cpu) )
return -EINVAL;
- if ( !(processor_pminfo[cpu]->init & XEN_PX_INIT) )
+ if ( !(processor_pminfo[cpu]->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
if (!cpufreq_driver.init)
@@ -434,7 +440,7 @@ int cpufreq_del_cpu(unsigned int cpu)
if ( !processor_pminfo[cpu] || !cpu_online(cpu) )
return -EINVAL;
- if ( !(processor_pminfo[cpu]->init & XEN_PX_INIT) )
+ if ( !(processor_pminfo[cpu]->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
if (!per_cpu(cpufreq_cpu_policy, cpu))
@@ -693,6 +699,120 @@ int acpi_set_pdc_bits(unsigned int acpi_id,
XEN_GUEST_HANDLE(uint32) pdc)
return ret;
}
+static void print_CPPC(const struct xen_processor_cppc *cppc_data)
+{
+ printk("\t_CPC: highest_perf=%u, lowest_perf=%u, "
+ "nominal_perf=%u, lowest_nonlinear_perf=%u, "
+ "nominal_mhz=%uMHz, lowest_mhz=%uMHz\n",
+ cppc_data->cpc.highest_perf, cppc_data->cpc.lowest_perf,
+ cppc_data->cpc.nominal_perf, cppc_data->cpc.lowest_nonlinear_perf,
+ cppc_data->cpc.nominal_mhz, cppc_data->cpc.lowest_mhz);
+}
+
+int set_cppc_pminfo(unsigned int acpi_id,
+ const struct xen_processor_cppc *cppc_data)
+{
+ int ret = 0, cpu_id;
+ struct processor_pminfo *pm_info;
+
+ cpu_id = get_cpu_id(acpi_id);
+ if ( cpu_id < 0 )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( cppc_data->pad[0] || cppc_data->pad[1] || cppc_data->pad[2] )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( cpufreq_verbose )
+ printk("Set CPU%d (ACPI ID %u) CPPC state info:\n",
+ cpu_id, acpi_id);
+
+ pm_info = processor_pminfo[cpu_id];
+ if ( !pm_info )
+ {
+ pm_info = xvzalloc(struct processor_pminfo);
+ if ( !pm_info )
+ {
+ ret = -ENOMEM;
+ goto out;
+ }
+ processor_pminfo[cpu_id] = pm_info;
+ }
+ pm_info->acpi_id = acpi_id;
+ pm_info->id = cpu_id;
+ pm_info->cppc_data = *cppc_data;
+
+ if ( (cppc_data->flags & XEN_CPPC_PSD) &&
+ !check_psd_pminfo(cppc_data->shared_type) )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( cppc_data->flags & XEN_CPPC_CPC )
+ {
+ if ( cppc_data->cpc.highest_perf == 0 ||
+ cppc_data->cpc.highest_perf > UINT8_MAX ||
+ cppc_data->cpc.nominal_perf == 0 ||
+ cppc_data->cpc.lowest_nonlinear_perf == 0 ||
+ cppc_data->cpc.lowest_perf == 0 ||
+ cppc_data->cpc.lowest_perf >
+ cppc_data->cpc.lowest_nonlinear_perf ||
+ cppc_data->cpc.lowest_nonlinear_perf >
+ cppc_data->cpc.nominal_perf ||
+ cppc_data->cpc.nominal_perf > cppc_data->cpc.highest_perf )
+ /*
+ * Right now, Xen doesn't actually use highest_perf/nominal_perf/
+ * lowest_nonlinear_perf/lowest_perf values read from ACPI _CPC
+ * table. Xen reads CPPC capability MSR to get these four values.
+ * So warning is enough.
+ */
+ printk_once(XENLOG_WARNING
+ "Broken CPPC perf values: lowest(%u),
nonlinear_lowest(%u), nominal(%u), highest(%u)\n",
+ cppc_data->cpc.lowest_perf,
+ cppc_data->cpc.lowest_nonlinear_perf,
+ cppc_data->cpc.nominal_perf,
+ cppc_data->cpc.highest_perf);
+
+ /* lowest_mhz and nominal_mhz are optional value */
+ if ( cppc_data->cpc.nominal_mhz &&
+ cppc_data->cpc.lowest_mhz > cppc_data->cpc.nominal_mhz )
+ {
+ printk_once(XENLOG_WARNING
+ "Broken CPPC freq values: lowest(%u), nominal(%u)\n",
+ cppc_data->cpc.lowest_mhz,
+ cppc_data->cpc.nominal_mhz);
+ /* Re-set with zero values, instead of keeping invalid values */
+ pm_info->cppc_data.cpc.nominal_mhz = 0;
+ pm_info->cppc_data.cpc.lowest_mhz = 0;
+ }
+ }
+
+ if ( cppc_data->flags == (XEN_CPPC_PSD | XEN_CPPC_CPC) )
+ {
+ if ( cpufreq_verbose )
+ {
+ print_PSD(&pm_info->cppc_data.domain_info);
+ print_CPPC(&pm_info->cppc_data);
+ }
+
+ pm_info->init = XEN_CPPC_INIT;
+ ret = cpufreq_cpu_init(cpu_id);
+ if ( ret )
+ printk_once(XENLOG_WARNING
+ "CPU%d failed amd-cppc mode init; use \"cpufreq=xen\"
instead",
+ cpu_id);
+ }
+
+ out:
+ return ret;
+}
+
static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
{
if (usr_max_freq)
diff --git a/xen/include/acpi/cpufreq/processor_perf.h
b/xen/include/acpi/cpufreq/processor_perf.h
index 4e045da983..e6576314f0 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -5,7 +5,8 @@
#include <public/sysctl.h>
#include <xen/acpi.h>
-#define XEN_PX_INIT 0x80000000U
+#define XEN_CPPC_INIT 0x40000000U
+#define XEN_PX_INIT 0x80000000U
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
@@ -43,6 +44,7 @@ struct processor_pminfo {
uint32_t acpi_id;
uint32_t id;
struct processor_performance perf;
+ struct xen_processor_cppc cppc_data;
uint32_t init;
};
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 2725b8d104..94349fc5f5 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -363,6 +363,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
#define XEN_PM_PX 1
#define XEN_PM_TX 2
#define XEN_PM_PDC 3
+#define XEN_PM_CPPC 4
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -370,6 +371,10 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
#define XEN_PX_PPC 4
#define XEN_PX_PSD 8
+/* CPPC sub info type */
+#define XEN_CPPC_PSD (1U << 0)
+#define XEN_CPPC_CPC (1U << 1)
+
struct xen_power_register {
uint32_t space_id;
uint32_t bit_width;
@@ -457,6 +462,26 @@ struct xen_processor_performance {
typedef struct xen_processor_performance xen_processor_performance_t;
DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);
+struct xen_processor_cppc {
+ uint8_t flags; /* IN: XEN_CPPC_xxx */
+ uint8_t pad[3];
+ /*
+ * IN: Subset _CPC fields useful for CPPC-compatible cpufreq
+ * driver's initialization
+ */
+ struct {
+ uint32_t highest_perf;
+ uint32_t nominal_perf;
+ uint32_t lowest_nonlinear_perf;
+ uint32_t lowest_perf;
+ uint32_t lowest_mhz;
+ uint32_t nominal_mhz;
+ } cpc;
+ uint32_t shared_type; /* IN: XEN_CPUPERF_SHARED_TYPE_xxx */
+ struct xen_psd_package domain_info; /* IN: _PSD */
+};
+typedef struct xen_processor_cppc xen_processor_cppc_t;
+
struct xenpf_set_processor_pminfo {
/* IN variables */
uint32_t id; /* ACPI CPU ID */
@@ -465,6 +490,7 @@ struct xenpf_set_processor_pminfo {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
+ xen_processor_cppc_t cppc_data; /* CPPC: _CPC and _PSD
*/
} u;
};
typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
diff --git a/xen/include/xen/pmstat.h b/xen/include/xen/pmstat.h
index 8350403e95..6096560d3c 100644
--- a/xen/include/xen/pmstat.h
+++ b/xen/include/xen/pmstat.h
@@ -7,12 +7,17 @@
int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf);
long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power);
+int set_cppc_pminfo(unsigned int acpi_id,
+ const struct xen_processor_cppc *cppc_data);
#ifdef CONFIG_COMPAT
struct compat_processor_performance;
int compat_set_px_pminfo(uint32_t acpi_id, struct compat_processor_performance
*perf);
struct compat_processor_power;
long compat_set_cx_pminfo(uint32_t acpi_id, struct compat_processor_power
*power);
+struct compat_processor_cppc;
+int compat_set_cppc_pminfo(unsigned int acpi_id,
+ const struct compat_processor_cppc *cppc_data);
#endif
uint32_t pmstat_get_cx_nr(unsigned int cpu);
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 6d6c6cfab2..9d08dcc4bb 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -160,6 +160,7 @@
! pct_register platform.h
! power_register platform.h
+! processor_cppc platform.h
? processor_csd platform.h
! processor_cx platform.h
! processor_flags platform.h
--
generated by git-patchbot for /home/xen/git/xen.git#master
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |