[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [RFC PATCH 4/4] xen/arm: Implement standard PV time interface as per ARM DEN 0057A
On Sun, 22 Jun 2025, Koichiro Den wrote: > The VCPUOP_register_runstate_memory_area hypercall is still actively > used, e.g., in the Linux arm64 codebase. When KPTI is enabled, the area > was not registered from the beginning due to the VA not always being > valid. In such cases, Linux falls back to using the standard PV time > interface (ARM DEN 0057A), but this interface has not been implemented > in Xen for ARM64 (until this commit). > > The VCPUOP_register_runstate_phys_area was introduced, though it's > unclear whether this would be used in Linux arm64 and when it will be > prevalent amongst every possible downstream domain Linux variant. And of > course Linux is not an only option for the Xen arm64 domains. > > Therefore, implementing the standard way of sharing PV time is > generically beneficial, avoiding reliance on specially crafted > hypercalls, the usage of which by guest VMs is not always guaranteed. > Note that the PV_TIME_ST interface communicates with IPA (GPA), not GVA. > > Add the PV time interface according to ARM DEN 0057A. > > Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx> > --- > xen/arch/arm/domain.c | 30 +++++++++ > xen/arch/arm/domain_build.c | 87 ++++++++++++++++++++++++- > xen/arch/arm/include/asm/domain.h | 17 +++++ > xen/arch/arm/include/asm/smccc.h | 12 ++++ > xen/arch/arm/vsmc.c | 38 +++++++++++ > xen/common/device-tree/dom0less-build.c | 2 +- > xen/include/xen/fdt-domain-build.h | 2 +- > 7 files changed, 183 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index be58a23dd725..e895e4111f1b 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -277,6 +277,18 @@ static void ctxt_switch_to(struct vcpu *n) > WRITE_SYSREG(n->arch.mdcr_el2, MDCR_EL2); > } > > +static void update_stolen_time(struct vcpu *n) > +{ > + uint64_t tot_stolen; > + > + if ( is_idle_vcpu(current) ) > + return; > + > + tot_stolen = n->runstate.time[RUNSTATE_runnable] + > + n->runstate.time[RUNSTATE_offline]; > + write_atomic(&n->arch.pv_time_region->stolen_time, tot_stolen); > +} > + > static void schedule_tail(struct vcpu *prev) > { > ASSERT(prev != current); > @@ -291,6 +303,8 @@ static void schedule_tail(struct vcpu *prev) > > update_runstate_area(current); > > + update_stolen_time(current); > + > /* Ensure that the vcpu has an up-to-date time base. */ > update_vcpu_system_time(current); > } > @@ -586,6 +600,8 @@ int arch_vcpu_create(struct vcpu *v) > if ( get_ssbd_state() == ARM_SSBD_RUNTIME ) > v->arch.cpu_info->flags |= CPUINFO_WORKAROUND_2_FLAG; > > + v->arch.pv_time_region = &v->domain->arch.pv_time_regions[v->vcpu_id]; > + > return rc; > > fail: > @@ -707,6 +723,7 @@ int arch_domain_create(struct domain *d, > unsigned int flags) > { > unsigned int count = 0; > + int order; unsigned int > int rc; > > BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS); > @@ -791,6 +808,19 @@ int arch_domain_create(struct domain *d, > d->arch.sve_vl = config->arch.sve_vl; > #endif > > + /* > + * Preallocate the stolen time shared memory regions for all the > + * possible vCPUs. > + */ > + order = get_order_from_bytes(d->max_vcpus * sizeof(struct > pv_time_region)); > + d->arch.pv_time_regions_gfn = INVALID_GFN; > + d->arch.pv_time_regions = alloc_xenheap_pages(order, 0); > + if ( !d->arch.pv_time_regions ) { > + rc = -ENOMEM; > + goto fail; > + } > + memset(d->arch.pv_time_regions, 0, PAGE_SIZE << order); > + > return 0; > > fail: > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 85b6909e2b0e..1c51b53d9c6b 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -1561,8 +1561,80 @@ int __init make_chosen_node(const struct kernel_info > *kinfo) > return res; > } > > -int __init make_resv_memory_node(const struct kernel_info *kinfo, int > addrcells, > - int sizecells) > +int __init make_pv_time_resv_memory_node(struct domain *d, > + const struct kernel_info *kinfo, > + int addrcells, int sizecells) > +{ > + __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS]; > + unsigned int len = (addrcells + sizecells) * sizeof(__be32); > + struct p2m_domain *p2m = p2m_get_hostp2m(d); > + struct membanks *unused_banks = NULL; > + void *fdt = kinfo->fdt; > + unsigned regions_len; > + gfn_t pv_time_gfn; > + mfn_t pv_time_mfn; > + paddr_t aligned; > + paddr_t avail; > + __be32 *cells; > + int res; > + int i; unsigned int i > + /* Find unused regions */ > + regions_len = PAGE_ALIGN(d->max_vcpus * 64); > + unused_banks = membanks_xzalloc(NR_MEM_BANKS, MEMORY); > + if ( !unused_banks ) > + return -ENOMEM; > + > + res = find_unused_regions(d, kinfo, unused_banks); > + if ( res ) { > + printk(XENLOG_WARNING "%pd: failed to find unused regions\n", d); > + goto fail; > + } > + for ( i = 0; i < unused_banks->nr_banks; i++ ) { > + const struct membank *bank = &unused_banks->bank[i]; > + aligned = PAGE_ALIGN(bank->start); > + avail = bank->size - (aligned - bank->start); > + if ( avail >= regions_len ) > + break; > + } > + if ( i == unused_banks->nr_banks ) { > + res = -ENOSPC; > + goto fail; > + } > + > + /* Insert P2M entry */ > + pv_time_mfn = virt_to_mfn(d->arch.pv_time_regions); > + pv_time_gfn = gaddr_to_gfn(aligned); > + p2m_write_lock(p2m); > + res = p2m_set_entry(p2m, pv_time_gfn, regions_len / PAGE_SIZE, PFN_DOW > + pv_time_mfn, p2m_ram_rw, p2m_access_r); > + p2m_write_unlock(p2m); > + if ( res ) { > + printk(XENLOG_WARNING "%pd: failed to set P2M entry for PV_TIME\n", > d); > + goto fail; > + } > + d->arch.pv_time_regions_gfn = pv_time_gfn; > + > + /* Reserve the selected GFN */ > + res = domain_fdt_begin_node(fdt, "pv-time", gfn_x(pv_time_gfn)); > + if ( res ) > + goto fail; > + > + cells = reg; > + dt_child_set_range(&cells, addrcells, sizecells, gfn_x(pv_time_gfn), > regions_len); > + res = fdt_property(fdt, "reg", reg, len); > + if ( res ) > + goto fail; > + > + res = fdt_end_node(fdt); > + > + fail: > + xfree(unused_banks); > + return res; > +} > + > +int __init make_resv_memory_node(struct domain *d, const struct kernel_info > *kinfo, > + int addrcells, int sizecells) > { > const struct membanks *mem = kernel_info_get_shm_mem_const(kinfo); > void *fdt = kinfo->fdt; > @@ -1596,6 +1668,10 @@ int __init make_resv_memory_node(const struct > kernel_info *kinfo, int addrcells, > if ( res ) > return res; > > + res = make_pv_time_resv_memory_node(d, kinfo, addrcells, sizecells); > + if ( res ) > + return res; > + > res = fdt_end_node(fdt); > > return res; > @@ -1744,6 +1820,11 @@ static int __init handle_node(struct domain *d, struct > kernel_info *kinfo, > dt_n_size_cells(node)); > if ( res ) > return res; > + > + res = make_pv_time_resv_memory_node(d, kinfo, dt_n_addr_cells(node), > + dt_n_size_cells(node)); > + if ( res ) > + return res; > } > > for ( child = node->child; child != NULL; child = child->sibling ) > @@ -1791,7 +1872,7 @@ static int __init handle_node(struct domain *d, struct > kernel_info *kinfo, > > if ( !res_mem_node_found ) > { > - res = make_resv_memory_node(kinfo, addrcells, sizecells); > + res = make_resv_memory_node(d, kinfo, addrcells, sizecells); > if ( res ) > return res; > } > diff --git a/xen/arch/arm/include/asm/domain.h > b/xen/arch/arm/include/asm/domain.h > index a3487ca71303..c231c45fe40f 100644 > --- a/xen/arch/arm/include/asm/domain.h > +++ b/xen/arch/arm/include/asm/domain.h > @@ -59,6 +59,18 @@ struct paging_domain { > unsigned long p2m_total_pages; > }; > > +/* Stolen time shared memory region (ARM DEN 0057A.b) */ > +struct pv_time_region { > + /* This field must be 0 as per ARM DEN 0057A.b */ > + uint32_t revision; > + > + /* This field must be 0 as per ARM DEN 0057A.b */ > + uint32_t attribute; > + > + /* Total stolen time in nanoseconds */ > + uint64_t stolen_time; > +} __aligned(64); > + > struct arch_domain > { > #ifdef CONFIG_ARM_64 > @@ -121,6 +133,9 @@ struct arch_domain > void *tee; > #endif > > + struct pv_time_region *pv_time_regions; > + gfn_t pv_time_regions_gfn; > + > } __cacheline_aligned; > > struct arch_vcpu > @@ -243,6 +258,8 @@ struct arch_vcpu > */ > bool need_flush_to_ram; > > + struct pv_time_region *pv_time_region; > + > } __cacheline_aligned; > > void vcpu_show_registers(struct vcpu *v); > diff --git a/xen/arch/arm/include/asm/smccc.h > b/xen/arch/arm/include/asm/smccc.h > index a289c48b7ffd..6207ac74b715 100644 > --- a/xen/arch/arm/include/asm/smccc.h > +++ b/xen/arch/arm/include/asm/smccc.h > @@ -380,6 +380,18 @@ void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs > *args, > ARM_SMCCC_OWNER_ARCH, \ > 0x3FFF) > > +#define ARM_SMCCC_HYP_PV_TIME_FEATURES \ > + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ > + ARM_SMCCC_CONV_64, \ > + ARM_SMCCC_OWNER_HYPERVISOR, \ > + 0x20) > + > +#define ARM_SMCCC_HYP_PV_TIME_ST \ > + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ > + ARM_SMCCC_CONV_64, \ > + ARM_SMCCC_OWNER_HYPERVISOR, \ > + 0x21) > + > /* SMCCC error codes */ > #define ARM_SMCCC_NOT_REQUIRED (-2) > #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION (-1) > diff --git a/xen/arch/arm/vsmc.c b/xen/arch/arm/vsmc.c > index 6081f14ed0c1..1e2fbc1a62b4 100644 > --- a/xen/arch/arm/vsmc.c > +++ b/xen/arch/arm/vsmc.c > @@ -12,6 +12,7 @@ > #include <public/arch-arm/smccc.h> > #include <asm/cpuerrata.h> > #include <asm/cpufeature.h> > +#include <asm/domain.h> > #include <asm/monitor.h> > #include <asm/regs.h> > #include <asm/smccc.h> > @@ -127,6 +128,10 @@ static bool handle_arch(struct cpu_user_regs *regs) > if ( cpus_have_cap(ARM_WORKAROUND_BHB_SMCC_3) ) > ret = ARM_SMCCC_SUCCESS; > break; > + case ARM_SMCCC_HYP_PV_TIME_FEATURES: > + if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn, > INVALID_GFN) ) > + ret = ARM_SMCCC_SUCCESS; > + break; > } > > set_user_reg(regs, 0, ret); > @@ -162,6 +167,35 @@ static bool handle_arch(struct cpu_user_regs *regs) > return false; > } > > +static bool fill_pv_time_features(struct cpu_user_regs *regs) > +{ > + uint32_t arch_func_id = get_user_reg(regs, 1); > + int ret = ARM_SMCCC_NOT_SUPPORTED; > + > + if ( arch_func_id == ARM_SMCCC_HYP_PV_TIME_ST && > + !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) ) > + ret = ARM_SMCCC_SUCCESS; > + > + set_user_reg(regs, 0, ret); > + > + return true; > +} > + > +static bool fill_pv_time_st(struct cpu_user_regs *regs) > +{ > + register_t ret = ARM_SMCCC_NOT_SUPPORTED; > + paddr_t gaddr; > + > + if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) ) > + { > + gaddr = gfn_to_gaddr(current->domain->arch.pv_time_regions_gfn); > + ret = gaddr + current->vcpu_id * sizeof(struct pv_time_region); > + } > + > + set_user_reg(regs, 0, ret); > + return true; > +} > + > /* SMCCC interface for hypervisor. Tell about itself. */ > static bool handle_hypervisor(struct cpu_user_regs *regs) > { > @@ -176,6 +210,10 @@ static bool handle_hypervisor(struct cpu_user_regs *regs) > case ARM_SMCCC_REVISION_FID(HYPERVISOR): > return fill_revision(regs, XEN_SMCCC_MAJOR_REVISION, > XEN_SMCCC_MINOR_REVISION); > + case ARM_SMCCC_HYP_PV_TIME_FEATURES: > + return fill_pv_time_features(regs); > + case ARM_SMCCC_HYP_PV_TIME_ST: > + return fill_pv_time_st(regs); This file is common across 32-bit and 64-bit guests. Looking at the implementation of fill_pv_time_st, which returns a 64-bit address on a potentially 32-bit register, it looks like this interface is only meant for 64-bit guests? If so, then we should have a check here and return "not supported" for 32-bit callers. > default: > return false; > } > diff --git a/xen/common/device-tree/dom0less-build.c > b/xen/common/device-tree/dom0less-build.c > index 3d503c697337..fa31b1733388 100644 > --- a/xen/common/device-tree/dom0less-build.c > +++ b/xen/common/device-tree/dom0less-build.c > @@ -502,7 +502,7 @@ static int __init prepare_dtb_domU(struct domain *d, > struct kernel_info *kinfo) > if ( ret ) > goto err; > > - ret = make_resv_memory_node(kinfo, addrcells, sizecells); > + ret = make_resv_memory_node(d, kinfo, addrcells, sizecells); > if ( ret ) > goto err; > > diff --git a/xen/include/xen/fdt-domain-build.h > b/xen/include/xen/fdt-domain-build.h > index e9418857e386..645e7d0a54aa 100644 > --- a/xen/include/xen/fdt-domain-build.h > +++ b/xen/include/xen/fdt-domain-build.h > @@ -25,7 +25,7 @@ int construct_domain(struct domain *d, struct kernel_info > *kinfo); > int construct_hwdom(struct kernel_info *kinfo, > const struct dt_device_node *node); > int make_chosen_node(const struct kernel_info *kinfo); > -int make_resv_memory_node(const struct kernel_info *kinfo, > +int make_resv_memory_node(struct domain *d, const struct kernel_info *kinfo, > int addrcells, int sizecells); > int make_cpus_node(const struct domain *d, void *fdt); > int make_hypervisor_node(struct domain *d, const struct kernel_info *kinfo, > -- > 2.48.1 >
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |