[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 8/8] common: convert vCPU info area registration
On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote: > Switch to using map_guest_area(). Noteworthy differences from > map_vcpu_info(): > - remote vCPU-s are paused rather than checked for being down (which in > principle can change right after the check), > - the domain lock is taken for a much smaller region, > - the error code for an attempt to re-register the area is now -EBUSY, > - we could in principle permit de-registration when no area was > previously registered (which would permit "probing", if necessary for > anything). > > Note that this eliminates a bug in copy_vcpu_settings(): The function > did allocate a new page regardless of the GFN already having a mapping, > thus in particular breaking the case of two vCPU-s having their info > areas on the same page. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Some minor comments below: Acked-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> > --- > RFC: I'm not really certain whether the preliminary check (ahead of > calling map_guest_area()) is worthwhile to have. > --- > v2: Re-base over changes earlier in the series. Properly enforce no re- > registration. Avoid several casts by introducing local variables. > > --- a/xen/arch/x86/include/asm/shared.h > +++ b/xen/arch/x86/include/asm/shared.h > @@ -26,17 +26,20 @@ static inline void arch_set_##field(stru > #define GET_SET_VCPU(type, field) \ > static inline type arch_get_##field(const struct vcpu *v) \ > { \ > + const vcpu_info_t *vi = v->vcpu_info_area.map; \ > + \ > return !has_32bit_shinfo(v->domain) ? \ > - v->vcpu_info->native.arch.field : \ > - v->vcpu_info->compat.arch.field; \ > + vi->native.arch.field : vi->compat.arch.field; \ > } \ > static inline void arch_set_##field(struct vcpu *v, \ > type val) \ > { \ > + vcpu_info_t *vi = v->vcpu_info_area.map; \ > + \ > if ( !has_32bit_shinfo(v->domain) ) \ > - v->vcpu_info->native.arch.field = val; \ > + vi->native.arch.field = val; \ > else \ > - v->vcpu_info->compat.arch.field = val; \ > + vi->compat.arch.field = val; \ > } > > #else > @@ -57,12 +60,16 @@ static inline void arch_set_##field(stru > #define GET_SET_VCPU(type, field) \ > static inline type arch_get_##field(const struct vcpu *v) \ > { \ > - return v->vcpu_info->arch.field; \ > + const vcpu_info_t *vi = v->vcpu_info_area.map; \ > + \ > + return vi->arch.field; \ > } \ > static inline void arch_set_##field(struct vcpu *v, \ > type val) \ > { \ > - v->vcpu_info->arch.field = val; \ > + vcpu_info_t *vi = v->vcpu_info_area.map; \ > + \ > + vi->arch.field = val; \ > } > > #endif > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -1749,53 +1749,24 @@ static int copy_vpmu(struct vcpu *d_vcpu > static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > { > unsigned int i; > - struct p2m_domain *p2m = p2m_get_hostp2m(cd); > int ret = -EINVAL; > > for ( i = 0; i < cd->max_vcpus; i++ ) > { > struct vcpu *d_vcpu = d->vcpu[i]; > struct vcpu *cd_vcpu = cd->vcpu[i]; > - mfn_t vcpu_info_mfn; > > if ( !d_vcpu || !cd_vcpu ) > continue; > > - /* Copy & map in the vcpu_info page if the guest uses one */ > - vcpu_info_mfn = d_vcpu->vcpu_info_mfn; > - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > - { > - mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; > - > - /* Allocate & map the page for it if it hasn't been already */ > - if ( mfn_eq(new_vcpu_info_mfn, INVALID_MFN) ) > - { > - gfn_t gfn = mfn_to_gfn(d, vcpu_info_mfn); > - unsigned long gfn_l = gfn_x(gfn); > - struct page_info *page; > - > - if ( !(page = alloc_domheap_page(cd, 0)) ) > - return -ENOMEM; > - > - new_vcpu_info_mfn = page_to_mfn(page); > - set_gpfn_from_mfn(mfn_x(new_vcpu_info_mfn), gfn_l); > - > - ret = p2m->set_entry(p2m, gfn, new_vcpu_info_mfn, > - PAGE_ORDER_4K, p2m_ram_rw, > - p2m->default_access, -1); > - if ( ret ) > - return ret; > - > - ret = map_vcpu_info(cd_vcpu, gfn_l, > - PAGE_OFFSET(d_vcpu->vcpu_info)); > - if ( ret ) > - return ret; > - } > - > - copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn); > - } > - > - /* Same for the (physically registered) runstate and time info > areas. */ > + /* > + * Copy and map the vcpu_info page and the (physically registered) > + * runstate and time info areas. > + */ > + ret = copy_guest_area(&cd_vcpu->vcpu_info_area, > + &d_vcpu->vcpu_info_area, cd_vcpu, d); > + if ( ret ) > + return ret; > ret = copy_guest_area(&cd_vcpu->runstate_guest_area, > &d_vcpu->runstate_guest_area, cd_vcpu, d); > if ( ret ) > --- a/xen/arch/x86/pv/shim.c > +++ b/xen/arch/x86/pv/shim.c > @@ -383,7 +383,7 @@ int pv_shim_shutdown(uint8_t reason) > for_each_vcpu ( d, v ) > { > /* Unmap guest vcpu_info page and runstate/time areas. */ > - unmap_vcpu_info(v); > + unmap_guest_area(v, &v->vcpu_info_area); > unmap_guest_area(v, &v->runstate_guest_area); > unmap_guest_area(v, &v->arch.time_guest_area); > > --- a/xen/arch/x86/time.c > +++ b/xen/arch/x86/time.c > @@ -1547,7 +1547,7 @@ static void __update_vcpu_system_time(st > struct vcpu_time_info *u = &vcpu_info(v, time), _u; > const struct domain *d = v->domain; > > - if ( v->vcpu_info == NULL ) > + if ( !v->vcpu_info_area.map ) > return; > > collect_time_info(v, &_u); > --- a/xen/arch/x86/x86_64/asm-offsets.c > +++ b/xen/arch/x86/x86_64/asm-offsets.c > @@ -53,7 +53,7 @@ void __dummy__(void) > > OFFSET(VCPU_processor, struct vcpu, processor); > OFFSET(VCPU_domain, struct vcpu, domain); > - OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); > + OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map); > OFFSET(VCPU_trap_bounce, struct vcpu, arch.pv.trap_bounce); > OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); > OFFSET(VCPU_event_addr, struct vcpu, arch.pv.event_callback_eip); > --- a/xen/arch/x86/x86_64/traps.c > +++ b/xen/arch/x86/x86_64/traps.c > @@ -96,7 +96,7 @@ static void _show_registers( > if ( context == CTXT_hypervisor ) > printk(" %pS", _p(regs->rip)); > printk("\nRFLAGS: %016lx ", regs->rflags); > - if ( (context == CTXT_pv_guest) && v && v->vcpu_info ) > + if ( (context == CTXT_pv_guest) && v && v->vcpu_info_area.map ) > printk("EM: %d ", !!vcpu_info(v, evtchn_upcall_mask)); > printk("CONTEXT: %s", context_names[context]); > if ( v && !is_idle_vcpu(v) ) > --- a/xen/common/compat/domain.c > +++ b/xen/common/compat/domain.c > @@ -49,7 +49,7 @@ int compat_common_vcpu_op(int cmd, struc > { > case VCPUOP_initialise: > { > - if ( v->vcpu_info == &dummy_vcpu_info ) > + if ( v->vcpu_info_area.map == &dummy_vcpu_info ) > return -EINVAL; > > #ifdef CONFIG_HVM > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -127,10 +127,10 @@ static void vcpu_info_reset(struct vcpu > { > struct domain *d = v->domain; d could likely be made const? > > - v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS) > - ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id]) > - : &dummy_vcpu_info); > - v->vcpu_info_mfn = INVALID_MFN; > + v->vcpu_info_area.map = > + ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS) > + ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id]) > + : &dummy_vcpu_info); > } > > static void vmtrace_free_buffer(struct vcpu *v) > @@ -964,7 +964,7 @@ int domain_kill(struct domain *d) > return -ERESTART; > for_each_vcpu ( d, v ) > { > - unmap_vcpu_info(v); > + unmap_guest_area(v, &v->vcpu_info_area); > unmap_guest_area(v, &v->runstate_guest_area); > } > d->is_dying = DOMDYING_dead; > @@ -1419,7 +1419,7 @@ int domain_soft_reset(struct domain *d, > for_each_vcpu ( d, v ) > { > set_xen_guest_handle(runstate_guest(v), NULL); > - unmap_vcpu_info(v); > + unmap_guest_area(v, &v->vcpu_info_area); > unmap_guest_area(v, &v->runstate_guest_area); > } > > @@ -1467,111 +1467,6 @@ int vcpu_reset(struct vcpu *v) > return rc; > } > > -/* > - * Map a guest page in and point the vcpu_info pointer at it. This > - * makes sure that the vcpu_info is always pointing at a valid piece > - * of memory, and it sets a pending event to make sure that a pending > - * event doesn't get missed. > - */ > -int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned int offset) > -{ > - struct domain *d = v->domain; > - void *mapping; > - vcpu_info_t *new_info; > - struct page_info *page; > - unsigned int align; > - > - if ( offset > (PAGE_SIZE - sizeof(*new_info)) ) > - return -ENXIO; > - > -#ifdef CONFIG_COMPAT > - BUILD_BUG_ON(sizeof(*new_info) != sizeof(new_info->compat)); > - if ( has_32bit_shinfo(d) ) > - align = alignof(new_info->compat); > - else > -#endif > - align = alignof(*new_info); > - if ( offset & (align - 1) ) > - return -ENXIO; > - > - if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) ) > - return -EINVAL; > - > - /* Run this command on yourself or on other offline VCPUS. */ > - if ( (v != current) && !(v->pause_flags & VPF_down) ) > - return -EINVAL; > - > - page = get_page_from_gfn(d, gfn, NULL, P2M_UNSHARE); > - if ( !page ) > - return -EINVAL; > - > - if ( !get_page_type(page, PGT_writable_page) ) > - { > - put_page(page); > - return -EINVAL; > - } > - > - mapping = __map_domain_page_global(page); > - if ( mapping == NULL ) > - { > - put_page_and_type(page); > - return -ENOMEM; > - } > - > - new_info = (vcpu_info_t *)(mapping + offset); > - > - if ( v->vcpu_info == &dummy_vcpu_info ) > - { > - memset(new_info, 0, sizeof(*new_info)); > -#ifdef XEN_HAVE_PV_UPCALL_MASK > - __vcpu_info(v, new_info, evtchn_upcall_mask) = 1; > -#endif > - } > - else > - { > - memcpy(new_info, v->vcpu_info, sizeof(*new_info)); > - } > - > - v->vcpu_info = new_info; > - v->vcpu_info_mfn = page_to_mfn(page); > - > - /* Set new vcpu_info pointer /before/ setting pending flags. */ > - smp_wmb(); > - > - /* > - * Mark everything as being pending just to make sure nothing gets > - * lost. The domain will get a spurious event, but it can cope. > - */ > -#ifdef CONFIG_COMPAT > - if ( !has_32bit_shinfo(d) ) > - write_atomic(&new_info->native.evtchn_pending_sel, ~0); > - else > -#endif > - write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0); > - vcpu_mark_events_pending(v); > - > - return 0; > -} > - > -/* > - * Unmap the vcpu info page if the guest decided to place it somewhere > - * else. This is used from domain_kill() and domain_soft_reset(). > - */ > -void unmap_vcpu_info(struct vcpu *v) > -{ > - mfn_t mfn = v->vcpu_info_mfn; > - > - if ( mfn_eq(mfn, INVALID_MFN) ) > - return; > - > - unmap_domain_page_global((void *) > - ((unsigned long)v->vcpu_info & PAGE_MASK)); > - > - vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */ > - > - put_page_and_type(mfn_to_page(mfn)); > -} > - > int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size, > struct guest_area *area, > void (*populate)(void *dst, struct vcpu *v)) > @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr > > domain_lock(d); > > - if ( map ) > - populate(map, v); > + /* No re-registration of the vCPU info area. */ > + if ( area != &v->vcpu_info_area || !area->pg ) It would be nice if this check could be done earlier, as to avoid having to fetch and map the page just to discard it. That would however require taking the domain lock earlier. > + { > + if ( map ) > + populate(map, v); > > - SWAP(area->pg, pg); > - SWAP(area->map, map); > + SWAP(area->pg, pg); > + SWAP(area->map, map); > + } > + else > + rc = -EBUSY; > > domain_unlock(d); > > + /* Set pending flags /after/ new vcpu_info pointer was set. */ > + if ( area == &v->vcpu_info_area && !rc ) > + { > + /* > + * Mark everything as being pending just to make sure nothing gets > + * lost. The domain will get a spurious event, but it can cope. > + */ > +#ifdef CONFIG_COMPAT > + if ( !has_32bit_shinfo(d) ) > + { > + vcpu_info_t *info = area->map; > + > + /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */ > + BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat)); > + write_atomic(&info->native.evtchn_pending_sel, ~0); > + } > + else > +#endif > + write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0); Can't the setting of evtchn_pending_sel be done in vcpu_info_populate()? > + vcpu_mark_events_pending(v); > + > + force_update_vcpu_system_time(v); > + } > + > if ( v != current ) > vcpu_unpause(v); > > @@ -1670,7 +1595,10 @@ void unmap_guest_area(struct vcpu *v, st > > domain_lock(d); > map = area->map; > - area->map = NULL; > + if ( area == &v->vcpu_info_area ) > + vcpu_info_reset(v); > + else > + area->map = NULL; > pg = area->pg; > area->pg = NULL; > domain_unlock(d); > @@ -1801,6 +1729,27 @@ bool update_runstate_area(struct vcpu *v > return rc; > } > > +/* > + * This makes sure that the vcpu_info is always pointing at a valid piece of > + * memory, and it sets a pending event to make sure that a pending event > + * doesn't get missed. > + */ > +static void cf_check > +vcpu_info_populate(void *map, struct vcpu *v) > +{ > + vcpu_info_t *info = map; > + > + if ( v->vcpu_info_area.map == &dummy_vcpu_info ) > + { > + memset(info, 0, sizeof(*info)); > +#ifdef XEN_HAVE_PV_UPCALL_MASK > + __vcpu_info(v, info, evtchn_upcall_mask) = 1; > +#endif I'm not sure about the point of those guards, this will always be 1, as we always build the hypervisor with the headers in xen/public? Is it to make backports easier? Thanks, Roger.
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |