[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [PATCH v2 09/11] xen/memory: Fix mapping grant tables with XENMEM_acquire_resource
> -----Original Message----- > From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > Sent: 22 September 2020 19:25 > To: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx> > Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>; George Dunlap > <George.Dunlap@xxxxxxxxxxxxx>; Ian > Jackson <iwj@xxxxxxxxxxxxxx>; Jan Beulich <JBeulich@xxxxxxxx>; Stefano > Stabellini > <sstabellini@xxxxxxxxxx>; Wei Liu <wl@xxxxxxx>; Julien Grall > <julien@xxxxxxx>; Paul Durrant > <paul@xxxxxxx>; Michał Leszczyński <michal.leszczynski@xxxxxxx>; Hubert > Jasudowicz > <hubert.jasudowicz@xxxxxxx>; Tamas K Lengyel <tamas@xxxxxxxxxxxxx> > Subject: [PATCH v2 09/11] xen/memory: Fix mapping grant tables with > XENMEM_acquire_resource > > A guests default number of grant frames is 64, and XENMEM_acquire_resource Nit: s/guests/guest's > will reject an attempt to map more than 32 frames. This limit is caused by > the size of mfn_list[] on the stack. > > Fix mapping of arbitrary size requests by looping over batches of 32 in > acquire_resource(), and using hypercall continuations when necessary. > > To start with, break _acquire_resource() out of acquire_resource() to cope > with type-specific dispatching, and update the return semantics to indicate > the number of mfns returned. Update gnttab_acquire_resource() and x86's > arch_acquire_resource() to match these new semantics. > > Have do_memory_op() pass start_extent into acquire_resource() so it can pick > up where it left off after a continuation, and loop over batches of 32 until > all the work is done, or a continuation needs to occur. > > compat_memory_op() is a bit more complicated, because it also has to marshal > frame_list in the XLAT buffer. Have it account for continuation information > itself and hide details from the upper layer, so it can marshal the buffer in > chunks if necessary. > > With these fixes in place, it is now possible to map the whole grant table for > a guest. > > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- > CC: George Dunlap <George.Dunlap@xxxxxxxxxxxxx> > CC: Ian Jackson <iwj@xxxxxxxxxxxxxx> > CC: Jan Beulich <JBeulich@xxxxxxxx> > CC: Stefano Stabellini <sstabellini@xxxxxxxxxx> > CC: Wei Liu <wl@xxxxxxx> > CC: Julien Grall <julien@xxxxxxx> > CC: Paul Durrant <paul@xxxxxxx> > CC: Michał Leszczyński <michal.leszczynski@xxxxxxx> > CC: Hubert Jasudowicz <hubert.jasudowicz@xxxxxxx> > CC: Tamas K Lengyel <tamas@xxxxxxxxxxxxx> > --- > xen/arch/x86/mm.c | 4 +- > xen/common/compat/memory.c | 96 ++++++++++++++++++++++++++++++-------- > xen/common/grant_table.c | 3 ++ > xen/common/memory.c | 114 > ++++++++++++++++++++++++++++++++++----------- > 4 files changed, 168 insertions(+), 49 deletions(-) > > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index e82307bdae..8628f51402 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -4632,7 +4632,6 @@ int arch_acquire_resource(struct domain *d, unsigned > int type, > if ( id != (unsigned int)ioservid ) > break; > > - rc = 0; > for ( i = 0; i < nr_frames; i++ ) > { > mfn_t mfn; > @@ -4643,6 +4642,9 @@ int arch_acquire_resource(struct domain *d, unsigned > int type, > > mfn_list[i] = mfn_x(mfn); > } > + if ( i == nr_frames ) > + /* Success. Passed nr_frames back to the caller. */ Nit: s/passed/pass > + rc = nr_frames; > break; > } > #endif > diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c > index 834c5e19d1..17619f26ed 100644 > --- a/xen/common/compat/memory.c > +++ b/xen/common/compat/memory.c > @@ -402,23 +402,10 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > case XENMEM_acquire_resource: > { > xen_pfn_t *xen_frame_list = NULL; > - unsigned int max_nr_frames; > > if ( copy_from_guest(&cmp.mar, compat, 1) ) > return -EFAULT; > > - /* > - * The number of frames handled is currently limited to a > - * small number by the underlying implementation, so the > - * scratch space should be sufficient for bouncing the > - * frame addresses. > - */ > - max_nr_frames = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.mar)) / > - sizeof(*xen_frame_list); > - > - if ( cmp.mar.nr_frames > max_nr_frames ) > - return -E2BIG; > - > /* Marshal the frame list in the remainder of the xlat space. */ > if ( !compat_handle_is_null(cmp.mar.frame_list) ) > xen_frame_list = (xen_pfn_t *)(nat.mar + 1); > @@ -432,6 +419,28 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > > if ( xen_frame_list && cmp.mar.nr_frames ) > { > + unsigned int xlat_max_frames = > + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.mar)) / > + sizeof(*xen_frame_list); > + > + if ( start_extent >= nat.mar->nr_frames ) > + return -EINVAL; > + > + /* > + * Adjust nat to account for work done on previous > + * continuations, leaving cmp pristine. Hide the > continaution > + * from the native code to prevent double accounting. > + */ > + nat.mar->nr_frames -= start_extent; > + nat.mar->frame += start_extent; > + cmd &= MEMOP_CMD_MASK; > + > + /* > + * If there are two many frames to fit within the xlat > buffer, > + * we'll need to loop to marshal them all. > + */ > + nat.mar->nr_frames = min(nat.mar->nr_frames, > xlat_max_frames); > + > /* > * frame_list is an input for translated guests, and an > output > * for untranslated guests. Only copy in for translated > guests. > @@ -444,14 +453,14 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > cmp.mar.nr_frames) || > __copy_from_compat_offset( > compat_frame_list, cmp.mar.frame_list, > - 0, cmp.mar.nr_frames) ) > + start_extent, nat.mar->nr_frames) ) > return -EFAULT; > > /* > * Iterate backwards over compat_frame_list[] expanding > * compat_pfn_t to xen_pfn_t in place. > */ > - for ( int x = cmp.mar.nr_frames - 1; x >= 0; --x ) > + for ( int x = nat.mar->nr_frames - 1; x >= 0; --x ) > xen_frame_list[x] = compat_frame_list[x]; > } > } > @@ -600,9 +609,11 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > case XENMEM_acquire_resource: > { > DEFINE_XEN_GUEST_HANDLE(compat_mem_acquire_resource_t); > + unsigned int done; > > if ( compat_handle_is_null(cmp.mar.frame_list) ) > { > + ASSERT(split == 0 && rc == 0); > if ( __copy_field_to_guest( > guest_handle_cast(compat, > compat_mem_acquire_resource_t), > @@ -611,6 +622,21 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > break; > } > > + if ( split < 0 ) > + { > + /* Contintuation occured. */ > + ASSERT(rc != XENMEM_acquire_resource); Do you mean "op !=" here? > + done = cmd >> MEMOP_EXTENT_SHIFT; > + } > + else > + { > + /* No continuation. */ > + ASSERT(rc == 0); "!rc" for consistency with other code perhaps? > + done = nat.mar->nr_frames; > + } > + > + ASSERT(done <= nat.mar->nr_frames); > + > /* > * frame_list is an input for translated guests, and an output > for > * untranslated guests. Only copy out for untranslated guests. > @@ -626,7 +652,7 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > */ > BUILD_BUG_ON(sizeof(compat_pfn_t) > sizeof(xen_pfn_t)); > > - for ( i = 0; i < cmp.mar.nr_frames; i++ ) > + for ( i = 0; i < done; i++ ) > { > compat_pfn_t frame = xen_frame_list[i]; > > @@ -636,15 +662,45 @@ int compat_memory_op(unsigned int cmd, > XEN_GUEST_HANDLE_PARAM(void) compat) > compat_frame_list[i] = frame; > } > > - if ( __copy_to_compat_offset(cmp.mar.frame_list, 0, > - compat_frame_list, > - cmp.mar.nr_frames) ) > + if ( __copy_to_compat_offset( > + cmp.mar.frame_list, start_extent, > + compat_frame_list, done) ) > return -EFAULT; > } > - break; > + > + start_extent += done; > + > + /* Completely done. */ > + if ( start_extent == cmp.mar.nr_frames ) > + break; > + > + /* > + * Done a "full" batch, but we were limited by space in the xlat > + * area. Go around the loop again without necesserily returning > + * to guest context. > + */ > + if ( done == nat.mar->nr_frames ) > + { > + split = 1; > + break; > + } > + > + /* Explicit continuation request from a higher level. */ > + if ( done < nat.mar->nr_frames ) > + return hypercall_create_continuation( > + __HYPERVISOR_memory_op, "ih", > + op | (start_extent << MEMOP_EXTENT_SHIFT), compat); > + > + /* > + * Well... Somethings gone wrong with the two levels of chunking. > + * My condolences to whomever next has to debug this mess. > + */ > + ASSERT_UNREACHABLE(); > + goto crash; > } > > default: > + crash: > domain_crash(current->domain); > split = 0; > break; > diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c > index 8c401a5540..81db47f8fd 100644 > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -4105,6 +4105,9 @@ int gnttab_acquire_resource( > for ( i = 0; i < nr_frames; ++i ) > mfn_list[i] = virt_to_mfn(vaddrs[frame + i]); > > + /* Success. Passed nr_frames back to the caller. */ > + rc = nr_frames; > + > out: > grant_write_unlock(gt); > > diff --git a/xen/common/memory.c b/xen/common/memory.c > index 369154b7c0..ec276cb9b1 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -1027,17 +1027,31 @@ static unsigned int resource_max_frames(struct domain > *d, > } > } > > +/* > + * Returns -errno on error, or positive in the range [1, nr_frames] on > + * success. Returning less than nr_frames contitutes a request for a > + * continuation. > + */ > +static int _acquire_resource( > + struct domain *d, unsigned int type, unsigned int id, unsigned long > frame, > + unsigned int nr_frames, xen_pfn_t mfn_list[]) > +{ > + switch ( type ) > + { > + case XENMEM_resource_grant_table: > + return gnttab_acquire_resource(d, id, frame, nr_frames, mfn_list); > + > + default: > + return arch_acquire_resource(d, type, id, frame, nr_frames, > mfn_list); > + } > +} > + > static int acquire_resource( > - XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg) > + XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg, > + unsigned long start_extent) > { > struct domain *d, *currd = current->domain; > xen_mem_acquire_resource_t xmar; > - /* > - * The mfn_list and gfn_list (below) arrays are ok on stack for the > - * moment since they are small, but if they need to grow in future > - * use-cases then per-CPU arrays or heap allocations may be required. > - */ > - xen_pfn_t mfn_list[32]; > unsigned int max_frames; > int rc; > > @@ -1055,9 +1069,6 @@ static int acquire_resource( > if ( xmar.pad != 0 ) > return -EINVAL; > > - if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) ) > - return -E2BIG; > - > rc = rcu_lock_remote_domain_by_id(xmar.domid, &d); > if ( rc ) > return rc; > @@ -1074,7 +1085,7 @@ static int acquire_resource( > > if ( guest_handle_is_null(xmar.frame_list) ) > { > - if ( xmar.nr_frames ) > + if ( xmar.nr_frames || start_extent ) > goto out; > > xmar.nr_frames = max_frames; > @@ -1087,26 +1098,47 @@ static int acquire_resource( > goto out; > } > > + /* > + * Limiting nr_frames at (UINT_MAX >> MEMOP_EXTENT_SHIFT) isn't ideal. > If > + * it ever becomes a practical problem, we can switch to mutating > + * xmar.{frame,nr_frames,frame_list} in guest memory. > + */ > + rc = -EINVAL; > + if ( start_extent >= xmar.nr_frames || > + xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) > + goto out; > + > + /* Adjust for work done on previous continuations. */ > + xmar.nr_frames -= start_extent; > + xmar.frame += start_extent; > + guest_handle_add_offset(xmar.frame_list, start_extent); > + > do { > - switch ( xmar.type ) > - { > - case XENMEM_resource_grant_table: > - rc = gnttab_acquire_resource(d, xmar.id, xmar.frame, > xmar.nr_frames, > - mfn_list); > - break; > + /* > + * Arbitrary size. Not too much stack space, and a reasonable stride > + * for continutation checks. > + */ > + xen_pfn_t mfn_list[32]; > + unsigned int todo = MIN(ARRAY_SIZE(mfn_list), xmar.nr_frames), done; > > - default: > - rc = arch_acquire_resource(d, xmar.type, xmar.id, xmar.frame, > - xmar.nr_frames, mfn_list); > - break; > - } > + rc = _acquire_resource(d, xmar.type, xmar.id, xmar.frame, > + todo, mfn_list); > + if ( rc < 0 ) > + goto out; > > - if ( rc ) > + done = rc; > + rc = 0; > + if ( done == 0 || done > todo ) > + { > + ASSERT_UNREACHABLE(); > + rc = -EINVAL; > goto out; > + } > > + /* Adjust guest frame_list appropriately. */ > if ( !paging_mode_translate(currd) ) > { > - if ( copy_to_guest(xmar.frame_list, mfn_list, xmar.nr_frames) ) > + if ( copy_to_guest(xmar.frame_list, mfn_list, done) ) > rc = -EFAULT; > } > else > @@ -1114,10 +1146,10 @@ static int acquire_resource( > xen_pfn_t gfn_list[ARRAY_SIZE(mfn_list)]; > unsigned int i; > > - if ( copy_from_guest(gfn_list, xmar.frame_list, xmar.nr_frames) ) > + if ( copy_from_guest(gfn_list, xmar.frame_list, done) ) > rc = -EFAULT; > > - for ( i = 0; !rc && i < xmar.nr_frames; i++ ) > + for ( i = 0; !rc && i < done; i++ ) > { > rc = set_foreign_p2m_entry(currd, gfn_list[i], > _mfn(mfn_list[i])); > @@ -1126,7 +1158,32 @@ static int acquire_resource( > rc = -EIO; > } > } > - } while ( 0 ); > + > + if ( rc ) > + goto out; > + > + xmar.nr_frames -= done; > + xmar.frame += done; > + guest_handle_add_offset(xmar.frame_list, done); > + start_extent += done; > + > + /* > + * Explicit contination request from _acquire_resource(), or we've > + * still got work to do other work is pending. I think "still got work to do" or " other work is pending", not both? Paul > + */ > + if ( done < todo || > + (xmar.nr_frames && hypercall_preempt_check()) ) > + { > + rc = hypercall_create_continuation( > + __HYPERVISOR_memory_op, "lh", > + XENMEM_acquire_resource | (start_extent << > MEMOP_EXTENT_SHIFT), > + arg); > + goto out; > + } > + > + } while ( xmar.nr_frames ); > + > + rc = 0; > > out: > rcu_unlock_domain(d); > @@ -1593,7 +1650,8 @@ long do_memory_op(unsigned long cmd, > XEN_GUEST_HANDLE_PARAM(void) arg) > > case XENMEM_acquire_resource: > rc = acquire_resource( > - guest_handle_cast(arg, xen_mem_acquire_resource_t)); > + guest_handle_cast(arg, xen_mem_acquire_resource_t), > + start_extent); > break; > > default: > -- > 2.11.0
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |