Xen project Mailing List

Re: [XEN PATCH v3] xen/arm: ffa: reclaim shared memory on guest destroy

Hi Bertrand, On Thu, Feb 1, 2024 at 2:57 PM Bertrand Marquis <Bertrand.Marquis@xxxxxxx> wrote: > > Hi Jens, > > > On 17 Jan 2024, at 12:06, Jens Wiklander <jens.wiklander@xxxxxxxxxx> wrote: > > > > When an FF-A enabled guest is destroyed it may leave behind memory > > shared with SPs. This memory must be reclaimed before it's reused or an > > SP may make changes to memory used by a new unrelated guest. So when the > > domain is teared down add FF-A requests to reclaim all remaining shared > > memory. > > > > SPs in the secure world are notified using VM_DESTROYED that a guest has > > been destroyed. An SP is supposed to relinquish all shared memory to allow > > reclaiming the memory. The relinquish operation may need to be delayed if > > the shared memory is for instance part of a DMA operation. > > > > The domain reference counter is increased when the first FF-A shared > > memory is registered and the counter is decreased again when the last > > shared memory is reclaimed. If FF-A shared memory registrations remain > > at the end of of ffa_domain_teardown() a timer is set to try to reclaim > > the shared memory every second until the memory is reclaimed. > > > > A few minor style fixes with a removed empty line here and an added new > > line there. > > > > Signed-off-by: Jens Wiklander <jens.wiklander@xxxxxxxxxx> > > --- > > > > v3: > > - Mentioning in the commit message that there are some style fixes > > - Addressing review comments > > - Refactor the ffa_domain_teardown() path to let > > ffa_domain_teardown_continue() do most of the work. > > > > v2: > > - Update commit message to match the new implementation > > - Using a per domain bitfield to keep track of which SPs has been notified > > with VM_DESTROYED > > - Holding a domain reference counter to keep the domain as a zombie domain > > while there still is shared memory registrations remaining to be reclaimed > > - Using a timer to retry reclaiming remaining shared memory registrations > > --- > > xen/arch/arm/tee/ffa.c | 253 +++++++++++++++++++++++++++++++++-------- > > 1 file changed, 204 insertions(+), 49 deletions(-) > > > > diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c > > index 0793c1c7585d..80ebbf4f01c6 100644 > > --- a/xen/arch/arm/tee/ffa.c > > +++ b/xen/arch/arm/tee/ffa.c > > @@ -54,6 +54,7 @@ > > #include <xen/mm.h> > > #include <xen/sched.h> > > #include <xen/sizes.h> > > +#include <xen/timer.h> > > #include <xen/types.h> > > > > #include <asm/event.h> > > @@ -144,6 +145,12 @@ > > */ > > #define FFA_MAX_SHM_COUNT 32 > > > > +/* > > + * The time we wait until trying to tear down a domain again if it was > > + * blocked initially. > > + */ > > +#define FFA_CTX_TEARDOWN_DELAY SECONDS(1) > > + > > /* FF-A-1.1-REL0 section 10.9.2 Memory region handle, page 167 */ > > #define FFA_HANDLE_HYP_FLAG BIT(63, ULL) > > #define FFA_HANDLE_INVALID 0xffffffffffffffffULL > > @@ -384,11 +391,6 @@ struct ffa_ctx { > > unsigned int page_count; > > /* FF-A version used by the guest */ > > uint32_t guest_vers; > > - /* > > - * Number of SPs that we have sent a VM created signal to, used in > > - * ffa_domain_teardown() to know which SPs need to be signalled. > > - */ > > - uint16_t create_signal_count; > > bool rx_is_free; > > /* Used shared memory objects, struct ffa_shm_mem */ > > struct list_head shm_list; > > @@ -402,6 +404,15 @@ struct ffa_ctx { > > spinlock_t tx_lock; > > spinlock_t rx_lock; > > spinlock_t lock; > > + /* Used if domain can't be torn down immediately */ > > + struct domain *teardown_d; > > + struct list_head teardown_list; > > + s_time_t teardown_expire; > > + /* > > + * Used for ffa_domain_teardown() to keep track of which SPs should be > > + * notified that this guest is being destroyed. > > + */ > > + unsigned long vm_destroy_bitmap[]; > > }; > > > > struct ffa_shm_mem { > > @@ -436,6 +447,12 @@ static void *ffa_tx __read_mostly; > > static DEFINE_SPINLOCK(ffa_rx_buffer_lock); > > static DEFINE_SPINLOCK(ffa_tx_buffer_lock); > > > > + > > +/* Used to track domains that could not be torn down immediately. */ > > +static struct timer ffa_teardown_timer; > > +static struct list_head ffa_teardown_head; > > +static DEFINE_SPINLOCK(ffa_teardown_lock); > > + > > static bool ffa_get_version(uint32_t *vers) > > { > > const struct arm_smccc_1_2_regs arg = { > > @@ -853,7 +870,6 @@ static int32_t handle_partition_info_get(uint32_t w1, > > uint32_t w2, uint32_t w3, > > goto out_rx_release; > > } > > > > - > > memcpy(ctx->rx, ffa_rx, sz); > > } > > ctx->rx_is_free = false; > > @@ -992,53 +1008,75 @@ static void put_shm_pages(struct ffa_shm_mem *shm) > > } > > } > > > > -static bool inc_ctx_shm_count(struct ffa_ctx *ctx) > > +static bool inc_ctx_shm_count(struct domain *d, struct ffa_ctx *ctx) > > { > > bool ret = true; > > > > spin_lock(&ctx->lock); > > + > > + /* > > + * If this is the first shm added, increase the domain reference > > + * counter as we need to keep domain around a bit longer to reclaim the > > + * shared memory in the teardown path. > > + */ > > + if ( !ctx->shm_count ) > > + get_knownalive_domain(d); > > + > > if (ctx->shm_count >= FFA_MAX_SHM_COUNT) > > ret = false; > > else > > ctx->shm_count++; > > + > > spin_unlock(&ctx->lock); > > > > return ret; > > } > > > > -static void dec_ctx_shm_count(struct ffa_ctx *ctx) > > +static void dec_ctx_shm_count(struct domain *d, struct ffa_ctx *ctx) > > { > > spin_lock(&ctx->lock); > > + > > ASSERT(ctx->shm_count > 0); > > ctx->shm_count--; > > + > > + /* > > + * If this was the last shm removed, let go of the domain reference we > > + * took in inc_ctx_shm_count() above. > > + */ > > + if ( !ctx->shm_count ) > > + put_domain(d); > > + > > spin_unlock(&ctx->lock); > > } > > > > -static struct ffa_shm_mem *alloc_ffa_shm_mem(struct ffa_ctx *ctx, > > +static struct ffa_shm_mem *alloc_ffa_shm_mem(struct domain *d, > > unsigned int page_count) > > { > > + struct ffa_ctx *ctx = d->arch.tee; > > struct ffa_shm_mem *shm; > > > > if ( page_count >= FFA_MAX_SHM_PAGE_COUNT ) > > return NULL; > > - if ( !inc_ctx_shm_count(ctx) ) > > + if ( !inc_ctx_shm_count(d, ctx) ) > > return NULL; > > > > shm = xzalloc_flex_struct(struct ffa_shm_mem, pages, page_count); > > if ( shm ) > > shm->page_count = page_count; > > else > > - dec_ctx_shm_count(ctx); > > + dec_ctx_shm_count(d, ctx); > > > > return shm; > > } > > > > -static void free_ffa_shm_mem(struct ffa_ctx *ctx, struct ffa_shm_mem *shm) > > +static void free_ffa_shm_mem(struct domain *d, struct ffa_shm_mem *shm) > > { > > + struct ffa_ctx *ctx = d->arch.tee; > > + > > if ( !shm ) > > return; > > > > - dec_ctx_shm_count(ctx); > > + dec_ctx_shm_count(d, ctx); > > put_shm_pages(shm); > > xfree(shm); > > } > > @@ -1306,7 +1344,7 @@ static void handle_mem_share(struct cpu_user_regs > > *regs) > > goto out_unlock; > > } > > > > - shm = alloc_ffa_shm_mem(ctx, page_count); > > + shm = alloc_ffa_shm_mem(d, page_count); > > if ( !shm ) > > { > > ret = FFA_RET_NO_MEMORY; > > @@ -1350,7 +1388,7 @@ static void handle_mem_share(struct cpu_user_regs > > *regs) > > > > out: > > if ( ret ) > > - free_ffa_shm_mem(ctx, shm); > > + free_ffa_shm_mem(d, shm); > > out_unlock: > > spin_unlock(&ctx->tx_lock); > > > > @@ -1401,7 +1439,7 @@ static int handle_mem_reclaim(uint64_t handle, > > uint32_t flags) > > } > > else > > { > > - free_ffa_shm_mem(ctx, shm); > > + free_ffa_shm_mem(d, shm); > > } > > > > return ret; > > @@ -1486,6 +1524,41 @@ static bool ffa_handle_call(struct cpu_user_regs > > *regs) > > } > > } > > > > +static bool is_in_subscr_list(const uint16_t *subscr, uint16_t start, > > + uint16_t end, uint16_t sp_id) > > +{ > > + unsigned int n; > > + > > + for ( n = start; n < end; n++ ) > > + { > > + if ( subscr[n] == sp_id ) > > + return true; > > + } > > + > > + return false; > > +} > > + > > +static void vm_destroy_bitmap_init(struct ffa_ctx *ctx, > > + unsigned int create_signal_count) > > +{ > > + unsigned int n; > > + > > + for ( n = 0; n < subscr_vm_destroyed_count; n++ ) > > + { > > + /* > > + * Skip SPs subscribed to the VM created event that never was > > + * notified of the VM creation due to an error during > > + * ffa_domain_init(). > > + */ > > + if ( is_in_subscr_list(subscr_vm_created, create_signal_count, > > + subscr_vm_created_count, > > + subscr_vm_destroyed[n]) ) > > + continue; > > + > > + set_bit(n, ctx->vm_destroy_bitmap); > > + } > > +} > > + > > static int ffa_domain_init(struct domain *d) > > { > > struct ffa_ctx *ctx; > > @@ -1501,11 +1574,14 @@ static int ffa_domain_init(struct domain *d) > > if ( d->domain_id >= UINT16_MAX) > > return -ERANGE; > > > > - ctx = xzalloc(struct ffa_ctx); > > + ctx = xzalloc_flex_struct(struct ffa_ctx, vm_destroy_bitmap, > > + BITS_TO_LONGS(subscr_vm_destroyed_count)); > > if ( !ctx ) > > return -ENOMEM; > > > > d->arch.tee = ctx; > > + ctx->teardown_d = d; > > + INIT_LIST_HEAD(&ctx->shm_list); > > > > for ( n = 0; n < subscr_vm_created_count; n++ ) > > { > > @@ -1515,65 +1591,141 @@ static int ffa_domain_init(struct domain *d) > > { > > printk(XENLOG_ERR "ffa: Failed to report creation of vm_id %u > > to %u: res %d\n", > > get_vm_id(d), subscr_vm_created[n], res); > > - ctx->create_signal_count = n; > > - return -EIO; > > + break; > > } > > } > > - ctx->create_signal_count = subscr_vm_created_count; > > - > > - INIT_LIST_HEAD(&ctx->shm_list); > > + vm_destroy_bitmap_init(ctx, n); > > + if ( n != subscr_vm_created_count ) > > + return -EIO; > > > > return 0; > > } > > > > -static bool is_in_subscr_list(const uint16_t *subscr, uint16_t start, > > - uint16_t end, uint16_t sp_id) > > +static void send_vm_destroyed(struct domain *d) > > { > > + struct ffa_ctx *ctx = d->arch.tee; > > unsigned int n; > > + int32_t res; > > > > - for ( n = start; n < end; n++ ) > > + for ( n = 0; n < subscr_vm_destroyed_count; n++ ) > > { > > - if ( subscr[n] == sp_id ) > > - return true; > > - } > > + if ( !test_bit(n, ctx->vm_destroy_bitmap) ) > > + continue; > > > > - return false; > > + res = ffa_direct_req_send_vm(subscr_vm_destroyed[n], get_vm_id(d), > > + FFA_MSG_SEND_VM_DESTROYED); > > + > > I think here we should dinstinguish cases depending on the error received: > - retry: we should keep the bit set and retry later > - other: unset the bit as there is not much we can do but log it. Makes sense. > > The interrupted case can happen, we handle that already in the direct message > request but we will get back here if that does not work after some retries so > putting it in the other category is ok i think. In the case of FFA_MSG_SEND_VM_DESTROYED is think it makes more sense to also retry FFA_RET_INTERRUPTED later. To me it seems that there will be an even better chance for an eventual non-secure interrupt to be handled if wait a little before retrying. So, if you don't mind, I'll skip clearing the bit below for both FFA_RET_INTERRUPTED and FFA_RET_RETRY. > > > + if ( res ) > > + { > > + printk(XENLOG_ERR "%pd: ffa: Failed to report destruction of > > vm_id %u to %u: res %d\n", > > + d, get_vm_id(d), subscr_vm_destroyed[n], res); > > + } > > + else > > + { > > + clear_bit(n, ctx->vm_destroy_bitmap); > > + } > > + } > > } > > > > -/* This function is supposed to undo what ffa_domain_init() has done */ > > -static int ffa_domain_teardown(struct domain *d) > > +static void reclaim_shms(struct domain *d) > > { > > struct ffa_ctx *ctx = d->arch.tee; > > - unsigned int n; > > + struct ffa_shm_mem *shm, *tmp; > > int32_t res; > > > > - if ( !ctx ) > > - return 0; > > + list_for_each_entry_safe(shm, tmp, &ctx->shm_list, list) > > + { > > + register_t handle_hi; > > + register_t handle_lo; > > > > - for ( n = 0; n < subscr_vm_destroyed_count; n++ ) > > + uint64_to_regpair(&handle_hi, &handle_lo, shm->handle); > > + res = ffa_mem_reclaim(handle_lo, handle_hi, 0); > > Same here as for the VM_DESTROYED, there are some error code that we should > not reiterate on as they will also fail next time: > - invalid parameters: stop and log an error, this will always fail > - no memory: for now I would say do the same as invalid parameter with a > comment > maybe because we could retry and memory coud be available next time > - abort: stop and log an error, this is a major fault on the other side > - denied: retry. I agree, I'll propose something like that in the v4. > > > + if ( res ) > > + { > > + printk(XENLOG_G_INFO "%pd: ffa: Failed to reclaim handle %#lx > > : %d\n", > > + d, shm->handle, res); > > + } > > + else > > + { > > + printk(XENLOG_G_DEBUG "%pd: ffa: Reclaimed handle %#lx\n", > > + d, shm->handle); > > + list_del(&shm->list); > > + free_ffa_shm_mem(d, shm); > > + } > > + } > > +} > > + > > +static void ffa_domain_teardown_continue(struct ffa_ctx *ctx, bool > > first_time) > > +{ > > + struct ffa_ctx *next_ctx = NULL; > > + > > + send_vm_destroyed(ctx->teardown_d); > > + reclaim_shms(ctx->teardown_d); > > + > > + if ( ctx->shm_count ) > > + { > > If i do understand right you only retry if there are remaining SHM to reclaim > but what > if you have destroy messages that were not sent successfully, shouldn't we > reiterate > in that case to send the destroy message again ? I agree, I'll fix. Cheers, Jens > > > + printk(XENLOG_G_INFO "%pd: ffa: Remaining unclaimed handles, > > retrying\n", ctx->teardown_d); > > + > > + ctx->teardown_expire = NOW() + FFA_CTX_TEARDOWN_DELAY; > > + > > + spin_lock(&ffa_teardown_lock); > > + list_add_tail(&ctx->teardown_list, &ffa_teardown_head); > > + /* Need to set a new timer for the next ctx in line */ > > + next_ctx = list_first_entry(&ffa_teardown_head, struct ffa_ctx, > > + teardown_list); > > + spin_unlock(&ffa_teardown_lock); > > + } > > + else > > { > > /* > > - * Skip SPs subscribed to the VM created event that never was > > - * notified of the VM creation due to an error during > > - * ffa_domain_init(). > > + * domain_destroy() might have been called (via put_domain() in > > + * reclaim_shms()), so we can't touch the domain structure anymore. > > */ > > - if ( is_in_subscr_list(subscr_vm_created, ctx->create_signal_count, > > - subscr_vm_created_count, > > - subscr_vm_destroyed[n]) ) > > - continue; > > + xfree(ctx); > > > > - res = ffa_direct_req_send_vm(subscr_vm_destroyed[n], get_vm_id(d), > > - FFA_MSG_SEND_VM_DESTROYED); > > - > > - if ( res ) > > - printk(XENLOG_ERR "ffa: Failed to report destruction of vm_id > > %u to %u: res %d\n", > > - get_vm_id(d), subscr_vm_destroyed[n], res); > > + /* Only check if there has been a change to the teardown queue */ > > + if ( !first_time ) > > + { > > + spin_lock(&ffa_teardown_lock); > > + next_ctx = list_first_entry_or_null(&ffa_teardown_head, > > + struct ffa_ctx, > > teardown_list); > > + spin_unlock(&ffa_teardown_lock); > > + } > > } > > > > + if ( next_ctx ) > > + set_timer(&ffa_teardown_timer, next_ctx->teardown_expire); > > +} > > + > > +static void ffa_teardown_timer_callback(void *arg) > > +{ > > + struct ffa_ctx *ctx; > > + > > + spin_lock(&ffa_teardown_lock); > > + ctx = list_first_entry_or_null(&ffa_teardown_head, struct ffa_ctx, > > + teardown_list); > > + if ( ctx ) > > + list_del(&ctx->teardown_list); > > + spin_unlock(&ffa_teardown_lock); > > + > > + if ( ctx ) > > + ffa_domain_teardown_continue(ctx, false /* !first_time */); > > + else > > + printk(XENLOG_G_ERR "%s: teardown list is empty\n", __func__); > > +} > > + > > +/* This function is supposed to undo what ffa_domain_init() has done */ > > +static int ffa_domain_teardown(struct domain *d) > > +{ > > + struct ffa_ctx *ctx = d->arch.tee; > > + > > + if ( !ctx ) > > + return 0; > > + > > if ( ctx->rx ) > > rxtx_unmap(ctx); > > > > - XFREE(d->arch.tee); > > + ffa_domain_teardown_continue(ctx, true /* first_time */); > > > > return 0; > > } > > @@ -1739,6 +1891,9 @@ static bool ffa_probe(void) > > if ( !init_sps() ) > > goto err_free_ffa_tx; > > > > + INIT_LIST_HEAD(&ffa_teardown_head); > > + init_timer(&ffa_teardown_timer, ffa_teardown_timer_callback, NULL, 0); > > + > > return true; > > > > err_free_ffa_tx: > > -- > > 2.34.1 > > > > Regards > Bertrand > >

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.