[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 06/11] optee: add std call handling
Hi Volodymyr, On 18/12/2018 21:11, Volodymyr Babchuk wrote: From: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx> The main way to communicate with OP-TEE is to issue standard SMCCC call. "Standard" is a SMCCC term and it means that call can be interrupted and OP-TEE can return control to NW before completing the call. In contrast with fast calls, where arguments and return values are passed in registers, standard calls use shared memory. Register pair a1,a2 holds 64-bit PA of command buffer, where all arguments are stored and which is used to return data. OP-TEE internally copies contents of this buffer into own secure memory before accessing and validating any data in command buffer. This is done to make sure that NW will not change contents of the validated parameters. Mediator needs to do the same for number of reasons: 1. To make sure that guest will not change data after validation. 2. To translate IPAs to PAs in the command buffer (this is not done in this patch). 3. To hide translated address from guest, so it will not be able to do IPA->PA translation by misusing mediator. During standard call OP-TEE can issue multiple "RPC returns", asking NW to do some work for OP-TEE. NW then issues special call OPTEE_SMC_CALL_RETURN_FROM_RPC to resume handling of the original call. Thus, mediator needs to maintain context for original standard call during multiple SMCCC calls. Standard call is considered complete, when returned value is not a RPC request. Signed-off-by: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx> --- Changes from v2: - renamed struct domain_ctx to struct optee_domain - fixed coding style - Now I use access_guest_memory_by_ipa() instead of mappings to read command buffer - Added tracking for in flight calls, so guest can't resume the same call from two CPUs simultaniously xen/arch/arm/tee/optee.c | 319 ++++++++++++++++++++++++++++++++++- xen/include/asm-arm/domain.h | 3 + 2 files changed, 320 insertions(+), 2 deletions(-) diff --git a/xen/arch/arm/tee/optee.c b/xen/arch/arm/tee/optee.c index 584241b03a..dc90e2ed8e 100644 --- a/xen/arch/arm/tee/optee.c +++ b/xen/arch/arm/tee/optee.c @@ -12,6 +12,8 @@ */#include <xen/device_tree.h>+#include <xen/domain_page.h> +#include <xen/guest_access.h> #include <xen/sched.h> #include <asm/smccc.h> #include <asm/tee/tee.h> @@ -22,11 +24,38 @@ /* Client ID 0 is reserved for hypervisor itself */ #define OPTEE_CLIENT_ID(domain) (domain->domain_id + 1)+/*+ * Maximal number of concurrent standard calls from one guest. This + * corresponds to OPTEE configuration option CFG_NUM_THREADS, because + * OP-TEE spawns a thread for every standard call. Looking at OP-TEE, CFG_NUM_THREADS will vary depending on the platform. Is there any way to probe that number of threads from Xen? In any case, I think we should update the comment to reflect that this seems to be the maximum CFG_NUM_THREADS supported by any upstream platform. + */ +#define MAX_STD_CALLS 16 + #define OPTEE_KNOWN_NSEC_CAPS OPTEE_SMC_NSEC_CAP_UNIPROCESSOR #define OPTEE_KNOWN_SEC_CAPS (OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM | \ OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM | \ OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)+/*+ * Call context. OP-TEE can issue multiple RPC returns during one call. + * We need to preserve context during them. + */ +struct optee_std_call { + struct list_head list; + struct optee_msg_arg *xen_arg; + paddr_t guest_arg_ipa; + int optee_thread_id; + int rpc_op; + bool in_flight; +}; + +/* Domain context */ +struct optee_domain { + struct list_head call_list; + atomic_t call_count; + spinlock_t lock; +}; + static bool optee_probe(void) { struct dt_device_node *node; @@ -52,6 +81,11 @@ static bool optee_probe(void) static int optee_enable(struct domain *d) { struct arm_smccc_res resp; + struct optee_domain *ctx; + + ctx = xzalloc(struct optee_domain); + if ( !ctx ) + return -ENOMEM;/** Inform OP-TEE about a new guest. @@ -69,9 +103,16 @@ static int optee_enable(struct domain *d) { gprintk(XENLOG_WARNING, "Unable to create OPTEE client: rc = 0x%X\n", (uint32_t)resp.a0); + xfree(ctx); return -ENODEV; }+ INIT_LIST_HEAD(&ctx->call_list);+ atomic_set(&ctx->call_count, 0); + spin_lock_init(&ctx->lock); + + d->arch.tee = ctx; + return 0; }@@ -111,9 +152,86 @@ static void set_return(struct cpu_user_regs *regs, uint32_t ret)set_user_reg(regs, 7, 0); }+static struct optee_std_call *allocate_std_call(struct optee_domain *ctx)+{ + struct optee_std_call *call; + int count; + + /* Make sure that guest does not execute more than MAX_STD_CALLS */ + count = atomic_add_unless(&ctx->call_count, 1, MAX_STD_CALLS); + if ( count == MAX_STD_CALLS ) + return NULL; + + call = xzalloc(struct optee_std_call); + if ( !call ) + { + atomic_dec(&ctx->call_count); + return NULL; + } + + call->optee_thread_id = -1; + call->in_flight = true; + + spin_lock(&ctx->lock); + list_add_tail(&call->list, &ctx->call_list); + spin_unlock(&ctx->lock); + + return call; +} + +static void free_std_call(struct optee_domain *ctx, + struct optee_std_call *call) +{ + atomic_dec(&ctx->call_count); + + spin_lock(&ctx->lock); + list_del(&call->list); + spin_unlock(&ctx->lock); + + ASSERT(!call->in_flight); + xfree(call->xen_arg); + xfree(call); +} + +static struct optee_std_call *get_std_call(struct optee_domain *ctx, + int thread_id) +{ + struct optee_std_call *call; + + spin_lock(&ctx->lock); + list_for_each_entry( call, &ctx->call_list, list ) + { + if ( call->optee_thread_id == thread_id ) + { + if ( call->in_flight ) + { + gprintk(XENLOG_WARNING, "Guest tries to execute call which is already in flight"); + goto out; + } + call->in_flight = true; + spin_unlock(&ctx->lock); + return call; + } + } +out: + spin_unlock(&ctx->lock); + + return NULL; +} + +static void put_std_call(struct optee_domain *ctx, struct optee_std_call *call) +{ + spin_lock(&ctx->lock); + ASSERT(call->in_flight); + call->in_flight = false; + spin_unlock(&ctx->lock); +} + static void optee_domain_destroy(struct domain *d) { struct arm_smccc_res resp; + struct optee_std_call *call, *call_tmp; + struct optee_domain *ctx = d->arch.tee; /* At this time all domain VCPUs should be stopped */@@ -124,6 +242,199 @@ static void optee_domain_destroy(struct domain *d)*/ arm_smccc_smc(OPTEE_SMC_VM_DESTROYED, OPTEE_CLIENT_ID(d), 0, 0, 0, 0, 0, 0, &resp); This function can be called without enable and should be idempotent. So I woudld check d->arch.tee before and... + ASSERT(!spin_is_locked(&ctx->lock)); + + list_for_each_entry_safe( call, call_tmp, &ctx->call_list, list ) + free_std_call(ctx, call); + + ASSERT(!atomic_read(&ctx->call_count)); + + xfree(d->arch.tee); use XFREE here. +} + +/* + * Copy command buffer into xen memory to: + * 1) Hide translated addresses from guest + * 2) Make sure that guest wouldn't change data in command buffer during call + */ +static bool copy_std_request(struct cpu_user_regs *regs, + struct optee_std_call *call) +{ + paddr_t xen_addr; + + call->guest_arg_ipa = (paddr_t)get_user_reg(regs, 1) << 32 | + get_user_reg(regs, 2); NIT: The indentation looks weird here. + + /* + * Command buffer should start at page boundary. + * This is OP-TEE ABI requirement. + */ + if ( call->guest_arg_ipa & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1) ) + return false; + + call->xen_arg = _xmalloc(OPTEE_MSG_NONCONTIG_PAGE_SIZE, + OPTEE_MSG_NONCONTIG_PAGE_SIZE); + if ( !call->xen_arg ) + return false; + + BUILD_BUG_ON(OPTEE_MSG_NONCONTIG_PAGE_SIZE > PAGE_SIZE); As you use _xmalloc, you should not need this. This is only necessary if you use alloc_xenheap_page. I am wondering whether it is wise to allocate the memory from xenheap and not domheap. While on Arm64 (for now) xenheap and domheap are the same, on Arm32 they are different. The xenheap is at most 1GB, so pretty limited. Furthermore, using domheap would have the advantage to allow in the future accounting the allocation to the guest and add more safety (there are discussion to make domheap per domain). + + access_guest_memory_by_ipa(current->domain, call->guest_arg_ipa, + call->xen_arg, OPTEE_MSG_NONCONTIG_PAGE_SIZE, + false); You need to check the return of access_guest_memory_by_ipa as this function can fail. + + xen_addr = virt_to_maddr(call->xen_arg); + + set_user_reg(regs, 1, xen_addr >> 32); + set_user_reg(regs, 2, xen_addr & 0xFFFFFFFF); + + return true; +} + +static void copy_std_request_back(struct optee_domain *ctx, + struct cpu_user_regs *regs, + struct optee_std_call *call) Can you add a comment on top of the function explaining what it does? +{ + struct optee_msg_arg *guest_arg; + struct page_info *page; + unsigned int i; + uint32_t attr; + + /* copy_std_request() validated IPA for us */ Not really, the guest is free to modify the stage-2 mapping on another vCPU while this is happening. I agree that the guest will shoot himself, but we at least need to not have weird behavior happening. In that case, I would check that the type is p2m_ram_rw as you don't want to write in read-only or foreign mapping. Also, as copy_std_request() and copy_std_request_back may not be called in the same "thread" it would be useful if you specify a bit more the interaction. + page = get_page_from_gfn(current->domain, paddr_to_pfn(call->guest_arg_ipa), Please use gfn_x(gaddr_to_gfn(...)) to clarify this is a gfn. The gfn_x will be unnecessary soon with a cleanup that is currently under review. + NULL, P2M_ALLOC); + if ( !page ) + return; + + guest_arg = map_domain_page(page_to_mfn(page)); So here you assume that PAGE_SIZE == OPTEE_MSG_NONCONTIG_PAGE_SIZE. Can you add a BUILD_BUG_ON just above (with a comment) so we don't get some nasty surprise with 64K support. Also, you should be able to use __map_domain_page(page) here. + + guest_arg->ret = call->xen_arg->ret; + guest_arg->ret_origin = call->xen_arg->ret_origin; + guest_arg->session = call->xen_arg->session; NIT: newline here please. + for ( i = 0; i < call->xen_arg->num_params; i++ ) + { + attr = call->xen_arg->params[i].attr; + + switch ( attr & OPTEE_MSG_ATTR_TYPE_MASK ) + { + case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT: + guest_arg->params[i].u.tmem.size = + call->xen_arg->params[i].u.tmem.size; + continue; + case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT: + guest_arg->params[i].u.value.a = + call->xen_arg->params[i].u.value.a; + guest_arg->params[i].u.value.b = + call->xen_arg->params[i].u.value.b; + continue; + case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT: + guest_arg->params[i].u.rmem.size = + call->xen_arg->params[i].u.rmem.size; + continue; + case OPTEE_MSG_ATTR_TYPE_NONE: + case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT: + continue; + } + } + + unmap_domain_page(guest_arg); + put_page(page); +} + +static void execute_std_call(struct optee_domain *ctx, + struct cpu_user_regs *regs, + struct optee_std_call *call) +{ + register_t optee_ret; + + forward_call(regs); + + optee_ret = get_user_reg(regs, 0); + if ( OPTEE_SMC_RETURN_IS_RPC(optee_ret) ) + { + call->optee_thread_id = get_user_reg(regs, 3); + call->rpc_op = OPTEE_SMC_RETURN_GET_RPC_FUNC(optee_ret); + put_std_call(ctx, call); + return; + } + + copy_std_request_back(ctx, regs, call); + + put_std_call(ctx, call); + free_std_call(ctx, call); +} Most of the code in this patch is self-explaining, which is quite nice :). However, I think this function would require explaining a bit the logic. For instance in which case the call will be freed. + +static bool handle_std_call(struct optee_domain *ctx, + struct cpu_user_regs *regs) +{ + struct optee_std_call *call = allocate_std_call(ctx); + + if ( !call ) + return false; + + if ( !copy_std_request(regs, call) ) + goto err; + + /* Now we can safely examine contents of command buffer */ + if ( OPTEE_MSG_GET_ARG_SIZE(call->xen_arg->num_params) > + OPTEE_MSG_NONCONTIG_PAGE_SIZE ) + goto err; + + switch ( call->xen_arg->cmd ) + { + case OPTEE_MSG_CMD_OPEN_SESSION: + case OPTEE_MSG_CMD_CLOSE_SESSION: + case OPTEE_MSG_CMD_INVOKE_COMMAND: + case OPTEE_MSG_CMD_CANCEL: + case OPTEE_MSG_CMD_REGISTER_SHM: + case OPTEE_MSG_CMD_UNREGISTER_SHM: + break; + default: + goto err; + } + + execute_std_call(ctx, regs, call); + + return true; This function is a bit odd to read. I think it would be more clear if you move this code before the break. + +err: + put_std_call(ctx, call); + free_std_call(ctx, call); + + return false; +} + +static bool handle_rpc(struct optee_domain *ctx, struct cpu_user_regs *regs) +{ + struct optee_std_call *call; + int optee_thread_id = get_user_reg(regs, 3); + + call = get_std_call(ctx, optee_thread_id); + + if ( !call ) + return false; + + switch ( call->rpc_op ) + { + case OPTEE_SMC_RPC_FUNC_ALLOC: + /* TODO: Add handling */ + break; + case OPTEE_SMC_RPC_FUNC_FREE: + /* TODO: Add handling */ + break; + case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR: + break; + case OPTEE_SMC_RPC_FUNC_CMD: + /* TODO: Add handling */ + break; + } + + execute_std_call(ctx, regs, call); + return true; }static bool handle_exchange_capabilities(struct cpu_user_regs *regs)@@ -161,6 +472,8 @@ static bool handle_exchange_capabilities(struct cpu_user_regs *regs)static bool optee_handle_call(struct cpu_user_regs *regs){ + struct optee_domain *ctx = current->domain->arch.tee; + switch ( get_user_reg(regs, 0) ) { case OPTEE_SMC_CALLS_COUNT: @@ -170,8 +483,6 @@ static bool optee_handle_call(struct cpu_user_regs *regs) case OPTEE_SMC_FUNCID_GET_OS_REVISION: case OPTEE_SMC_ENABLE_SHM_CACHE: case OPTEE_SMC_DISABLE_SHM_CACHE: - case OPTEE_SMC_CALL_WITH_ARG: - case OPTEE_SMC_CALL_RETURN_FROM_RPC: forward_call(regs); return true; case OPTEE_SMC_GET_SHM_CONFIG: @@ -180,6 +491,10 @@ static bool optee_handle_call(struct cpu_user_regs *regs) return true; case OPTEE_SMC_EXCHANGE_CAPABILITIES: return handle_exchange_capabilities(regs); + case OPTEE_SMC_CALL_WITH_ARG: + return handle_std_call(ctx, regs); + case OPTEE_SMC_CALL_RETURN_FROM_RPC: + return handle_rpc(ctx, regs); default: return false; } diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index 175de44927..88b48697bd 100644 --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -97,6 +97,9 @@ struct arch_domain struct vpl011 vpl011; #endif+#ifdef CONFIG_TEE+ void *tee; +#endif Did you look whether there are any hole in arch_domain that could be re-used? } __cacheline_aligned;struct arch_vcpu Cheers, -- Julien Grall _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |