[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 06/11] optee: add std call handling

To: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
From: Julien Grall <julien.grall@xxxxxxx>
Date: Wed, 16 Jan 2019 18:45:41 +0000
Cc: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx>, "tee-dev@xxxxxxxxxxxxxxxx" <tee-dev@xxxxxxxxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>
Delivery-date: Wed, 16 Jan 2019 18:46:22 +0000
List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Hi Volodymyr,

On 18/12/2018 21:11, Volodymyr Babchuk wrote:

From: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx>

The main way to communicate with OP-TEE is to issue standard SMCCC
call. "Standard" is a SMCCC term and it means that call can be
interrupted and OP-TEE can return control to NW before completing
the call.

In contrast with fast calls, where arguments and return values
are passed in registers, standard calls use shared memory. Register
pair a1,a2 holds 64-bit PA of command buffer, where all arguments
are stored and which is used to return data. OP-TEE internally
copies contents of this buffer into own secure memory before accessing
and validating any data in command buffer. This is done to make sure
that NW will not change contents of the validated parameters.

Mediator needs to do the same for number of reasons:

1. To make sure that guest will not change data after validation.
2. To translate IPAs to PAs in the command buffer (this is not done
    in this patch).
3. To hide translated address from guest, so it will not be able
    to do IPA->PA translation by misusing mediator.

During standard call OP-TEE can issue multiple "RPC returns", asking
NW to do some work for OP-TEE. NW then issues special call
OPTEE_SMC_CALL_RETURN_FROM_RPC to resume handling of the original call.
Thus, mediator needs to maintain context for original standard call
during multiple SMCCC calls.

Standard call is considered complete, when returned value is
not a RPC request.

Signed-off-by: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx>
---

  Changes from v2:
   - renamed struct domain_ctx to struct optee_domain
   - fixed coding style
   - Now I use access_guest_memory_by_ipa() instead of mappings
     to read command buffer
   - Added tracking for in flight calls, so guest can't resume
     the same call from two CPUs simultaniously

  xen/arch/arm/tee/optee.c     | 319 ++++++++++++++++++++++++++++++++++-
  xen/include/asm-arm/domain.h |   3 +
  2 files changed, 320 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/tee/optee.c b/xen/arch/arm/tee/optee.c
index 584241b03a..dc90e2ed8e 100644
--- a/xen/arch/arm/tee/optee.c
+++ b/xen/arch/arm/tee/optee.c
@@ -12,6 +12,8 @@
   */

#include <xen/device_tree.h>

+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
  #include <xen/sched.h>
  #include <asm/smccc.h>
  #include <asm/tee/tee.h>
@@ -22,11 +24,38 @@
  /* Client ID 0 is reserved for hypervisor itself */
  #define OPTEE_CLIENT_ID(domain) (domain->domain_id + 1)

+/*

+ * Maximal number of concurrent standard calls from one guest. This
+ * corresponds to OPTEE configuration option CFG_NUM_THREADS, because
+ * OP-TEE spawns a thread for every standard call.

Looking at OP-TEE, CFG_NUM_THREADS will vary depending on the platform. Is thereany way to probe that number of threads from Xen?

In any case, I think we should update the comment to reflect that this seems tobe the maximum CFG_NUM_THREADS supported by any upstream platform.

+ */
+#define MAX_STD_CALLS   16
+
  #define OPTEE_KNOWN_NSEC_CAPS OPTEE_SMC_NSEC_CAP_UNIPROCESSOR
  #define OPTEE_KNOWN_SEC_CAPS (OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM | \
                                OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM |  \
                                OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)

+/*

+ * Call context. OP-TEE can issue multiple RPC returns during one call.
+ * We need to preserve context during them.
+ */
+struct optee_std_call {
+    struct list_head list;
+    struct optee_msg_arg *xen_arg;
+    paddr_t guest_arg_ipa;
+    int optee_thread_id;
+    int rpc_op;
+    bool in_flight;
+};
+
+/* Domain context */
+struct optee_domain {
+    struct list_head call_list;
+    atomic_t call_count;
+    spinlock_t lock;
+};
+
  static bool optee_probe(void)
  {
      struct dt_device_node *node;
@@ -52,6 +81,11 @@ static bool optee_probe(void)
  static int optee_enable(struct domain *d)
  {
      struct arm_smccc_res resp;
+    struct optee_domain *ctx;
+
+    ctx = xzalloc(struct optee_domain);
+    if ( !ctx )
+        return -ENOMEM;

/*

       * Inform OP-TEE about a new guest.
@@ -69,9 +103,16 @@ static int optee_enable(struct domain *d)
      {
          gprintk(XENLOG_WARNING, "Unable to create OPTEE client: rc = 0x%X\n",
                  (uint32_t)resp.a0);
+        xfree(ctx);
          return -ENODEV;
      }

+ INIT_LIST_HEAD(&ctx->call_list);

+    atomic_set(&ctx->call_count, 0);
+    spin_lock_init(&ctx->lock);
+
+    d->arch.tee = ctx;
+
      return 0;
  }

@@ -111,9 +152,86 @@ static void set_return(struct cpu_user_regs *regs, uint32_t ret)

      set_user_reg(regs, 7, 0);
  }

+static struct optee_std_call *allocate_std_call(struct optee_domain *ctx)

+{
+    struct optee_std_call *call;
+    int count;
+
+    /* Make sure that guest does not execute more than MAX_STD_CALLS */
+    count = atomic_add_unless(&ctx->call_count, 1, MAX_STD_CALLS);
+    if ( count == MAX_STD_CALLS )
+        return NULL;
+
+    call = xzalloc(struct optee_std_call);
+    if ( !call )
+    {
+        atomic_dec(&ctx->call_count);
+        return NULL;
+    }
+
+    call->optee_thread_id = -1;
+    call->in_flight = true;
+
+    spin_lock(&ctx->lock);
+    list_add_tail(&call->list, &ctx->call_list);
+    spin_unlock(&ctx->lock);
+
+    return call;
+}
+
+static void free_std_call(struct optee_domain *ctx,
+                          struct optee_std_call *call)
+{
+    atomic_dec(&ctx->call_count);
+
+    spin_lock(&ctx->lock);
+    list_del(&call->list);
+    spin_unlock(&ctx->lock);
+
+    ASSERT(!call->in_flight);
+    xfree(call->xen_arg);
+    xfree(call);
+}
+
+static struct optee_std_call *get_std_call(struct optee_domain *ctx,
+                                           int thread_id)
+{
+    struct optee_std_call *call;
+
+    spin_lock(&ctx->lock);
+    list_for_each_entry( call, &ctx->call_list, list )
+    {
+        if ( call->optee_thread_id == thread_id )
+        {
+            if ( call->in_flight )
+            {
+                gprintk(XENLOG_WARNING, "Guest tries to execute call which is 
already in flight");
+                goto out;
+            }
+            call->in_flight = true;
+            spin_unlock(&ctx->lock);
+            return call;
+        }
+    }
+out:
+    spin_unlock(&ctx->lock);
+
+    return NULL;
+}
+
+static void put_std_call(struct optee_domain *ctx, struct optee_std_call *call)
+{
+    spin_lock(&ctx->lock);
+    ASSERT(call->in_flight);
+    call->in_flight = false;
+    spin_unlock(&ctx->lock);
+}
+
  static void optee_domain_destroy(struct domain *d)
  {
      struct arm_smccc_res resp;
+    struct optee_std_call *call, *call_tmp;
+    struct optee_domain *ctx = d->arch.tee;
        
      /* At this time all domain VCPUs should be stopped */

@@ -124,6 +242,199 @@ static void optee_domain_destroy(struct domain *d)

       */
      arm_smccc_smc(OPTEE_SMC_VM_DESTROYED, OPTEE_CLIENT_ID(d), 0, 0, 0, 0, 0, 
0,
                    &resp);

This function can be called without enable and should be idempotent. So I woudldcheck d->arch.tee before and...

+    ASSERT(!spin_is_locked(&ctx->lock));
+
+    list_for_each_entry_safe( call, call_tmp, &ctx->call_list, list )
+        free_std_call(ctx, call);
+
+    ASSERT(!atomic_read(&ctx->call_count));
+
+    xfree(d->arch.tee);


use XFREE here.

+}
+
+/*
+ * Copy command buffer into xen memory to:
+ * 1) Hide translated addresses from guest
+ * 2) Make sure that guest wouldn't change data in command buffer during call
+ */
+static bool copy_std_request(struct cpu_user_regs *regs,
+                             struct optee_std_call *call)
+{
+    paddr_t xen_addr;
+
+    call->guest_arg_ipa = (paddr_t)get_user_reg(regs, 1) << 32 |
+                            get_user_reg(regs, 2);


NIT: The indentation looks weird here.

+
+    /*
+     * Command buffer should start at page boundary.
+     * This is OP-TEE ABI requirement.
+     */
+    if ( call->guest_arg_ipa & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1) )
+        return false;
+
+    call->xen_arg = _xmalloc(OPTEE_MSG_NONCONTIG_PAGE_SIZE,
+                             OPTEE_MSG_NONCONTIG_PAGE_SIZE);
+    if ( !call->xen_arg )
+        return false;
+
+    BUILD_BUG_ON(OPTEE_MSG_NONCONTIG_PAGE_SIZE > PAGE_SIZE);

As you use _xmalloc, you should not need this. This is only necessary if you usealloc_xenheap_page.

I am wondering whether it is wise to allocate the memory from xenheap and notdomheap. While on Arm64 (for now) xenheap and domheap are the same, on Arm32they are different. The xenheap is at most 1GB, so pretty limited.

Furthermore, using domheap would have the advantage to allow in the futureaccounting the allocation to the guest and add more safety (there are discussionto make domheap per domain).

+
+    access_guest_memory_by_ipa(current->domain, call->guest_arg_ipa,
+                               call->xen_arg, OPTEE_MSG_NONCONTIG_PAGE_SIZE,
+                               false);

You need to check the return of access_guest_memory_by_ipa as this function canfail.

+
+    xen_addr = virt_to_maddr(call->xen_arg);
+
+    set_user_reg(regs, 1, xen_addr >> 32);
+    set_user_reg(regs, 2, xen_addr & 0xFFFFFFFF);
+
+    return true;
+}
+
+static void copy_std_request_back(struct optee_domain *ctx,
+                                  struct cpu_user_regs *regs,
+                                  struct optee_std_call *call)


Can you add a comment on top of the function explaining what it does?

+{
+    struct optee_msg_arg *guest_arg;
+    struct page_info *page;
+    unsigned int i;
+    uint32_t attr;
+
+    /* copy_std_request() validated IPA for us */

Not really, the guest is free to modify the stage-2 mapping on another vCPUwhile this is happening. I agree that the guest will shoot himself, but we atleast need to not have weird behavior happening.

In that case, I would check that the type is p2m_ram_rw as you don't want towrite in read-only or foreign mapping.

Also, as copy_std_request() and copy_std_request_back may not be called in thesame "thread" it would be useful if you specify a bit more the interaction.

+    page = get_page_from_gfn(current->domain, 
paddr_to_pfn(call->guest_arg_ipa),

Please use gfn_x(gaddr_to_gfn(...)) to clarify this is a gfn. The gfn_x will beunnecessary soon with a cleanup that is currently under review.

+                             NULL, P2M_ALLOC);
+    if ( !page )
+        return;
+
+    guest_arg = map_domain_page(page_to_mfn(page));

So here you assume that PAGE_SIZE == OPTEE_MSG_NONCONTIG_PAGE_SIZE. Can you adda BUILD_BUG_ON just above (with a comment) so we don't get some nasty surprisewith 64K support.


Also, you should be able to use __map_domain_page(page) here.

+
+    guest_arg->ret = call->xen_arg->ret;
+    guest_arg->ret_origin = call->xen_arg->ret_origin;
+    guest_arg->session = call->xen_arg->session;


NIT: newline here please.

+    for ( i = 0; i < call->xen_arg->num_params; i++ )
+    {
+        attr = call->xen_arg->params[i].attr;
+
+        switch ( attr & OPTEE_MSG_ATTR_TYPE_MASK )
+        {
+        case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
+        case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
+            guest_arg->params[i].u.tmem.size =
+                call->xen_arg->params[i].u.tmem.size;
+            continue;
+        case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
+        case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
+            guest_arg->params[i].u.value.a =
+                call->xen_arg->params[i].u.value.a;
+            guest_arg->params[i].u.value.b =
+                call->xen_arg->params[i].u.value.b;
+            continue;
+        case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT:
+        case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT:
+            guest_arg->params[i].u.rmem.size =
+                call->xen_arg->params[i].u.rmem.size;
+            continue;
+        case OPTEE_MSG_ATTR_TYPE_NONE:
+        case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT:
+        case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
+            continue;
+        }
+    }
+
+    unmap_domain_page(guest_arg);
+    put_page(page);
+}
+
+static void execute_std_call(struct optee_domain *ctx,
+                             struct cpu_user_regs *regs,
+                             struct optee_std_call *call)
+{
+    register_t optee_ret;
+
+    forward_call(regs);
+
+    optee_ret = get_user_reg(regs, 0);
+    if ( OPTEE_SMC_RETURN_IS_RPC(optee_ret) )
+    {
+        call->optee_thread_id = get_user_reg(regs, 3);
+        call->rpc_op = OPTEE_SMC_RETURN_GET_RPC_FUNC(optee_ret);
+        put_std_call(ctx, call);
+        return;
+    }
+
+    copy_std_request_back(ctx, regs, call);
+
+    put_std_call(ctx, call);
+    free_std_call(ctx, call);
+}

Most of the code in this patch is self-explaining, which is quite nice :).However, I think this function would require explaining a bit the logic. Forinstance in which case the call will be freed.

+
+static bool handle_std_call(struct optee_domain *ctx,
+                            struct cpu_user_regs *regs)
+{
+    struct optee_std_call *call = allocate_std_call(ctx);
+
+    if ( !call )
+        return false;
+
+    if ( !copy_std_request(regs, call) )
+        goto err;
+
+    /* Now we can safely examine contents of command buffer */
+    if ( OPTEE_MSG_GET_ARG_SIZE(call->xen_arg->num_params) >
+         OPTEE_MSG_NONCONTIG_PAGE_SIZE )
+        goto err;
+
+    switch ( call->xen_arg->cmd )
+    {
+    case OPTEE_MSG_CMD_OPEN_SESSION:
+    case OPTEE_MSG_CMD_CLOSE_SESSION:
+    case OPTEE_MSG_CMD_INVOKE_COMMAND:
+    case OPTEE_MSG_CMD_CANCEL:
+    case OPTEE_MSG_CMD_REGISTER_SHM:
+    case OPTEE_MSG_CMD_UNREGISTER_SHM:
+        break;
+    default:
+        goto err;
+    }
+
+    execute_std_call(ctx, regs, call);
+
+    return true;

This function is a bit odd to read. I think it would be more clear if you movethis code before the break.

+
+err:
+    put_std_call(ctx, call);
+    free_std_call(ctx, call);
+
+    return false;
+}
+
+static bool handle_rpc(struct optee_domain *ctx, struct cpu_user_regs *regs)
+{
+    struct optee_std_call *call;
+    int optee_thread_id = get_user_reg(regs, 3);
+
+    call = get_std_call(ctx, optee_thread_id);
+
+    if ( !call )
+        return false;
+
+    switch ( call->rpc_op )
+    {
+    case OPTEE_SMC_RPC_FUNC_ALLOC:
+        /* TODO: Add handling */
+        break;
+    case OPTEE_SMC_RPC_FUNC_FREE:
+        /* TODO: Add handling */
+        break;
+    case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR:
+        break;
+    case OPTEE_SMC_RPC_FUNC_CMD:
+        /* TODO: Add handling */
+        break;
+    }
+
+    execute_std_call(ctx, regs, call);
+    return true;
  }

static bool handle_exchange_capabilities(struct cpu_user_regs *regs)

@@ -161,6 +472,8 @@ static bool handle_exchange_capabilities(struct 
cpu_user_regs *regs)

static bool optee_handle_call(struct cpu_user_regs *regs)

  {
+    struct optee_domain *ctx = current->domain->arch.tee;
+
      switch ( get_user_reg(regs, 0) )
      {
      case OPTEE_SMC_CALLS_COUNT:
@@ -170,8 +483,6 @@ static bool optee_handle_call(struct cpu_user_regs *regs)
      case OPTEE_SMC_FUNCID_GET_OS_REVISION:
      case OPTEE_SMC_ENABLE_SHM_CACHE:
      case OPTEE_SMC_DISABLE_SHM_CACHE:
-    case OPTEE_SMC_CALL_WITH_ARG:
-    case OPTEE_SMC_CALL_RETURN_FROM_RPC:
          forward_call(regs);
          return true;
      case OPTEE_SMC_GET_SHM_CONFIG:
@@ -180,6 +491,10 @@ static bool optee_handle_call(struct cpu_user_regs *regs)
          return true;
      case OPTEE_SMC_EXCHANGE_CAPABILITIES:
          return handle_exchange_capabilities(regs);
+    case OPTEE_SMC_CALL_WITH_ARG:
+        return handle_std_call(ctx, regs);
+    case OPTEE_SMC_CALL_RETURN_FROM_RPC:
+        return handle_rpc(ctx, regs);
      default:
          return false;
      }
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 175de44927..88b48697bd 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -97,6 +97,9 @@ struct arch_domain
      struct vpl011 vpl011;
  #endif

+#ifdef CONFIG_TEE

+    void *tee;
+#endif


Did you look whether there are any hole in arch_domain that could be re-used?

  }  __cacheline_aligned;
struct arch_vcpu


Cheers,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

Follow-Ups:
- Re: [Xen-devel] [PATCH v3 06/11] optee: add std call handling
  - From: Volodymyr Babchuk

Prev by Date: Re: [Xen-devel] [PATCH v3] xen: Fix x86 sched_clock() interface for xen
Next by Date: Re: [Xen-devel] [PATCH v3 1/7] docs: Improve documentation for dom0= and dom0-iommu=
Previous by thread: [Xen-devel] Fail to boot Dom0 on Xen Arm64 after "dma-mapping: bypass indirect calls for dma-direct"
Next by thread: Re: [Xen-devel] [PATCH v3 06/11] optee: add std call handling
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.