[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [UNIKRAFT PATCH v4 4/7] plat/arm: Support fp/simd save/restore during context switch



Hello Justin,

Please find the comment inline

Thanks & Regards
Sharan

On 4/7/20 9:13 AM, Jia He wrote:
Floating point feature is useful for some applications. We should
save/restore fp registers during context switch.

Signed-off-by: Jia He <justin.he@xxxxxxx>
---
  plat/common/arm/fp_arm64.c          | 82 +++++++++++++++++++++++++++++
  plat/common/arm/thread_start64.S    |  1 +
  plat/common/include/arm/arm64/cpu.h | 46 +++++++++++++++-
  plat/kvm/Makefile.uk                |  3 +-
  4 files changed, 129 insertions(+), 3 deletions(-)
  create mode 100644 plat/common/arm/fp_arm64.c

diff --git a/plat/common/arm/fp_arm64.c b/plat/common/arm/fp_arm64.c
new file mode 100644
index 0000000..d624063
--- /dev/null
+++ b/plat/common/arm/fp_arm64.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: ISC */
+/*
+ * Authors: Jia He <justin.he@xxxxxxx>
+ *
+ * Copyright (c) 2020 Arm Ltd.
+ *
+ * Permission to use, copy, modify, and/or distribute this software
+ * for any purpose with or without fee is hereby granted, provided
+ * that the above copyright notice and this permission notice appear
+ * in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+ * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <kvm/config.h>
 Do we need the kvm/config.h here shouldn't you provide uk/config.h?

+#include <uk/plat/common/cpu.h>
+
+#ifdef CONFIG_FPSIMD
If we use this restrict the compilation in Makefile.uk using CONFIG_FP_SIMD
why do we use it here as well?

+void fpsimd_save_state(uintptr_t ptr)
+{
+       __u32 fpcr, fpsr;
+
+       __asm__ __volatile__(
+               "mrs       %0, fpcr\n"
+               "mrs       %1, fpsr\n"
+               "stp       q0,  q1,  [%2, #16 *  0]\n"
+               "stp       q2,  q3,  [%2, #16 *  2]\n"
+               "stp       q4,  q5,  [%2, #16 *  4]\n"
+               "stp       q6,  q7,  [%2, #16 *  6]\n"
+               "stp       q8,  q9,  [%2, #16 *  8]\n"
+               "stp       q10, q11, [%2, #16 * 10]\n"
+               "stp       q12, q13, [%2, #16 * 12]\n"
+               "stp       q14, q15, [%2, #16 * 14]\n"
+               "stp       q16, q17, [%2, #16 * 16]\n"
+               "stp       q18, q19, [%2, #16 * 18]\n"
+               "stp       q20, q21, [%2, #16 * 20]\n"
+               "stp       q22, q23, [%2, #16 * 22]\n"
+               "stp       q24, q25, [%2, #16 * 24]\n"
+               "stp       q26, q27, [%2, #16 * 26]\n"
+               "stp       q28, q29, [%2, #16 * 28]\n"
+               "stp       q30, q31, [%2, #16 * 30]\n"
+               : "=&r"(fpcr), "=&r"(fpsr) : "r"(ptr));
+
+       ((struct fpsimd_state *)ptr)->fpcr = fpcr;
+       ((struct fpsimd_state *)ptr)->fpsr = fpsr;
+}
+
+void fpsimd_restore_state(uintptr_t ptr)
+{
+       __u32 fpcr, fpsr;
+
+       fpcr = ((struct fpsimd_state *)ptr)->fpcr;
+       fpsr = ((struct fpsimd_state *)ptr)->fpsr;
+
+       __asm__ __volatile__(
+               "ldp       q0,  q1,  [%2, #16 *  0]\n"
+               "ldp       q2,  q3,  [%2, #16 *  2]\n"
+               "ldp       q4,  q5,  [%2, #16 *  4]\n"
+               "ldp       q6,  q7,  [%2, #16 *  6]\n"
+               "ldp       q8,  q9,  [%2, #16 *  8]\n"
+               "ldp       q10, q11, [%2, #16 * 10]\n"
+               "ldp       q12, q13, [%2, #16 * 12]\n"
+               "ldp       q14, q15, [%2, #16 * 14]\n"
+               "ldp       q16, q17, [%2, #16 * 16]\n"
+               "ldp       q18, q19, [%2, #16 * 18]\n"
+               "ldp       q20, q21, [%2, #16 * 20]\n"
+               "ldp       q22, q23, [%2, #16 * 22]\n"
+               "ldp       q24, q25, [%2, #16 * 24]\n"
+               "ldp       q26, q27, [%2, #16 * 26]\n"
+               "ldp       q28, q29, [%2, #16 * 28]\n"
+               "ldp       q30, q31, [%2, #16 * 30]\n"
+               "msr       fpcr, %0\n"
+               "msr       fpsr, %1\n"
+               : : "r"(fpcr), "r"(fpsr), "r"(ptr));
+}
+#endif
diff --git a/plat/common/arm/thread_start64.S b/plat/common/arm/thread_start64.S
index 9a80f62..d79fd19 100644
--- a/plat/common/arm/thread_start64.S
+++ b/plat/common/arm/thread_start64.S
@@ -34,6 +34,7 @@
  #include <uk/plat/common/sw_ctx.h>
  #include <uk/arch/lcpu.h>
  #include <uk/asm.h>
+#include <uk/config.h>
Why do we need this header here?
/*
   * Thread stack memory layout:
diff --git a/plat/common/include/arm/arm64/cpu.h 
b/plat/common/include/arm/arm64/cpu.h
index 122727a..93ad13b 100644
--- a/plat/common/include/arm/arm64/cpu.h
+++ b/plat/common/include/arm/arm64/cpu.h
@@ -116,6 +116,47 @@ void halt(void);
  void reset(void);
  void system_off(void);
+#ifdef CONFIG_FPSIMD
+struct fpsimd_state {
+       __u64           regs[32 * 2];
+       __u32           fpsr;
+       __u32           fpcr;
+};
+
+extern void fpsimd_save_state(uintptr_t ptr);
+extern void fpsimd_restore_state(uintptr_t ptr);
+
+static inline void save_extregs(struct sw_ctx *ctx)
+{
+       fpsimd_save_state(ctx->extregs);
+}
+
+static inline void restore_extregs(struct sw_ctx *ctx)
+{
+       fpsimd_restore_state(ctx->extregs);
+}
+
+static inline struct sw_ctx *arch_alloc_sw_ctx(struct uk_alloc *allocator)
+{
+       struct sw_ctx *ctx;
+
+       ctx = (struct sw_ctx *)uk_malloc(allocator,
+                       sizeof(struct sw_ctx) + sizeof(struct fpsimd_state));
+       if (ctx)
+               ctx->extregs = (uintptr_t)((void *)ctx + sizeof(struct sw_ctx));
+
+       uk_pr_debug("Allocating %lu + %lu bytes for sw ctx at %p, extregs at 
%p\n",
+                       sizeof(struct sw_ctx), sizeof(struct fpsimd_state),
+                       ctx, (void *)ctx->extregs);
+
+       return ctx;
+}
+
+static inline void arch_init_extregs(struct sw_ctx *ctx __unused)
+{
+}
+
+#else /* !CONFIG_FPSIMD */
  static inline void save_extregs(struct sw_ctx *ctx __unused)
  {
  }
@@ -128,9 +169,9 @@ static inline struct sw_ctx *arch_alloc_sw_ctx(struct 
uk_alloc *allocator)
  {
        struct sw_ctx *ctx;
- ctx = uk_malloc(allocator, sizeof(struct sw_ctx));
+       ctx = (struct sw_ctx *)uk_malloc(allocator, sizeof(struct sw_ctx));
        uk_pr_debug("Allocating %lu bytes for sw ctx at %p\n",
-                  sizeof(struct sw_ctx), ctx);
+               sizeof(struct sw_ctx), ctx);
return ctx;
  }
@@ -140,4 +181,5 @@ static inline void arch_init_extregs(struct sw_ctx *ctx)
        ctx->extregs = (uintptr_t)ctx + sizeof(struct sw_ctx);
  }
+#endif /* CONFIG_FPSIMD */
  #endif /* __PLAT_COMMON_ARM64_CPU_H__ */
diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk
index a6d6f5e..9354d64 100644
--- a/plat/kvm/Makefile.uk
+++ b/plat/kvm/Makefile.uk
@@ -82,7 +82,8 @@ LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/cache64.S|co
  LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/psci_arm64.S|common
  LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/time.c|common
  LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/generic_timer.c|common
-LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/traps.c|common
+LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(UK_PLAT_COMMON_BASE)/arm/traps.c|isr
+LIBKVMPLAT_SRCS-$(CONFIG_FPSIMD)      += 
$(UK_PLAT_COMMON_BASE)/arm/fp_arm64.c|isr
  ifeq ($(CONFIG_HAVE_SCHED),y)
  LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/thread_start64.S|common
  LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/thread.c|common



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.