[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Minios-devel] [UNIKRAFT PATCH v4 4/7] plat/arm: Support fp/simd save/restore during context switch



Floating point feature is useful for some applications. We should
save/restore fp registers during context switch.

Signed-off-by: Jia He <justin.he@xxxxxxx>
---
 plat/common/arm/fp_arm64.c          | 82 +++++++++++++++++++++++++++++
 plat/common/arm/thread_start64.S    |  1 +
 plat/common/include/arm/arm64/cpu.h | 46 +++++++++++++++-
 plat/kvm/Makefile.uk                |  3 +-
 4 files changed, 129 insertions(+), 3 deletions(-)
 create mode 100644 plat/common/arm/fp_arm64.c

diff --git a/plat/common/arm/fp_arm64.c b/plat/common/arm/fp_arm64.c
new file mode 100644
index 0000000..d624063
--- /dev/null
+++ b/plat/common/arm/fp_arm64.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: ISC */
+/*
+ * Authors: Jia He <justin.he@xxxxxxx>
+ *
+ * Copyright (c) 2020 Arm Ltd.
+ *
+ * Permission to use, copy, modify, and/or distribute this software
+ * for any purpose with or without fee is hereby granted, provided
+ * that the above copyright notice and this permission notice appear
+ * in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+ * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <kvm/config.h>
+#include <uk/plat/common/cpu.h>
+
+#ifdef CONFIG_FPSIMD
+void fpsimd_save_state(uintptr_t ptr)
+{
+       __u32 fpcr, fpsr;
+
+       __asm__ __volatile__(
+               "mrs    %0, fpcr\n"
+               "mrs    %1, fpsr\n"
+               "stp    q0,  q1,  [%2, #16 *  0]\n"
+               "stp    q2,  q3,  [%2, #16 *  2]\n"
+               "stp    q4,  q5,  [%2, #16 *  4]\n"
+               "stp    q6,  q7,  [%2, #16 *  6]\n"
+               "stp    q8,  q9,  [%2, #16 *  8]\n"
+               "stp    q10, q11, [%2, #16 * 10]\n"
+               "stp    q12, q13, [%2, #16 * 12]\n"
+               "stp    q14, q15, [%2, #16 * 14]\n"
+               "stp    q16, q17, [%2, #16 * 16]\n"
+               "stp    q18, q19, [%2, #16 * 18]\n"
+               "stp    q20, q21, [%2, #16 * 20]\n"
+               "stp    q22, q23, [%2, #16 * 22]\n"
+               "stp    q24, q25, [%2, #16 * 24]\n"
+               "stp    q26, q27, [%2, #16 * 26]\n"
+               "stp    q28, q29, [%2, #16 * 28]\n"
+               "stp    q30, q31, [%2, #16 * 30]\n"
+               : "=&r"(fpcr), "=&r"(fpsr) : "r"(ptr));
+
+       ((struct fpsimd_state *)ptr)->fpcr = fpcr;
+       ((struct fpsimd_state *)ptr)->fpsr = fpsr;
+}
+
+void fpsimd_restore_state(uintptr_t ptr)
+{
+       __u32 fpcr, fpsr;
+
+       fpcr = ((struct fpsimd_state *)ptr)->fpcr;
+       fpsr = ((struct fpsimd_state *)ptr)->fpsr;
+
+       __asm__ __volatile__(
+               "ldp    q0,  q1,  [%2, #16 *  0]\n"
+               "ldp    q2,  q3,  [%2, #16 *  2]\n"
+               "ldp    q4,  q5,  [%2, #16 *  4]\n"
+               "ldp    q6,  q7,  [%2, #16 *  6]\n"
+               "ldp    q8,  q9,  [%2, #16 *  8]\n"
+               "ldp    q10, q11, [%2, #16 * 10]\n"
+               "ldp    q12, q13, [%2, #16 * 12]\n"
+               "ldp    q14, q15, [%2, #16 * 14]\n"
+               "ldp    q16, q17, [%2, #16 * 16]\n"
+               "ldp    q18, q19, [%2, #16 * 18]\n"
+               "ldp    q20, q21, [%2, #16 * 20]\n"
+               "ldp    q22, q23, [%2, #16 * 22]\n"
+               "ldp    q24, q25, [%2, #16 * 24]\n"
+               "ldp    q26, q27, [%2, #16 * 26]\n"
+               "ldp    q28, q29, [%2, #16 * 28]\n"
+               "ldp    q30, q31, [%2, #16 * 30]\n"
+               "msr    fpcr, %0\n"
+               "msr    fpsr, %1\n"
+               : : "r"(fpcr), "r"(fpsr), "r"(ptr));
+}
+#endif
diff --git a/plat/common/arm/thread_start64.S b/plat/common/arm/thread_start64.S
index 9a80f62..d79fd19 100644
--- a/plat/common/arm/thread_start64.S
+++ b/plat/common/arm/thread_start64.S
@@ -34,6 +34,7 @@
 #include <uk/plat/common/sw_ctx.h>
 #include <uk/arch/lcpu.h>
 #include <uk/asm.h>
+#include <uk/config.h>
 
 /*
  * Thread stack memory layout:
diff --git a/plat/common/include/arm/arm64/cpu.h 
b/plat/common/include/arm/arm64/cpu.h
index 122727a..93ad13b 100644
--- a/plat/common/include/arm/arm64/cpu.h
+++ b/plat/common/include/arm/arm64/cpu.h
@@ -116,6 +116,47 @@ void halt(void);
 void reset(void);
 void system_off(void);
 
+#ifdef CONFIG_FPSIMD
+struct fpsimd_state {
+       __u64           regs[32 * 2];
+       __u32           fpsr;
+       __u32           fpcr;
+};
+
+extern void fpsimd_save_state(uintptr_t ptr);
+extern void fpsimd_restore_state(uintptr_t ptr);
+
+static inline void save_extregs(struct sw_ctx *ctx)
+{
+       fpsimd_save_state(ctx->extregs);
+}
+
+static inline void restore_extregs(struct sw_ctx *ctx)
+{
+       fpsimd_restore_state(ctx->extregs);
+}
+
+static inline struct sw_ctx *arch_alloc_sw_ctx(struct uk_alloc *allocator)
+{
+       struct sw_ctx *ctx;
+
+       ctx = (struct sw_ctx *)uk_malloc(allocator,
+                       sizeof(struct sw_ctx) + sizeof(struct fpsimd_state));
+       if (ctx)
+               ctx->extregs = (uintptr_t)((void *)ctx + sizeof(struct sw_ctx));
+
+       uk_pr_debug("Allocating %lu + %lu bytes for sw ctx at %p, extregs at 
%p\n",
+                       sizeof(struct sw_ctx), sizeof(struct fpsimd_state),
+                       ctx, (void *)ctx->extregs);
+
+       return ctx;
+}
+
+static inline void arch_init_extregs(struct sw_ctx *ctx __unused)
+{
+}
+
+#else /* !CONFIG_FPSIMD */
 static inline void save_extregs(struct sw_ctx *ctx __unused)
 {
 }
@@ -128,9 +169,9 @@ static inline struct sw_ctx *arch_alloc_sw_ctx(struct 
uk_alloc *allocator)
 {
        struct sw_ctx *ctx;
 
-       ctx = uk_malloc(allocator, sizeof(struct sw_ctx));
+       ctx = (struct sw_ctx *)uk_malloc(allocator, sizeof(struct sw_ctx));
        uk_pr_debug("Allocating %lu bytes for sw ctx at %p\n",
-                  sizeof(struct sw_ctx), ctx);
+               sizeof(struct sw_ctx), ctx);
 
        return ctx;
 }
@@ -140,4 +181,5 @@ static inline void arch_init_extregs(struct sw_ctx *ctx)
        ctx->extregs = (uintptr_t)ctx + sizeof(struct sw_ctx);
 }
 
+#endif /* CONFIG_FPSIMD */
 #endif /* __PLAT_COMMON_ARM64_CPU_H__ */
diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk
index a6d6f5e..9354d64 100644
--- a/plat/kvm/Makefile.uk
+++ b/plat/kvm/Makefile.uk
@@ -82,7 +82,8 @@ LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/cache64.S|co
 LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/psci_arm64.S|common
 LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/time.c|common
 LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/generic_timer.c|common
-LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/traps.c|common
+LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(UK_PLAT_COMMON_BASE)/arm/traps.c|isr
+LIBKVMPLAT_SRCS-$(CONFIG_FPSIMD)      += 
$(UK_PLAT_COMMON_BASE)/arm/fp_arm64.c|isr
 ifeq ($(CONFIG_HAVE_SCHED),y)
 LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += 
$(UK_PLAT_COMMON_BASE)/arm/thread_start64.S|common
 LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(UK_PLAT_COMMON_BASE)/thread.c|common
-- 
2.17.1


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.