[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCHv5 31/46] plat/kvm: Create page tables for Arm64





On 12.09.2018 10:03, Wei Chen (Arm Technology China) wrote:
-----Original Message-----
From: Simon Kuenzer <simon.kuenzer@xxxxxxxxx>
Sent: 2018年9月12日 15:58
To: Wei Chen (Arm Technology China) <Wei.Chen@xxxxxxx>; minios-
devel@xxxxxxxxxxxxxxxxxxxx
Cc: Kaly Xin (Arm Technology China) <Kaly.Xin@xxxxxxx>; nd <nd@xxxxxxx>
Subject: Re: [Minios-devel] [UNIKRAFT PATCHv5 31/46] plat/kvm: Create page
tables for Arm64

Hey,

here I have to trust you. ;-) The only thing I saw is that '#ifdef SMP'
could be replaced with "#if UKPLAT_LCPU_MULTICORE". We anyway do not
have SMP support yet.

Did you use or port this code from somewhere else?


Yes, it seems I have used some code from FreeBSD (#ifdef SMP)

Can you add a note somewhere telling from where you got the code from FreeBSD: Source path, repository URL, version or maybe commit id. We need to make sure that authorship is fully traceable. Please check this also for other files that you ported from FreeBSD.

Actually, I prefer to remove it until we want to support SMP : )

As you prefer. I would keep it since we want to have SMP support at some point.



Thanks,

Simon

On 10.08.2018 09:08, Wei Chen wrote:
From: Wei Chen <Wei.Chen@xxxxxxx>

We're design a direct mapping of physical memory and virtual
memory. In order to improve the security, we use mmu to control
the attributes of image sections. The minimal granularity is 4K.
If map entire physical address with 4k pages, the size of page
tables would be very big. So, wo just use 4K page to map the
area when image has been placed, and use 1G or 2M block to map
other memory areas. In this case, the page tables would only
occupy 20KB memory.

Signed-off-by: Wei Chen <Wei.Chen@xxxxxxx>
---
   plat/common/include/arm/arm64/cpu_defs.h | 116 +++++++
   plat/kvm/Makefile.uk                     |   1 +
   plat/kvm/arm/pagetable.S                 | 382 +++++++++++++++++++++++
   3 files changed, 499 insertions(+)
   create mode 100644 plat/kvm/arm/pagetable.S

diff --git a/plat/common/include/arm/arm64/cpu_defs.h
b/plat/common/include/arm/arm64/cpu_defs.h
index 56082e3..b8f207f 100644
--- a/plat/common/include/arm/arm64/cpu_defs.h
+++ b/plat/common/include/arm/arm64/cpu_defs.h
@@ -57,4 +57,120 @@
   #define DCCISW                       0x1
   #define DCCSW                        0x2

+/*
+ * Memory types, these values are the indexs of the attributes
+ * that defined in MAIR_EL1.
+ */
+#define DEVICE_nGnRnE  0
+#define DEVICE_nGnRE   1
+#define DEVICE_GRE     2
+#define NORMAL_NC      3
+#define NORMAL_WT      4
+#define NORMAL_WB      5
+
+/*
+ * Definitions for Block and Page descriptor attributes
+ */
+/* Level 0 table, 512GiB per entry */
+#define L0_SHIFT       39
+#define L0_SIZE                (1ul << L0_SHIFT)
+#define L0_OFFSET      (L0_SIZE - 1ul)
+#define L0_INVAL       0x0 /* An invalid address */
+       /* 0x1 Level 0 doesn't support block translation */
+       /* 0x2 also marks an invalid address */
+#define L0_TABLE       0x3 /* A next-level table */
+
+/* Level 1 table, 1GiB per entry */
+#define L1_SHIFT       30
+#define L1_SIZE        (1 << L1_SHIFT)
+#define L1_OFFSET      (L1_SIZE - 1)
+#define L1_INVAL       L0_INVAL
+#define L1_BLOCK       0x1
+#define L1_TABLE       L0_TABLE
+
+/* Level 2 table, 2MiB per entry */
+#define L2_SHIFT       21
+#define L2_SIZE        (1 << L2_SHIFT)
+#define L2_OFFSET      (L2_SIZE - 1)
+#define L2_INVAL       L1_INVAL
+#define L2_BLOCK       L1_BLOCK
+#define L2_TABLE       L1_TABLE
+
+#define L2_BLOCK_MASK  _AC(0xffffffe00000, UL)
+
+/* Level 3 table, 4KiB per entry */
+#define L3_SHIFT       12
+#define L3_SIZE        (1 << L3_SHIFT)
+#define L3_OFFSET      (L3_SIZE - 1)
+#define L3_SHIFT       12
+#define L3_INVAL       0x0
+       /* 0x1 is reserved */
+       /* 0x2 also marks an invalid address */
+#define L3_PAGE                0x3
+
+#define L0_ENTRIES_SHIFT 9
+#define L0_ENTRIES     (1 << L0_ENTRIES_SHIFT)
+#define L0_ADDR_MASK   (L0_ENTRIES - 1)
+
+#define Ln_ENTRIES_SHIFT 9
+#define Ln_ENTRIES     (1 << Ln_ENTRIES_SHIFT)
+#define Ln_ADDR_MASK   (Ln_ENTRIES - 1)
+#define Ln_TABLE_MASK  ((1 << 12) - 1)
+#define Ln_TABLE       0x3
+#define Ln_BLOCK       0x1
+
+/*
+ * Hardware page table definitions.
+ */
+/* TODO: Add the upper attributes */
+#define ATTR_MASK_H    _AC(0xfff0000000000000, UL)
+#define ATTR_MASK_L    _AC(0x0000000000000fff, UL)
+#define ATTR_MASK      (ATTR_MASK_H | ATTR_MASK_L)
+/* Bits 58:55 are reserved for software */
+#define ATTR_SW_MANAGED        (_AC(1, UL) << 56)
+#define ATTR_SW_WIRED  (_AC(1, UL) << 55)
+#define ATTR_UXN       (_AC(1, UL) << 54)
+#define ATTR_PXN       (_AC(1, UL) << 53)
+#define ATTR_XN                (ATTR_PXN | ATTR_UXN)
+#define ATTR_CONTIGUOUS        (_AC(1, UL) << 52)
+#define ATTR_DBM       (_AC(1, UL) << 51)
+#define ATTR_nG                (1 << 11)
+#define ATTR_AF                (1 << 10)
+#define ATTR_SH(x)     ((x) << 8)
+#define ATTR_SH_MASK   ATTR_SH(3)
+#define ATTR_SH_NS     0               /* Non-shareable */
+#define ATTR_SH_OS     2               /* Outer-shareable */
+#define ATTR_SH_IS     3               /* Inner-shareable */
+#define ATTR_AP_RW_BIT (1 << 7)
+#define ATTR_AP(x)     ((x) << 6)
+#define ATTR_AP_MASK   ATTR_AP(3)
+#define ATTR_AP_RW     (0 << 1)
+#define ATTR_AP_RO     (1 << 1)
+#define ATTR_AP_USER   (1 << 0)
+#define ATTR_NS                (1 << 5)
+#define ATTR_IDX(x)    ((x) << 2)
+#define ATTR_IDX_MASK  (7 << 2)
+
+#define ATTR_DEFAULT   (ATTR_AF | ATTR_SH(ATTR_SH_IS))
+
+#define ATTR_DESCR_MASK        3
+
+/*
+ * Define the attributes of pagetable descriptors
+ */
+#define SECT_ATTR_DEFAULT      \
+               (Ln_BLOCK | ATTR_DEFAULT)
+#define SECT_ATTR_NORMAL       \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_NORMAL_RO    \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_AP_RW_BIT | ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_NORMAL_EXEC  \
+               (SECT_ATTR_DEFAULT | ATTR_UXN | \
+               ATTR_AP_RW_BIT | ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_DEVICE_nGnRE \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_IDX(DEVICE_nGnRnE))
+
   #endif /* __CPU_ARM_64_DEFS_H__ */
diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk
index a43cdbe..a54bddf 100644
--- a/plat/kvm/Makefile.uk
+++ b/plat/kvm/Makefile.uk
@@ -58,6 +58,7 @@ LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) +=
$(UK_PLAT_COMMON_BASE)/arm/time.c|commo
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) +=
$(UK_PLAT_COMMON_BASE)/arm/traps.c|common
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBKVMPLAT_BASE)/arm/entry64.S
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) +=
$(LIBKVMPLAT_BASE)/arm/exceptions.S
+LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBKVMPLAT_BASE)/arm/pagetable.S
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBKVMPLAT_BASE)/arm/setup.c
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBKVMPLAT_BASE)/arm/lcpu.c
   LIBKVMPLAT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBKVMPLAT_BASE)/arm/intctrl.c
diff --git a/plat/kvm/arm/pagetable.S b/plat/kvm/arm/pagetable.S
new file mode 100644
index 0000000..4c8419f
--- /dev/null
+++ b/plat/kvm/arm/pagetable.S
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*-
+ * Copyright (c) 2012-2014 Andrew Turner. All rights reserved.
+ * Copyright (c) 2018 Arm Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <uk/arch/limits.h>
+#include <asm.h>
+#include <mm.h>
+#include <arm/cpu_defs.h>
+/*
+ * The following is the Memory Layout of AArch64 Virtual Machine
+ * | 0 - 0x3FFFFFFF |   0x40000000 - 0x7FFFFFFFFF           |  512GB - 1TB
|
+ * ------------------------------------------------------------------------
--
+ * |  DEVICES MMIO  | DTB|TEXT|DATA|BSS|PAGETABLE|BOOTSTACK | PCI-e High
Mem |
+ * ------------------------------------------------------------------------
--
+ */
+#define DEVICE_ADDR_START 0
+#define DEVICE_ADDR_SIZE  0x40000000
+#define RAM_ADDR_START    0x40000000
+#define RAM_L2_ENTRIES    255
+#define RAM_ADDR_SIZE     (0x40000000 * RAM_L2_ENTRIES)
+#define PCIE_ADDR_START   0x8000000000
+#define PCIE_L2_ENTRIES   512
+#define PCIE_ADDR_SIZE    0x8000000000
+
+/*
+ * We will use a direct map for physical address and virtual address
+ * (PA = VA 1:1 mapping)
+ */
+ENTRY(create_pagetables)
+       /* Save link address */
+       mov x5, x30
+
+       /* Pagetable starts from _end */
+       ldr x14, =_end
+
+       /* Clean the page table */
+       mov x6, x14
+       add x13, x14, #PAGE_TABLE_SIZE
+1:
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       cmp  x6, x13
+       b.lo 1b
+
+       /*
+        * We have 2 L1 tables to map 0 ~ 512GB, 512 GB ~ 1TB.
+        * Link these two tables to L0 entries#0, #1
+        */
+       mov x6, x14
+       mov x8, #0
+       add x9, x14, #L1_TABLE_OFFSET;
+       mov x10, #2
+       bl  link_l0_pagetable
+
+       /*
+        * Using 1GiB block to map device address space (0x0 ~ 0x3fffffff)
+        */
+       add x6, x14, #L1_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_DEVICE_nGnRE
+       mov x8, #DEVICE_ADDR_START
+       mov x9, x8
+       mov x10, #1
+       bl  build_l1_block_pagetable
+
+       /*
+        * Using 1GiB block to map RAM address space
+        * (0x40000000 ~ (256GiB -1)). The RAM areas that contain kernel
+        * sections will be update later.
+        */
+       add x6, x14, #L1_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, #255
+       bl  build_l1_block_pagetable
+
+       /*
+        * Using 1GiB block to map high PCIe address space
+        * (0x512GiB ~ (1TiB -1)).
+        */
+       add x6, x14, #L1_TABLE_OFFSET;
+       add x6, x6, #PAGE_SIZE
+       ldr x7, =SECT_ATTR_DEVICE_nGnRE
+       mov x8, #PCIE_ADDR_START
+       mov x9, x8
+       mov x10, #512
+       bl  build_l1_block_pagetable
+
+       /*
+        * The text, bss, data and other sections are placed at
+        * 0x40000000 ~ 0x7fffffff. We use 4k granularity to manage
+        * their attributes to improve security.
+        * L1 -> L2 -> L3 (4K)
+        */
+       /* 1st: Build a L2 block pagetable */
+       add x6, x14, #L2_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, 512
+       bl  build_l2_block_pagetable
+
+       /* 2nd: Link this L2 block pagetable to L1 entry */
+       add x6, x14, #L1_TABLE_OFFSET;
+       mov x8, #RAM_ADDR_START
+       add x9, x14, #L2_TABLE_OFFSET;
+       bl  link_l1_pagetable
+
+       /* 3rd: Build a L3 pagetable for 0~2MB*/
+       add x6, x14, #L3_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, 512
+       bl  build_l3_pagetable
+
+       /* 4th: Link this L3 pagetable to L2 entry */
+       add x6, x14, #L2_TABLE_OFFSET;
+       mov x8, #RAM_ADDR_START
+       add x9, x14, #L3_TABLE_OFFSET;
+       bl  link_l2_pagetable
+
+       /* 5th: Update dtb section to readonly */
+       ldr x8, =_dtb
+       ldr x9, =_text
+       sub x10, x9, x8
+       add x6, x14, #L3_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_RO
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* 6th: Update text section to Readonly & EXEC*/
+       ldr x8, =_text
+       ldr x9, =_etext
+       sub x10, x9, x8
+       add x6, x14, #L3_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_EXEC
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* 7th: Update rodata and Constructor tables sections to Readonly */
+       ldr x8, =_rodata
+       ldr x9, =_ectors
+       sub x10, x9, x8
+       add x6, x14, #L3_TABLE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_RO
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* Restore the Link register */
+       mov x30, x5
+
+       ret
+END(create_pagetables)
+
+/*
+ * Builds an L0 -> L1 table descriptor
+ *
+ * This is a link for a 512GiB block of memory with up to 1GiB regions
mapped
+ * within it by build_l1_block_pagetable.
+ *
+ *  x6  = L0 table
+ *  x8  = Virtual Address
+ *  x9  = L1 PA (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+link_l0_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L0_SHIFT
+       and x11, x11, #L0_ADDR_MASK
+
+       /* Build the L0 block entry */
+       mov x12, #L0_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+1:     orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds an L1 -> L2 table descriptor
+ *
+ * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
+ * within it by build_l2_block_pagetable.
+ *
+ *  x6  = L1 table
+ *  x8  = Virtual Address
+ *  x9  = L2 PA (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+link_l1_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L1_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L1 block entry */
+       mov x12, #L1_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+       orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       ret
+
+/*
+ * Builds count 1 GiB page table entry
+ *  x6  = L1 table
+ *  x7  = Memory attributes
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l1_block_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L1_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L1 block entry */
+       mov x12, x7
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L1_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L1_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds count 2 MiB page table entry
+ *  x6  = L2 table
+ *  x7  = Memory attributes (trashed)
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l2_block_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L2_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L2 block entry */
+       mov x12, x7
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L2_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L2_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds an L3 -> L2 table descriptor
+ *
+ * This is a link for a 2MiB block of memory with up to 4KiB regions mapped
+ * within it by build_pte_pagetable.
+ *
+ *  x6  = L2 table
+ *  x8  = Virtual Address
+ *  x9  = L3 table PA (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+link_l2_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L2_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L2 block entry */
+       mov x12, #L2_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+       orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       ret
+
+/*
+ * Builds count 4 KiB page table entry
+ *  x6  = L3 table
+ *  x7  = Memory attributes (trashed)
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l3_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L3_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L3 entry */
+       mov x12, x7
+       orr x12, x12, #L3_PAGE
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L3_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L3_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.