[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Minios-devel] [UNIKRAFT PATCHv4 28/43] plat/kvm: Create page tables for Arm64


  • To: <minios-devel@xxxxxxxxxxxxxxxxxxxx>, <simon.kuenzer@xxxxxxxxx>
  • From: Wei Chen <Wei.Chen@xxxxxxx>
  • Date: Fri, 6 Jul 2018 09:03:41 +0000
  • Authentication-results: spf=fail (sender IP is 40.67.248.234) smtp.mailfrom=arm.com; lists.xenproject.org; dkim=none (message not signed) header.d=none;lists.xenproject.org; dmarc=none action=none header.from=arm.com;
  • Cc: Kaly.Xin@xxxxxxx, nd@xxxxxxx, wei.chen@xxxxxxx
  • Delivery-date: Fri, 06 Jul 2018 09:05:51 +0000
  • List-id: Mini-os development list <minios-devel.lists.xenproject.org>
  • Nodisclaimer: True
  • Spamdiagnosticmetadata: NSPM
  • Spamdiagnosticoutput: 1:99

We're design a direct mapping of physical memory and virtual
memory. In order to improve the security, we use mmu to control
the attributes of image sections. The minimal granularity is 4K.
If map entire physical address with 4k pages, the size of page
tables would be very big. So, wo just use 4K page to map the
area when image has been placed, and use 1G or 2M block to map
other memory areas. In this case, the page tables would only
occupy 20KB memory.

Signed-off-by: Wei Chen <Wei.Chen@xxxxxxx>
---
 plat/common/include/arm/arm64/cpu_defs.h | 134 ++++++++
 plat/kvm/arm/pagetable.S                 | 381 +++++++++++++++++++++++
 2 files changed, 515 insertions(+)
 create mode 100644 plat/kvm/arm/pagetable.S

diff --git a/plat/common/include/arm/arm64/cpu_defs.h 
b/plat/common/include/arm/arm64/cpu_defs.h
index f5c64ce..f33ce35 100644
--- a/plat/common/include/arm/arm64/cpu_defs.h
+++ b/plat/common/include/arm/arm64/cpu_defs.h
@@ -116,6 +116,140 @@ END(name)
 /* Any access in EL0 or EL1 will not be trapped */
 #define CPACR_FPEN_TRAP_NONE    (0x3 << 20)
 
+/* Registers and Bits definitions for MMU */
+/* MAIR_EL1 - Memory Attribute Indirection Register */
+#define MAIR_ATTR_MASK(idx)    (0xff << ((n)* 8))
+#define MAIR_ATTR(attr, idx)   ((attr) << ((idx) * 8))
+
+/* Device-nGnRnE memory */
+#define MAIR_DEVICE_nGnRnE     0x00
+/* Device-nGnRE memory */
+#define MAIR_DEVICE_nGnRE      0x04
+/* Device-GRE memory */
+#define MAIR_DEVICE_GRE                0x0C
+/* Outer Non-cacheable + Inner Non-cacheable */
+#define MAIR_NORMAL_NC         0x44
+/* Outer + Inner Write-through non-transient */
+#define MAIR_NORMAL_WT         0xbb
+/* Outer + Inner Write-back non-transient */
+#define MAIR_NORMAL_WB         0xff
+
+/*
+ * Memory types, these values are the indexs of the attributes
+ * that defined in MAIR_EL1.
+ */
+#define DEVICE_nGnRnE  0
+#define DEVICE_nGnRE   1
+#define DEVICE_GRE     2
+#define NORMAL_NC      3
+#define NORMAL_WT      4
+#define NORMAL_WB      5
+
+/*
+ * Definitions for Block and Page descriptor attributes
+ */
+/* Level 0 table, 512GiB per entry */
+#define L0_SHIFT       39
+#define L0_SIZE                (1ul << L0_SHIFT)
+#define L0_OFFSET      (L0_SIZE - 1ul)
+#define L0_INVAL       0x0 /* An invalid address */
+       /* 0x1 Level 0 doesn't support block translation */
+       /* 0x2 also marks an invalid address */
+#define L0_TABLE       0x3 /* A next-level table */
+
+/* Level 1 table, 1GiB per entry */
+#define L1_SHIFT       30
+#define L1_SIZE        (1 << L1_SHIFT)
+#define L1_OFFSET      (L1_SIZE - 1)
+#define L1_INVAL       L0_INVAL
+#define L1_BLOCK       0x1
+#define L1_TABLE       L0_TABLE
+
+/* Level 2 table, 2MiB per entry */
+#define L2_SHIFT       21
+#define L2_SIZE        (1 << L2_SHIFT)
+#define L2_OFFSET      (L2_SIZE - 1)
+#define L2_INVAL       L1_INVAL
+#define L2_BLOCK       L1_BLOCK
+#define L2_TABLE       L1_TABLE
+
+#define L2_BLOCK_MASK  _AC(0xffffffe00000, UL)
+
+/* Level 3 table, 4KiB per entry */
+#define L3_SHIFT       12
+#define L3_SIZE        (1 << L3_SHIFT)
+#define L3_OFFSET      (L3_SIZE - 1)
+#define L3_SHIFT       12
+#define L3_INVAL       0x0
+       /* 0x1 is reserved */
+       /* 0x2 also marks an invalid address */
+#define L3_PAGE                0x3
+
+#define L0_ENTRIES_SHIFT 9
+#define L0_ENTRIES     (1 << L0_ENTRIES_SHIFT)
+#define L0_ADDR_MASK   (L0_ENTRIES - 1)
+
+#define Ln_ENTRIES_SHIFT 9
+#define Ln_ENTRIES     (1 << Ln_ENTRIES_SHIFT)
+#define Ln_ADDR_MASK   (Ln_ENTRIES - 1)
+#define Ln_TABLE_MASK  ((1 << 12) - 1)
+#define Ln_TABLE       0x3
+#define Ln_BLOCK       0x1
+
+/*
+ * Hardware page table definitions.
+ */
+/* TODO: Add the upper attributes */
+#define ATTR_MASK_H    _AC(0xfff0000000000000, UL)
+#define ATTR_MASK_L    _AC(0x0000000000000fff, UL)
+#define ATTR_MASK      (ATTR_MASK_H | ATTR_MASK_L)
+/* Bits 58:55 are reserved for software */
+#define ATTR_SW_MANAGED        (_AC(1, UL) << 56)
+#define ATTR_SW_WIRED  (_AC(1, UL) << 55)
+#define ATTR_UXN       (_AC(1, UL) << 54)
+#define ATTR_PXN       (_AC(1, UL) << 53)
+#define ATTR_XN                (ATTR_PXN | ATTR_UXN)
+#define ATTR_CONTIGUOUS        (_AC(1, UL) << 52)
+#define ATTR_DBM       (_AC(1, UL) << 51)
+#define ATTR_nG                (1 << 11)
+#define ATTR_AF                (1 << 10)
+#define ATTR_SH(x)     ((x) << 8)
+#define ATTR_SH_MASK   ATTR_SH(3)
+#define ATTR_SH_NS     0               /* Non-shareable */
+#define ATTR_SH_OS     2               /* Outer-shareable */
+#define ATTR_SH_IS     3               /* Inner-shareable */
+#define ATTR_AP_RW_BIT (1 << 7)
+#define ATTR_AP(x)     ((x) << 6)
+#define ATTR_AP_MASK   ATTR_AP(3)
+#define ATTR_AP_RW     (0 << 1)
+#define ATTR_AP_RO     (1 << 1)
+#define ATTR_AP_USER   (1 << 0)
+#define ATTR_NS                (1 << 5)
+#define ATTR_IDX(x)    ((x) << 2)
+#define ATTR_IDX_MASK  (7 << 2)
+
+#define ATTR_DEFAULT   (ATTR_AF | ATTR_SH(ATTR_SH_IS))
+
+#define ATTR_DESCR_MASK        3
+
+/*
+ * Define the attributes of pagetable descriptors
+ */
+#define SECT_ATTR_DEFAULT      \
+               (Ln_BLOCK | ATTR_DEFAULT)
+#define SECT_ATTR_NORMAL       \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_NORMAL_RO    \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_AP_RW_BIT | ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_NORMAL_EXEC  \
+               (SECT_ATTR_DEFAULT | ATTR_UXN | \
+               ATTR_AP_RW_BIT | ATTR_IDX(NORMAL_WB))
+#define SECT_ATTR_DEVICE_nGnRE \
+               (SECT_ATTR_DEFAULT | ATTR_XN | \
+               ATTR_IDX(DEVICE_nGnRnE))
+
 /* Define the address offset of boot stack and pagetable */
 #define PAGE_SIZE      __PAGE_SIZE
 #define PAGE_SHIFT     __PAGE_SHIFT
diff --git a/plat/kvm/arm/pagetable.S b/plat/kvm/arm/pagetable.S
new file mode 100644
index 0000000..8de6305
--- /dev/null
+++ b/plat/kvm/arm/pagetable.S
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*-
+ * Copyright (c) 2012-2014 Andrew Turner. All rights reserved.
+ * Copyright (c) 2018 Arm Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <uk/arch/limits.h>
+#include <arm/cpu_defs.h>
+
+/*
+ * The following is the Memory Layout of AArch64 Virtual Machine
+ * | 0 - 0x3FFFFFFF |   0x40000000 - 0x7FFFFFFFFF           |  512GB - 1TB   |
+ * --------------------------------------------------------------------------
+ * |  DEVICES MMIO  | DTB|TEXT|DATA|BSS|PAGETABLE|BOOTSTACK | PCI-e High Mem |
+ * --------------------------------------------------------------------------
+ */
+#define DEVICE_ADDR_START 0
+#define DEVICE_ADDR_SIZE  0x40000000
+#define RAM_ADDR_START    0x40000000
+#define RAM_L2_ENTRIES    255
+#define RAM_ADDR_SIZE     (0x40000000 * RAM_L2_ENTRIES)
+#define PCIE_ADDR_START   0x8000000000
+#define PCIE_L2_ENTRIES   512
+#define PCIE_ADDR_SIZE    0x8000000000
+
+/*
+ * We will use a direct map for physical address and virtual address
+ * (PA = VA 1:1 mapping)
+ */
+ENTRY(create_pagetables)
+       /* Save link address */
+       mov x5, x30
+
+       /* Pagetable starts from _end */
+       ldr x14, =_end
+
+       /* Clean the page table */
+       mov x6, x14
+       add x13, x14, #PAGE_TABLE_SIZE
+1:
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       stp  xzr, xzr, [x6], #16
+       cmp  x6, x13
+       b.lo 1b
+
+       /*
+        * We have 2 PUD tables to map 0 ~ 512GB, 512 GB ~ 1TB.
+        * Link these two tables to PGD entries#0, #1
+        */
+       mov x6, x14
+       mov x8, #0
+       add x9, x14, #PUD_PAGE_OFFSET;
+       mov x10, #2
+       bl  link_l0_pagetable
+
+       /*
+        * Using 1GiB block to map device address space (0x0 ~ 0x3fffffff)
+        */
+       add x6, x14, #PUD_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_DEVICE_nGnRE
+       mov x8, #DEVICE_ADDR_START
+       mov x9, x8
+       mov x10, #1
+       bl  build_l1_block_pagetable
+
+       /*
+        * Using 1GiB block to map RAM address space
+        * (0x40000000 ~ (256GiB -1)). The RAM areas that contain kernel
+        * sections will be update later.
+        */
+       add x6, x14, #PUD_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, #255
+       bl  build_l1_block_pagetable
+
+       /*
+        * Using 1GiB block to map high PCIe address space
+        * (0x512GiB ~ (1TiB -1)).
+        */
+       add x6, x14, #PUD_PAGE_OFFSET;
+       add x6, x6, #PAGE_SIZE
+       ldr x7, =SECT_ATTR_DEVICE_nGnRE
+       mov x8, #PCIE_ADDR_START
+       mov x9, x8
+       mov x10, #512
+       bl  build_l1_block_pagetable
+
+       /*
+        * The text, bss, data and other sections are placed at
+        * 0x40000000 ~ 0x7fffffff. We use 4k granularity to manage
+        * their attributes to improve security.
+        * L1(PUD) -> L2(PMD) -> L3(PTE 4K)
+        */
+       /* 1st: Build a L2 block pagetable */
+       add x6, x14, #PMD_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, 512
+       bl  build_l2_block_pagetable
+
+       /* 2nd: Link this L2 block pagetable to L1 entry */
+       add x6, x14, #PUD_PAGE_OFFSET;
+       mov x8, #RAM_ADDR_START
+       add x9, x14, #PMD_PAGE_OFFSET;
+       bl  link_l1_pagetable
+
+       /* 3rd: Build a L3 PTE pagetable for 0~2MB*/
+       add x6, x14, #PTE_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL
+       mov x8, #RAM_ADDR_START
+       mov x9, x8
+       mov x10, 512
+       bl  build_l3_pagetable
+
+       /* 4th: Link this L3 PTE pagetable to L2 entry */
+       add x6, x14, #PMD_PAGE_OFFSET;
+       mov x8, #RAM_ADDR_START
+       add x9, x14, #PTE_PAGE_OFFSET;
+       bl  link_l2_pagetable
+
+       /* 5th: Update dtb section to readonly */
+       ldr x8, =_dtb
+       ldr x9, =_text
+       sub x10, x9, x8
+       add x6, x14, #PTE_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_RO
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* 6th: Update text section to Readonly & EXEC*/
+       ldr x8, =_text
+       ldr x9, =_etext
+       sub x10, x9, x8
+       add x6, x14, #PTE_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_EXEC
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* 7th: Update rodata and Constructor tables sections to Readonly */
+       ldr x8, =_rodata
+       ldr x9, =_ectors
+       sub x10, x9, x8
+       add x6, x14, #PTE_PAGE_OFFSET;
+       ldr x7, =SECT_ATTR_NORMAL_RO
+       mov x9, x8
+       lsr x10, x10, L3_SHIFT
+       bl  build_l3_pagetable
+
+       /* Restore the Link register */
+       mov x30, x5
+
+       ret
+END(create_pagetables)
+
+/*
+ * Builds an L0 -> L1 table descriptor
+ *
+ * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
+ * within it by build_l1_block_pagetable.
+ *
+ *  x6  = L0 table
+ *  x8  = Virtual Address
+ *  x9  = L1 PA (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+link_l0_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L0_SHIFT
+       and x11, x11, #L0_ADDR_MASK
+
+       /* Build the L0 block entry */
+       mov x12, #L0_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+1:     orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds an L1 -> L2 table descriptor
+ *
+ * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
+ * within it by build_l2_block_pagetable.
+ *
+ *  x6  = L1 table
+ *  x8  = Virtual Address
+ *  x9  = L2 PA (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+link_l1_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L1_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L1 block entry */
+       mov x12, #L1_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+       orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       ret
+
+/*
+ * Builds count 1 GiB page table entry
+ *  x6  = L1 table
+ *  x7  = Memory attributes
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l1_block_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L1_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L1 block entry */
+       mov x12, x7
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L1_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L1_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds count 2 MiB page table entry
+ *  x6  = L2 table
+ *  x7  = Memory attributes (trashed)
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l2_block_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L2_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L2 block entry */
+       mov x12, x7
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L2_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L2_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
+
+/*
+ * Builds an L3(PTE) -> L2(PMD) table descriptor
+ *
+ * This is a link for a 2MiB block of memory with up to 4KiB regions mapped
+ * within it by build_pte_pagetable.
+ *
+ *  x6  = L2 PMD table
+ *  x8  = Virtual Address
+ *  x9  = PTE table PA (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+link_l2_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L2_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L2 block entry */
+       mov x12, #L2_TABLE
+
+       /* Only use the output address bits */
+       lsr x9, x9, #PAGE_SHIFT
+       orr x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       ret
+
+/*
+ * Builds count 4 KiB page table entry
+ *  x6  = L3 table
+ *  x7  = Memory attributes (trashed)
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count
+ *  x11, x12 and x13 are trashed
+ */
+build_l3_pagetable:
+       /* Find the table index */
+       lsr x11, x8, #L3_SHIFT
+       and x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L3 PTE entry */
+       mov x12, x7
+       orr x12, x12, #L3_PAGE
+#ifdef SMP
+       orr x12, x12, ATTR_SH(ATTR_SH_IS)
+#endif
+
+       /* Only use the output address bits */
+       lsr x9, x9, #L3_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr x13, x12, x9, lsl #L3_SHIFT
+
+       /* Store the entry */
+       str x13, [x6, x11, lsl #3]
+
+       sub x10, x10, #1
+       add x11, x11, #1
+       add x9, x9, #1
+       cbnz x10, 1b
+
+       ret
-- 
2.17.1


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.