[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 3/3] xen/ppc: Implement initial Radix MMU support



Add code to construct early identity-mapped page tables as well as the
required process and partition tables to enable the MMU.

Signed-off-by: Shawn Anastasio <sanastasio@xxxxxxxxxxxxxxxxxxxxx>
---
v4:
  - (page.h) Drop *_SHIFT constants in page.h and include shift directly in
    corresponding *_MASK constant instead.
  - (regs.h) Drop full license text header
  - (mm-radix.c) Mark *_pd_pool_used counters as __initdata
  - (mm-radix.c) Drop pointer unnecessary casts of __va()'s return value
  - (mm-radix.c) Add newline separator between declarations and statements
    in setup_{partition,process}_table()
v3:
  - Add XEN_PT_SIZE_LVL() macro for calculating the size in bytes
    of a page directory/page table structure.
  - Add alignment to struct lvl$N_{pd,pt} types.
  - Don't require aligned `paddr` in paddr_to_{pde,pte}
  - Fix RTS_FIELD comment bit numbering

v2:
  - Add missing ')' to mm.h/maddr_to_virt definition
  - Add missing parens around `va` argument in XEN_PT_INDEX_LVL
  - Drop '&' asm constraint modifier in processor.h/mfmsr()
  - Fix asm constraint formatting in processor.h
  - Change mtspr,mfspr to functions in processor.h
  - Drop unnecessary _Static_assert in mm-radix.c
  - Drop unnecessary cast in setup_initial_mapping
  - Use __va() to obtain pde/pte addresses in setup_initial_mapping
  - Wrap >80col lines in mm-radix.c
  - Drop unnecessary parameters in setup_{process,partition}_table
  - Add __init to setup_{process,partition}_table
  - Drop unnecessary opal.c changes
  - Drop manual prototype of setup_initial_pagetables in setup.c in
    favor of using the one already in asm/mm.h
  - Fix formatting of tlbiel asm block in tlb-radix.c

 xen/arch/ppc/Makefile                |   2 +
 xen/arch/ppc/include/asm/bitops.h    |   9 +
 xen/arch/ppc/include/asm/mm.h        |  18 ++
 xen/arch/ppc/include/asm/page-bits.h |   3 +-
 xen/arch/ppc/include/asm/page.h      | 180 ++++++++++++++++++
 xen/arch/ppc/include/asm/processor.h |  34 ++++
 xen/arch/ppc/include/asm/regs.h      | 124 +++++++++++++
 xen/arch/ppc/include/asm/types.h     |   1 +
 xen/arch/ppc/mm-radix.c              | 267 +++++++++++++++++++++++++++
 xen/arch/ppc/ppc64/head.S            |  15 ++
 xen/arch/ppc/setup.c                 |   3 +
 xen/arch/ppc/tlb-radix.c             |  96 ++++++++++
 12 files changed, 751 insertions(+), 1 deletion(-)
 create mode 100644 xen/arch/ppc/include/asm/bitops.h
 create mode 100644 xen/arch/ppc/include/asm/mm.h
 create mode 100644 xen/arch/ppc/include/asm/page.h
 create mode 100644 xen/arch/ppc/include/asm/regs.h
 create mode 100644 xen/arch/ppc/mm-radix.c
 create mode 100644 xen/arch/ppc/tlb-radix.c

diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
index 0c0a7884a1..a059ac4c0a 100644
--- a/xen/arch/ppc/Makefile
+++ b/xen/arch/ppc/Makefile
@@ -2,8 +2,10 @@ obj-$(CONFIG_PPC64) += ppc64/

 obj-y += boot-of.init.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.init.o
+obj-y += mm-radix.o
 obj-y += opal.o
 obj-y += setup.o
+obj-y += tlb-radix.o

 $(TARGET): $(TARGET)-syms
        cp -f $< $@
diff --git a/xen/arch/ppc/include/asm/bitops.h 
b/xen/arch/ppc/include/asm/bitops.h
new file mode 100644
index 0000000000..548e97b414
--- /dev/null
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_PPC_BITOPS_H
+#define _ASM_PPC_BITOPS_H
+
+/* PPC bit number conversion */
+#define PPC_BITLSHIFT(be)      (BITS_PER_LONG - 1 - (be))
+#define PPC_BIT(bit)           (1UL << PPC_BITLSHIFT(bit))
+#define PPC_BITMASK(bs, be)    ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
+#endif /* _ASM_PPC_BITOPS_H */
diff --git a/xen/arch/ppc/include/asm/mm.h b/xen/arch/ppc/include/asm/mm.h
new file mode 100644
index 0000000000..c85a7ed686
--- /dev/null
+++ b/xen/arch/ppc/include/asm/mm.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_PPC_MM_H
+#define _ASM_PPC_MM_H
+
+#include <asm/page-bits.h>
+
+#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+#define paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
+
+#define virt_to_maddr(va) ((paddr_t)((vaddr_t)(va) & PADDR_MASK))
+#define maddr_to_virt(pa) ((void *)((paddr_t)(pa) | XEN_VIRT_START))
+
+/* Convert between Xen-heap virtual addresses and machine addresses. */
+#define __pa(x)             (virt_to_maddr(x))
+#define __va(x)             (maddr_to_virt(x))
+
+void setup_initial_pagetables(void);
+
+#endif /* _ASM_PPC_MM_H */
diff --git a/xen/arch/ppc/include/asm/page-bits.h 
b/xen/arch/ppc/include/asm/page-bits.h
index 4c01bf9716..0286177520 100644
--- a/xen/arch/ppc/include/asm/page-bits.h
+++ b/xen/arch/ppc/include/asm/page-bits.h
@@ -2,6 +2,7 @@
 #define __PPC_PAGE_BITS_H__

 #define PAGE_SHIFT              16 /* 64 KiB Pages */
-#define PADDR_BITS              48
+#define PADDR_BITS              53
+#define VADDR_BITS              52

 #endif /* __PPC_PAGE_BITS_H__ */
diff --git a/xen/arch/ppc/include/asm/page.h b/xen/arch/ppc/include/asm/page.h
new file mode 100644
index 0000000000..3c90e8bf19
--- /dev/null
+++ b/xen/arch/ppc/include/asm/page.h
@@ -0,0 +1,180 @@
+#ifndef _ASM_PPC_PAGE_H
+#define _ASM_PPC_PAGE_H
+
+#include <xen/types.h>
+
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#define PDE_VALID     PPC_BIT(0)
+#define PDE_NLB_MASK  0x1ffffffffffffe0UL
+#define PDE_NLS_MASK  0x1f
+
+#define PTE_VALID     PPC_BIT(0)
+#define PTE_LEAF      PPC_BIT(1)
+#define PTE_REFERENCE PPC_BIT(55)
+#define PTE_CHANGE    PPC_BIT(56)
+
+/* PTE Attributes */
+#define PTE_ATT_SAO            PPC_BIT(59) /* Strong Access Ordering */
+#define PTE_ATT_NON_IDEMPOTENT PPC_BIT(58)
+#define PTE_ATT_TOLERANT       (PPC_BIT(58) | PPC_BIT(59))
+
+/* PTE Encoded Access Authority*/
+#define PTE_EAA_PRIVILEGED PPC_BIT(60)
+#define PTE_EAA_READ       PPC_BIT(61)
+#define PTE_EAA_WRITE      PPC_BIT(62)
+#define PTE_EAA_EXECUTE    PPC_BIT(63)
+
+/* Field shifts/masks */
+#define PTE_RPN_MASK  0x1fffffffffff000UL
+#define PTE_ATT_MASK  0x30UL
+#define PTE_EAA_MASK  0xfUL
+
+#define PTE_XEN_BASE (PTE_VALID | PTE_EAA_PRIVILEGED | PTE_REFERENCE)
+#define PTE_XEN_RW   (PTE_XEN_BASE | PTE_EAA_READ | PTE_EAA_WRITE | PTE_CHANGE)
+#define PTE_XEN_RO   (PTE_XEN_BASE | PTE_EAA_READ)
+#define PTE_XEN_RX   (PTE_XEN_BASE | PTE_EAA_READ | PTE_EAA_EXECUTE)
+
+/*
+ * Radix Tree layout for 64KB pages:
+ *
+ * [ L1 (ROOT) PAGE DIRECTORY (8192 * sizeof(pde_t)) ]
+ *                     |
+ *                     |
+ *                     v
+ *    [ L2 PAGE DIRECTORY (512 * sizeof(pde_t)) ]
+ *                     |
+ *                     |
+ *                     v
+ *    [ L3 PAGE DIRECTORY (512 * sizeof(pde_t)) ]
+ *                     |
+ *                     |
+ *                     v
+ *      [ L4 PAGE TABLE (32 * sizeof(pte_t)) ]
+ *                     |
+ *                     |
+ *                     v
+ *            [ PAGE TABLE ENTRY ]
+ */
+
+#define XEN_PT_ENTRIES_LOG2_LVL_1 13 /* 2**13 entries, maps 2**13 * 512GB = 
4PB */
+#define XEN_PT_ENTRIES_LOG2_LVL_2 9  /* 2**9  entries, maps 2**9  * 1GB = 
512GB */
+#define XEN_PT_ENTRIES_LOG2_LVL_3 9  /* 2**9  entries, maps 2**9  * 1GB = 
512GB */
+#define XEN_PT_ENTRIES_LOG2_LVL_4 5  /* 2**5  entries, maps 2**5  * 64K = 2MB 
*/
+
+#define XEN_PT_SHIFT_LVL_1    (XEN_PT_SHIFT_LVL_2 + XEN_PT_ENTRIES_LOG2_LVL_2)
+#define XEN_PT_SHIFT_LVL_2    (XEN_PT_SHIFT_LVL_3 + XEN_PT_ENTRIES_LOG2_LVL_3)
+#define XEN_PT_SHIFT_LVL_3    (XEN_PT_SHIFT_LVL_4 + XEN_PT_ENTRIES_LOG2_LVL_4)
+#define XEN_PT_SHIFT_LVL_4    PAGE_SHIFT
+
+#define XEN_PT_ENTRIES_LOG2_LVL(lvl) (XEN_PT_ENTRIES_LOG2_LVL_##lvl)
+#define XEN_PT_SHIFT_LVL(lvl)        (XEN_PT_SHIFT_LVL_##lvl)
+#define XEN_PT_ENTRIES_LVL(lvl)      (1UL << XEN_PT_ENTRIES_LOG2_LVL(lvl))
+#define XEN_PT_SIZE_LVL(lvl)         (sizeof(uint64_t) * 
XEN_PT_ENTRIES_LVL(lvl))
+#define XEN_PT_MASK_LVL(lvl)         (XEN_PT_ENTRIES_LVL(lvl) - 1)
+#define XEN_PT_INDEX_LVL(lvl, va)    (((va) >> XEN_PT_SHIFT_LVL(lvl)) & 
XEN_PT_MASK_LVL(lvl))
+
+/*
+ * Calculate the index of the provided virtual address in the provided
+ * page table struct
+ */
+#define pt_index(pt, va) _Generic((pt), \
+    struct lvl1_pd * : XEN_PT_INDEX_LVL(1, (va)), \
+    struct lvl2_pd * : XEN_PT_INDEX_LVL(2, (va)), \
+    struct lvl3_pd * : XEN_PT_INDEX_LVL(3, (va)), \
+    struct lvl4_pt * : XEN_PT_INDEX_LVL(4, (va)))
+
+#define pt_entry(pt, va) (&((pt)->entries[pt_index((pt), (va))]))
+
+typedef struct
+{
+    __be64 pde;
+} pde_t;
+
+typedef struct
+{
+    __be64 pte;
+} pte_t;
+
+struct lvl1_pd
+{
+    pde_t entries[XEN_PT_ENTRIES_LVL(1)];
+} __aligned(XEN_PT_SIZE_LVL(1));
+
+struct lvl2_pd
+{
+    pde_t entries[XEN_PT_ENTRIES_LVL(2)];
+} __aligned(XEN_PT_SIZE_LVL(2));
+
+struct lvl3_pd
+{
+    pde_t entries[XEN_PT_ENTRIES_LVL(3)];
+} __aligned(XEN_PT_SIZE_LVL(3));
+
+struct lvl4_pt
+{
+    pte_t entries[XEN_PT_ENTRIES_LVL(4)];
+} __aligned(XEN_PT_SIZE_LVL(4));
+
+static inline pte_t paddr_to_pte(paddr_t paddr, unsigned long flags)
+{
+    paddr_t paddr_aligned = paddr & PTE_RPN_MASK;
+
+    return (pte_t){ .pte = cpu_to_be64(paddr_aligned | flags | PTE_LEAF) };
+}
+
+static inline pde_t paddr_to_pde(paddr_t paddr, unsigned long flags, unsigned 
long nls)
+{
+    paddr_t paddr_aligned = paddr & PDE_NLB_MASK;
+
+    return (pde_t){ .pde = cpu_to_be64(paddr_aligned | flags | nls) };
+}
+
+static inline paddr_t pte_to_paddr(pte_t pte)
+{
+    return be64_to_cpu(pte.pte) & PTE_RPN_MASK;
+}
+
+static inline paddr_t pde_to_paddr(pde_t pde)
+{
+    return be64_to_cpu(pde.pde) & PDE_NLB_MASK;
+}
+
+static inline bool pte_is_valid(pte_t pte)
+{
+    return pte.pte & be64_to_cpu(PTE_VALID);
+}
+
+static inline bool pde_is_valid(pde_t pde)
+{
+    return pde.pde & be64_to_cpu(PDE_VALID);
+}
+
+/*
+ * ISA 3.0 partition and process table entry format
+ */
+struct patb_entry {
+       __be64 patb0;
+       __be64 patb1;
+};
+#define PATB0_HR PPC_BIT(0) /* host uses radix */
+#define PATB1_GR PPC_BIT(0) /* guest uses radix; must match HR */
+
+struct prtb_entry {
+       __be64 prtb0;
+       __be64 reserved;
+};
+
+/*
+ * We support 52 bits, hence:
+ * bits 52 - 31 = 21, 0b10101
+ * RTS encoding details
+ * bits 0 - 2 of rts -> bits 5 - 7 of unsigned long
+ * bits 3 - 4 of rts -> bits 61 - 62 of unsigned long
+ */
+#define RTS_FIELD ((0x2UL << 61) | (0x5UL << 5))
+
+void tlbie_all(void);
+
+#endif /* _ASM_PPC_PAGE_H */
diff --git a/xen/arch/ppc/include/asm/processor.h 
b/xen/arch/ppc/include/asm/processor.h
index 2300640787..6b70569eb8 100644
--- a/xen/arch/ppc/include/asm/processor.h
+++ b/xen/arch/ppc/include/asm/processor.h
@@ -133,6 +133,40 @@ struct cpu_user_regs
     uint32_t entry_vector;
 };

+static __inline__ void sync(void)
+{
+    asm volatile ( "sync" );
+}
+
+static __inline__ void isync(void)
+{
+    asm volatile ( "isync" );
+}
+
+static inline unsigned long mfmsr(void)
+{
+    unsigned long msr;
+    asm volatile ( "mfmsr %0" : "=r" (msr) );
+    return msr;
+}
+
+static inline void mtmsrd(unsigned long msr)
+{
+    asm volatile ( "mtmsrd %0" : : "r" (msr) );
+}
+
+static inline void mtspr(uint16_t spr, unsigned long val)
+{
+    asm volatile ( "mtspr %0, %1" : : "K" (spr), "r" (val) );
+}
+
+static inline unsigned long mfspr(uint16_t spr)
+{
+    unsigned long val;
+    asm volatile ( "mfspr %0, %1" : "=r" (val) : "K" (spr) );
+    return val;
+}
+
 /*
  * panic() isn't available at the moment so an infinite loop will be
  * used temporarily.
diff --git a/xen/arch/ppc/include/asm/regs.h b/xen/arch/ppc/include/asm/regs.h
new file mode 100644
index 0000000000..cbf1a9c8e4
--- /dev/null
+++ b/xen/arch/ppc/include/asm/regs.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright IBM Corp. 2005, 2007
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ *          Christian Ehrhardt <ehrhardt@xxxxxxxxxxxxxxxxxx>
+ */
+
+#ifndef _ASM_REG_DEFS_H_
+#define _ASM_REG_DEFS_H_
+
+/* Special Purpose Registers */
+#define SPRN_VRSAVE 256
+#define SPRN_DSISR  18
+#define SPRN_DAR    19
+#define SPRN_DEC    22
+#define SPRN_SRR0   26
+#define SPRN_SRR1   27
+#define SPRN_TBRL   268
+#define SPRN_TBRU   269
+#define SPRN_SPRG0  272
+#define SPRN_SPRG1  273
+#define SPRN_SPRG2  274
+#define SPRN_SPRG3  275
+#define SPRN_TBWL   284
+#define SPRN_TBWU   285
+
+#define SPRN_HSPRG0 304
+#define SPRN_HSPRG1 305
+#define SPRN_HDEC   310
+#define SPRN_HIOR   311
+#define SPRN_RMOR   312
+#define SPRN_HRMOR  313
+#define SPRN_HSRR0  314
+#define SPRN_HSRR1  315
+#define SPRN_LPIDR  319
+
+#define SPRN_PTCR      0x1D0   /* Partition table control Register */
+#define SPRN_PID       0x030   /* Process ID */
+
+/* Performance monitor spr encodings */
+#define SPRN_MMCRA  786
+#define   MMCRA_SAMPHV    _AC(0x10000000, UL) /* state of MSR HV when SIAR set 
*/
+#define   MMCRA_SAMPPR    _AC(0x08000000, UL) /* state of MSR PR when SIAR set 
*/
+#define   MMCRA_SAMPLE_ENABLE _AC(0x00000001, UL) /* enable sampling */
+#define NUM_PMCS 8
+#define SPRN_PMC1   787
+#define SPRN_PMC2   788
+#define SPRN_PMC3   789
+#define SPRN_PMC4   790
+#define SPRN_PMC5   791
+#define SPRN_PMC6   792
+#define SPRN_PMC7   793
+#define SPRN_PMC8   794
+#define SPRN_MMCR0  795
+#define   MMCR0_FC      _AC(0x80000000, UL) /* freeze counters */
+#define   MMCR0_FCS     _AC(0x40000000, UL) /* freeze in supervisor state */
+#define   MMCR0_FCP     _AC(0x20000000, UL) /* freeze in problem state */
+#define   MMCR0_FCM1    _AC(0x10000000, UL) /* freeze counters while MSR mark 
= 1 */
+#define   MMCR0_FCM0    _AC(0x08000000, UL) /* freeze counters while MSR mark 
= 0 */
+#define   MMCR0_PMAE    _AC(0x04000000, UL) /* performance monitor alert 
enabled */
+#define   MMCR0_PMAO    _AC(0x00000080, UL) /* performance monitor alert 
occurred */
+#define   MMCR0_FCH     _AC(0x00000001, UL) /* freeze conditions in hypervisor 
*/
+#define SPRN_SIAR   796
+#define SPRN_SDAR   797
+#define SPRN_MMCR1  798
+
+/* As defined for PU G4 */
+#define SPRN_HID0   1008
+#define SPRN_HID1   1009
+#define SPRN_HID4   1012
+
+#define SPRN_DABR   1013
+#define SPRN_HID5   1014
+#define SPRN_DABRX  1015
+#define SPRN_HID6   1017
+#define SPRN_HID7   1018
+#define SPRN_HID8   1019
+#define SPRN_PIR    1023
+
+#define SPRN_LPCR      0x13E   /* LPAR Control Register */
+#define   LPCR_VPM0            _AC(0x8000000000000000, UL)
+#define   LPCR_VPM1            _AC(0x4000000000000000, UL)
+#define   LPCR_ISL             _AC(0x2000000000000000, UL)
+#define   LPCR_VC_SH           61
+#define   LPCR_DPFD_SH         52
+#define   LPCR_DPFD            (_AC(7, UL) << LPCR_DPFD_SH)
+#define   LPCR_VRMASD_SH       47
+#define   LPCR_VRMASD          (_AC(0x1f, UL) << LPCR_VRMASD_SH)
+#define   LPCR_VRMA_L          _AC(0x0008000000000000, UL)
+#define   LPCR_VRMA_LP0                _AC(0x0001000000000000, UL)
+#define   LPCR_VRMA_LP1                _AC(0x0000800000000000, UL)
+#define   LPCR_RMLS            0x1C000000      /* Implementation dependent RMO 
limit sel */
+#define   LPCR_RMLS_SH         26
+#define   LPCR_HAIL            _AC(0x0000000004000000, UL)   /* HV AIL 
(ISAv3.1) */
+#define   LPCR_ILE             _AC(0x0000000002000000, UL)   /* !HV irqs set 
MSR:LE */
+#define   LPCR_AIL             _AC(0x0000000001800000, UL)     /* Alternate 
interrupt location */
+#define   LPCR_AIL_0           _AC(0x0000000000000000, UL)     /* MMU off 
exception offset 0x0 */
+#define   LPCR_AIL_3           _AC(0x0000000001800000, UL)   /* MMU on 
exception offset 0xc00...4xxx */
+#define   LPCR_ONL             _AC(0x0000000000040000, UL)     /* online - 
PURR/SPURR count */
+#define   LPCR_LD              _AC(0x0000000000020000, UL)     /* large 
decremeter */
+#define   LPCR_PECE            _AC(0x000000000001f000, UL)     /* powersave 
exit cause enable */
+#define     LPCR_PECEDP        _AC(0x0000000000010000, UL)     /* directed 
priv dbells cause exit */
+#define     LPCR_PECEDH        _AC(0x0000000000008000, UL)     /* directed hyp 
dbells cause exit */
+#define     LPCR_PECE0         _AC(0x0000000000004000, UL)     /* ext. 
exceptions can cause exit */
+#define     LPCR_PECE1         _AC(0x0000000000002000, UL)     /* decrementer 
can cause exit */
+#define     LPCR_PECE2         _AC(0x0000000000001000, UL)     /* machine 
check etc can cause exit */
+#define     LPCR_PECE_HVEE     _AC(0x0000400000000000, UL)     /* P9 Wakeup on 
HV interrupts */
+#define   LPCR_MER             _AC(0x0000000000000800, UL)     /* Mediated 
External Exception */
+#define   LPCR_MER_SH          11
+#define          LPCR_GTSE             _AC(0x0000000000000400, UL)     /* 
Guest Translation Shootdown Enable */
+#define   LPCR_TC              _AC(0x0000000000000200, UL)     /* Translation 
control */
+#define   LPCR_HEIC            _AC(0x0000000000000010, UL)   /* Hypervisor 
External Interrupt Control */
+#define   LPCR_LPES            0x0000000c
+#define   LPCR_LPES0           _AC(0x0000000000000008, UL)      /* LPAR Env 
selector 0 */
+#define   LPCR_LPES1           _AC(0x0000000000000004, UL)      /* LPAR Env 
selector 1 */
+#define   LPCR_LPES_SH         2
+#define   LPCR_RMI             _AC(0x0000000000000002, UL)      /* real mode 
is cache inhibit */
+#define   LPCR_HVICE           _AC(0x0000000000000002, UL)      /* P9: HV 
interrupt enable */
+#define   LPCR_HDICE           _AC(0x0000000000000001, UL)      /* Hyp Decr 
enable (HV,PR,EE) */
+#define   LPCR_UPRT            _AC(0x0000000000400000, UL)      /* Use Process 
Table (ISA 3) */
+#define   LPCR_HR              _AC(0x0000000000100000, UL)
+
+#endif /* _ASM_REG_DEFS_H_ */
diff --git a/xen/arch/ppc/include/asm/types.h b/xen/arch/ppc/include/asm/types.h
index b62ae0e7ae..ffaf378a4d 100644
--- a/xen/arch/ppc/include/asm/types.h
+++ b/xen/arch/ppc/include/asm/types.h
@@ -4,6 +4,7 @@
 #define _ASM_PPC_TYPES_H

 typedef unsigned long paddr_t;
+typedef unsigned long vaddr_t;
 #define INVALID_PADDR (~0UL)
 #define PRIpaddr "016lx"

diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c
new file mode 100644
index 0000000000..3feeb90ebc
--- /dev/null
+++ b/xen/arch/ppc/mm-radix.c
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+#include <asm/early_printk.h>
+#include <asm/mm.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/regs.h>
+#include <asm/msr.h>
+
+void enable_mmu(void);
+
+#define INITIAL_LVL1_PD_COUNT      1
+#define INITIAL_LVL2_LVL3_PD_COUNT 2
+#define INITIAL_LVL4_PT_COUNT      256
+
+static size_t __initdata initial_lvl1_pd_pool_used;
+static struct lvl1_pd initial_lvl1_pd_pool[INITIAL_LVL1_PD_COUNT];
+
+static size_t __initdata initial_lvl2_lvl3_pd_pool_used;
+static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT];
+
+static size_t __initdata initial_lvl4_pt_pool_used;
+static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT];
+
+/* Only reserve minimum Partition and Process tables  */
+#define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */
+#define PATB_SIZE      (1UL << PATB_SIZE_LOG2)
+#define PRTB_SIZE_LOG2 12
+#define PRTB_SIZE      (1UL << PRTB_SIZE_LOG2)
+
+static struct patb_entry
+    __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)];
+
+static struct prtb_entry
+    __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)];
+
+static __init struct lvl1_pd *lvl1_pd_pool_alloc(void)
+{
+    if ( initial_lvl1_pd_pool_used >= INITIAL_LVL1_PD_COUNT )
+    {
+        early_printk("Ran out of space for LVL1 PD!\n");
+        die();
+    }
+
+    return &initial_lvl1_pd_pool[initial_lvl1_pd_pool_used++];
+}
+
+static __init struct lvl2_pd *lvl2_pd_pool_alloc(void)
+{
+    if ( initial_lvl2_lvl3_pd_pool_used >= INITIAL_LVL2_LVL3_PD_COUNT )
+    {
+        early_printk("Ran out of space for LVL2/3 PD!\n");
+        die();
+    }
+
+    return &initial_lvl2_lvl3_pd_pool[initial_lvl2_lvl3_pd_pool_used++];
+}
+
+static __init struct lvl3_pd *lvl3_pd_pool_alloc(void)
+{
+    BUILD_BUG_ON(sizeof(struct lvl3_pd) != sizeof(struct lvl2_pd));
+
+    return (struct lvl3_pd *) lvl2_pd_pool_alloc();
+}
+
+static __init struct lvl4_pt *lvl4_pt_pool_alloc(void)
+{
+    if ( initial_lvl4_pt_pool_used >= INITIAL_LVL4_PT_COUNT )
+    {
+        early_printk("Ran out of space for LVL4 PT!\n");
+        die();
+    }
+
+    return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++];
+}
+
+#ifndef NDEBUG
+/* TODO: Remove once we get common/ building */
+static char *__init itoa64_hex(uint64_t val, char *out_buf, size_t buf_len)
+{
+    uint64_t cur;
+    size_t i = buf_len - 1;
+
+    /* Null terminate buffer */
+    out_buf[i] = '\0';
+
+    /* Add digits in reverse */
+    cur = val;
+    while ( cur && i > 0 )
+    {
+        out_buf[--i] = "0123456789ABCDEF"[cur % 16];
+        cur /= 16;
+    }
+
+    /* Pad to 16 digits */
+    while ( i > 0 )
+        out_buf[--i] = '0';
+
+    return out_buf + i;
+}
+#endif
+
+static void __init radix_dprint(uint64_t addr, const char *msg)
+{
+#ifndef NDEBUG
+    char buf[sizeof("DEADBEEFCAFEBABA")];
+    char *addr_s = itoa64_hex(addr, buf, sizeof(buf));
+
+    early_printk("(0x");
+    early_printk(addr_s);
+    early_printk(") ");
+    early_printk(msg);
+#endif
+}
+
+static void __init setup_initial_mapping(struct lvl1_pd *lvl1,
+                                         vaddr_t map_start,
+                                         vaddr_t map_end,
+                                         paddr_t phys_base)
+{
+    uint64_t page_addr;
+
+    if ( map_start & ~PAGE_MASK )
+    {
+        early_printk("Xen _start be aligned to 64k (PAGE_SIZE) boundary\n");
+        die();
+    }
+
+    if ( phys_base & ~PAGE_MASK )
+    {
+        early_printk("Xen should be loaded at 64k (PAGE_SIZE) boundary\n");
+        die();
+    }
+
+    for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE )
+    {
+        struct lvl2_pd *lvl2;
+        struct lvl3_pd *lvl3;
+        struct lvl4_pt *lvl4;
+        pde_t *pde;
+        pte_t *pte;
+
+        /* Allocate LVL 2 PD if necessary */
+        pde = pt_entry(lvl1, page_addr);
+        if ( !pde_is_valid(*pde) )
+        {
+            lvl2 = lvl2_pd_pool_alloc();
+            *pde = paddr_to_pde(__pa(lvl2), PDE_VALID,
+                                XEN_PT_ENTRIES_LOG2_LVL_2);
+        }
+        else
+            lvl2 = __va(pde_to_paddr(*pde));
+
+        /* Allocate LVL 3 PD if necessary */
+        pde = pt_entry(lvl2, page_addr);
+        if ( !pde_is_valid(*pde) )
+        {
+            lvl3 = lvl3_pd_pool_alloc();
+            *pde = paddr_to_pde(__pa(lvl3), PDE_VALID,
+                                XEN_PT_ENTRIES_LOG2_LVL_3);
+        }
+        else
+            lvl3 = __va(pde_to_paddr(*pde));
+
+        /* Allocate LVL 4 PT if necessary */
+        pde = pt_entry(lvl3, page_addr);
+        if ( !pde_is_valid(*pde) )
+        {
+            lvl4 = lvl4_pt_pool_alloc();
+            *pde = paddr_to_pde(__pa(lvl4), PDE_VALID,
+                                XEN_PT_ENTRIES_LOG2_LVL_4);
+        }
+        else
+            lvl4 = __va(pde_to_paddr(*pde));
+
+        /* Finally, create PTE in LVL 4 PT */
+        pte = pt_entry(lvl4, page_addr);
+        if ( !pte_is_valid(*pte) )
+        {
+            unsigned long paddr = (page_addr - map_start) + phys_base;
+            unsigned long flags;
+
+            radix_dprint(paddr, "being mapped to ");
+            radix_dprint(page_addr, "!\n");
+            if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) )
+            {
+                radix_dprint(page_addr, "being marked as TEXT (RX)\n");
+                flags = PTE_XEN_RX;
+            }
+            else if ( is_kernel_rodata(page_addr) )
+            {
+                radix_dprint(page_addr, "being marked as RODATA (RO)\n");
+                flags = PTE_XEN_RO;
+            }
+            else
+            {
+                radix_dprint(page_addr, "being marked as DEFAULT (RW)\n");
+                flags = PTE_XEN_RW;
+            }
+
+            *pte = paddr_to_pte(paddr, flags);
+            radix_dprint(paddr_to_pte(paddr, flags).pte,
+                             "is result of PTE map!\n");
+        }
+        else
+        {
+            early_printk("BUG: Tried to create PTE for already-mapped page!");
+            die();
+        }
+    }
+}
+
+static void __init setup_partition_table(struct lvl1_pd *root)
+{
+    unsigned long ptcr;
+
+    /* Configure entry for LPID 0 to enable Radix and point to root PD */
+    uint64_t patb0 = RTS_FIELD | __pa(root) | XEN_PT_ENTRIES_LOG2_LVL_1 |
+                     PATB0_HR;
+    uint64_t patb1 = __pa(initial_prtb) | (PRTB_SIZE_LOG2 - 12) | PATB1_GR;
+
+    initial_patb[0].patb0 = cpu_to_be64(patb0);
+    initial_patb[0].patb1 = cpu_to_be64(patb1);
+
+    ptcr = __pa(initial_patb) | (PATB_SIZE_LOG2 - 12);
+    mtspr(SPRN_PTCR, ptcr);
+}
+
+static void __init setup_process_table(struct lvl1_pd *root)
+{
+    /* Configure entry for PID 0 to point to root PD */
+    uint64_t prtb0 = RTS_FIELD | __pa(root) | XEN_PT_ENTRIES_LOG2_LVL_1;
+
+    initial_prtb[0].prtb0 = cpu_to_be64(prtb0);
+}
+
+void __init setup_initial_pagetables(void)
+{
+    struct lvl1_pd *root = lvl1_pd_pool_alloc();
+    unsigned long lpcr;
+
+    setup_initial_mapping(root, (vaddr_t)_start, (vaddr_t)_end, __pa(_start));
+
+    /* Enable Radix mode in LPCR */
+    lpcr = mfspr(SPRN_LPCR);
+    mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+    early_printk("Enabled radix in LPCR\n");
+
+    /* Set up initial process table */
+    setup_process_table(root);
+
+    /* Set up initial partition table */
+    setup_partition_table(root);
+
+    /* Flush TLB */
+    tlbie_all();
+    early_printk("Flushed TLB\n");
+
+    /* Turn on the MMU */
+    enable_mmu();
+}
diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
index a149339ad0..e600ddb05f 100644
--- a/xen/arch/ppc/ppc64/head.S
+++ b/xen/arch/ppc/ppc64/head.S
@@ -2,6 +2,7 @@

 #include <asm/asm-defns.h>
 #include <asm/asm-offsets.h>
+#include <asm/msr.h>

     .section .text.header, "ax", %progbits

@@ -66,3 +67,17 @@ ENTRY(start)

     .size start, . - start
     .type start, %function
+
+ENTRY(enable_mmu)
+    mflr %r3
+    mfmsr %r4
+
+    /* enable instruction and data relocation (MMU) */
+    ori %r4, %r4, (MSR_IR | MSR_DR)
+
+    mtsrr0 %r3 /* return to caller after MMU enable */
+    mtsrr1 %r4
+    rfid
+
+    .size enable_mmu, . - enable_mmu
+    .type enable_mmu, %function
diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
index 8ab7bc19ee..0106e9c9da 100644
--- a/xen/arch/ppc/setup.c
+++ b/xen/arch/ppc/setup.c
@@ -2,6 +2,7 @@
 #include <xen/init.h>
 #include <asm/boot.h>
 #include <asm/early_printk.h>
+#include <asm/mm.h>
 #include <asm/processor.h>

 /* Xen stack for bringing up the first CPU. */
@@ -27,6 +28,8 @@ void __init noreturn start_xen(unsigned long r3, unsigned 
long r4,
         boot_opal_init((void *)r3);
     }

+    setup_initial_pagetables();
+
     early_printk("Hello, ppc64le!\n");

     for ( ; ; )
diff --git a/xen/arch/ppc/tlb-radix.c b/xen/arch/ppc/tlb-radix.c
new file mode 100644
index 0000000000..3dde102c62
--- /dev/null
+++ b/xen/arch/ppc/tlb-radix.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Based on arch/powerpc/mm/book3s64/radix_tlb.c from Linux with the following
+ * copyright notice:
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+#include <xen/stringify.h>
+
+#include <asm/bitops.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+/* TLB flush actions. Used as argument to tlbiel_flush() */
+enum
+{
+    TLB_INVAL_SCOPE_LPID,   /* invalidate TLBs for current LPID */
+    TLB_INVAL_SCOPE_GLOBAL, /* invalidate all TLBs */
+};
+
+#define POWER9_TLB_SETS_RADIX 128 /* # sets in POWER9 TLB Radix mode */
+
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+                                    unsigned int pid, unsigned int ric,
+                                    unsigned int prs)
+{
+    unsigned long rb;
+    unsigned long rs;
+
+    rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+    rs = ((unsigned long) pid << PPC_BITLSHIFT(31));
+
+    asm volatile ( "tlbiel %0, %1, %2, %3, 1"
+                   :: "r" (rb), "r" (rs), "i" (ric), "i" (prs)
+                   : "memory" );
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+    unsigned int set;
+
+    asm volatile ( "ptesync" : : : "memory" );
+
+    /*
+     * Flush the first set of the TLB, and the entire Page Walk Cache
+     * and partition table entries. Then flush the remaining sets of the
+     * TLB.
+     */
+
+    if ( mfmsr() & MSR_HV )
+    {
+        /* MSR[HV] should flush partition scope translations first. */
+        tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+
+        for ( set = 1; set < num_sets; set++ )
+            tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+    }
+
+    /* Flush process scoped entries. */
+    tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+
+    for ( set = 1; set < num_sets; set++ )
+        tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+
+    asm volatile ( "ptesync" : : : "memory" );
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+    unsigned int is;
+
+    switch ( action )
+    {
+    case TLB_INVAL_SCOPE_GLOBAL:
+        is = 3;
+        break;
+    case TLB_INVAL_SCOPE_LPID:
+        is = 2;
+        break;
+    default:
+        die();
+    }
+
+    tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+
+    asm volatile ( "slbia 7; isync" : : : "memory" );
+}
+
+void tlbie_all(void)
+{
+    radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
--
2.30.2




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.