[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/head: Refactor 32-bit pgtable setup



The new Xen PVH entry point requires page tables to be setup by the
kernel since it is entered with paging disabled.

Pull the common code out of head_32.S so that mk_early_pgtbl_32 can be
invoked from both the new Xen entry point and the existing startup_32
code.

Convert resulting common code to C.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
This is replacement for https://lkml.org/lkml/2016/10/14/434, with
assembly code re-written in C as requested by Ingo.


 arch/x86/include/asm/pgtable_32.h |  32 ++++++++++
 arch/x86/kernel/head32.c          |  62 +++++++++++++++++++
 arch/x86/kernel/head_32.S         | 122 +++-----------------------------------
 3 files changed, 101 insertions(+), 115 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_32.h 
b/arch/x86/include/asm/pgtable_32.h
index b6c0b40..fbc7336 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ static inline void check_pgt_cache(void) { }
 #define kern_addr_valid(kaddr) (0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55b..e5fb436 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
        start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+       pte_t pte, *ptep;
+       int i;
+       unsigned long *ptr;
+       /* Enough space to fit pagetables for the low memory linear map */
+       const unsigned long limit = __pa(_end) +
+               (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+       pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val)    { (pl2).pmd = (val); }
+#else
+       pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+       ptep = (pte_t *)__pa(__brk_base);
+       pte.pte = PTE_IDENT_ATTR;
+
+       while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+               SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+               *pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+               /* Kernel PDE entry */
+               *(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+               for (i = 0; i < PTRS_PER_PTE; i++) {
+                       *ptep = pte;
+                       pte.pte += PAGE_SIZE;
+                       ptep++;
+               }
+
+               pl2p++;
+       }
+
+       ptr = (unsigned long *)__pa(&max_pfn_mapped);
+       /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+       *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+       ptr = (unsigned long *)__pa(&_brk_end);
+       *ptr = (unsigned long)ptep + PAGE_OFFSET;
+}
+
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2dabea4..dc6b030 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -24,6 +24,7 @@
 #include <asm/nops.h>
 #include <asm/bootparam.h>
 #include <asm/export.h>
+#include <asm/pgtable_32.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -42,41 +43,6 @@
 #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
- */
-
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
-
-/*
- * Number of possible pages in the lowmem region.
- *
- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
- * gas warning about overflowing shift count when gas has been compiled
- * with only a host target support using a 32-bit type for internal
- * representation.
- */
-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
-
-/*
  * Worst-case size of the kernel mapping we need to make:
  * a relocatable kernel can live anywhere in lowmem, so we need to be able
  * to map all of lowmem.
@@ -158,90 +124,15 @@ ENTRY(startup_32)
        call load_ucode_bsp
 #endif
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- */
-#ifdef CONFIG_X86_PAE
-
-       /*
-        * In PAE mode initial_page_table is statically defined to contain
-        * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-        * entries). The identity mapping is handled by pointing two PGD entries
-        * to the first kernel PMD.
-        *
-        * Note the upper half of each PMD or PTE are always zero at this stage.
-        */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-       xorl %ebx,%ebx                          /* %ebx is kept at zero */
-
-       movl $pa(__brk_base), %edi
-       movl $pa(initial_pg_pmd), %edx
-       movl $PTE_IDENT_ATTR, %eax
-10:
-       leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PMD entry */
-       movl %ecx,(%edx)                        /* Store PMD entry */
-                                               /* Upper half already zero */
-       addl $8,%edx
-       movl $512,%ecx
-11:
-       stosl
-       xchgl %eax,%ebx
-       stosl
-       xchgl %eax,%ebx
-       addl $0x1000,%eax
-       loop 11b
-
-       /*
-        * End condition: we must map up to the end + MAPPING_BEYOND_END.
-        */
-       movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-       cmpl %ebp,%eax
-       jb 10b
-1:
-       addl $__PAGE_OFFSET, %edi
-       movl %edi, pa(_brk_end)
-       shrl $12, %eax
-       movl %eax, pa(max_pfn_mapped)
+       /* Create early pagetables. */
+       call  mk_early_pgtbl_32
 
        /* Do early initialization of the fixmap area */
        movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#ifdef  CONFIG_X86_PAE
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
        movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else  /* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-       movl $pa(__brk_base), %edi
-       movl $pa(initial_page_table), %edx
-       movl $PTE_IDENT_ATTR, %eax
-10:
-       leal PDE_IDENT_ATTR(%edi),%ecx          /* Create PDE entry */
-       movl %ecx,(%edx)                        /* Store identity PDE entry */
-       movl %ecx,page_pde_offset(%edx)         /* Store kernel PDE entry */
-       addl $4,%edx
-       movl $1024, %ecx
-11:
-       stosl
-       addl $0x1000,%eax
-       loop 11b
-       /*
-        * End condition: we must map up to the end + MAPPING_BEYOND_END.
-        */
-       movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-       cmpl %ebp,%eax
-       jb 10b
-       addl $__PAGE_OFFSET, %edi
-       movl %edi, pa(_brk_end)
-       shrl $12, %eax
-       movl %eax, pa(max_pfn_mapped)
-
-       /* Do early initialization of the fixmap area */
-       movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+#else
        movl %eax,pa(initial_page_table+0xffc)
 #endif
 
@@ -662,6 +553,7 @@ ENTRY(setup_once_ref)
 __PAGE_ALIGNED_BSS
        .align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.globl initial_pg_pmd
 initial_pg_pmd:
        .fill 1024*KPMDS,4,0
 #else
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.