[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Subject: PAE support
ChangeSet 1.1616, 2005/05/31 16:39:28+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx Subject: PAE support This patch adds initial support for PAE paging to xen. This patch does: * boot Xen itself with PAE paging enabled. * add PAE support to the dom0 domain builder. Some notes on the design and the changes: * There are two new config options: CONFIG_X86_PAE (boolean, same name Linux uses to simply things) and CONFIG_PAGING_LEVELS (int, possible values are 2,3,4). I've used #if CONFIG_PAGING_LEVELS for stuff which simply depends on the number of paging levels in the code common for x86-32/64, and CONFIG_X86_PAE for special PAE quirks or i386-only stuff. I've tried to avoid ifdefs if possible though, often I rearranged code to make it work in both PAE and non-PAE case instead. * idle_pg_table: 3rd level is statically initialized, 2nd level is contignous in physical and virtual memory, so it can be addressed linear (the dom0 builder uses the same trick to simplify things a bit btw.). There are two new symbols: idle_pg_table_l3 and idle_pg_table_l2 for the two tables. idle_pg_table is aliased to the toplevel page table, i.e. idle_pg_table_l3 in PAE mode and idle_pg_table_l2 in non-pae mode. The idle l3 table is actually never ever touched after boot, the l2 table is accessed via idle_pg_table_l2 and addressed linear in both PAE and non-PAE mode. * I've added a "intpte_t" type and a PRIpte define, modeled after the C99 inttypes.h header, for page table entries. Signed-off-by: Gerd Knorr <kraxel@xxxxxxxxxxx> arch/x86/audit.c | 4 arch/x86/boot/x86_32.S | 43 ++++ arch/x86/dom0_ops.c | 2 arch/x86/domain.c | 8 arch/x86/domain_build.c | 92 +++++++-- arch/x86/idle0_task.c | 3 arch/x86/mm.c | 348 ++++++++++++++++++++++++----------- arch/x86/setup.c | 2 arch/x86/shadow.c | 51 +++-- arch/x86/traps.c | 2 arch/x86/vmx.c | 12 - arch/x86/vmx_io.c | 6 arch/x86/vmx_vmcs.c | 4 arch/x86/x86_32/domain_page.c | 2 arch/x86/x86_32/mm.c | 140 +++++++++----- arch/x86/x86_32/traps.c | 17 + arch/x86/x86_64/mm.c | 2 include/asm-x86/config.h | 33 ++- include/asm-x86/domain.h | 6 include/asm-x86/mm.h | 14 - include/asm-x86/page.h | 151 +++++++++++++-- include/asm-x86/shadow.h | 6 include/asm-x86/smp.h | 7 include/asm-x86/types.h | 6 include/asm-x86/x86_32/page-2level.h | 49 ++++ include/asm-x86/x86_32/page-3level.h | 56 +++++ include/asm-x86/x86_32/page.h | 127 ------------ include/asm-x86/x86_64/page.h | 188 ++---------------- include/public/arch-x86_32.h | 6 29 files changed, 830 insertions(+), 557 deletions(-) diff -Nru a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c 2005-05-31 12:04:00 -04:00 +++ b/xen/arch/x86/audit.c 2005-05-31 12:04:00 -04:00 @@ -408,9 +408,9 @@ for_each_exec_domain(d, ed) { - if ( pagetable_val(ed->arch.guest_table) ) + if ( pagetable_get_phys(ed->arch.guest_table) ) adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1); - if ( pagetable_val(ed->arch.shadow_table) ) + if ( pagetable_get_phys(ed->arch.shadow_table) ) adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0); if ( ed->arch.monitor_shadow_ref ) adjust(&frame_table[ed->arch.monitor_shadow_ref], 0); diff -Nru a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S 2005-05-31 12:04:00 -04:00 +++ b/xen/arch/x86/boot/x86_32.S 2005-05-31 12:04:00 -04:00 @@ -101,6 +101,22 @@ xor %eax,%eax rep stosb +#ifdef CONFIG_X86_PAE + /* Initialize low and high mappings of all memory with 2MB pages */ + mov $idle_pg_table_l2-__PAGE_OFFSET,%edi + mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */ +1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */ + stosl /* low mapping */ + add $4,%edi + add $(1<<L2_PAGETABLE_SHIFT),%eax + cmp $DIRECTMAP_PHYS_END+0xe3,%eax + jne 1b +1: stosl /* low mappings cover as much physmem as possible */ + add $4,%edi + add $(1<<L2_PAGETABLE_SHIFT),%eax + cmp $__HYPERVISOR_VIRT_START+0xe3,%eax + jne 1b +#else /* Initialize low and high mappings of all memory with 4MB pages */ mov $idle_pg_table-__PAGE_OFFSET,%edi mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */ @@ -113,6 +129,7 @@ add $(1<<L2_PAGETABLE_SHIFT),%eax cmp $__HYPERVISOR_VIRT_START+0xe3,%eax jne 1b +#endif /* Initialise IDT with simple error defaults. */ lea ignore_int,%edx @@ -204,10 +221,17 @@ .quad 0x0000000000000000 /* unused */ .quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */ .quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */ +#ifdef CONFIG_X86_PAE + .quad 0x00cfba00000067ff + .quad 0x00cfb200000067ff + .quad 0x00cffa00000067ff + .quad 0x00cff200000067ff +#else .quad 0x00cfba000000c3ff /* 0xe019 ring 1 3.95GB code at 0x0 */ .quad 0x00cfb2000000c3ff /* 0xe021 ring 1 3.95GB data at 0x0 */ .quad 0x00cffa000000c3ff /* 0xe02b ring 3 3.95GB code at 0x0 */ .quad 0x00cff2000000c3ff /* 0xe033 ring 3 3.95GB data at 0x0 */ +#endif .quad 0x0000000000000000 /* unused */ .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ @@ -215,10 +239,27 @@ /* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */ /* CPU0 stack is aligned on an even page boundary! */ ENTRY(cpu0_stack) - .org 0x2000 + STACK_SIZE + +#ifdef CONFIG_X86_PAE + ENTRY(idle_pg_table) +ENTRY(idle_pg_table_l3) + .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01 + .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01 + .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE +ENTRY(idle_pg_table_l2) + .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE + +#else /* CONFIG_X86_PAE */ +ENTRY(idle_pg_table) +ENTRY(idle_pg_table_l2) # Initial page directory is 4kB .org 0x2000 + STACK_SIZE + PAGE_SIZE + +#endif /* CONFIG_X86_PAE */ + ENTRY(stext) ENTRY(_stext) diff -Nru a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c 2005-05-31 12:04:00 -04:00 +++ b/xen/arch/x86/dom0_ops.c 2005-05-31 12:04:00 -04:00 @@ -405,7 +405,7 @@ c->flags |= VGCF_VMX_GUEST; #endif - c->pt_base = pagetable_val(ed->arch.guest_table); + c->pt_base = pagetable_get_phys(ed->arch.guest_table); c->vm_assist = ed->domain->vm_assist; } diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c 2005-05-31 12:04:01 -04:00 +++ b/xen/arch/x86/domain.c 2005-05-31 12:04:01 -04:00 @@ -460,7 +460,7 @@ // trust the VMX domain builder. Xen should validate this // page table, and/or build the table itself, or ??? // - if ( !pagetable_val(d->arch.phys_table) ) + if ( !pagetable_get_phys(d->arch.phys_table) ) d->arch.phys_table = ed->arch.guest_table; if ( (error = vmx_final_setup_guest(ed, c)) ) @@ -660,7 +660,7 @@ struct exec_domain *ed = current; if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) || - unlikely(pagetable_val(ed->arch.guest_table_user) == 0) ) + unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) ) return -EFAULT; toggle_guest_mode(ed); @@ -978,7 +978,7 @@ /* Drop the in-use references to page-table bases. */ for_each_exec_domain ( d, ed ) { - if ( pagetable_val(ed->arch.guest_table) != 0 ) + if ( pagetable_get_phys(ed->arch.guest_table) != 0 ) { if ( shadow_mode_refcounts(d) ) put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]); @@ -988,7 +988,7 @@ ed->arch.guest_table = mk_pagetable(0); } - if ( pagetable_val(ed->arch.guest_table_user) != 0 ) + if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 ) { if ( shadow_mode_refcounts(d) ) put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]); diff -Nru a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c 2005-05-31 12:04:00 -04:00 +++ b/xen/arch/x86/domain_build.c 2005-05-31 12:04:00 -04:00 @@ -44,15 +44,15 @@ #if defined(__i386__) /* No ring-3 access in initial leaf page tables. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT) #elif defined(__x86_64__) /* Allow ring-3 access in long mode as guest cannot use ring 1. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) -#endif -/* Don't change these: Linux expects just these bits to be set. */ -/* (And that includes the bogus _PAGE_DIRTY!) */ #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) @@ -91,7 +91,11 @@ #elif defined(__x86_64__) char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); +#endif +#if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t *l4tab = NULL, *l4start = NULL; +#endif +#if CONFIG_PAGING_LEVELS >= 3 l3_pgentry_t *l3tab = NULL, *l3start = NULL; #endif l2_pgentry_t *l2tab = NULL, *l2start = NULL; @@ -143,7 +147,7 @@ panic("Not enough RAM for DOM0 reservation.\n"); alloc_start = page_to_phys(page); alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT); - + if ( (rc = parseelfimage(&dsi)) != 0 ) return rc; @@ -172,10 +176,15 @@ v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ -#if defined(__i386__) +#if defined(__i386__) && !defined(CONFIG_X86_PAE) if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) break; +#elif defined(__i386__) && defined(CONFIG_X86_PAE) + /* 5 pages: 1x 3rd + 4x 2nd level */ + if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> + L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages ) + break; #elif defined(__x86_64__) #define NR(_l,_h,_s) \ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ @@ -249,6 +258,24 @@ } /* WARNING: The new domain must have its 'processor' field filled in! */ +#if CONFIG_PAGING_LEVELS == 3 + l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE; + memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE); + for (i = 0; i < 4; i++) { + l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT); + l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] = + l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR); + } + unsigned long v; + for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END; + v += (1 << L2_PAGETABLE_SHIFT)) { + l2tab[v >> L2_PAGETABLE_SHIFT] = + l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + (v-PERDOMAIN_VIRT_START), + __PAGE_HYPERVISOR); + } + ed->arch.guest_table = mk_pagetable((unsigned long)l3start); +#else l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = @@ -256,8 +283,9 @@ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR); ed->arch.guest_table = mk_pagetable((unsigned long)l2start); +#endif - l2tab += l2_table_offset(dsi.v_start); + l2tab += l2_linear_offset(dsi.v_start); mfn = alloc_start >> PAGE_SHIFT; for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) { @@ -282,8 +310,8 @@ } /* Pages that are part of page tables must be read only. */ - l2tab = l2start + l2_table_offset(vpt_start); - l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab); + l2tab = l2start + l2_linear_offset(vpt_start); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |