[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 05/10] kexec: extend hypercall with improved load/unload ops
>>> On 24.06.13 at 19:42, David Vrabel <david.vrabel@xxxxxxxxxx> wrote: > +static void init_level2_page(l2_pgentry_t *l2, unsigned long addr) > +{ > + unsigned long end_addr; > + > + addr &= PAGE_MASK; > + end_addr = addr + L2_PAGETABLE_ENTRIES * (1ul << L2_PAGETABLE_SHIFT); For one, with the code below, this can't be right: "addr" is getting only page aligned, but the rest of the code assumes it is suitable as a super-page. Further, you're risking mapping non-memory here, as the caller doesn't pass the true end of the memory block to be mapped. And then the expression is odd - why not just end_addr = addr + (L2_PAGETABLE_ENTRIES << L2_PAGETABLE_SHIFT); > +static int init_level4_page(struct kexec_image *image, l4_pgentry_t *l4, > + unsigned long addr, unsigned long last_addr) > +{ > + unsigned long end_addr; > + int result; > + > + addr &= PAGE_MASK; > + end_addr = addr + L4_PAGETABLE_ENTRIES * (1ul << L4_PAGETABLE_SHIFT); > + > + while ( (addr < last_addr) && (addr < end_addr) ) > + { > + struct page_info *l3_page; > + l3_pgentry_t *l3; > + > + l3_page = kimage_alloc_control_page(image, 0); > + if ( !l3_page ) > + return -ENOMEM; > + l3 = __map_domain_page(l3_page); > + result = init_level3_page(image, l3, addr, last_addr); > + unmap_domain_page(l3); > + if (result) Coding style. > +static int init_transition_pgtable(struct kexec_image *image, l4_pgentry_t > *l4) > +{ > + struct page_info *l3_page; > + struct page_info *l2_page; > + struct page_info *l1_page; > + unsigned long vaddr, paddr; > + l3_pgentry_t *l3 = NULL; > + l2_pgentry_t *l2 = NULL; > + l1_pgentry_t *l1 = NULL; > + int ret = -ENOMEM; > + > + vaddr = (unsigned long)kexec_reloc; > + paddr = page_to_maddr(image->control_code_page); > + > + l4 += l4_table_offset(vaddr); > + if ( !(l4e_get_flags(*l4) & _PAGE_PRESENT) ) > + { > + l3_page = kimage_alloc_control_page(image, 0); > + if ( !l3_page ) > + goto out; > + l4e_write(l4, l4e_from_page(l3_page, __PAGE_HYPERVISOR)); > + } > + else > + l3_page = l4e_get_page(*l4); > + > + l3 = __map_domain_page(l3_page); > + l3 += l3_table_offset(vaddr); > + if ( !(l3e_get_flags(*l3) & _PAGE_PRESENT) ) > + { > + l2_page = kimage_alloc_control_page(image, 0); > + if ( !l2_page ) > + goto out; > + l3e_write(l3, l3e_from_page(l2_page, __PAGE_HYPERVISOR)); > + } > + else > + l2_page = l3e_get_page(*l3); Afaict you're done using "l3" here, so you should unmap it in order to reduce the pressure on the domain page mapping resources. > + > + l2 = __map_domain_page(l2_page); > + l2 += l2_table_offset(vaddr); > + if ( !(l2e_get_flags(*l2) & _PAGE_PRESENT) ) > + { > + l1_page = kimage_alloc_control_page(image, 0); > + if ( !l1_page ) > + goto out; > + l2e_write(l2, l2e_from_page(l1_page, __PAGE_HYPERVISOR)); > + } > + else > + l1_page = l2e_get_page(*l2); Same for "l2" at this point. > +static int build_reloc_page_table(struct kexec_image *image) > +{ > + struct page_info *l4_page; > + l4_pgentry_t *l4; > + int result; > + > + l4_page = kimage_alloc_control_page(image, 0); > + if ( !l4_page ) > + return -ENOMEM; > + l4 = __map_domain_page(l4_page); > + > + result = init_level4_page(image, l4, 0, max_page << PAGE_SHIFT); What about holes in the physical address space - not just the MMIO hole below 4Gb is a problem here, but also discontiguous physical memory. > --- /dev/null > +++ b/xen/arch/x86/x86_64/kexec_reloc.S > @@ -0,0 +1,211 @@ > +/* > + * Relocate a kexec_image to its destination and call it. > + * > + * Copyright (C) 2013 Citrix Systems R&D Ltd. > + * > + * Portions derived from Linux's arch/x86/kernel/relocate_kernel_64.S. > + * > + * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xxxxxxxxxxxx> > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2. See the file COPYING for more details. > + */ > +#include <xen/config.h> > + > +#include <asm/asm_defns.h> > +#include <asm/msr.h> > +#include <asm/page.h> > +#include <asm/machine_kexec.h> > + > + .text > + .align PAGE_SIZE > + .code64 > + > +ENTRY(kexec_reloc) > + /* %rdi - code page maddr */ > + /* %rsi - page table maddr */ > + /* %rdx - indirection page maddr */ > + /* %rcx - entry maddr */ > + /* %r8 - flags */ > + > + movq %rdx, %rbx > + > + /* Setup stack. */ > + leaq (reloc_stack - kexec_reloc)(%rdi), %rsp > + > + /* Load reloc page table. */ > + movq %rsi, %cr3 > + > + /* Jump to identity mapped code. */ > + leaq (identity_mapped - kexec_reloc)(%rdi), %rax > + jmpq *%rax > + > +identity_mapped: > + pushq %rcx > + pushq %rbx > + pushq %rsi > + pushq %rdi > + > + /* > + * Set cr0 to a known state: > + * - Paging enabled > + * - Alignment check disabled > + * - Write protect disabled > + * - No task switch > + * - Don't do FP software emulation. > + * - Proctected mode enabled > + */ > + movq %cr0, %rax > + andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax > + orl $(X86_CR0_PG | X86_CR0_PE), %eax Either "andq" and "orq" or "andl" and "orl". > + movq %rax, %cr0 > + > + /* > + * Set cr4 to a known state: > + * - physical address extension enabled > + */ > + movq $X86_CR4_PAE, %rax "movl" suffices here. > + movq %rax, %cr4 > + > + movq %rbx, %rdi > + call relocate_pages > + > + popq %rdi > + popq %rsi > + popq %rbx > + popq %rcx > + > + /* Need to switch to 32-bit mode? */ > + testq $KEXEC_RELOC_FLAG_COMPAT, %r8 > + jnz call_32_bit > + > +call_64_bit: > + /* Call the image entry point. This should never return. */ > + call *%rcx > + ud2 > + > +call_32_bit: > + /* Setup IDT. */ > + lidt compat_mode_idt(%rip) > + > + /* Load compat GDT. */ > + leaq (compat_mode_gdt - kexec_reloc)(%rdi), %rax > + movq %rax, (compat_mode_gdt_desc + 2)(%rip) > + lgdt compat_mode_gdt_desc(%rip) > + > + /* Relocate compatibility mode entry point address. */ > + leal (compatibility_mode - kexec_reloc)(%edi), %eax > + movl %eax, compatibility_mode_far(%rip) > + > + /* Enter compatibility mode. */ > + ljmp *compatibility_mode_far(%rip) As you're elsewhere using mnemonic suffixes elsewhere even when not strictly needed, for consistency I'd recommend using one on this instruction (and perhaps also on the lidt/lgdt above) too. > + > +relocate_pages: > + /* %rdi - indirection page maddr */ > + cld > + movq %rdi, %rcx > + xorq %rdi, %rdi > + xorq %rsi, %rsi I guess performance and code size aren't of highest importance here, but "xorl" would suffice in both of the above lines. > + jmp 1f > + > +0: /* top, read another word for the indirection page */ > + > + movq (%rbx), %rcx > + addq $8, %rbx > +1: > + testq $0x1, %rcx /* is it a destination page? */ And "testl" (or even "testb") would be sufficient here. There are more pointless uses of the "q" suffix further down. In any event the 0x1 here and the other flags tested for below will want to become manifest constants. > + jz 2f > + movq %rcx, %rdi > + andq $0xfffffffffffff000, %rdi The number of "f"-s here being correct can't be seen without actually counting them - either use a manifest constant like PAGE_MASK, or at least write "$~0xfff". > + jmp 0b > +2: > + testq $0x2, %rcx /* is it an indirection page? */ > + jz 2f > + movq %rcx, %rbx > + andq $0xfffffffffffff000, %rbx > + jmp 0b > +2: > + testq $0x4, %rcx /* is it the done indicator? */ > + jz 2f > + jmp 3f Just a single (inverse) conditional branch please. And there are too many "2:" labels here in close succession. > +2: > + testq $0x8, %rcx /* is it the source indicator? */ > + jz 2f > + movq %rcx, %rsi /* For ever source page do a copy */ "every"? > + andq $0xfffffffffffff000, %rsi > + movq $512, %rcx > + rep movsq > + jmp 0b > +2: > + testq $0x10, %rcx /* is it the zero indicator? */ > + jz 0b /* Ignore it otherwise */ > + movq $512, %rcx /* Zero the destination page. */ > + xorq %rax, %rax > + rep stosq > + jmp 0b > +3: > + ret > + > + .code32 > + > +compatibility_mode: > + /* Setup some sane segments. */ > + movl $0x0008, %eax > + movl %eax, %ds > + movl %eax, %es > + movl %eax, %fs > + movl %eax, %gs > + movl %eax, %ss > + > + movl %ecx, %ebp > + > + /* Disable paging and therefore leave 64 bit mode. */ > + movl %cr0, %eax > + andl $~X86_CR0_PG, %eax > + movl %eax, %cr0 > + > + /* Disable long mode */ > + movl $MSR_EFER, %ecx > + rdmsr > + andl $~EFER_LME, %eax > + wrmsr > + > + /* Clear cr4 to disable PAE. */ > + movl $0, %eax xorl %eax, %eax > + movl %eax, %cr4 > + > + /* Call the image entry point. This should never return. */ > + call *%ebp > + ud2 > + > + .align 16 Why? 4 is all you need. > +compatibility_mode_far: > + .long 0x00000000 /* set in call_32_bit above */ > + .word 0x0010 > + > + .align 16 Even more so here. If you care for alignment, you want 2 mod 8 here. > +compat_mode_gdt_desc: > + .word (3*8)-1 > + .quad 0x0000000000000000 /* set in call_32_bit above */ > + > + .align 16 And just 8 here. > +compat_mode_gdt: > + .quad 0x0000000000000000 /* null */ > + .quad 0x00cf92000000ffff /* 0x0008 ring 0 data */ > + .quad 0x00cf9a000000ffff /* 0x0010 ring 0 code, compatibility */ > + > +compat_mode_idt: compat_mode_idt_desc: And if you care for alignment, 2 mod 8 again. > + .word 0 /* limit */ > + .long 0 /* base */ > + > + /* > + * 16 words of stack are more than enough. > + */ > + .fill 16,8,0 > +reloc_stack: And now you don't care for the stack being mis-aligned? Jan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |