[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 5/6] xen: modify page table construction
On 11/02/16 13:47, Daniel Kiper wrote: > On Thu, Feb 11, 2016 at 08:53:25AM +0100, Juergen Gross wrote: >> Modify the page table construction to allow multiple virtual regions >> to be mapped. This is done as preparation for removing the p2m list >> from the initial kernel mapping in order to support huge pv domains. >> >> This allows a cleaner approach for mapping the relocator page by >> using this capability. >> >> The interface to the assembler level of the relocator has to be changed >> in order to be able to process multiple page table areas. > > I hope that older kernels could be loaded as is and everything works as > expected. See Patch 0/6. I have tested old kernels, too. > >> Signed-off-by: Juergen Gross <jgross@xxxxxxxx> >> --- >> grub-core/lib/i386/xen/relocator.S | 47 +++--- >> grub-core/lib/x86_64/xen/relocator.S | 44 +++-- >> grub-core/lib/xen/relocator.c | 22 ++- >> grub-core/loader/i386/xen.c | 313 >> +++++++++++++++++++++++------------ >> include/grub/xen/relocator.h | 6 +- >> 5 files changed, 277 insertions(+), 155 deletions(-) >> >> diff --git a/grub-core/lib/i386/xen/relocator.S >> b/grub-core/lib/i386/xen/relocator.S >> index 694a54c..c23b405 100644 >> --- a/grub-core/lib/i386/xen/relocator.S >> +++ b/grub-core/lib/i386/xen/relocator.S >> @@ -50,41 +50,45 @@ VARIABLE(grub_relocator_xen_remapper_map_high) >> jmp *%ebx >> >> LOCAL(cont): >> - xorl %eax, %eax >> - movl %eax, %ebp >> + /* mov imm32, %eax */ >> + .byte 0xb8 >> +VARIABLE(grub_relocator_xen_paging_areas_addr) >> + .long 0 >> + movl %eax, %ebx >> 1: >> - >> + movl 0(%ebx), %ebp >> + movl 4(%ebx), %ecx > > Oh... No, please use constants not plain numbers and > explain in comments what is going on. Otherwise it > is unreadable. Hmm, yes, some comments wouldn't hurt. :-) Regarding constants: do you really think I should introduce their first usage in this file with my patch? > >> + testl %ecx, %ecx >> + jz 3f >> + addl $8, %ebx >> + movl %ebx, %esp >> + >> +2: >> + movl %ecx, %edi >> /* mov imm32, %eax */ >> .byte 0xb8 >> VARIABLE(grub_relocator_xen_mfn_list) >> .long 0 >> - movl %eax, %edi >> - movl %ebp, %eax >> - movl 0(%edi, %eax, 4), %ecx >> - >> - /* mov imm32, %ebx */ >> - .byte 0xbb >> -VARIABLE(grub_relocator_xen_paging_start) >> - .long 0 >> - shll $12, %eax >> - addl %eax, %ebx >> + movl 0(%eax, %ebp, 4), %ecx >> + movl %ebp, %ebx >> + shll $12, %ebx > > Ditto. Look at the removed line above: I just switched register usage. > >> movl %ecx, %edx >> shll $12, %ecx >> shrl $20, %edx >> orl $5, %ecx >> movl $2, %esi >> movl $__HYPERVISOR_update_va_mapping, %eax >> - int $0x82 >> + int $0x82 /* parameters: eax, ebx, ecx, edx, esi */ > > Please use more verbose comments. :-) > >> >> incl %ebp >> - /* mov imm32, %ecx */ >> - .byte 0xb9 >> -VARIABLE(grub_relocator_xen_paging_size) >> - .long 0 >> - cmpl %ebp, %ecx >> + movl %edi, %ecx >> + >> + loop 2b >> >> - ja 1b >> + mov %esp, %ebx >> + jmp 1b >> >> +3: >> /* mov imm32, %ebx */ >> .byte 0xbb >> VARIABLE(grub_relocator_xen_mmu_op_addr) >> @@ -102,6 +106,9 @@ VARIABLE(grub_relocator_xen_remap_continue) >> >> jmp *%eax >> >> +VARIABLE(grub_relocator_xen_paging_areas) >> + .long 0, 0, 0, 0, 0, 0, 0, 0 >> + >> VARIABLE(grub_relocator_xen_mmu_op) >> .space 256 >> >> diff --git a/grub-core/lib/x86_64/xen/relocator.S >> b/grub-core/lib/x86_64/xen/relocator.S >> index 92e9e72..dbb90c7 100644 >> --- a/grub-core/lib/x86_64/xen/relocator.S >> +++ b/grub-core/lib/x86_64/xen/relocator.S > > Is to possible to split this patch to i386 and x86-64 stuff patches? I don't think so. The C-part is common and assembler sources must both match. > >> @@ -50,31 +50,24 @@ VARIABLE(grub_relocator_xen_remapper_map) >> >> LOCAL(cont): >> >> - /* mov imm64, %rcx */ >> - .byte 0x48 >> - .byte 0xb9 >> -VARIABLE(grub_relocator_xen_paging_size) >> - .quad 0 >> - >> - /* mov imm64, %rax */ >> - .byte 0x48 >> - .byte 0xb8 >> -VARIABLE(grub_relocator_xen_paging_start) >> - .quad 0 >> - >> - movq %rax, %r12 >> - >> /* mov imm64, %rax */ >> .byte 0x48 >> .byte 0xb8 >> VARIABLE(grub_relocator_xen_mfn_list) >> .quad 0 >> >> - movq %rax, %rsi >> + movq %rax, %rbx >> + leaq EXT_C(grub_relocator_xen_paging_areas) (%rip), %r8 >> + >> 1: >> + movq 0(%r8), %r12 >> + movq 8(%r8), %rcx > > Ditto. > >> + testq %rcx, %rcx >> + jz 3f >> +2: >> movq %r12, %rdi >> - movq %rsi, %rbx >> - movq 0(%rsi), %rsi >> + shlq $12, %rdi >> + movq (%rbx, %r12, 8), %rsi > > Ditto. > >> shlq $12, %rsi >> orq $5, %rsi >> movq $2, %rdx >> @@ -83,13 +76,15 @@ VARIABLE(grub_relocator_xen_mfn_list) >> syscall >> >> movq %r9, %rcx >> - addq $8, %rbx >> - addq $4096, %r12 >> - movq %rbx, %rsi >> + incq %r12 >> + >> + loop 2b >> >> - loop 1b >> + addq $16, %r8 > > Ditto. > >> + jmp 1b >> >> - leaq LOCAL(mmu_op) (%rip), %rdi >> +3: >> + leaq EXT_C(grub_relocator_xen_mmu_op) (%rip), %rdi >> movq $3, %rsi >> movq $0, %rdx >> movq $0x7FF0, %r10 > > Ugh... Please fix this too. Probably in separate patch. I don't mind cleaning the assembler sources by using more comments and constants. I just don't think this should be part of this series. > >> @@ -104,7 +99,10 @@ VARIABLE(grub_relocator_xen_remap_continue) >> >> jmp *%rax >> >> -LOCAL(mmu_op): >> +VARIABLE(grub_relocator_xen_paging_areas) >> + /* array of start, size pairs, size 0 is end marker */ >> + .quad 0, 0, 0, 0, 0, 0, 0, 0 >> + >> VARIABLE(grub_relocator_xen_mmu_op) >> .space 256 >> >> diff --git a/grub-core/lib/xen/relocator.c b/grub-core/lib/xen/relocator.c >> index 8f427d3..bc29055 100644 >> --- a/grub-core/lib/xen/relocator.c >> +++ b/grub-core/lib/xen/relocator.c >> @@ -36,15 +36,15 @@ extern grub_uint8_t grub_relocator_xen_remap_end; >> extern grub_xen_reg_t grub_relocator_xen_stack; >> extern grub_xen_reg_t grub_relocator_xen_start_info; >> extern grub_xen_reg_t grub_relocator_xen_entry_point; >> -extern grub_xen_reg_t grub_relocator_xen_paging_start; >> -extern grub_xen_reg_t grub_relocator_xen_paging_size; >> extern grub_xen_reg_t grub_relocator_xen_remapper_virt; >> extern grub_xen_reg_t grub_relocator_xen_remapper_virt2; >> extern grub_xen_reg_t grub_relocator_xen_remapper_map; >> extern grub_xen_reg_t grub_relocator_xen_mfn_list; >> +extern grub_xen_reg_t grub_relocator_xen_paging_areas[2 * XEN_MAX_MAPPINGS]; >> extern grub_xen_reg_t grub_relocator_xen_remap_continue; >> #ifdef __i386__ >> extern grub_xen_reg_t grub_relocator_xen_mmu_op_addr; >> +extern grub_xen_reg_t grub_relocator_xen_paging_areas_addr; >> extern grub_xen_reg_t grub_relocator_xen_remapper_map_high; >> #endif >> extern mmuext_op_t grub_relocator_xen_mmu_op[3]; >> @@ -61,6 +61,7 @@ grub_relocator_xen_boot (struct grub_relocator *rel, >> { >> grub_err_t err; >> void *relst; >> + int i; >> grub_relocator_chunk_t ch, ch_tramp; >> grub_xen_mfn_t *mfn_list = >> (grub_xen_mfn_t *) grub_xen_start_page_addr->mfn_list; >> @@ -77,8 +78,11 @@ grub_relocator_xen_boot (struct grub_relocator *rel, >> grub_relocator_xen_stack = state.stack; >> grub_relocator_xen_start_info = state.start_info; >> grub_relocator_xen_entry_point = state.entry_point; >> - grub_relocator_xen_paging_start = state.paging_start << 12; >> - grub_relocator_xen_paging_size = state.paging_size; >> + for (i = 0; i < XEN_MAX_MAPPINGS; i++) >> + { >> + grub_relocator_xen_paging_areas[2 * i] = state.paging_start[i]; >> + grub_relocator_xen_paging_areas[2 * i + 1] = state.paging_size[i]; > > Why 2 and 1? Constants? I guess I should probably define grub_relocator_xen_paging_areas as an array of struct { grub_xen_reg_t start; grub_xen_reg_t size; }; > >> + } >> grub_relocator_xen_remapper_virt = remapper_virt; >> grub_relocator_xen_remapper_virt2 = remapper_virt; >> grub_relocator_xen_remap_continue = trampoline_virt; >> @@ -88,10 +92,12 @@ grub_relocator_xen_boot (struct grub_relocator *rel, >> grub_relocator_xen_remapper_map_high = (mfn_list[remapper_pfn] >> 20); >> grub_relocator_xen_mmu_op_addr = (char *) &grub_relocator_xen_mmu_op >> - (char *) &grub_relocator_xen_remap_start + remapper_virt; >> + grub_relocator_xen_paging_areas_addr = >> + (char *) &grub_relocator_xen_paging_areas >> + - (char *) &grub_relocator_xen_remap_start + remapper_virt; >> #endif >> >> - grub_relocator_xen_mfn_list = state.mfn_list >> - + state.paging_start * sizeof (grub_addr_t); >> + grub_relocator_xen_mfn_list = state.mfn_list; >> >> grub_memset (grub_relocator_xen_mmu_op, 0, >> sizeof (grub_relocator_xen_mmu_op)); >> @@ -100,9 +106,9 @@ grub_relocator_xen_boot (struct grub_relocator *rel, >> #else >> grub_relocator_xen_mmu_op[0].cmd = MMUEXT_PIN_L4_TABLE; >> #endif >> - grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start]; >> + grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start[0]]; >> grub_relocator_xen_mmu_op[1].cmd = MMUEXT_NEW_BASEPTR; >> - grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start]; >> + grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start[0]]; >> grub_relocator_xen_mmu_op[2].cmd = MMUEXT_UNPIN_TABLE; >> grub_relocator_xen_mmu_op[2].arg1.mfn = >> mfn_list[grub_xen_start_page_addr->pt_base >> 12]; >> diff --git a/grub-core/loader/i386/xen.c b/grub-core/loader/i386/xen.c >> index 0f41048..5e10420 100644 >> --- a/grub-core/loader/i386/xen.c >> +++ b/grub-core/loader/i386/xen.c >> @@ -42,6 +42,29 @@ >> >> GRUB_MOD_LICENSE ("GPLv3+"); >> >> +#ifdef __x86_64__ >> +#define NUMBER_OF_LEVELS 4 >> +#define INTERMEDIATE_OR 7 >> +#define VIRT_MASK 0x0000ffffffffffffULL >> +#else >> +#define NUMBER_OF_LEVELS 3 >> +#define INTERMEDIATE_OR 3 >> +#define VIRT_MASK 0x00000000ffffffffULL > > Long expected constants... Nice! Just to please you. :-) They were just moved up some lines as they are used in the new structure definitions. Using constants was unavoidable here. ;-) > >> +#endif >> + >> +struct grub_xen_mapping_lvl { >> + grub_uint64_t virt_start; >> + grub_uint64_t virt_end; >> + grub_uint64_t pfn_start; >> + grub_uint64_t n_pt_pages; >> +}; >> + >> +struct grub_xen_mapping { >> + grub_uint64_t *where; >> + struct grub_xen_mapping_lvl area; >> + struct grub_xen_mapping_lvl lvls[NUMBER_OF_LEVELS]; >> +}; >> + >> static struct grub_relocator *relocator = NULL; >> static grub_uint64_t max_addr; >> static grub_dl_t my_mod; >> @@ -56,9 +79,10 @@ static struct grub_relocator_xen_state state; >> static grub_xen_mfn_t *virt_mfn_list; >> static struct start_info *virt_start_info; >> static grub_xen_mfn_t console_pfn; >> -static grub_uint64_t *virt_pgtable; >> -static grub_uint64_t pgtbl_start; >> static grub_uint64_t pgtbl_end; >> +static struct grub_xen_mapping mappings[XEN_MAX_MAPPINGS]; >> +static int n_mappings; >> +static struct grub_xen_mapping *map_reloc; >> >> #define PAGE_SIZE 4096 >> #define MAX_MODULES (PAGE_SIZE / sizeof (struct xen_multiboot_mod_list)) >> @@ -75,100 +99,162 @@ page2offset (grub_uint64_t page) >> return page << PAGE_SHIFT; >> } >> >> -#ifdef __x86_64__ >> -#define NUMBER_OF_LEVELS 4 >> -#define INTERMEDIATE_OR 7 >> -#else >> -#define NUMBER_OF_LEVELS 3 >> -#define INTERMEDIATE_OR 3 >> -#endif >> - >> -static grub_uint64_t >> -get_pgtable_size (grub_uint64_t total_pages, grub_uint64_t virt_base) >> +static grub_err_t >> +get_pgtable_size (grub_uint64_t from, grub_uint64_t to, grub_uint64_t pfn) >> { >> - if (!virt_base) >> - total_pages++; >> - grub_uint64_t ret = 0; >> - grub_uint64_t ll = total_pages; >> - int i; >> - for (i = 0; i < NUMBER_OF_LEVELS; i++) >> - { >> - ll = (ll + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE; >> - /* PAE wants all 4 root directories present. */ >> -#ifdef __i386__ >> - if (i == 1) >> - ll = 4; >> -#endif >> - ret += ll; >> - } >> - for (i = 1; i < NUMBER_OF_LEVELS; i++) >> - if (virt_base >> (PAGE_SHIFT + i * LOG_POINTERS_PER_PAGE)) >> - ret++; >> - return ret; >> -} >> + struct grub_xen_mapping *map, *map_cmp; >> + grub_uint64_t mask, bits; >> + int i, m; >> >> -static void >> -generate_page_table (grub_uint64_t *where, grub_uint64_t paging_start, >> - grub_uint64_t paging_end, grub_uint64_t total_pages, >> - grub_uint64_t virt_base, grub_xen_mfn_t *mfn_list) >> -{ >> - if (!virt_base) >> - paging_end++; >> + if (n_mappings == XEN_MAX_MAPPINGS) >> + return grub_error (GRUB_ERR_BUG, "too many mapped areas"); >> + >> + grub_dprintf ("xen", "get_pgtable_size %d from=%llx, to=%llx, pfn=%llx\n", >> + n_mappings, (unsigned long long) from, (unsigned long long) to, >> + (unsigned long long) pfn); >> >> - grub_uint64_t lx[NUMBER_OF_LEVELS], lxs[NUMBER_OF_LEVELS]; >> - grub_uint64_t nlx, nls, sz = 0; >> - int l; >> + map = mappings + n_mappings; >> + grub_memset (map, 0, sizeof (*map)); >> >> - nlx = paging_end; >> - nls = virt_base >> PAGE_SHIFT; >> - for (l = 0; l < NUMBER_OF_LEVELS; l++) >> + map->area.virt_start = from & VIRT_MASK; >> + map->area.virt_end = (to - 1) & VIRT_MASK; >> + map->area.n_pt_pages = 0; >> + >> + for (i = NUMBER_OF_LEVELS - 1; i >= 0; i--) >> { >> - nlx = (nlx + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE; >> - /* PAE wants all 4 root directories present. */ >> + map->lvls[i].pfn_start = pfn + map->area.n_pt_pages; >> + if (i == NUMBER_OF_LEVELS - 1) >> + { >> + if (n_mappings == 0) >> + { >> + map->lvls[i].virt_start = 0; >> + map->lvls[i].virt_end = VIRT_MASK; >> + map->lvls[i].n_pt_pages = 1; >> + map->area.n_pt_pages++; >> + } >> + continue; >> + } >> + >> + bits = PAGE_SHIFT + (i + 1) * LOG_POINTERS_PER_PAGE; >> + mask = (1ULL << bits) - 1; >> + map->lvls[i].virt_start = map->area.virt_start & ~mask; >> + map->lvls[i].virt_end = map->area.virt_end | mask; >> #ifdef __i386__ >> - if (l == 1) >> - nlx = 4; >> + /* PAE wants last root directory present. */ >> + if (i == 1 && to <= 0xc0000000 && n_mappings == 0) > > Ditto. > >> + map->lvls[i].virt_end = VIRT_MASK; >> #endif >> - lx[l] = nlx; >> - sz += lx[l]; >> - lxs[l] = nls & (POINTERS_PER_PAGE - 1); >> - if (nls && l != 0) >> - sz++; >> - nls >>= LOG_POINTERS_PER_PAGE; >> + for (m = 0; m < n_mappings; m++) >> + { >> + map_cmp = mappings + m; >> + if (map_cmp->lvls[i].virt_start == map_cmp->lvls[i].virt_end) >> + continue; >> + if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start && >> + map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end) >> + { >> + map->lvls[i].virt_start = 0; >> + map->lvls[i].virt_end = 0; >> + break; >> + } >> + if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start && >> + map->lvls[i].virt_start <= map_cmp->lvls[i].virt_end) >> + map->lvls[i].virt_start = map_cmp->lvls[i].virt_end + 1; >> + if (map->lvls[i].virt_end >= map_cmp->lvls[i].virt_start && >> + map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end) >> + map->lvls[i].virt_end = map_cmp->lvls[i].virt_start - 1; >> + } >> + if (map->lvls[i].virt_start < map->lvls[i].virt_end) >> + map->lvls[i].n_pt_pages = >> + ((map->lvls[i].virt_end - map->lvls[i].virt_start) >> bits) + 1; >> + map->area.n_pt_pages += map->lvls[i].n_pt_pages; >> + grub_dprintf ("xen", "get_pgtable_size level %d: virt %llx-%llx %d >> pts\n", >> + i, (unsigned long long) map->lvls[i].virt_start, >> + (unsigned long long) map->lvls[i].virt_end, >> + (int) map->lvls[i].n_pt_pages); >> } >> >> - grub_uint64_t lp; >> - grub_uint64_t j; >> - grub_uint64_t *pg = (grub_uint64_t *) where; >> - int pr = 0; >> + grub_dprintf ("xen", "get_pgtable_size return: %d page tables\n", >> + (int) map->area.n_pt_pages); >> + >> + state.paging_start[n_mappings] = pfn; >> + state.paging_size[n_mappings] = map->area.n_pt_pages; >> + >> + return GRUB_ERR_NONE; >> +} >> + >> +static grub_uint64_t * >> +get_pg_table_virt (int mapping, int level) >> +{ >> + grub_uint64_t pfn; >> + struct grub_xen_mapping *map; >> + >> + map = mappings + mapping; >> + pfn = map->lvls[level].pfn_start - map->lvls[NUMBER_OF_LEVELS - >> 1].pfn_start; >> + return map->where + pfn * POINTERS_PER_PAGE; >> +} >> >> - grub_memset (pg, 0, sz * PAGE_SIZE); >> +static grub_uint64_t >> +get_pg_table_prot (int level, grub_uint64_t pfn) >> +{ >> + int m; >> + grub_uint64_t pfn_s, pfn_e; >> >> - lp = paging_start + lx[NUMBER_OF_LEVELS - 1]; >> - for (l = NUMBER_OF_LEVELS - 1; l >= 1; l--) >> + if (level > 0) >> + return INTERMEDIATE_OR; >> + for (m = 0; m < n_mappings; m++) >> { >> - if (lxs[l] || pr) >> - pg[0] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR; >> - if (pr) >> - pg += POINTERS_PER_PAGE; >> - for (j = 0; j < lx[l - 1]; j++) >> - pg[j + lxs[l]] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR; >> - pg += lx[l] * POINTERS_PER_PAGE; >> - if (lxs[l]) >> - pr = 1; >> + pfn_s = mappings[m].lvls[NUMBER_OF_LEVELS - 1].pfn_start; >> + pfn_e = mappings[m].area.n_pt_pages + pfn_s; >> + if (pfn >= pfn_s && pfn < pfn_e) >> + return 5; >> } >> + return 7; > > What 7 and 5 means? Page table protection bits. > I am exhausted. Please fix above stuff and I will review this patch > again after fixes. I'd really like to have the maintainer's opinion on usage of constants vs. pure numbers. Up to now I have the impression that constants are used only to abstract i386 vs. x86-64. I wouldn't mind changing that, but I don't like modifying all my patches which are then rejected due to that change. Thanks for doing the review, Juergen _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |