[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 5/6] xen: modify page table construction



On Thu, Feb 11, 2016 at 08:53:25AM +0100, Juergen Gross wrote:
> Modify the page table construction to allow multiple virtual regions
> to be mapped. This is done as preparation for removing the p2m list
> from the initial kernel mapping in order to support huge pv domains.
>
> This allows a cleaner approach for mapping the relocator page by
> using this capability.
>
> The interface to the assembler level of the relocator has to be changed
> in order to be able to process multiple page table areas.

I hope that older kernels could be loaded as is and everything works as 
expected.

> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
> ---
>  grub-core/lib/i386/xen/relocator.S   |  47 +++---
>  grub-core/lib/x86_64/xen/relocator.S |  44 +++--
>  grub-core/lib/xen/relocator.c        |  22 ++-
>  grub-core/loader/i386/xen.c          | 313 
> +++++++++++++++++++++++------------
>  include/grub/xen/relocator.h         |   6 +-
>  5 files changed, 277 insertions(+), 155 deletions(-)
>
> diff --git a/grub-core/lib/i386/xen/relocator.S 
> b/grub-core/lib/i386/xen/relocator.S
> index 694a54c..c23b405 100644
> --- a/grub-core/lib/i386/xen/relocator.S
> +++ b/grub-core/lib/i386/xen/relocator.S
> @@ -50,41 +50,45 @@ VARIABLE(grub_relocator_xen_remapper_map_high)
>       jmp *%ebx
>
>  LOCAL(cont):
> -     xorl    %eax, %eax
> -     movl    %eax, %ebp
> +     /* mov imm32, %eax */
> +     .byte   0xb8
> +VARIABLE(grub_relocator_xen_paging_areas_addr)
> +     .long   0
> +     movl    %eax, %ebx
>  1:
> -
> +     movl    0(%ebx), %ebp
> +     movl    4(%ebx), %ecx

Oh... No, please use constants not plain numbers and
explain in comments what is going on. Otherwise it
is unreadable.

> +     testl   %ecx, %ecx
> +     jz      3f
> +     addl    $8, %ebx
> +     movl    %ebx, %esp
> +
> +2:
> +     movl    %ecx, %edi
>       /* mov imm32, %eax */
>       .byte   0xb8
>  VARIABLE(grub_relocator_xen_mfn_list)
>       .long   0
> -     movl    %eax, %edi
> -     movl    %ebp, %eax
> -     movl    0(%edi, %eax, 4), %ecx
> -
> -     /* mov imm32, %ebx */
> -     .byte   0xbb
> -VARIABLE(grub_relocator_xen_paging_start)
> -     .long   0
> -     shll    $12, %eax
> -     addl    %eax, %ebx
> +     movl    0(%eax, %ebp, 4), %ecx
> +     movl    %ebp, %ebx
> +     shll    $12, %ebx

Ditto.

>       movl    %ecx, %edx
>       shll    $12,  %ecx
>       shrl    $20,  %edx
>       orl     $5, %ecx
>       movl    $2, %esi
>       movl    $__HYPERVISOR_update_va_mapping, %eax
> -     int     $0x82
> +     int     $0x82   /* parameters: eax, ebx, ecx, edx, esi */

Please use more verbose comments.

>
>       incl    %ebp
> -     /* mov imm32, %ecx */
> -     .byte   0xb9
> -VARIABLE(grub_relocator_xen_paging_size)
> -     .long   0
> -     cmpl    %ebp, %ecx
> +     movl    %edi, %ecx
> +
> +     loop    2b
>
> -     ja      1b
> +     mov     %esp, %ebx
> +     jmp     1b
>
> +3:
>       /* mov imm32, %ebx */
>       .byte   0xbb
>  VARIABLE(grub_relocator_xen_mmu_op_addr)
> @@ -102,6 +106,9 @@ VARIABLE(grub_relocator_xen_remap_continue)
>
>       jmp *%eax
>
> +VARIABLE(grub_relocator_xen_paging_areas)
> +     .long   0, 0, 0, 0, 0, 0, 0, 0
> +
>  VARIABLE(grub_relocator_xen_mmu_op)
>       .space 256
>
> diff --git a/grub-core/lib/x86_64/xen/relocator.S 
> b/grub-core/lib/x86_64/xen/relocator.S
> index 92e9e72..dbb90c7 100644
> --- a/grub-core/lib/x86_64/xen/relocator.S
> +++ b/grub-core/lib/x86_64/xen/relocator.S

Is to possible to split this patch to i386 and x86-64 stuff patches?

> @@ -50,31 +50,24 @@ VARIABLE(grub_relocator_xen_remapper_map)
>
>  LOCAL(cont):
>
> -     /* mov imm64, %rcx */
> -     .byte   0x48
> -     .byte   0xb9
> -VARIABLE(grub_relocator_xen_paging_size)
> -     .quad   0
> -
> -     /* mov imm64, %rax */
> -     .byte   0x48
> -     .byte   0xb8
> -VARIABLE(grub_relocator_xen_paging_start)
> -     .quad   0
> -
> -     movq    %rax, %r12
> -
>       /* mov imm64, %rax */
>       .byte   0x48
>       .byte   0xb8
>  VARIABLE(grub_relocator_xen_mfn_list)
>       .quad   0
>
> -     movq    %rax, %rsi
> +     movq    %rax, %rbx
> +     leaq    EXT_C(grub_relocator_xen_paging_areas) (%rip), %r8
> +
>  1:
> +     movq    0(%r8), %r12
> +     movq    8(%r8), %rcx

Ditto.

> +     testq   %rcx, %rcx
> +     jz      3f
> +2:
>       movq    %r12, %rdi
> -     movq    %rsi, %rbx
> -     movq    0(%rsi), %rsi
> +     shlq    $12, %rdi
> +     movq    (%rbx, %r12, 8), %rsi

Ditto.

>       shlq    $12,  %rsi
>       orq     $5, %rsi
>       movq    $2, %rdx
> @@ -83,13 +76,15 @@ VARIABLE(grub_relocator_xen_mfn_list)
>       syscall
>
>       movq    %r9, %rcx
> -     addq    $8, %rbx
> -     addq    $4096, %r12
> -     movq    %rbx, %rsi
> +     incq    %r12
> +
> +     loop 2b
>
> -     loop 1b
> +     addq    $16, %r8

Ditto.

> +     jmp     1b
>
> -     leaq   LOCAL(mmu_op) (%rip), %rdi
> +3:
> +     leaq   EXT_C(grub_relocator_xen_mmu_op) (%rip), %rdi
>       movq   $3, %rsi
>       movq   $0, %rdx
>       movq   $0x7FF0, %r10

Ugh... Please fix this too. Probably in separate patch.

> @@ -104,7 +99,10 @@ VARIABLE(grub_relocator_xen_remap_continue)
>
>       jmp *%rax
>
> -LOCAL(mmu_op):
> +VARIABLE(grub_relocator_xen_paging_areas)
> +     /* array of start, size pairs, size 0 is end marker */
> +     .quad   0, 0, 0, 0, 0, 0, 0, 0
> +
>  VARIABLE(grub_relocator_xen_mmu_op)
>       .space 256
>
> diff --git a/grub-core/lib/xen/relocator.c b/grub-core/lib/xen/relocator.c
> index 8f427d3..bc29055 100644
> --- a/grub-core/lib/xen/relocator.c
> +++ b/grub-core/lib/xen/relocator.c
> @@ -36,15 +36,15 @@ extern grub_uint8_t grub_relocator_xen_remap_end;
>  extern grub_xen_reg_t grub_relocator_xen_stack;
>  extern grub_xen_reg_t grub_relocator_xen_start_info;
>  extern grub_xen_reg_t grub_relocator_xen_entry_point;
> -extern grub_xen_reg_t grub_relocator_xen_paging_start;
> -extern grub_xen_reg_t grub_relocator_xen_paging_size;
>  extern grub_xen_reg_t grub_relocator_xen_remapper_virt;
>  extern grub_xen_reg_t grub_relocator_xen_remapper_virt2;
>  extern grub_xen_reg_t grub_relocator_xen_remapper_map;
>  extern grub_xen_reg_t grub_relocator_xen_mfn_list;
> +extern grub_xen_reg_t grub_relocator_xen_paging_areas[2 * XEN_MAX_MAPPINGS];
>  extern grub_xen_reg_t grub_relocator_xen_remap_continue;
>  #ifdef __i386__
>  extern grub_xen_reg_t grub_relocator_xen_mmu_op_addr;
> +extern grub_xen_reg_t grub_relocator_xen_paging_areas_addr;
>  extern grub_xen_reg_t grub_relocator_xen_remapper_map_high;
>  #endif
>  extern mmuext_op_t grub_relocator_xen_mmu_op[3];
> @@ -61,6 +61,7 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>  {
>    grub_err_t err;
>    void *relst;
> +  int i;
>    grub_relocator_chunk_t ch, ch_tramp;
>    grub_xen_mfn_t *mfn_list =
>      (grub_xen_mfn_t *) grub_xen_start_page_addr->mfn_list;
> @@ -77,8 +78,11 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>    grub_relocator_xen_stack = state.stack;
>    grub_relocator_xen_start_info = state.start_info;
>    grub_relocator_xen_entry_point = state.entry_point;
> -  grub_relocator_xen_paging_start = state.paging_start << 12;
> -  grub_relocator_xen_paging_size = state.paging_size;
> +  for (i = 0; i < XEN_MAX_MAPPINGS; i++)
> +    {
> +      grub_relocator_xen_paging_areas[2 * i] = state.paging_start[i];
> +      grub_relocator_xen_paging_areas[2 * i + 1] = state.paging_size[i];

Why 2 and 1? Constants?

> +    }
>    grub_relocator_xen_remapper_virt = remapper_virt;
>    grub_relocator_xen_remapper_virt2 = remapper_virt;
>    grub_relocator_xen_remap_continue = trampoline_virt;
> @@ -88,10 +92,12 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>    grub_relocator_xen_remapper_map_high = (mfn_list[remapper_pfn] >> 20);
>    grub_relocator_xen_mmu_op_addr = (char *) &grub_relocator_xen_mmu_op
>      - (char *) &grub_relocator_xen_remap_start + remapper_virt;
> +  grub_relocator_xen_paging_areas_addr =
> +    (char *) &grub_relocator_xen_paging_areas
> +    - (char *) &grub_relocator_xen_remap_start + remapper_virt;
>  #endif
>
> -  grub_relocator_xen_mfn_list = state.mfn_list
> -    + state.paging_start * sizeof (grub_addr_t);
> +  grub_relocator_xen_mfn_list = state.mfn_list;
>
>    grub_memset (grub_relocator_xen_mmu_op, 0,
>              sizeof (grub_relocator_xen_mmu_op));
> @@ -100,9 +106,9 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>  #else
>    grub_relocator_xen_mmu_op[0].cmd = MMUEXT_PIN_L4_TABLE;
>  #endif
> -  grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start];
> +  grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start[0]];
>    grub_relocator_xen_mmu_op[1].cmd = MMUEXT_NEW_BASEPTR;
> -  grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start];
> +  grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start[0]];
>    grub_relocator_xen_mmu_op[2].cmd = MMUEXT_UNPIN_TABLE;
>    grub_relocator_xen_mmu_op[2].arg1.mfn =
>      mfn_list[grub_xen_start_page_addr->pt_base >> 12];
> diff --git a/grub-core/loader/i386/xen.c b/grub-core/loader/i386/xen.c
> index 0f41048..5e10420 100644
> --- a/grub-core/loader/i386/xen.c
> +++ b/grub-core/loader/i386/xen.c
> @@ -42,6 +42,29 @@
>
>  GRUB_MOD_LICENSE ("GPLv3+");
>
> +#ifdef __x86_64__
> +#define NUMBER_OF_LEVELS 4
> +#define INTERMEDIATE_OR 7
> +#define VIRT_MASK 0x0000ffffffffffffULL
> +#else
> +#define NUMBER_OF_LEVELS 3
> +#define INTERMEDIATE_OR 3
> +#define VIRT_MASK 0x00000000ffffffffULL

Long expected constants... Nice!

> +#endif
> +
> +struct grub_xen_mapping_lvl {
> +  grub_uint64_t virt_start;
> +  grub_uint64_t virt_end;
> +  grub_uint64_t pfn_start;
> +  grub_uint64_t n_pt_pages;
> +};
> +
> +struct grub_xen_mapping {
> +  grub_uint64_t *where;
> +  struct grub_xen_mapping_lvl area;
> +  struct grub_xen_mapping_lvl lvls[NUMBER_OF_LEVELS];
> +};
> +
>  static struct grub_relocator *relocator = NULL;
>  static grub_uint64_t max_addr;
>  static grub_dl_t my_mod;
> @@ -56,9 +79,10 @@ static struct grub_relocator_xen_state state;
>  static grub_xen_mfn_t *virt_mfn_list;
>  static struct start_info *virt_start_info;
>  static grub_xen_mfn_t console_pfn;
> -static grub_uint64_t *virt_pgtable;
> -static grub_uint64_t pgtbl_start;
>  static grub_uint64_t pgtbl_end;
> +static struct grub_xen_mapping mappings[XEN_MAX_MAPPINGS];
> +static int n_mappings;
> +static struct grub_xen_mapping *map_reloc;
>
>  #define PAGE_SIZE 4096
>  #define MAX_MODULES (PAGE_SIZE / sizeof (struct xen_multiboot_mod_list))
> @@ -75,100 +99,162 @@ page2offset (grub_uint64_t page)
>    return page << PAGE_SHIFT;
>  }
>
> -#ifdef __x86_64__
> -#define NUMBER_OF_LEVELS 4
> -#define INTERMEDIATE_OR 7
> -#else
> -#define NUMBER_OF_LEVELS 3
> -#define INTERMEDIATE_OR 3
> -#endif
> -
> -static grub_uint64_t
> -get_pgtable_size (grub_uint64_t total_pages, grub_uint64_t virt_base)
> +static grub_err_t
> +get_pgtable_size (grub_uint64_t from, grub_uint64_t to, grub_uint64_t pfn)
>  {
> -  if (!virt_base)
> -    total_pages++;
> -  grub_uint64_t ret = 0;
> -  grub_uint64_t ll = total_pages;
> -  int i;
> -  for (i = 0; i < NUMBER_OF_LEVELS; i++)
> -    {
> -      ll = (ll + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE;
> -      /* PAE wants all 4 root directories present.  */
> -#ifdef __i386__
> -      if (i == 1)
> -     ll = 4;
> -#endif
> -      ret += ll;
> -    }
> -  for (i = 1; i < NUMBER_OF_LEVELS; i++)
> -    if (virt_base >> (PAGE_SHIFT + i * LOG_POINTERS_PER_PAGE))
> -      ret++;
> -  return ret;
> -}
> +  struct grub_xen_mapping *map, *map_cmp;
> +  grub_uint64_t mask, bits;
> +  int i, m;
>
> -static void
> -generate_page_table (grub_uint64_t *where, grub_uint64_t paging_start,
> -                  grub_uint64_t paging_end, grub_uint64_t total_pages,
> -                  grub_uint64_t virt_base, grub_xen_mfn_t *mfn_list)
> -{
> -  if (!virt_base)
> -    paging_end++;
> +  if (n_mappings == XEN_MAX_MAPPINGS)
> +    return grub_error (GRUB_ERR_BUG, "too many mapped areas");
> +
> +  grub_dprintf ("xen", "get_pgtable_size %d from=%llx, to=%llx, pfn=%llx\n",
> +             n_mappings, (unsigned long long) from, (unsigned long long) to,
> +             (unsigned long long) pfn);
>
> -  grub_uint64_t lx[NUMBER_OF_LEVELS], lxs[NUMBER_OF_LEVELS];
> -  grub_uint64_t nlx, nls, sz = 0;
> -  int l;
> +  map = mappings + n_mappings;
> +  grub_memset (map, 0, sizeof (*map));
>
> -  nlx = paging_end;
> -  nls = virt_base >> PAGE_SHIFT;
> -  for (l = 0; l < NUMBER_OF_LEVELS; l++)
> +  map->area.virt_start = from & VIRT_MASK;
> +  map->area.virt_end = (to - 1) & VIRT_MASK;
> +  map->area.n_pt_pages = 0;
> +
> +  for (i = NUMBER_OF_LEVELS - 1; i >= 0; i--)
>      {
> -      nlx = (nlx + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE;
> -      /* PAE wants all 4 root directories present.  */
> +      map->lvls[i].pfn_start = pfn + map->area.n_pt_pages;
> +      if (i == NUMBER_OF_LEVELS - 1)
> +     {
> +       if (n_mappings == 0)
> +         {
> +           map->lvls[i].virt_start = 0;
> +           map->lvls[i].virt_end = VIRT_MASK;
> +           map->lvls[i].n_pt_pages = 1;
> +           map->area.n_pt_pages++;
> +         }
> +       continue;
> +     }
> +
> +      bits = PAGE_SHIFT + (i + 1) * LOG_POINTERS_PER_PAGE;
> +      mask = (1ULL << bits) - 1;
> +      map->lvls[i].virt_start = map->area.virt_start & ~mask;
> +      map->lvls[i].virt_end = map->area.virt_end | mask;
>  #ifdef __i386__
> -      if (l == 1)
> -     nlx = 4;
> +      /* PAE wants last root directory present. */
> +      if (i == 1 && to <= 0xc0000000 && n_mappings == 0)

Ditto.

> +     map->lvls[i].virt_end = VIRT_MASK;
>  #endif
> -      lx[l] = nlx;
> -      sz += lx[l];
> -      lxs[l] = nls & (POINTERS_PER_PAGE - 1);
> -      if (nls && l != 0)
> -     sz++;
> -      nls >>= LOG_POINTERS_PER_PAGE;
> +      for (m = 0; m < n_mappings; m++)
> +     {
> +       map_cmp = mappings + m;
> +       if (map_cmp->lvls[i].virt_start == map_cmp->lvls[i].virt_end)
> +         continue;
> +       if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start &&
> +           map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end)
> +        {
> +          map->lvls[i].virt_start = 0;
> +          map->lvls[i].virt_end = 0;
> +          break;
> +        }
> +        if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start &&
> +            map->lvls[i].virt_start <= map_cmp->lvls[i].virt_end)
> +          map->lvls[i].virt_start = map_cmp->lvls[i].virt_end + 1;
> +        if (map->lvls[i].virt_end >= map_cmp->lvls[i].virt_start &&
> +            map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end)
> +          map->lvls[i].virt_end = map_cmp->lvls[i].virt_start - 1;
> +     }
> +      if (map->lvls[i].virt_start < map->lvls[i].virt_end)
> +     map->lvls[i].n_pt_pages =
> +       ((map->lvls[i].virt_end - map->lvls[i].virt_start) >> bits) + 1;
> +      map->area.n_pt_pages += map->lvls[i].n_pt_pages;
> +      grub_dprintf ("xen", "get_pgtable_size level %d: virt %llx-%llx %d 
> pts\n",
> +                 i, (unsigned long long)  map->lvls[i].virt_start,
> +                 (unsigned long long)  map->lvls[i].virt_end,
> +                 (int) map->lvls[i].n_pt_pages);
>      }
>
> -  grub_uint64_t lp;
> -  grub_uint64_t j;
> -  grub_uint64_t *pg = (grub_uint64_t *) where;
> -  int pr = 0;
> +  grub_dprintf ("xen", "get_pgtable_size return: %d page tables\n",
> +             (int) map->area.n_pt_pages);
> +
> +  state.paging_start[n_mappings] = pfn;
> +  state.paging_size[n_mappings] = map->area.n_pt_pages;
> +
> +  return GRUB_ERR_NONE;
> +}
> +
> +static grub_uint64_t *
> +get_pg_table_virt (int mapping, int level)
> +{
> +  grub_uint64_t pfn;
> +  struct grub_xen_mapping *map;
> +
> +  map = mappings + mapping;
> +  pfn = map->lvls[level].pfn_start - map->lvls[NUMBER_OF_LEVELS - 
> 1].pfn_start;
> +  return map->where + pfn * POINTERS_PER_PAGE;
> +}
>
> -  grub_memset (pg, 0, sz * PAGE_SIZE);
> +static grub_uint64_t
> +get_pg_table_prot (int level, grub_uint64_t pfn)
> +{
> +  int m;
> +  grub_uint64_t pfn_s, pfn_e;
>
> -  lp = paging_start + lx[NUMBER_OF_LEVELS - 1];
> -  for (l = NUMBER_OF_LEVELS - 1; l >= 1; l--)
> +  if (level > 0)
> +    return INTERMEDIATE_OR;
> +  for (m = 0; m < n_mappings; m++)
>      {
> -      if (lxs[l] || pr)
> -     pg[0] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR;
> -      if (pr)
> -     pg += POINTERS_PER_PAGE;
> -      for (j = 0; j < lx[l - 1]; j++)
> -     pg[j + lxs[l]] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR;
> -      pg += lx[l] * POINTERS_PER_PAGE;
> -      if (lxs[l])
> -     pr = 1;
> +      pfn_s = mappings[m].lvls[NUMBER_OF_LEVELS - 1].pfn_start;
> +      pfn_e = mappings[m].area.n_pt_pages + pfn_s;
> +      if (pfn >= pfn_s && pfn < pfn_e)
> +     return 5;
>      }
> +  return 7;

What 7 and 5 means?

I am exhausted. Please fix above stuff and I will review this patch
again after fixes.

Daniel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.