[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC 0 PATCH 3/3] PVH dom0: construct_dom0 changes



>>> On 25.09.13 at 23:03, Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> wrote:
> @@ -307,6 +308,136 @@ static void __init process_dom0_ioports_disable(void)
>      }
>  }
>  
> +/*
> + * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have
> + * the entire io region mapped in the EPT/NPT.
> + *
> + * PVH FIXME: The following doesn't map MMIO ranges when they sit above the
> + *            highest E820 covered address.

This absolutely needs fixing before this can go in.

> + */
> +static __init void pvh_map_all_iomem(struct domain *d)
> +{
> +    unsigned long start_pfn, end_pfn, end = 0, start = 0;
> +    const struct e820entry *entry;
> +    unsigned int i, nump;
> +    int rc;
> +
> +    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
> +    {
> +        end = entry->addr + entry->size;
> +
> +        if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ||

The inclusion of E820_UNUSABLE here clearly needs an explanation
(ideally in the shape of a code comment).

> +             i == e820.nr_map - 1 )
> +        {
> +            start_pfn = PFN_DOWN(start);
> +            end_pfn = PFN_UP(end);
> +
> +            if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE )
> +                end_pfn = PFN_UP(entry->addr);

Better coded as if/else construct (making more obvious what the
intention here is - it took me quite some time to see that what
you're doing here is correct, because the general case really is
what sits in the if() body, and the exception is what gets done
before the if(), which is counter intuitive).

> +
> +            if ( start_pfn < end_pfn )
> +            {
> +                nump = end_pfn - start_pfn;
> +                /* Add pages to the mapping */
> +                rc = domctl_memory_mapping(d, start_pfn, start_pfn, nump, 1);
> +                BUG_ON(rc);
> +            }
> +            start = end;
> +        }
> +    }
> +
> +    /* If the e820 ended under 4GB, we must map the remaining space upto 4GB 
> */

This is arbitrary (related to the FIXME above).

> +static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v,
> +                                                 unsigned long v_start)
> +{
> +    int i, j, k;
> +    l4_pgentry_t *l4tab = NULL, *l4start = NULL;
> +    l3_pgentry_t *l3tab = NULL;
> +    l2_pgentry_t *l2tab = NULL;
> +    l1_pgentry_t *l1tab = NULL;
> +    l4_pgentry_t sav_guest_l4;
> +    unsigned long cr3_pfn;
> +
> +    ASSERT(paging_mode_enabled(v->domain));
> +
> +    l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
> +    l4tab = l4start + l4_table_offset(v_start);

Many of the comments on patch 2 apply here too.

> +    sav_guest_l4 = *l4tab;
> +
> +    /* Give guest a clean slate to start with */
> +    clear_page(l4start);
> +    *l4tab = sav_guest_l4;
> +    BUG_ON(!l4e_get_pfn(sav_guest_l4));

This assumes that the initial mapping is covered by a single L4
entry. While true for Linux, this is not generic, and hence needs
fixing (the problem continues further down).

> +
> +    l3tab = map_l3t_from_l4e(*l4tab);
> +    for (i=0; i < PAGE_SIZE / sizeof(l3_pgentry_t); i++, l3tab++)

Coding style (not going to make further notes on violations).

> +    {
> +        if ( l3e_get_pfn(*l3tab) == 0 )

Is that really a good check? Shouldn't that rather look at
_PAGE_PRESENT?

> +            continue;
> +
> +        l2tab = map_l2t_from_l3e(*l3tab);
> +        for (j=0; j < PAGE_SIZE / sizeof(l2_pgentry_t); j++, l2tab++)
> +        {
> +            if ( l2e_get_pfn(*l2tab) == 0 )
> +                continue;
> +
> +            l1tab = map_l1t_from_l2e(*l2tab);
> +
> +            for (k=0; k < PAGE_SIZE / sizeof(l2_pgentry_t); k++, l1tab++)

l2_pgentry_t? You be better off using sizeof(*l1tab) here anyway
(and similarly in the other loops).

> @@ -513,7 +644,7 @@ int __init construct_dom0(
>      unsigned long alloc_spfn;
>      unsigned long alloc_epfn;
>      unsigned long initrd_pfn = -1, initrd_mfn = 0;
> -    unsigned long count;
> +    unsigned long count, shared_info_paddr = 0;

Even if benign for x86-64, please used paddr_t for physical
addresses.

> @@ -603,12 +735,20 @@ int __init construct_dom0(
>          goto out;
>      }
>  
> -    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != 
> XEN_ENT_NONE &&
> -         !test_bit(XENFEAT_dom0, parms.f_supported) )
> +    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != 
> XEN_ENT_NONE )
>      {
> -        printk("Kernel does not support Dom0 operation\n");
> -        rc = -EINVAL;
> -        goto out;
> +        if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
> +        {
> +            printk("Kernel does not support Dom0 operation\n");
> +            rc = -EINVAL;
> +            goto out;
> +        }
> +        if ( is_pvh_domain(d) &&
> +             !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) )
> +        {
> +            printk("Kernel does not support PVH mode\n");
> +            return -EINVAL;

Inconsistent: Above you see "goto out", and here you use "return".

> @@ -673,6 +813,14 @@ int __init construct_dom0(
>      vstartinfo_end   = (vstartinfo_start +
>                          sizeof(struct start_info) +
>                          sizeof(struct dom0_vga_console_info));
> +
> +    if ( is_pvh_domain(d) )
> +    {
> +        /* note, following is paddr and not maddr */
> +        shared_info_paddr = round_pgup(vstartinfo_end) - v_start;

Hardly worth the comment considering the variable name.

> @@ -868,6 +1016,9 @@ int __init construct_dom0(
>                                      L1_PROT : COMPAT_L1_PROT));
>          l1tab++;
>  
> +        if ( is_pvh_domain(d) )
> +            continue;
> +
>          page = mfn_to_page(mfn);
>          if ( (page->u.inuse.type_info == 0) &&
>               !get_page_and_type(page, d, PGT_writable_page) )

So why is the remaining part of this loop not applicable to PVH?

> @@ -912,6 +1063,16 @@ int __init construct_dom0(
>          (void)alloc_vcpu(d, i, cpu);
>      }
>  
> +    /*
> +     * pvh: we temporarily disable paging mode so that we can build cr3 
> needed
> +     * to run on dom0's page tables.
> +     */
> +    if ( is_pvh_domain(d) )
> +    {
> +        save_pvh_pg_mode = d->arch.paging.mode;
> +        d->arch.paging.mode = 0;
> +    }

Does this really need to be in a conditional?

> @@ -1089,11 +1262,18 @@ int __init construct_dom0(
>      regs->eip = parms.virt_entry;
>      regs->esp = vstack_end;
>      regs->esi = vstartinfo_start;
> -    regs->eflags = X86_EFLAGS_IF;
> +    regs->eflags = X86_EFLAGS_IF | 0x2;

Unrelated change?

> +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages)

__init

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.