[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 9/9] xen/x86: setup PVHv2 Dom0 ACPI tables



>>> On 19.01.17 at 18:29, <roger.pau@xxxxxxxxxx> wrote:
> +static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr)
> +{
> +    struct acpi_table_madt *madt;
> +    struct acpi_table_header *table;
> +    struct acpi_madt_io_apic *io_apic;
> +    struct acpi_madt_local_apic *local_apic;
> +    struct acpi_madt_local_x2apic *x2apic;
> +    acpi_status status;
> +    unsigned long size;
> +    unsigned int i, max_vcpus;
> +    int rc;
> +
> +    /* Count number of interrupt overrides in the MADT. */
> +    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
> +                          acpi_count_intr_ovr, UINT_MAX);
> +
> +    /* Count number of NMI sources in the MADT. */
> +    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_count_nmi_src,
> +                          UINT_MAX);
> +
> +    /* Check if there are x2APIC entries. */
> +    acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, acpi_check_x2apic, 1);
> +
> +    max_vcpus = dom0_max_vcpus();
> +    /* Calculate the size of the crafted MADT. */
> +    size = sizeof(*madt);
> +    size += sizeof(*io_apic);

Still just a single IO-APIC and no fixme note. I see you have one
below, but this line will need to remain in lock step. Or perhaps you
could multiply by nr_ioapics here, accepting the transient waste of
space.

> +    /*
> +     * The APIC ID field of the local APIC struct is only 1byte, so we need
> +     * to limit the number of local APIC entries to 128 because we only use
> +     * even numbers as APIC IDs.
> +     */
> +    size += sizeof(*local_apic) *
> +            min(max_vcpus, 1U << (sizeof(local_apic->id) * 8));

This caps at 256 now. Perhaps it would anyway be better to use
HVM_MAX_VCPUS here, or maybe the minimum of both?

> +    size += sizeof(*intsrcovr) * acpi_intr_overrides;
> +    size += sizeof(*nmisrc) * acpi_nmi_sources;
> +    size += acpi_x2apic ? sizeof(*x2apic) * dom0_max_vcpus() : 0;

You have the function call result already latched in a local variable.
Plus, when you cap LAPIC entries, you should also provide x2APIC
ones (to be able to represent all vCPU-s).

> +    madt = xzalloc_bytes(size);
> +    if ( !madt )
> +    {
> +        printk("Unable to allocate memory for MADT table\n");
> +        return -ENOMEM;
> +    }
> +
> +    /* Copy the native MADT table header. */
> +    status = acpi_get_table(ACPI_SIG_MADT, 0, &table);
> +    if ( !ACPI_SUCCESS(status) )
> +    {
> +        printk("Failed to get MADT ACPI table, aborting.\n");
> +        return -EINVAL;
> +    }
> +    madt->header = *table;
> +    madt->address = APIC_DEFAULT_PHYS_BASE;
> +    /*
> +     * NB: this is currently set to 4, which is the revision in the ACPI
> +     * spec 6.1. Sadly ACPICA doesn't provide revision numbers for the
> +     * tables described in the headers.
> +     */
> +    madt->header.revision = min_t(unsigned char, table->revision, 4);
> +
> +    /*
> +     * Setup the IO APIC entry.
> +     * FIXME: the current vIO-APIC code just supports one IO-APIC instance
> +     * per domain. This must be fixed in order to provide the same amount of
> +     * IO APICs as available on bare metal, and with the same IDs as found in
> +     * the native IO APIC MADT entries.
> +     */
> +    if ( nr_ioapics > 1 )
> +        printk("WARNING: found %d IO APICs, Dom0 will only have access to 1 
> emulated IO APIC\n",
> +               nr_ioapics);
> +    io_apic = (struct acpi_madt_io_apic *)(madt + 1);

To aid readability you may want to use void in all such casts.

> +static bool __init pvh_acpi_table_allowed(const char *sig)
> +{
> +    static const char __init banned_tables[][ACPI_NAME_SIZE] = {

__initconst

> +        ACPI_SIG_HPET, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_MPST,
> +        ACPI_SIG_PMTT, ACPI_SIG_MADT, ACPI_SIG_DMAR};
> +    int i;

unsigned int

> +static int __init pvh_setup_acpi_xsdt(struct domain *d, paddr_t madt_addr,
> +                                      paddr_t *addr)
> +{
> +    struct acpi_table_xsdt *xsdt;
> +    struct acpi_table_header *table;
> +    struct acpi_table_rsdp *rsdp;
> +    unsigned long size = sizeof(*xsdt);
> +    unsigned int i, j, num_tables = 0;
> +    paddr_t xsdt_paddr;
> +    int rc;
> +
> +    /*
> +     * Restore original DMAR table signature, we are going to filter it
> +     * from the new XSDT that is presented to the guest, so it no longer
> +     * makes sense to have it's signature zapped.

"... so it is no longer necessary to ..."?

> +static int __init pvh_setup_acpi(struct domain *d, paddr_t start_info)
> +{
> +    struct acpi_table_rsdp rsdp, *native_rsdp;
> +    unsigned long pfn, nr_pages;
> +    paddr_t madt_paddr, xsdt_paddr, rsdp_paddr;
> +    unsigned int i;
> +    int rc;
> +
> +    /* Scan top-level tables and add their regions to the guest memory map. 
> */
> +    for( i = 0; i < acpi_gbl_root_table_list.count; i++ )
> +    {
> +        const char *sig = acpi_gbl_root_table_list.tables[i].signature.ascii;
> +        unsigned long addr = acpi_gbl_root_table_list.tables[i].address;
> +        unsigned long size = acpi_gbl_root_table_list.tables[i].length;
> +
> +        /*
> +         * Make sure the original MADT is also mapped, so that Dom0 can
> +         * properly access the data returned by _MAT methods in case it's
> +         * re-using MADT memory.
> +         */
> +        if ( pvh_acpi_table_allowed(sig) ||
> +             (strncmp(sig, ACPI_SIG_MADT, ACPI_NAME_SIZE) == 0 &&
> +             !acpi_memory_banned(addr, size)) )

Indentation. But to me this would be clearer as

        if ( strncmp(sig, ACPI_SIG_MADT, ACPI_NAME_SIZE)
             ? pvh_acpi_table_allowed(sig)
             : !acpi_memory_banned(addr, size) )

anyway.

> +             pvh_add_mem_range(d, addr, addr + size, E820_ACPI);
> +    }
> +
> +    /* Identity map ACPI e820 regions. */
> +    for ( i = 0; i < d->arch.nr_e820; i++ )
> +    {
> +        if ( d->arch.e820[i].type != E820_ACPI &&
> +             d->arch.e820[i].type != E820_NVS )
> +            continue;
> +
> +        pfn = PFN_DOWN(d->arch.e820[i].addr);
> +        nr_pages = PFN_UP((d->arch.e820[i].addr & ~PAGE_MASK) +
> +                          d->arch.e820[i].size);
> +
> +        rc = modify_identity_mmio(d, pfn, nr_pages, true);
> +        if ( rc )
> +        {
> +            printk("Failed to map ACPI region [%#lx, %#lx) into Dom0 memory 
> map\n",
> +                   pfn, pfn + nr_pages);
> +            return rc;
> +        }
> +    }
> +
> +    rc = pvh_setup_acpi_madt(d, &madt_paddr);
> +    if ( rc )
> +        return rc;
> +
> +    rc = pvh_setup_acpi_xsdt(d, madt_paddr, &xsdt_paddr);
> +    if ( rc )
> +        return rc;
> +
> +    /* Craft a custom RSDP. */
> +    memset(&rsdp, 0, sizeof(rsdp));

Perhaps worth using an initializer again (with once again as many as
possible of the items below folded there)?

> +    memcpy(rsdp.signature, ACPI_SIG_RSDP, sizeof(rsdp.signature));
> +    native_rsdp = acpi_os_map_memory(acpi_os_get_root_pointer(), 
> sizeof(rsdp));
> +    memcpy(rsdp.oem_id, native_rsdp->oem_id, sizeof(rsdp.oem_id));
> +    acpi_os_unmap_memory(native_rsdp, sizeof(rsdp));
> +    rsdp.revision = 2;
> +    rsdp.xsdt_physical_address = xsdt_paddr;
> +    rsdp.rsdt_physical_address = 0;

This is redundant in any event.

> +    rsdp.length = sizeof(rsdp);
> +    /*
> +     * Calling acpi_tb_checksum here is a layering violation, but
> +     * introducing a wrapper for such simple usage seems overkill.
> +     */
> +    rsdp.checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp),
> +                                      ACPI_RSDP_REV0_SIZE);
> +    rsdp.extended_checksum -= acpi_tb_checksum(ACPI_CAST_PTR(u8, &rsdp),
> +                                               sizeof(rsdp));
> +
> +    /*
> +     * Place the new RSDP in guest memory space.
> +     *
> +     * NB: this RSDP is not going to replace the original RSDP, which
> +     * should still be accessible to the guest. However that RSDP is
> +     * going to point to the native RSDT, and should not be used.

... for the Dom0 kernel's boot purposes (we keep it visible after all
for post boot access).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.