[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 12/18] x86: convert dom0 creation to domain builder


  • To: "Daniel P. Smith" <dpsmith@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 27 Jul 2022 14:25:04 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=CtzhcsiHCHjulapMScTLDR09bCy43he6C9mapoEDS/s=; b=a9+wmjZNrUSyM2cLPsVp04mlFeqMyF0365ey+02lEpJJnmQd4HrKXV6EH/Yc+oU7JSUVkuZyR9+2vSSOLUuMYTKRRDOONl37Hre8F84qLxNJnqlwpRyBksMtEq+7k7GJ7N+f+hJ6yC60VN5JipXca1cgwscBkq8KUT3HMskjjGlPTxCOXZNjInYcOHxMvBqBx2rpCXW6+B1mHTGtFPXDueGOBXg/56a5I2NBUtd3sBOPeB51NEgjXIZH3vzJd888m1gOJe0K6pOF+Z/iv9OqCfTCNpivz/9EjyLG5UqegZBva4/3hE4DdfLH9b6yIf8JQgBxapLGU4wiBo9dsjZVZQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=ejrFcOLO/H53pOQqR8RUumwpImCplgOAnp7br21jN21WC/pfL8aIIbF8ISAcmhyGyZCsez1BZeQ7sArj2fIHzpYQ4dOrNzCZpTlIf25VBJGbeb3c+JJpMrEnTkBF5UaklXaEE1+LqjBLu3c5Lw+p1YqzN+r1VEPsmvhPFG38jFgZCz9Toi5WsuCXCfPmqyav+4IR8jawOxMZ/acpJD3pPHgSjKIzToWIgnz6Mo8fUhteV1iCauae+/ypPvXc51NvL5kdwP8mY+2IHXxrQNjQHVKZmeSrKFp9BPGfGHUmFiyeoODQ8SRRPK+QUGvhFF8zMw4e9cG1PLFYJjBU4QUISA==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: scott.davis@xxxxxxxxxx, christopher.clark@xxxxxxxxxx, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx, Wei Liu <wl@xxxxxxx>
  • Delivery-date: Wed, 27 Jul 2022 12:25:19 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 06.07.2022 23:04, Daniel P. Smith wrote:
> --- /dev/null
> +++ b/xen/arch/x86/domain_builder.c
> @@ -0,0 +1,128 @@
> +#include <xen/bootdomain.h>
> +#include <xen/bootinfo.h>
> +#include <xen/domain.h>
> +#include <xen/domain_builder.h>
> +#include <xen/err.h>
> +#include <xen/grant_table.h>
> +#include <xen/iommu.h>
> +#include <xen/sched.h>
> +
> +#include <asm/pv/shim.h>
> +#include <asm/setup.h>
> +
> +extern unsigned long cr4_pv32_mask;

Such declarations need to go in a header which both producer and
consumer(s) include.

> +static unsigned int __init dom_max_vcpus(struct boot_domain *bd)
> +{
> +    unsigned int limit;
> +
> +    if ( builder_is_initdom(bd) )
> +        return dom0_max_vcpus();
> +
> +    limit = bd->mode & BUILD_MODE_PARAVIRTUALIZED ?
> +                MAX_VIRT_CPUS : HVM_MAX_VCPUS;

Nit: Indentation.

> +    if ( bd->ncpus > limit )
> +        return limit;
> +    else
> +        return bd->ncpus;

    return min(bd->ncpus, limit);

> +}
> +
> +void __init arch_create_dom(
> +    const struct boot_info *bi, struct boot_domain *bd)
> +{
> +    struct xen_domctl_createdomain dom_cfg = {
> +        .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
> +        .max_evtchn_port = -1,
> +        .max_grant_frames = -1,
> +        .max_maptrack_frames = -1,
> +        .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
> +        .max_vcpus = dom_max_vcpus(bd),
> +        .arch = {
> +            .misc_flags = bd->functions & BUILD_FUNCTION_INITIAL_DOM &&
> +                           opt_dom0_msr_relaxed ? XEN_X86_MSR_RELAXED : 0,
> +        },
> +    };
> +    unsigned int is_privileged = 0;

Either this is bool and retains its name, or it remains unsigned int
and changes its name (to e.g. "cdf").

> +    char *cmdline;
> +
> +    if ( bd->kernel == NULL )
> +        panic("Error creating d%uv0\n", bd->domid);

This gives too little information and, by mentioning vCPU 0, is imo
actively misleading.

> +    /* mask out PV and device model bits, if 0 then the domain is PVH */
> +    if ( !(bd->mode &
> +           (BUILD_MODE_PARAVIRTUALIZED|BUILD_MODE_ENABLE_DEVICE_MODEL)) )

Shouldn't you outright reject BUILD_MODE_ENABLE_DEVICE_MODEL, since
you can't fulfill that request?

> +    {
> +        dom_cfg.flags |= (XEN_DOMCTL_CDF_hvm |
> +                         (hvm_hap_supported() ? XEN_DOMCTL_CDF_hap : 0));
> +
> +        /*
> +         * If shadow paging is enabled for the initial domain, mask out
> +         * HAP if it was just enabled.
> +         */
> +        if ( builder_is_initdom(bd) )
> +            if ( opt_dom0_shadow )
> +                dom_cfg.flags |= ~XEN_DOMCTL_CDF_hap;

Please combine such if()s into a single one using &&. And I suppose
you mean &= ? Furthermore - how would a DomU be started without using
HAP when HAP is available?

> +        /* TODO: review which flags should be present */
> +        dom_cfg.arch.emulation_flags |=
> +            XEN_X86_EMU_LAPIC | XEN_X86_EMU_IOAPIC | XEN_X86_EMU_VPCI;
> +    }
> +
> +    if ( iommu_enabled && builder_is_hwdom(bd) )
> +        dom_cfg.flags |= XEN_DOMCTL_CDF_iommu;

Why would only hwdom get an IOMMU?

> +    if ( !pv_shim && builder_is_ctldom(bd) )
> +        is_privileged = CDF_privileged;
> +
> +    /* Create initial domain.  Not d0 for pvshim. */

Up to here I was assuming this function would deal with more than just
Dom0, based on conditionals seen. What's the intention? Mixing things
is at best confusing.

> +    bd->domid = get_initial_domain_id();

Higher up in the panic() invocation you did use bd->domid already.

> +    bd->domain = domain_create(bd->domid, &dom_cfg, is_privileged);
> +    if ( IS_ERR(bd->domain) )
> +        panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(bd->domain));
> +
> +    init_dom0_cpuid_policy(bd->domain);
> +
> +    if ( alloc_dom0_vcpu0(bd->domain) == NULL )
> +        panic("Error creating d%uv0\n", bd->domid);
> +
> +    /* Grab the DOM0 command line. */
> +    cmdline = (bd->kernel->string.kind == BOOTSTR_CMDLINE) ?
> +              bd->kernel->string.bytes : NULL;
> +    if ( cmdline || bi->arch->kextra )
> +    {
> +        char dom_cmdline[MAX_GUEST_CMDLINE];
> +
> +        cmdline = arch_prepare_cmdline(cmdline, bi->arch);
> +        strlcpy(dom_cmdline, cmdline, MAX_GUEST_CMDLINE);
> +
> +        if ( bi->arch->kextra )
> +            /* kextra always includes exactly one leading space. */
> +            strlcat(dom_cmdline, bi->arch->kextra, MAX_GUEST_CMDLINE);

I don't think the comment belongs here - there's no insertion of a blank
in sight anywhere.

> +        apply_xen_cmdline(dom_cmdline);
> +
> +        strlcpy(bd->kernel->string.bytes, dom_cmdline, MAX_GUEST_CMDLINE);

Further up using MAX_GUEST_CMDLINE is acceptable, because it's easy to see
that this is the array's size. But here this isn't the case - you want to
use ARRAY_SIZE() at least in this one case (ideally everywhere).

> +    }
> +
> +    /*
> +     * Temporarily clear SMAP in CR4 to allow user-accesses in 
> construct_dom0().

dom0 here, but ...

> +     * This saves a large number of corner cases interactions with
> +     * copy_from_user().
> +     */
> +    if ( cpu_has_smap )
> +    {
> +        cr4_pv32_mask &= ~X86_CR4_SMAP;
> +        write_cr4(read_cr4() & ~X86_CR4_SMAP);
> +    }
> +
> +    if ( construct_domain(bd) != 0 )

... domain here, yet then ...

> +        panic("Could not construct domain 0\n");

... domain 0 again here.

> @@ -745,109 +746,21 @@ static unsigned int __init copy_bios_e820(struct 
> e820entry *map, unsigned int li
>      return n;
>  }
>  
> -static struct domain *__init create_dom0(
> -    const struct boot_info *bi, struct boot_domain *bd)
> +void __init apply_xen_cmdline(char *cmdline)
>  {
> -    struct xen_domctl_createdomain dom0_cfg = {
> -        .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
> -        .max_evtchn_port = -1,
> -        .max_grant_frames = -1,
> -        .max_maptrack_frames = -1,
> -        .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
> -        .max_vcpus = dom0_max_vcpus(),
> -        .arch = {
> -            .misc_flags = opt_dom0_msr_relaxed ? XEN_X86_MSR_RELAXED : 0,
> -        },
> -    };
> -    char *cmdline;
> -
> -    if ( bd->kernel == NULL )
> -        panic("Error creating d%uv0\n", bd->domid);
> -
> -    if ( opt_dom0_pvh )
> -    {
> -        dom0_cfg.flags |= (XEN_DOMCTL_CDF_hvm |
> -                           ((hvm_hap_supported() && !opt_dom0_shadow) ?
> -                            XEN_DOMCTL_CDF_hap : 0));
> -
> -        dom0_cfg.arch.emulation_flags |=
> -            XEN_X86_EMU_LAPIC | XEN_X86_EMU_IOAPIC | XEN_X86_EMU_VPCI;
> -    }
> -
> -    if ( iommu_enabled )
> -        dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> -
> -    /* Create initial domain.  Not d0 for pvshim. */
> -    bd->domid = get_initial_domain_id();
> -    bd->domain = domain_create(bd->domid, &dom0_cfg, pv_shim ?
> -                               0 : CDF_privileged);
> -    if ( IS_ERR(bd->domain) )
> -        panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(bd->domain));
> -
> -    init_dom0_cpuid_policy(bd->domain);
> -
> -    if ( alloc_dom0_vcpu0(bd->domain) == NULL )
> -        panic("Error creating d%uv0\n", bd->domid);
> -
> -    /* Grab the DOM0 command line. */
> -    cmdline = (bd->kernel->string.kind == BOOTSTR_CMDLINE) ?
> -              bd->kernel->string.bytes : NULL;
> -    if ( cmdline || bi->arch->kextra )
> -    {
> -        char dom0_cmdline[MAX_GUEST_CMDLINE];
> -
> -        cmdline = arch_prepare_cmdline(cmdline, bi->arch);
> -        strlcpy(dom0_cmdline, cmdline, MAX_GUEST_CMDLINE);
> -
> -        if ( bi->arch->kextra )
> -            /* kextra always includes exactly one leading space. */
> -            strlcat(dom0_cmdline, bi->arch->kextra, MAX_GUEST_CMDLINE);
> -
> -        /* Append any extra parameters. */
> -        if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
> -            strlcat(dom0_cmdline, " noapic", MAX_GUEST_CMDLINE);
> -        if ( (strlen(acpi_param) == 0) && acpi_disabled )
> -        {
> -            printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
> -            strlcpy(acpi_param, "off", sizeof(acpi_param));
> -        }
> -        if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
> -        {
> -            strlcat(dom0_cmdline, " acpi=", MAX_GUEST_CMDLINE);
> -            strlcat(dom0_cmdline, acpi_param, MAX_GUEST_CMDLINE);
> -        }
> -
> -        strlcpy(bd->kernel->string.bytes, dom0_cmdline, MAX_GUEST_CMDLINE);
> -    }
> -
> -    /*
> -     * Temporarily clear SMAP in CR4 to allow user-accesses in 
> construct_dom0().
> -     * This saves a large number of corner cases interactions with
> -     * copy_from_user().
> -     */
> -    if ( cpu_has_smap )
> +    if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
> +        strlcat(cmdline, " noapic", MAX_GUEST_CMDLINE);
> +    if ( (strlen(acpi_param) == 0) && acpi_disabled )
>      {
> -        cr4_pv32_mask &= ~X86_CR4_SMAP;
> -        write_cr4(read_cr4() & ~X86_CR4_SMAP);
> +        printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
> +        strlcpy(acpi_param, "off", sizeof(acpi_param));
>      }
> -
> -    if ( construct_domain(bd) != 0 )
> -        panic("Could not construct domain 0\n");
> -
> -    if ( cpu_has_smap )
> +    if ( (strlen(acpi_param) != 0) &&
> +         !strstr(cmdline, "acpi=") )
>      {
> -        write_cr4(read_cr4() | X86_CR4_SMAP);
> -        cr4_pv32_mask |= X86_CR4_SMAP;
> +        strlcat(cmdline, " acpi=", MAX_GUEST_CMDLINE);
> +        strlcat(cmdline, acpi_param, MAX_GUEST_CMDLINE);
>      }
> -
> -    return bd->domain;
> -}
> -
> -void __init arch_create_dom(
> -    const struct boot_info *bi, struct boot_domain *bd)
> -{
> -    if ( builder_is_initdom(bd) )
> -        create_dom0(bi, bd);
>  }

Earlier on a function was introduced to deal with this cmdline handling.
And now you introduce a 2nd such function?

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.