[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 04/34] HYPERCALL_version_op. New hypercall mirroring XENVER_ but sane.



> Subject: [PATCH v4 05/35] HYPERCALL_version_op. New hypercall mirroring
>  XENVER_ but sane.
> 
> This hypercall mirrors the XENVER_ in that it has similar functionality.
> However it is designed differently:
>  - No compat layer. The data structures are the same size on 32
>    as on 64-bit.
>  - The hypercall accepts three arguments - the command, pointer to
>    an buffer, and the length of the buffer.
>  - Each sub-ops can be "probed" for size by returning the size of
>    buffer that will be needed - if the buffer is NULL.
>  - Subops can complete even if the buffer is too slow - truncated
>    data will be filled and hypercall will return -ENOBUFS.

s/too slow/too small/ ?

>  - VERSION_OP_commandline, VERSION_OP_changeset are privileged.

Aiui this is no difference to the old one anymore if we assume
patches get committed in the order they're being presented in
this series.

> --- a/xen/common/kernel.c
> +++ b/xen/common/kernel.c
> @@ -221,6 +221,47 @@ void __init do_initcalls(void)
>  
>  #endif
>  
> +static int get_features(struct domain *d, xen_feature_info_t *fi)
> +{
> +    switch ( fi->submap_idx )
> +    {
> +    case 0:
> +        fi->submap = (1U << XENFEAT_memory_op_vnode_supported);
> +        if ( VM_ASSIST(d, pae_extended_cr3) )
> +            fi->submap |= (1U << XENFEAT_pae_pgdir_above_4gb);

Since you already move this code, I think the two lines above
would better go into the x86-specific section below.

> @@ -302,50 +343,16 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
> arg)
>      case XENVER_get_features:
>      {
>          xen_feature_info_t fi;
> -        struct domain *d = current->domain;
>  
>          if ( copy_from_guest(&fi, arg, 1) )
>              return -EFAULT;
>  
> -        switch ( fi.submap_idx )
> +        if ( !deny )
>          {
> -        case 0:
> -            if ( deny )
> -                break;
> -            fi.submap = (1U << XENFEAT_memory_op_vnode_supported);
> -            if ( VM_ASSIST(d, pae_extended_cr3) )
> -                fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
> -            if ( paging_mode_translate(d) )
> -                fi.submap |= 
> -                    (1U << XENFEAT_writable_page_tables) |
> -                    (1U << XENFEAT_auto_translated_physmap);
> -            if ( is_hardware_domain(d) )
> -                fi.submap |= 1U << XENFEAT_dom0;
> -#ifdef CONFIG_X86
> -            switch ( d->guest_type )
> -            {
> -            case guest_type_pv:
> -                fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
> -                             (1U << XENFEAT_highmem_assist) |
> -                             (1U << XENFEAT_gnttab_map_avail_bits);
> -                break;
> -            case guest_type_pvh:
> -                fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) |
> -                             (1U << XENFEAT_supervisor_mode_kernel) |
> -                             (1U << XENFEAT_hvm_callback_vector);
> -                break;
> -            case guest_type_hvm:
> -                fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) |
> -                             (1U << XENFEAT_hvm_callback_vector) |
> -                             (1U << XENFEAT_hvm_pirqs);
> -                break;
> -            }
> -#endif
> -            break;
> -        default:
> -            return -EINVAL;
> +            int rc = get_features(current->domain, &fi);
> +            if ( rc )

Blank line between declaration(s) and statement(s) please.

> @@ -388,6 +395,182 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
> arg)
>      return -ENOSYS;
>  }
>  
> +static const char *capabilities_info(ssize_t *len)

Why ssize_t?

> +{
> +    static xen_capabilities_info_t cached_cap;

__read_mostly?

> +static int size_of_subops_data(unsigned int cmd, ssize_t *sz)
> +{
> +    int rc = 0;
> +    /* Compute size. */
> +    switch ( cmd )
> +    {
> +    case XEN_VERSION_OP_version:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;
> +
> +    case XEN_VERSION_OP_extraversion:
> +        *sz = strlen(xen_extra_version()) + 1;
> +        break;
> +
> +    case XEN_VERSION_OP_capabilities:
> +        capabilities_info(sz);
> +        break;
> +
> +    case XEN_VERSION_OP_platform_parameters:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;
> +
> +    case XEN_VERSION_OP_changeset:
> +        *sz = strlen(xen_changeset()) + 1;
> +        break;
> +
> +    case XEN_VERSION_OP_get_features:
> +        *sz = sizeof(xen_feature_info_t);
> +        break;
> +
> +    case XEN_VERSION_OP_pagesize:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;

Please combine all the cases producing this value.

> +    case XEN_VERSION_OP_guest_handle:
> +        *sz = ARRAY_SIZE(current->domain->handle);
> +        break;
> +
> +    case XEN_VERSION_OP_commandline:
> +        *sz = ARRAY_SIZE(saved_cmdline);

strlen()?

> +DO(version_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg,
> +               unsigned int len)
> +{
> +    union {
> +        xen_version_op_val_t n;

Would "v" or "val" be the more natural name here?

> +        xen_feature_info_t fi;
> +    } u = {};
> +    ssize_t sz = 0;
> +    const void *ptr = NULL;
> +    int rc = xsm_version_op(XSM_OTHER, cmd);
> +
> +    /* We can safely return -EPERM! */
> +    if ( rc )
> +        return rc;
> +
> +    rc = size_of_subops_data(cmd, &sz);
> +    if ( rc )
> +        return rc;
> +
> +    ASSERT(sz);
> +    /*
> +     * This hypercall also allows the client to probe. If it provides
> +     * a NULL arg we will return the size of the space it has to
> +     * allocate for the specific sub-op.
> +     */
> +    if ( guest_handle_is_null(arg) )
> +        return sz;
> +
> +    /*
> +     * The HYPERVISOR_xen_version differs in that some return the value,
> +     * and some copy it on back on argument. We follow the same rule for all
> +     * sub-ops: return 0 on success, positive value of bytes returned, and
> +     * always copy the result in arg. Yeey sanity!
> +     */
> +
> +    switch ( cmd )
> +    {
> +    case XEN_VERSION_OP_version:
> +        u.n = (xen_major_version() << 16) | xen_minor_version();
> +        break;
> +
> +    case XEN_VERSION_OP_extraversion:
> +        ptr = xen_extra_version();
> +        break;
> +
> +    case XEN_VERSION_OP_capabilities:
> +        ptr = capabilities_info(&sz);
> +        break;
> +
> +    case XEN_VERSION_OP_platform_parameters:
> +        u.n = HYPERVISOR_VIRT_START;
> +        break;
> +
> +    case XEN_VERSION_OP_changeset:
> +        ptr = xen_changeset();
> +        break;
> +
> +    case XEN_VERSION_OP_get_features:
> +        if ( copy_from_guest(&u.fi, arg, 1) )
> +        {
> +            rc = -EFAULT;
> +            break;
> +        }
> +        rc = get_features(current->domain, &u.fi);
> +        break;
> +
> +    case XEN_VERSION_OP_pagesize:
> +        u.n = PAGE_SIZE;
> +        break;
> +
> +    case XEN_VERSION_OP_guest_handle:
> +        ptr = current->domain->handle;
> +        break;
> +
> +    case XEN_VERSION_OP_commandline:
> +        ptr = saved_cmdline;
> +        break;
> +
> +    default:
> +        rc = -ENOSYS;
> +    }

Seeing this long switch() a second time I wonder why
size_of_subops_data() doesn't just get folded here, with the null
handle being taken care of below instead of above.

> +    if ( !rc )
> +    {
> +        ssize_t bytes;
> +
> +        if ( sz > len )
> +            bytes = len;
> +        else
> +            bytes = sz;

min() (for which sz and bytes being "unsigned int" would help)?

> +        if ( copy_to_guest(arg, ptr ? : &u, bytes) )
> +            rc = -EFAULT;
> +    }
> +    if ( !rc )
> +    {
> +        /*
> +         * We return len (truncate) worth of data even if we fail.
> +         */
> +        if ( sz > len )
> +            rc = -ENOBUFS;

Perhaps worth moving this up into the previous if(), such that
-EFAULT would also take precedence over -ENOBUFS?

> @@ -87,6 +95,68 @@ typedef struct xen_feature_info xen_feature_info_t;
>  #define XENVER_commandline 9
>  typedef char xen_commandline_t[1024];
>  
> +
> +
> +/*
> + * The HYPERCALL_version_op has a set of sub-ops which mirror the
> + * sub-ops of HYPERCALL_xen_version. However this hypercall differs
> + * radically from the former:
> + *  - It returns the amount of bytes returned.
> + *  - It will return -XEN_EPERM if the guest is not permitted.
> + *  - It will return the requested data in arg.
> + *  - It requires an third argument (len) for the length of the
> + *    arg. Naturally the arg has to fit the requested data otherwise
> + *    -XEN_ENOBUFS is returned.
> + *
> + * It also offers an mechanism to probe for the amount of bytes an
> + * sub-op will require. Having the arg have an NULL pointer will

s/pointer/handle/

> + * return the number of bytes requested for the operation. Or an
> + * negative value if an error is encountered.
> + */
> +
> +typedef uint64_t xen_version_op_val_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_version_op_val_t);
> +
> +typedef void xen_version_op_buf_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_version_op_buf_t);

Are these actually useful for anything? And for the various strings,
wouldn't a "char" handle be more natural?

> +/* arg == version_op_val_t. Encoded as major:minor (31..16:15..0) */

Please make explicit that 63...32 are zero (or whatever they really
are).

> +#define XEN_VERSION_OP_version      0
> +
> +/* arg == version_op_buf. Contains NUL terminated utf-8 string. */
> +#define XEN_VERSION_OP_extraversion 1
> +
> +/* arg == version_op_buf. Contains NUL terminated utf-8 string. */
> +#define XEN_VERSION_OP_capabilities 3
> +
> +/* arg == version_op_buf. Contains NUL terminated utf-8 string. */
> +#define XEN_VERSION_OP_changeset 4
> +
> +/*
> + * arg == xen_version_op_val_t. Contains the virtual address
> + * of the hypervisor encoded as [63..0].

I'd say the encoding info here is unnecessary and could - just like
you already do for pagesize below - be omitted.

> + */
> +#define XEN_VERSION_OP_platform_parameters 5
> +
> +/*
> + * arg = xen_feature_info_t - shares the same structure
> + * as the XENVER_get_features.
> + */
> +#define XEN_VERSION_OP_get_features 6
> +
> +/* arg == xen_version_op_val_t. */
> +#define XEN_VERSION_OP_pagesize 7
> +
> +/* arg == version_op_buf.
> + *
> + * The toolstack fills it out for guest consumption. It is intended to hold
> + * the UUID of the guest.
> + */
> +#define XEN_VERSION_OP_guest_handle 8
> +
> +/* arg = version_op_buf. Contains NUL terminated utf-8 string. */
> +#define XEN_VERSION_OP_commandline 9
> +
>  #endif /* __XEN_PUBLIC_VERSION_H__ */

Would leaving out the _OP and _op everywhere result in any
name collisions with the old one?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.