[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 6/9] ioreq-server: add support for multiple servers



>>> On 01.05.14 at 14:08, <paul.durrant@xxxxxxxxxx> wrote:
> NOTE: To prevent emulators running in non-privileged guests from
>       potentially allocating very large amounts of xen heap, the core
>       rangeset code has been modified to introduce a hard limit of 256
>       ranges per set.

This is definitely not acceptable - there's no reason to impose any
such limit on e.g. the hardware domain, and we're (or may in the
future want to be) using range sets also for non-domain purposes.
What I'd consider acceptable would be a new range set operation
setting a limit (with the default being unlimited).

>  bool_t hvm_io_pending(struct vcpu *v)
>  {
> -    struct hvm_ioreq_server *s = v->domain->arch.hvm_domain.ioreq_server;
> -    ioreq_t *p;
> -
> -    if ( !s )
> -        return 0;
> +    struct domain *d = v->domain;
> +    struct hvm_ioreq_server *s;
> + 
> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server_list,
> +                          list_entry )
> +    {
> +        ioreq_t *p = get_ioreq(s, v);
> + 
> +        p = get_ioreq(s, v);

Why twice?

> +static int hvm_access_cf8(
> +    int dir, uint32_t port, uint32_t bytes, uint32_t *val)
> +{
> +    struct vcpu *curr = current;
> +    struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
> +    int rc;
> +
> +    BUG_ON(port < 0xcf8);
> +    port -= 0xcf8;
> +
> +    spin_lock(&hd->pci_lock);

Is there really any danger in not having this lock at all? On real
hardware, if the OS doesn't properly serialize accesses, the
result is going to be undefined too. All I think you need to make
sure is that ->pci_cf8 never gets updated non-atomically.

> +    if ( dir == IOREQ_WRITE )
> +    {
> +        switch ( bytes )
> +        {
> +        case 4:
> +            hd->pci_cf8 = *val;
> +            break;
> +
> +        case 2:

I don't think handling other than 4-byte accesses at the precise
address 0xCF8 is necessary or even correct here. Port 0xCF9,
when accessed as a byte, commonly has another meaning for
example.

> +static int hvm_access_cfc(
> +    int dir, uint32_t port, uint32_t bytes, uint32_t *val)
> +{
> +    struct vcpu *curr = current;
> +    struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
> +    int rc;
> +
> +    BUG_ON(port < 0xcfc);
> +    port -= 0xcfc;
> +
> +    spin_lock(&hd->pci_lock);
> +
> +    if ( hd->pci_cf8 & (1 << 31) ) {
> +        /* Fall through to an emulator */
> +        rc = X86EMUL_UNHANDLEABLE;
> +    } else {
> +        /* Config access disabled */

Why does this not also get passed through to an emulator?

> +static int hvm_ioreq_server_alloc_rangesets(struct hvm_ioreq_server *s, 
> +                                            bool_t is_default)
> +{
> +    int i;

Please try to avoid using variables of signed types for things that
can never be negative.

> +    int rc;
> +
> +    if ( is_default )
> +        goto done;
> +
> +    for ( i = 0; i < MAX_IO_RANGE_TYPE; i++ ) {
> +        char *name;
> +
> +        rc = asprintf(&name, "ioreq_server %d:%d %s", s->domid, s->id,

Is it really useful to include the domain ID in that name?

> @@ -764,52 +1019,270 @@ static int hvm_create_ioreq_server(struct domain *d, 
> domid_t domid)
>      domain_pause(d);
>      spin_lock(&d->arch.hvm_domain.ioreq_server_lock);
>  
> -    rc = -EEXIST;
> -    if ( d->arch.hvm_domain.ioreq_server != NULL )
> -        goto fail2;
> +    rc = -EEXIST;
> +    if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
> +        goto fail2;
> +
> +    rc = hvm_ioreq_server_init(s, d, domid, is_default,
> +                               d->arch.hvm_domain.ioreq_server_id++);

What about wraparound here?

> +static void hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
> +{
> +    struct hvm_ioreq_server *s;
> +
> +    spin_lock(&d->arch.hvm_domain.ioreq_server_lock);
> +
> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server_list,
> +                          list_entry )
> +    {
> +        bool_t is_default = ( s == d->arch.hvm_domain.default_ioreq_server);
> +
> +        if ( s->id != id )
> +            continue;
> +
> +        domain_pause(d);
> +
> +        if ( is_default )
> +            d->arch.hvm_domain.default_ioreq_server = NULL;
> +
> +        --d->arch.hvm_domain.ioreq_server_count;
> +        list_del_init(&s->list_entry);

list_del() again, as s gets freed below.

> @@ -1744,11 +2206,94 @@ void hvm_vcpu_down(struct vcpu *v)
>      }
>  }
>  
> +static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
> +                                                        ioreq_t *p)
> +{
> +#define CF8_BDF(cf8)  (((cf8) & 0x00ffff00) >> 8)
> +#define CF8_ADDR(cf8) ((cf8) & 0x000000ff)

Are you aware that AMD has this port-based extended config space
access mechanism with the high 4 address bits being (iirc) at bits
24-27 of the port 0xCF8 value? You shouldn't be unconditionally
discarding these here I think (the server ought to know whether it
can handle such).

> +
> +    struct hvm_ioreq_server *s;
> +    uint8_t type;
> +    uint64_t addr;
> +
> +    if ( d->arch.hvm_domain.ioreq_server_count == 1 &&
> +         d->arch.hvm_domain.default_ioreq_server )
> +        goto done;
> +
> +    if ( p->type == IOREQ_TYPE_PIO &&
> +         (p->addr & ~3) == 0xcfc )
> +    {
> +        uint32_t cf8;
> +        uint32_t sbdf;
> +
> +        /* PCI config data cycle */
> +        type = IOREQ_TYPE_PCI_CONFIG;
> +
> +        spin_lock(&d->arch.hvm_domain.pci_lock);
> +        cf8 = d->arch.hvm_domain.pci_cf8;
> +        sbdf = HVMOP_PCI_SBDF(0,
> +                              PCI_BUS(CF8_BDF(cf8)),
> +                              PCI_SLOT(CF8_BDF(cf8)),
> +                              PCI_FUNC(CF8_BDF(cf8)));
> +        addr = ((uint64_t)sbdf << 32) | (CF8_ADDR(cf8) + (p->addr & 3));
> +        spin_unlock(&d->arch.hvm_domain.pci_lock);
> +    }
> +    else
> +    {
> +        type = p->type;
> +        addr = p->addr;
> +    }
> +
> +    switch ( type )
> +    {
> +    case IOREQ_TYPE_COPY:
> +    case IOREQ_TYPE_PIO:
> +    case IOREQ_TYPE_PCI_CONFIG:
> +        break;
> +    default:
> +        goto done;
> +    }

This switch would better go into the "else" above. And with that the
question arises whether an input of IOREQ_TYPE_PCI_CONFIG is
actually valid here.

> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server_list,
> +                          list_entry )
> +    {
> +        if ( s == d->arch.hvm_domain.default_ioreq_server )
> +            continue;
> +
> +        switch ( type )
> +        {
> +        case IOREQ_TYPE_COPY:
> +        case IOREQ_TYPE_PIO:
> +            if ( rangeset_contains_singleton(s->range[type], addr) )
> +                goto found;
> +
> +            break;
> +        case IOREQ_TYPE_PCI_CONFIG:
> +            if ( rangeset_contains_singleton(s->range[type], addr >> 32) ) {
> +                p->type = type;
> +                p->addr = addr;
> +                goto found;
> +            }
> +
> +            break;
> +        }
> +    }
> +
> + done:
> +    s = d->arch.hvm_domain.default_ioreq_server;
> +
> + found:
> +    return s;

The actions at these labels being rather simple, I'm once again having
a hard time seeing why the various goto-s up there can't be return-s,
at once making it much more obvious to the reader what is happening
in the respective cases.

> -bool_t hvm_send_assist_req(ioreq_t *proto_p)
> +bool_t hvm_send_assist_req_to_ioreq_server(struct hvm_ioreq_server *s,
> +                                           struct vcpu *v,

Both callers effectively pass "current" here - if you really want to
retain the parameter, please name it "curr" to document that fact.

> +void hvm_broadcast_assist_req(ioreq_t *p)
> +{
> +    struct vcpu *v = current;
> +    struct domain *d = v->domain;
> +    struct hvm_ioreq_server *s;
> +
> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server_list,
> +                          list_entry )
> +        (void) hvm_send_assist_req_to_ioreq_server(s, v, p);
> +}

Is there possibly any way to make sure only operations not having
any results and not affecting guest visible state changes can make
it here?

> +    rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0, &op.id);
> +    if ( rc != 0 )
> +        goto out;
> ...
> +    if ( (rc = hvm_get_ioreq_server_info(d, op.id,
> +                                         &op.ioreq_pfn,
> +                                         &op.bufioreq_pfn, 
> +                                         &op.bufioreq_port)) < 0 )
> +        goto out;

May I ask that you use consistent style for similar operations?

> +static int hvmop_destroy_ioreq_server(
> +    XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
> +{
> +    xen_hvm_destroy_ioreq_server_t op;
> +    struct domain *d;
> +    int rc;
> +
> +    if ( copy_from_guest(&op, uop, 1) )
> +        return -EFAULT;
> +
> +    rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> +    if ( rc != 0 )
> +        return rc;
> +
> +    rc = -EINVAL;
> +    if ( !is_hvm_domain(d) )
> +        goto out;
> +
> +    hvm_destroy_ioreq_server(d, op.id);
> +    rc = 0;

Shouldn't this minimally return -ENOENT for an invalid or no in use ID?

> @@ -4484,6 +5189,31 @@ long do_hvm_op(unsigned long op, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>  
>      switch ( op &= HVMOP_op_mask )
>      {
> +    case HVMOP_create_ioreq_server:
> +        rc = hvmop_create_ioreq_server(
> +            guest_handle_cast(arg, xen_hvm_create_ioreq_server_t));
> +        break;
> +    
> +    case HVMOP_get_ioreq_server_info:
> +        rc = hvmop_get_ioreq_server_info(
> +            guest_handle_cast(arg, xen_hvm_get_ioreq_server_info_t));
> +        break;
> +    
> +    case HVMOP_map_io_range_to_ioreq_server:
> +        rc = hvmop_map_io_range_to_ioreq_server(
> +            guest_handle_cast(arg, xen_hvm_io_range_t));
> +        break;
> +    
> +    case HVMOP_unmap_io_range_from_ioreq_server:
> +        rc = hvmop_unmap_io_range_from_ioreq_server(
> +            guest_handle_cast(arg, xen_hvm_io_range_t));
> +        break;
> +    
> +    case HVMOP_destroy_ioreq_server:
> +        rc = hvmop_destroy_ioreq_server(
> +            guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> +        break;

Neither here nor in the individual functions are any XSM checks - I
don't think you want to permit any domain to issue any of these on
an arbitrary other domain?

> +int vasprintf(char **bufp, const char *fmt, va_list args)
> +{
> +    va_list args_copy;
> +    size_t size;
> +    char *buf, dummy[1];
> +
> +    va_copy(args_copy, args);
> +    size = vsnprintf(dummy, 0, fmt, args_copy);
> +    va_end(args_copy);
> +
> +    buf = _xmalloc(++size, 1);

xmalloc_array(char, ++size)

> +EXPORT_SYMBOL(vasprintf);

I realize that there are more of these in that file, but since they're
useless to us, let's at least not add any new ones.

> @@ -46,7 +48,10 @@ struct hvm_ioreq_vcpu {
>      evtchn_port_t    ioreq_evtchn;
>  };
>  
> +#define MAX_IO_RANGE_TYPE (HVMOP_IO_RANGE_PCI + 1)

A common misconception: You mean NR_ here, not MAX_ (or else
you'd have to drop the " + 1").

>  struct hvm_domain {
> -    struct hvm_ioreq_server *ioreq_server;
> +    /* Guest page range used for non-default ioreq servers */
> +    unsigned long           ioreq_gmfn_base;
> +    unsigned long           ioreq_gmfn_mask;
> +    unsigned int            ioreq_gmfn_count;
> +
> +    /* Lock protects all other values in the following block */
>      spinlock_t              ioreq_server_lock;
> +    ioservid_t              ioreq_server_id;
> +    unsigned int            ioreq_server_count;
> +    struct list_head        ioreq_server_list;

Rather than using all these redundant prefixes, could I talk you into
using sub-structures instead:

    struct {
        struct {
        } gmfn;
        struct {
        } server;
    } ioreq;

?

> +typedef uint16_t ioservid_t;
> +DEFINE_XEN_GUEST_HANDLE(ioservid_t);

I don't think you really need this handle type.

> +struct xen_hvm_get_ioreq_server_info {
> +    domid_t domid;               /* IN - domain to be serviced */
> +    ioservid_t id;               /* IN - server id */
> +    xen_pfn_t ioreq_pfn;         /* OUT - sync ioreq pfn */
> +    xen_pfn_t bufioreq_pfn;      /* OUT - buffered ioreq pfn */
> +    evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */
> +};

Afaict this structure's layout will differ between 32- and 64-bit
tool stack domains, i.e. you need to add some explicit padding/
alignment. Or even better yet, shuffle the fields - bufioreq_port
fits in the 32-bit hole after id.

> +struct xen_hvm_destroy_ioreq_server {
> +    domid_t domid; /* IN - domain to be serviced */
> +    ioservid_t id; /* IN - server id */
> +};

Thats the same structure as for create, just that the last field is an
output there. Please have just one structure for creation/destruction.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.