x86/HVM: avoid pointer wraparound in bufioreq handling The number of slots per page being 511 (i.e. not a power of two) means that the (32-bit) read and write indexes going beyond 2^32 will likely disturb operation. Extend I/O req server creation so the caller can indicate that it is using suitable atomic accesses where needed (not all accesses to the two pointers really need to be atomic), allowing the hypervisor to atomically canonicalize both pointers when both have gone through at least one cycle. Signed-off-by: Jan Beulich Acked-by: Ian Campbell --- v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations. Adjust xc_hvm_create_ioreq_server() documentation. --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl * * @parm xch a handle to an open hypervisor interface. * @parm domid the domain id to be serviced - * @parm handle_bufioreq should the IOREQ Server handle buffered requests? + * @parm handle_bufioreq how should the IOREQ Server handle buffered requests + * (HVM_IOREQSRV_BUFIOREQ_*)? * @parm id pointer to an ioservid_t to receive the IOREQ Server id. * @return 0 on success, -1 on failure. */ --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); arg->domid = domid; - arg->handle_bufioreq = !!handle_bufioreq; + arg->handle_bufioreq = handle_bufioreq; rc = do_xen_hypercall(xch, &hypercall); --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d, domid_t domid, bool_t is_default, - bool_t handle_bufioreq, ioservid_t id) + int bufioreq_handling, ioservid_t id) { struct vcpu *v; int rc; @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct if ( rc ) return rc; - rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq); + if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC ) + s->bufioreq_atomic = 1; + + rc = hvm_ioreq_server_setup_pages( + s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF); if ( rc ) goto fail_map; @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d } static int hvm_create_ioreq_server(struct domain *d, domid_t domid, - bool_t is_default, bool_t handle_bufioreq, + bool_t is_default, int bufioreq_handling, ioservid_t *id) { struct hvm_ioreq_server *s; int rc; + if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC ) + return -EINVAL; + rc = -ENOMEM; s = xzalloc(struct hvm_ioreq_server); if ( !s ) @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL ) goto fail2; - rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq, + rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling, next_ioservid(d)); if ( rc ) goto fail3; @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p) spin_lock(&s->bufioreq_lock); - if ( (pg->write_pointer - pg->read_pointer) >= + if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >= (IOREQ_BUFFER_SLOT_NUM - qw) ) { /* The queue is full: send the iopacket through the normal path. */ @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p) return 0; } - pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; + pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp; if ( qw ) { bp.data = p->data >> 32; - pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; + pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp; } /* Make the ioreq_t visible /before/ write_pointer. */ wmb(); - pg->write_pointer += qw ? 2 : 1; + pg->ptrs.write_pointer += qw ? 2 : 1; + + /* Canonicalize read/write pointers to prevent their overflow. */ + while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM && + pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM ) + { + union bufioreq_pointers old = pg->ptrs, new; + unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM; + + new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM; + new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM; + cmpxchg(&pg->ptrs.full, old.full, new.full); + } notify_via_xen_event_channel(d, s->bufioreq_evtchn); spin_unlock(&s->bufioreq_lock); @@ -5446,7 +5465,7 @@ static int hvmop_create_ioreq_server( goto out; rc = hvm_create_ioreq_server(d, curr_d->domain_id, 0, - !!op.handle_bufioreq, &op.id); + op.handle_bufioreq, &op.id); if ( rc != 0 ) goto out; @@ -5928,7 +5947,8 @@ static int hvmop_get_param( /* May need to create server. */ domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN]; - rc = hvm_create_ioreq_server(d, domid, 1, 1, NULL); + rc = hvm_create_ioreq_server(d, domid, 1, + HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL); if ( rc != 0 && rc != -EEXIST ) goto out; } --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -70,6 +70,7 @@ struct hvm_ioreq_server { evtchn_port_t bufioreq_evtchn; struct rangeset *range[NR_IO_RANGE_TYPES]; bool_t enabled; + bool_t bufioreq_atomic; }; struct hvm_domain { --- a/xen/include/public/hvm/hvm_op.h +++ b/xen/include/public/hvm/hvm_op.h @@ -266,6 +266,13 @@ typedef uint16_t ioservid_t; #define HVMOP_create_ioreq_server 17 struct xen_hvm_create_ioreq_server { domid_t domid; /* IN - domain to be serviced */ +#define HVM_IOREQSRV_BUFIOREQ_OFF 0 +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1 +/* + * Use this when read_pointer gets updated atomically and + * the pointer pair gets read atomically: + */ +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ ioservid_t id; /* OUT - server id */ }; --- a/xen/include/public/hvm/ioreq.h +++ b/xen/include/public/hvm/ioreq.h @@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t; #define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ struct buffered_iopage { - unsigned int read_pointer; - unsigned int write_pointer; +#ifdef __XEN__ + union bufioreq_pointers { + struct { +#endif + uint32_t read_pointer; + uint32_t write_pointer; +#ifdef __XEN__ + }; + uint64_t full; + } ptrs; +#endif buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; }; /* NB. Size of this structure must be no greater than one page. */ typedef struct buffered_iopage buffered_iopage_t;