[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 2/2] ioreq-server: Support scatter page forwarding




> -----Original Message-----
> From: Andrew Cooper [mailto:andrew.cooper3@xxxxxxxxxx]
> Sent: Friday, August 22, 2014 6:35 PM
> To: Ye, Wei; xen-devel@xxxxxxxxxxxxx
> Cc: Tian, Kevin; keir@xxxxxxx; ian.campbell@xxxxxxxxxx;
> stefano.stabellini@xxxxxxxxxxxxx; tim@xxxxxxx; ian.jackson@xxxxxxxxxxxxx;
> Dugger, Donald D; Paul.Durrant@xxxxxxxxxx; Lv, Zhiyuan; JBeulich@xxxxxxxx;
> Zhang, Yang Z
> Subject: Re: [Xen-devel] [PATCH v2 2/2] ioreq-server: Support scatter page
> forwarding
> 
> On 22/08/14 20:18, Wei Ye wrote:
> > Extend the interface to support add/remove scatter page list to be
> > forwarded by a dedicated ioreq-server instance. Check and select a
> > right ioreq-server instance for forwarding the write operation for a
> > write protected page.
> >
> > Signed-off-by: Wei Ye <wei.ye@xxxxxxxxx>
> > ---
> >  tools/libxc/xc_domain.c          |   32 ++++++
> >  tools/libxc/xenctrl.h            |   18 ++++
> >  xen/arch/x86/hvm/hvm.c           |  209
> ++++++++++++++++++++++++++++++++++++++
> >  xen/include/asm-x86/hvm/domain.h |    9 ++
> >  xen/include/public/hvm/hvm_op.h  |   12 +++
> >  5 files changed, 280 insertions(+)
> >
> > diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index
> > 37ed141..36e4e59 100644
> > --- a/tools/libxc/xc_domain.c
> > +++ b/tools/libxc/xc_domain.c
> > @@ -1485,6 +1485,38 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t
> domid,
> >      return rc;
> >  }
> >
> > +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t
> domid,
> > +                                     ioservid_t id, uint16_t set,
> > +                                     uint16_t nr_pages, uint64_t
> > +*gpfn) {
> > +    DECLARE_HYPERCALL;
> > +
> DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t,
> arg);
> > +    int pg, rc = -1;
> > +
> > +    if ( arg == NULL )
> > +        return -1;
> 
> You must set errno before exiting -1.
> 
I'm not sure what kind of errno shoud be set. I just follow the similar 
existing functions like
"xc_hvm_map_io_range_to_ioreq_server...", it also didn't set errno before 
exiting. What's
Your suggestion for the errno?

> > +
> > +    if ( nr_pages > XEN_IOREQSVR_MAX_MAP_BATCH )
> > +        goto out;
> > +
> > +    hypercall.op     = __HYPERVISOR_hvm_op;
> > +    hypercall.arg[0] = HVMOP_map_pages_to_ioreq_server;
> > +    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> > +
> > +    arg->domid = domid;
> > +    arg->id = id;
> > +    arg->set = set;
> > +    arg->nr_pages = nr_pages;
> > +    for ( pg = 0; pg < nr_pages; pg++ )
> > +        arg->page_list[pg] = gpfn[pg];
> 
> memcpy()
> 
Ok.

> > +
> > +    rc = do_xen_hypercall(xch, &hypercall);
> > +
> > +out:
> > +    xc_hypercall_buffer_free(xch, arg);
> > +    return rc;
> > +}
> > +
> >  int xc_hvm_destroy_ioreq_server(xc_interface *xch,
> >                                  domid_t domid,
> >                                  ioservid_t id) diff --git
> > a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index b55d857..84e8465
> > 100644
> > --- a/tools/libxc/xenctrl.h
> > +++ b/tools/libxc/xenctrl.h
> > @@ -1943,6 +1943,24 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch,
> >                                            uint8_t function);
> >
> >  /**
> > + * This function registers/deregisters a set of pages to be write 
> > protected.
> > + *
> > + * @parm xch a handle to an open hypervisor interface.
> > + * @parm domid the domain id to be serviced
> > + * @parm id the IOREQ Server id.
> > + * @parm set whether registers or deregisters: 1 for register, 0 for
> > +deregister
> > + * @parm nr_pages the count of pages
> > + * @parm pgfn the guest page frame number list
> > + * @return 0 on success, -1 on failure.
> > + */
> > +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch,
> > +                                          domid_t domid,
> > +                                          ioservid_t id,
> > +                                          uint16_t set,
> > +                                          uint16_t nr_pages,
> > +                                          uint64_t *gpfn);
> 
> Alignment of parameters.
Ok.

> 
> > +
> > +/**
> >   * This function destroys an IOREQ Server.
> >   *
> >   * @parm xch a handle to an open hypervisor interface.
> > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index
> > 4984149..bbbacc3 100644
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -68,6 +68,12 @@
> >  #include <public/mem_event.h>
> >  #include <xen/rangeset.h>
> 
> This hash chain code belongs in its own patch.  Please split it out, to allow 
> the
> hypercall interface changes to be in a smaller patch.
> 
Will split the hash chain code into a single patch, thanks.

> >
> > +#define PGLIST_HASH_SIZE_SHIFT    8
> > +#define PGLIST_HASH_SIZE    (1 << PGLIST_HASH_SIZE_SHIFT)
> > +#define pglist_hash(x)      ((x) % PGLIST_HASH_SIZE)
> > +#define PGLIST_INVALID_GPFN       0
> 
> Use INVALID_MFN.  GFN 0 is perfectly legitimate as the target of scatter
> forwarding.
Ok.


> 
> > +#define PGLIST_HASH_ENTRY_SIZE    sizeof(struct pglist_hash_table)
> > +
> >  bool_t __read_mostly hvm_enabled;
> >
> >  unsigned int opt_hvm_debug_level __read_mostly; @@ -744,6 +750,98
> @@
> > static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server
> *s)
> >      spin_unlock(&s->lock);
> >  }
> >
> > +static struct pglist_hash_table
> *hvm_ioreq_server_lookup_pglist_hash_table(
> > +                          struct pglist_hash_table *pglist_ht,
> > +uint64_t gpfn) {
> > +    unsigned int index = pglist_hash(gpfn);
> > +    struct pglist_hash_table *entry;
> > +
> > +    for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> > +        if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
> > +            break;
> > +
> > +    return entry;
> > +}
> > +
> > +static void hvm_ioreq_server_free_hash_chain(struct pglist_hash_table
> > +*chain) {
> > +    struct pglist_hash_table *p = chain;
> > +    struct pglist_hash_table *n;
> > +
> > +    while ( p )
> > +    {
> > +        n = p->next;
> > +        xfree(p);
> > +        p = n;
> > +    }
> > +}
> > +
> > +static void hvm_ioreq_server_free_pglist_hash(struct
> > +pglist_hash_table *pglist_ht) {
> > +    unsigned int i;
> > +
> > +    for ( i = 0; i < PGLIST_HASH_SIZE; i++ )
> > +        if ( pglist_ht[i].next != NULL )
> > +            hvm_ioreq_server_free_hash_chain(pglist_ht[i].next);
> > +}
> > +
> > +static int hvm_ioreq_server_pglist_hash_add(struct pglist_hash_table
> *pglist_ht,
> > +                                            uint64_t gpfn) {
> > +    unsigned int index = pglist_hash(gpfn);
> > +    struct pglist_hash_table *ne;
> > +
> > +    if ( hvm_ioreq_server_lookup_pglist_hash_table(pglist_ht, gpfn) !=
> NULL )
> > +        return -EINVAL;
> > +
> > +    if ( pglist_ht[index].gpfn == PGLIST_INVALID_GPFN )
> > +        pglist_ht[index].gpfn = gpfn;
> > +    else
> > +    {
> > +        ne = xmalloc(struct pglist_hash_table);
> > +        if ( !ne )
> > +            return -ENOMEM;
> > +        ne->next = pglist_ht[index].next;
> > +        ne->gpfn = gpfn;
> > +        pglist_ht[index].next = ne;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int hvm_ioreq_server_pglist_hash_rem(struct pglist_hash_table
> *pglist_ht,
> > +                                            uint64_t gpfn) {
> > +    unsigned int index = pglist_hash(gpfn);
> > +    struct pglist_hash_table *next, *prev;
> > +
> > +    if ( pglist_ht[index].gpfn == gpfn )
> > +        pglist_ht[index].gpfn = PGLIST_INVALID_GPFN;
> > +    else
> > +    {
> > +        prev = &pglist_ht[index];
> > +        while ( 1 )
> > +        {
> > +            next = prev->next;
> > +            if ( !next )
> > +            {
> > +                printk(XENLOG_G_WARNING
> "hvm_ioreq_server_pglist_hash_rem hash_table %p remove"
> > +                       " %lx not found\n", pglist_ht, gpfn);
> > +                return -EINVAL;
> > +            }
> > +            if ( next->gpfn == gpfn )
> > +            {
> > +                prev->next = next->next;
> > +                xfree(next);
> > +                break;
> > +            }
> > +            prev = next;
> > +         }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static int hvm_ioreq_server_map_pages(struct hvm_ioreq_server *s,
> >                                        bool_t is_default, bool_t
> > handle_bufioreq)  { @@ -948,7 +1046,14 @@ static int
> > hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
> >      spin_lock_init(&s->lock);
> >      INIT_LIST_HEAD(&s->ioreq_vcpu_list);
> >      spin_lock_init(&s->bufioreq_lock);
> > +    spin_lock_init(&s->pglist_hash_lock);
> >
> > +    rc = -ENOMEM;
> > +    s->pglist_hash_base = xmalloc_array(struct pglist_hash_table,
> PGLIST_HASH_SIZE);
> > +    if ( s->pglist_hash_base == NULL )
> > +        goto fail1;
> > +    memset(s->pglist_hash_base, 0, PGLIST_HASH_ENTRY_SIZE *
> > + PGLIST_HASH_SIZE);
> 
> xzalloc_array()
Ok.


> 
> > +
> >      rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
> >      if ( rc )
> >          goto fail1;
> > @@ -1087,6 +1192,9 @@ static int hvm_destroy_ioreq_server(struct
> > domain *d, ioservid_t id)
> >
> >          hvm_ioreq_server_deinit(s, 0);
> >
> > +        hvm_ioreq_server_free_pglist_hash(s->pglist_hash_base);
> > +        xfree(s->pglist_hash_base);
> > +
> >          domain_unpause(d);
> >
> >          xfree(s);
> > @@ -1279,6 +1387,63 @@ static int hvm_set_ioreq_server_state(struct
> domain *d, ioservid_t id,
> >      return rc;
> >  }
> >
> > +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t
> id,
> > +                                         uint16_t set, uint16_t nr_pages,
> > +                                         uint64_t *gpfn) {
> > +    struct hvm_ioreq_server *s;
> > +    int rc;
> > +    unsigned int i;
> > +    p2m_type_t ot, nt;
> > +
> > +    if ( set )
> > +    {
> > +        ot = p2m_ram_rw;
> > +        nt = p2m_mmio_write_dm;
> > +    }
> > +    else
> > +    {
> > +        ot = p2m_mmio_write_dm;
> > +        nt = p2m_ram_rw;
> > +    }
> > +
> > +    spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> > +
> > +    rc = -ENOENT;
> > +    list_for_each_entry ( s,
> > +                          &d->arch.hvm_domain.ioreq_server.list,
> > +                          list_entry )
> > +    {
> > +        if ( s != d->arch.hvm_domain.default_ioreq_server &&
> > +             s->id == id )
> > +        {
> > +            spin_lock(&s->pglist_hash_lock);
> > +
> > +            for ( i = 0; i < nr_pages; i++ )
> > +            {
> > +                rc = p2m_change_type_one(d, gpfn[i], ot, nt);
> > +                if ( !rc )
> > +                {
> > +                    if ( set )
> > +                        rc = 
> > hvm_ioreq_server_pglist_hash_add(s->pglist_hash_base,
> gpfn[i]);
> > +                    else
> > +                        rc = 
> > hvm_ioreq_server_pglist_hash_rem(s->pglist_hash_base,
> gpfn[i]);
> > +                }
> > +
> > +                if ( rc )
> > +                    break;
> > +            }
> > +
> > +            spin_unlock(&s->pglist_hash_lock);
> > +            break;
> > +        }
> > +    }
> > +
> > +    spin_unlock(&d->arch.hvm_domain.ioreq_server.lock);
> > +
> > +    return rc;
> > +}
> > +
> >  static int hvm_all_ioreq_servers_add_vcpu(struct domain *d, struct
> > vcpu *v)  {
> >      struct hvm_ioreq_server *s;
> > @@ -2349,6 +2514,8 @@ static struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
> >          switch ( type )
> >          {
> >              unsigned long end;
> > +            uint64_t gpfn;
> > +            struct pglist_hash_table *he;
> >
> >          case IOREQ_TYPE_PIO:
> >              end = addr + p->size - 1; @@ -2361,6 +2528,14 @@ static
> > struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
> >              if ( rangeset_contains_range(r, addr, end) )
> >                  return s;
> >
> > +            gpfn = addr >> PAGE_SHIFT;
> > +            spin_lock(&s->pglist_hash_lock);
> > +            he = hvm_ioreq_server_lookup_pglist_hash_table(s-
> >pglist_hash_base, gpfn);
> > +            spin_unlock(&s->pglist_hash_lock);
> > +
> > +            if ( he )
> > +                return s;
> > +
> >              break;
> >          case IOREQ_TYPE_PCI_CONFIG:
> >              if ( rangeset_contains_singleton(r, addr >> 32) ) @@
> > -5309,6 +5484,35 @@ static int hvmop_set_ioreq_server_state(
> >      return rc;
> >  }
> >
> > +static int hvmop_map_pages_to_ioreq_server(
> > +
> XEN_GUEST_HANDLE_PARAM(xen_hvm_map_pages_to_ioreq_server_t)
> uop)
> > +{
> > +    xen_hvm_map_pages_to_ioreq_server_t op;
> > +    struct domain *d;
> > +    int rc;
> > +
> > +    if ( copy_from_guest(&op, uop, 1) )
> > +        return -EFAULT;
> > +
> > +    rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> > +    if ( rc != 0 )
> > +        return rc;
> > +
> > +    rc = -EINVAL;
> > +    if ( !is_hvm_domain(d) )
> > +        goto out;
> > +
> > +    rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d,
> HVMOP_map_pages_to_ioreq_server);
> > +    if ( rc != 0 )
> > +        goto out;
> > +
> > +    rc = hvm_map_pages_to_ioreq_server(d, op.id, op.set, op.nr_pages,
> > + op.page_list);
> > +
> > +out:
> > +    rcu_unlock_domain(d);
> > +    return rc;
> > +}
> > +
> >  static int hvmop_destroy_ioreq_server(
> >      XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
> { @@
> > -5374,6 +5578,11 @@ long do_hvm_op(unsigned long op,
> XEN_GUEST_HANDLE_PARAM(void) arg)
> >              guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t));
> >          break;
> >
> > +    case HVMOP_map_pages_to_ioreq_server:
> > +        rc = hvmop_map_pages_to_ioreq_server(
> > +            guest_handle_cast(arg, xen_hvm_map_pages_to_ioreq_server_t));
> > +        break;
> > +
> >      case HVMOP_destroy_ioreq_server:
> >          rc = hvmop_destroy_ioreq_server(
> >              guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> > diff --git a/xen/include/asm-x86/hvm/domain.h
> > b/xen/include/asm-x86/hvm/domain.h
> > index 291a2e0..c28fbbe 100644
> > --- a/xen/include/asm-x86/hvm/domain.h
> > +++ b/xen/include/asm-x86/hvm/domain.h
> > @@ -48,6 +48,11 @@ struct hvm_ioreq_vcpu {
> >      evtchn_port_t    ioreq_evtchn;
> >  };
> >
> > +struct pglist_hash_table {
> > +    struct pglist_hash_table *next;
> > +    uint64_t gpfn;
> > +};
> > +
> >  #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)  #define
> > MAX_NR_IO_RANGES  256
> >
> > @@ -70,6 +75,10 @@ struct hvm_ioreq_server {
> >      evtchn_port_t          bufioreq_evtchn;
> >      struct rangeset        *range[NR_IO_RANGE_TYPES];
> >      bool_t                 enabled;
> > +
> > +    /* scatter page list need write protection */
> > +    struct pglist_hash_table   *pglist_hash_base;
> > +    spinlock_t             pglist_hash_lock;
> >  };
> >
> >  struct hvm_domain {
> > diff --git a/xen/include/public/hvm/hvm_op.h
> > b/xen/include/public/hvm/hvm_op.h index eeb0a60..f7c989a 100644
> > --- a/xen/include/public/hvm/hvm_op.h
> > +++ b/xen/include/public/hvm/hvm_op.h
> > @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state
> {  typedef
> > struct xen_hvm_set_ioreq_server_state
> > xen_hvm_set_ioreq_server_state_t;
> > DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
> >
> > +#define HVMOP_map_pages_to_ioreq_server 23
> > +#define XEN_IOREQSVR_MAX_MAP_BATCH      128
> > +struct xen_hvm_map_pages_to_ioreq_server {
> > +    domid_t domid;   /* IN - domain to be serviced */
> > +    ioservid_t id;   /* IN - server id */
> > +    uint16_t set;    /* IN - 1: map pages, 0: unmap pages */
> > +    uint16_t nr_pages;  /* IN - page count */
> > +    uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list
> > +*/
> 
> No arbitrary limits like this in the API please.  Use a GUEST_HANDLE_64
> instead.
Ok.


> 
> ~Andrew
> 
> > +};
> > +typedef struct xen_hvm_map_pages_to_ioreq_server
> > +xen_hvm_map_pages_to_ioreq_server_t;
> >
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_pages_to_ioreq_server_t);
> > +
> >  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> >
> >  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.