[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest



On Mon, Oct 10, 2016 at 08:32:23AM +0800, Haozhong Zhang wrote:
> XENMEM_populate_pmemmap is used by toolstack to map given host pmem pages
> to given guest pages. Only pages in the data area of a pmem region are
> allowed to be mapped to guest.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
> ---
> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
> Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
>  tools/libxc/include/xenctrl.h |   8 +++
>  tools/libxc/xc_domain.c       |  14 +++++
>  xen/arch/x86/pmem.c           | 123 
> ++++++++++++++++++++++++++++++++++++++++++
>  xen/common/domain.c           |   3 ++
>  xen/common/memory.c           |  31 +++++++++++
>  xen/include/public/memory.h   |  14 ++++-
>  xen/include/xen/pmem.h        |  10 ++++
>  xen/include/xen/sched.h       |   3 ++
>  8 files changed, 205 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index 2c83544..46c71fc 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2710,6 +2710,14 @@ int xc_livepatch_revert(xc_interface *xch, char *name, 
> uint32_t timeout);
>  int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
>  int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
>  
> +/**
> + * Map host pmem pages at PFNs @mfn ~ (@mfn + @nr_mfns - 1) to
> + * guest physical pages at guest PFNs @gpfn ~ (@gpfn + @nr_mfns - 1)
> + */
> +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
> +                               xen_pfn_t mfn, xen_pfn_t gpfn,
> +                               unsigned int nr_mfns);
> +
>  /* Compat shims */
>  #include "xenctrl_compat.h"
>  
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 296b852..81a90a1 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2520,6 +2520,20 @@ int xc_domain_soft_reset(xc_interface *xch,
>      domctl.domain = (domid_t)domid;
>      return do_domctl(xch, &domctl);
>  }
> +
> +int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
> +                               xen_pfn_t mfn, xen_pfn_t gpfn,
> +                               unsigned int nr_mfns)
> +{
> +    struct xen_pmemmap pmemmap = {
> +        .domid   = domid,
> +        .mfn     = mfn,
> +        .gpfn    = gpfn,
> +        .nr_mfns = nr_mfns,
> +    };
> +    return do_memory_op(xch, XENMEM_populate_pmemmap, &pmemmap, 
> sizeof(pmemmap));
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
> index 70358ed..e4dc685 100644
> --- a/xen/arch/x86/pmem.c
> +++ b/xen/arch/x86/pmem.c
> @@ -24,6 +24,9 @@
>  #include <xen/spinlock.h>
>  #include <xen/pmem.h>
>  #include <xen/iocap.h>
> +#include <xen/sched.h>
> +#include <xen/event.h>
> +#include <xen/paging.h>
>  #include <asm-x86/mm.h>
>  
>  /*
> @@ -63,6 +66,48 @@ static int check_reserved_size(unsigned long rsv_mfns, 
> unsigned long total_mfns)
>          ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
>  }
>  
> +static int is_data_mfn(unsigned long mfn)

bool
> +{
> +    struct list_head *cur;
> +    int data = 0;
> +
> +    ASSERT(spin_is_locked(&pmem_list_lock));
> +
> +    list_for_each(cur, &pmem_list)
> +    {
> +        struct pmem *pmem = list_entry(cur, struct pmem, link);
> +
> +        if ( pmem->data_spfn <= mfn && mfn < pmem->data_epfn )

You may want to change the first conditional to have 'mfn' on the left
side. And perhaps change 'mfn' to 'pfn' as that is what your structure
is called?

But ... maybe the #3 patch that introduces XENPF_pmem_add should
use 'data_smfn', 'data_emfn' and so on?

> +        {
> +            data = 1;
> +            break;
> +        }
> +    }
> +
> +    return data;
> +}
> +
> +static int pmem_page_valid(struct page_info *page, struct domain *d)

bool
> +{
> +    /* only data area can be mapped to guest */
> +    if ( !is_data_mfn(page_to_mfn(page)) )
> +    {
> +        dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx is not a pmem data page\n",
> +                page_to_mfn(page));
> +        return 0;
> +    }
> +
> +    /* inuse/offlined/offlining pmem page cannot be mapped to guest */
> +    if ( !page_state_is(page, free) )
> +    {
> +        dprintk(XENLOG_DEBUG, "pmem: invalid page state of mfn 0x%lx: 
> 0x%lx\n",
> +                page_to_mfn(page), page->count_info & PGC_state);
> +        return 0;
> +    }
> +
> +    return 1;
> +}
> +
>  static int pmem_add_check(unsigned long spfn, unsigned long epfn,
>                            unsigned long rsv_spfn, unsigned long rsv_epfn,
>                            unsigned long data_spfn, unsigned long data_epfn)
> @@ -159,3 +204,81 @@ int pmem_add(unsigned long spfn, unsigned long epfn,
>   out:
>      return ret;
>  }
> +
> +static int pmem_assign_pages(struct domain *d,
> +                             struct page_info *pg, unsigned int order)
> +{
> +    int rc = 0;
> +    unsigned long i;
> +
> +    spin_lock(&d->pmem_lock);
> +
> +    if ( unlikely(d->is_dying) )
> +    {
> +        rc = -EINVAL;
> +        goto out;
> +    }
> +
> +    for ( i = 0; i < (1 << order); i++ )
> +    {
> +        ASSERT(page_get_owner(&pg[i]) == NULL);
> +        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
> +        page_set_owner(&pg[i], d);
> +        smp_wmb();

Why here? Why not after the count_info is set?

> +        pg[i].count_info = PGC_allocated | 1;
> +        page_list_add_tail(&pg[i], &d->pmem_page_list);
> +    }
> +
> + out:
> +    spin_unlock(&d->pmem_lock);
> +    return rc;
> +}
> +
> +int pmem_populate(struct xen_pmemmap_args *args)
> +{
> +    struct domain *d = args->domain;
> +    unsigned long i, mfn, gpfn;
> +    struct page_info *page;
> +    int rc = 0;
> +
> +    if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
> +        return -EINVAL;
> +
> +    for ( i = args->nr_done, mfn = args->mfn + i, gpfn = args->gpfn + i;
> +          i < args->nr_mfns;
> +          i++, mfn++, gpfn++ )
> +    {
> +        if ( i != args->nr_done && hypercall_preempt_check() )
> +        {
> +            args->preempted = 1;
> +            goto out;
> +        }
> +
> +        page = mfn_to_page(mfn);
> +
> +        spin_lock(&pmem_list_lock);
> +        if ( !pmem_page_valid(page, d) )
> +        {
> +            dprintk(XENLOG_DEBUG, "pmem: MFN 0x%lx not a valid pmem page\n", 
> mfn);
> +            spin_unlock(&pmem_list_lock);
> +            rc = -EINVAL;
> +            goto out;
> +        }
> +        page->count_info = PGC_state_inuse;

No test_and_set_bit ?

> +        spin_unlock(&pmem_list_lock);
> +
> +        page->u.inuse.type_info = 0;
> +
> +        guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), 0);
> +        if ( pmem_assign_pages(d, page, 0) )
> +        {
> +            guest_physmap_remove_page(d, _gfn(gpfn), _mfn(mfn), 0);

Don't you also need to do something about PGC_state_inuse ?
> +            rc = -EFAULT;
> +            goto out;
> +        }
> +    }
> +
> + out:
> +    args->nr_done = i;
> +    return rc;
> +}
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 3abaca9..8192548 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -288,6 +288,9 @@ struct domain *domain_create(domid_t domid, unsigned int 
> domcr_flags,
>      INIT_PAGE_LIST_HEAD(&d->page_list);
>      INIT_PAGE_LIST_HEAD(&d->xenpage_list);
>  
> +    spin_lock_init_prof(d, pmem_lock);
> +    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
> +
>      spin_lock_init(&d->node_affinity_lock);
>      d->node_affinity = NODE_MASK_ALL;
>      d->auto_node_affinity = 1;
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 21797ca..09cb1c9 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -24,6 +24,7 @@
>  #include <xen/numa.h>
>  #include <xen/mem_access.h>
>  #include <xen/trace.h>
> +#include <xen/pmem.h>
>  #include <asm/current.h>
>  #include <asm/hardirq.h>
>  #include <asm/p2m.h>
> @@ -1329,6 +1330,36 @@ long do_memory_op(unsigned long cmd, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>      }
>  #endif
>  
> +    case XENMEM_populate_pmemmap:
> +    {
> +        struct xen_pmemmap pmemmap;
> +        struct xen_pmemmap_args args;
> +
> +        if ( copy_from_guest(&pmemmap, arg, 1) )
> +            return -EFAULT;
> +
> +        d = rcu_lock_domain_by_any_id(pmemmap.domid);
> +        if ( !d )
> +            return -EINVAL;
> +
> +        args.domain = d;
> +        args.mfn = pmemmap.mfn;
> +        args.gpfn = pmemmap.gpfn;
> +        args.nr_mfns = pmemmap.nr_mfns;
> +        args.nr_done = start_extent;
> +        args.preempted = 0;
> +
> +        rc = pmem_populate(&args);
> +        rcu_unlock_domain(d);
> +
> +        if ( !rc && args.preempted )

Nice! Glad to see that preemption is there!

> +            return hypercall_create_continuation(
> +                __HYPERVISOR_memory_op, "lh",
> +                op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
> +
> +        break;
> +    }
> +
>      default:
>          rc = arch_memory_op(cmd, arg);
>          break;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.