[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC v2 8/9] xen/arm: Implement hypercall for dirty page tracing (shadow op)



On Wed, 3 Jul 2013, Jaeyong Yoo wrote:
> From: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> 
> Add hypercall (shadow op: enable/disable and clean/peek dirted page bitmap)
> 
> Signed-off-by: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> Singed-off-by: Alexey Sokolov <sokolov.a@xxxxxxxxxxx>
> ---
>  xen/arch/arm/domain.c     |   4 +
>  xen/arch/arm/domctl.c     |  19 ++++
>  xen/arch/arm/p2m.c        | 219 
> +++++++++++++++++++++++++++++++++++++++++++++-
>  xen/include/asm-arm/p2m.h |   2 +
>  4 files changed, 242 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 73dd0de..e3fc533 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -484,6 +484,10 @@ int arch_domain_create(struct domain *d, unsigned int 
> domcr_flags)
>      d->arch.vpidr = boot_cpu_data.midr.bits;
>      d->arch.vmpidr = boot_cpu_data.mpidr.bits;
>  
> +    /* init for dirty-page tracing */
> +    d->arch.dirty.count = 0;
> +    d->arch.dirty.top = NULL;
> +
>      clear_page(d->shared_info);
>      share_xen_page_with_guest(
>          virt_to_page(d->shared_info), d, XENSHARE_writable);
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index a0719b0..bc06534 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -67,6 +67,14 @@ long arch_do_domctl(struct xen_domctl *domctl, struct 
> domain *d,
>              goto gethvmcontext_out;
>          }
>  
> +        /* Allocate our own marshalling buffer */
> +        ret = -ENOMEM;
> +        if ( (c.data = xmalloc_bytes(c.size)) == NULL )
> +        {
> +            printk("(gethvmcontext) xmalloc_bytes failed: %d\n", c.size );
> +            goto gethvmcontext_out;
> +        }
> +
>          domain_pause(d);
>          ret = hvm_save(d, &c);
>          domain_unpause(d);

Shouldn't this hunk belong to patch #2?


> @@ -85,6 +93,17 @@ long arch_do_domctl(struct xen_domctl *domctl, struct 
> domain *d,
>              xfree(c.data);
>      }
>      break;
> +    case XEN_DOMCTL_shadow_op:
> +    {
> +        ret = dirty_mode_op(d, &domctl->u.shadow_op);
> +        if ( (&domctl->u.shadow_op)->op == XEN_DOMCTL_SHADOW_OP_CLEAN
> +             || (&domctl->u.shadow_op)->op == XEN_DOMCTL_SHADOW_OP_PEEK)
> +        {
> +            copyback = 1;
> +        }
> +    }
> +    break;
> +
>      default:
>          return -EINVAL;
>      }
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index bae7af7..fb8ce10 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -69,8 +69,8 @@ out:
>  
>      spin_unlock(&p2m->lock);
>  
> -    return ( ((xen_pfn_t)first_index << FIRST_SHIFT) |
> -             (second_index << SECOND_SHIFT)          |
> +    return ( ((xen_pfn_t)first_index << FIRST_SHIFT) | 
> +             (second_index << SECOND_SHIFT)          | 
>               (third_index << THIRD_SHIFT)
>             )  >> PAGE_SHIFT;

Does this belong here? If so, why?


> @@ -496,6 +496,221 @@ void mark_dirty(struct domain *d, paddr_t addr)
>      spin_unlock(&d->arch.dirty.lock);
>  }
>  
> +static void free_dirty_bitmap(struct domain *d)
> +{
> +    struct page_info *pg;
> +
> +    spin_lock(&d->arch.dirty.lock);
> +
> +    if(d->arch.dirty.top)
> +    {
> +        while ( (pg = page_list_remove_head(&d->arch.dirty.pages)) )
> +            free_domheap_page(pg);
> +    }
> +    d->arch.dirty.top = NULL;
> +    d->arch.dirty.count = 0;
> +
> +    spin_unlock(&d->arch.dirty.lock);
> +}
> +
> +/* Change types across all p2m entries in a domain */
> +static void p2m_change_entry_type_global(struct domain *d, enum mg ot, enum 
> mg nt)
> +{
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +    int i1, i2, i3;
> +    int first_index = first_table_offset(GUEST_RAM_BASE);
> +    int second_index = second_table_offset(GUEST_RAM_BASE);
> +    int third_index = third_table_offset(GUEST_RAM_BASE);

you shouldn't assume GUEST_RAM_BASE is static anywhere


> +    lpae_t *first = __map_domain_page(p2m->first_level);
> +    lpae_t pte, *second = NULL,  *third = NULL;
> +
> +    BUG_ON(!first && "Can't map first level p2m.");
> +
> +    spin_lock(&p2m->lock);
> +
> +    for (i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1)
> +    {
> +        lpae_walk_t first_pte = first[i1].walk;
> +        if (!first_pte.valid || !first_pte.table)
> +            goto out;
> +
> +        second = map_domain_page(first_pte.base);
> +        BUG_ON(!second && "Can't map second level p2m.");
> +        for(i2 = second_index; i2 < LPAE_ENTRIES; ++i2)
> +        {
> +            lpae_walk_t second_pte = second[i2].walk;
> +            if (!second_pte.valid || !second_pte.table)
> +                goto out;
> +
> +            third = map_domain_page(second_pte.base);
> +            BUG_ON(!third && "Can't map third level p2m.");
> +
> +            for (i3 = third_index; i3 < LPAE_ENTRIES; ++i3)
> +            {
> +                lpae_walk_t third_pte = third[i3].walk;
> +                int write = 0;
> +                if (!third_pte.valid)
> +                    goto out;
> +
> +                pte = third[i3];
> +                if (pte.p2m.write == 1 && nt == mg_ro)
> +                {
> +                    pte.p2m.write = 0;
> +                    write = 1;
> +                }
> +                else if (pte.p2m.write == 0 && nt == mg_rw)
> +                {
> +                    pte.p2m.write = 1;
> +                    write = 1;
> +                }
> +                if (write)
> +                    write_pte(&third[i3], pte);
> +            }
> +            unmap_domain_page(third);
> +
> +            third = NULL;
> +            third_index = 0;
> +        }
> +        unmap_domain_page(second);
> +
> +        second = NULL;
> +        second_index = 0;
> +        third_index = 0;
> +    }
> +
> +out:
> +    flush_tlb_all_local();
> +    if (third) unmap_domain_page(third);
> +    if (second) unmap_domain_page(second);
> +    if (first) unmap_domain_page(first);
> +
> +    spin_unlock(&p2m->lock);
> +}
> +
> +/* Read a domain's log-dirty bitmap and stats.
> + * If the operation is a CLEAN, clear the bitmap and stats. */
> +static int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc)
> +{
> +    int i1, i2, rv = 0, clean = 0, peek = 1;
> +    xen_pfn_t *first = NULL, *second = NULL;
> +    unsigned long pages = 0, *third = NULL;
> +
> +    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
> +
> +    if (guest_handle_is_null(sc->dirty_bitmap))
> +        peek = 0;
> +
> +    domain_pause(d);
> +
> +    printk("DEBUG: log-dirty %s: dom %u dirty=%u\n",
> +            (clean) ? "clean" : "peek",
> +            d->domain_id, d->arch.dirty.count);
> +
> +    sc->stats.dirty_count = d->arch.dirty.count;
> +
> +    spin_lock(&d->arch.dirty.lock);
> +    first = d->arch.dirty.top ? __map_domain_page(d->arch.dirty.top) : NULL;
> +
> +    /* 32-bit address space starts from 0 and ends at ffffffff.

This should be a 40 bit address space due to LPAE support


> +     * First level of p2m tables is indexed by bits 39-30.
> +     * So significant bits for 32-bit addresses in first table are 31-30.
> +     * So we need to iterate from 0 to 4.
> +     * But RAM starts from 2 gigabytes. So start from 2 instead.
> +     * TODO remove this hardcode

I wouldn't assume that the guest ram start at 2GB


> +     */
> +    for (i1 = 2; (pages < sc->pages) && (i1 < 4); ++i1)
> +    {
> +        second = (first && first[i1]) ? map_domain_page(first[i1]) : NULL;
> +
> +        for (i2 = 0; (i2 < LPAE_ENTRIES); ++i2)
> +        {
> +            unsigned int bytes = LPAE_ENTRIES/8;
> +            third = (second && second[i2]) ? map_domain_page(second[i2]) : 
> NULL;
> +            if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
> +                bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
> +            if (peek)
                  ^ code style
> +            {
> +                if ( (third ? copy_to_guest_offset(sc->dirty_bitmap, pages 
> >> 3,
> +                                                   (uint8_t *)third, bytes) 
> +                            : clear_guest_offset(sc->dirty_bitmap, pages >> 
> 3,
> +                                                 (uint8_t*)NULL /*only type 
> matters*/,
> +                                                 bytes)) != 0 )
> +                {
> +                    rv = 1;
> +                    goto out;   
> +                }
> +            }
> +            pages += bytes << 3;
> +            if (third)
                  ^ code style
> +            {
> +                if (clean) clear_page(third);
> +                unmap_domain_page(third);
> +            }
> +        }
> +        if (second) unmap_domain_page(second);
> +    }
> +    if (first) unmap_domain_page(first);
> +
> +    spin_unlock(&d->arch.dirty.lock);
> +
> +    if ( pages < sc->pages )
> +        sc->pages = pages;
> +
> +    if (clean)
           ^ code style
> +    {
> +        p2m_change_entry_type_global(d, mg_rw, mg_ro);
> +        free_dirty_bitmap(d);
> +    }
> +
> +out:
> +    if (rv)
> +    {
> +       spin_unlock(&d->arch.dirty.lock);
> +    }
> +    domain_unpause(d);
> +    return rv;
> +}
> +
> +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc)
> +{
> +    long ret = 0;
> +
> +    switch (sc->op)
> +    {
> +        case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
> +        case XEN_DOMCTL_SHADOW_OP_OFF:
> +        {
> +            enum mg ot = mg_clear, nt = mg_clear;
> +
> +            ot = sc->op == XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY ? mg_rw : 
> mg_ro;
> +            nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
> +
> +            domain_pause(d);
> +
> +            d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1;
> +            p2m_change_entry_type_global(d, ot, nt);
> +
> +            if (sc->op == XEN_DOMCTL_SHADOW_OP_OFF)
> +                free_dirty_bitmap(d);
> +
> +            domain_unpause(d);

Do we need a lock to make sure that all the operations happen
atomically?


> +        }
> +        break;
> +
> +        case XEN_DOMCTL_SHADOW_OP_CLEAN:
> +        case XEN_DOMCTL_SHADOW_OP_PEEK:
> +        {
> +            ret = log_dirty_op(d, sc);
> +        }
> +        break;

For symmetry maybe it would be better to pause the domain outside
log_dirty_op


> +        default:
> +            return -ENOSYS;
> +    }
> +    return ret;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index fd5890f..498cf0b 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -2,6 +2,7 @@
>  #define _XEN_P2M_H
>  
>  #include <xen/mm.h>
> +#include <public/domctl.h>
>  
>  struct domain;
>  
> @@ -112,6 +113,7 @@ static inline int get_page_and_type(struct page_info 
> *page,
>  
>  enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
>  void mark_dirty(struct domain *d, paddr_t addr);
> +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
>  
>  #endif /* _XEN_P2M_H */
>  
> -- 
> 1.8.1.2
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.