[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86/shadow: dirty VRAM tracking is needed for HVM only



commit ded576ce07e9328f66842bef67d8cfc14c3088b7
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Jul 21 13:57:06 2020 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Jul 21 13:57:06 2020 +0200

    x86/shadow: dirty VRAM tracking is needed for HVM only
    
    Move shadow_track_dirty_vram() into hvm.c (requiring two static
    functions to become non-static). More importantly though make sure we
    don't de-reference d->arch.hvm.dirty_vram for a non-HVM guest. This was
    a latent issue only just because the field lives far enough into struct
    hvm_domain to be outside the part overlapping with struct pv_domain.
    
    While moving shadow_track_dirty_vram() some purely typographic
    adjustments are being made, like inserting missing blanks or putting
    breaces on their own lines.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/mm/shadow/common.c  | 201 +------------------------------------
 xen/arch/x86/mm/shadow/hvm.c     | 212 +++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/shadow/multi.c   |  25 +++--
 xen/arch/x86/mm/shadow/private.h |   8 ++
 4 files changed, 236 insertions(+), 210 deletions(-)

diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 773777321f..05a155d1bc 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -999,7 +999,7 @@ void shadow_prealloc(struct domain *d, u32 type, unsigned 
int count)
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-static void shadow_blow_tables(struct domain *d)
+void shadow_blow_tables(struct domain *d)
 {
     struct page_info *sp, *t;
     struct vcpu *v;
@@ -2029,7 +2029,7 @@ int sh_remove_write_access(struct domain *d, mfn_t gmfn,
 /* Remove all mappings of a guest frame from the shadow tables.
  * Returns non-zero if we need to flush TLBs. */
 
-static int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn)
+int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn)
 {
     struct page_info *page = mfn_to_page(gmfn);
 
@@ -3162,203 +3162,6 @@ static void sh_clean_dirty_bitmap(struct domain *d)
 }
 
 
-/**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
-{
-    int rc = 0;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    uint8_t *dirty_bitmap = NULL;
-
-    if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
-
-    dirty_vram = d->arch.hvm.dirty_vram;
-
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", 
dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm.dirty_vram = NULL;
-    }
-
-    if ( !nr )
-        goto out;
-
-    dirty_bitmap = vzalloc(dirty_size);
-    if ( dirty_bitmap == NULL )
-    {
-        rc = -ENOMEM;
-        goto out;
-    }
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
-    if ( !dirty_vram )
-    {
-        /* Throw away all the shadows rather than walking through them
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) 
== NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-        /* still completely clean, just copy our empty bitmap */
-        memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
-    else
-    {
-        mfn_t map_mfn = INVALID_MFN;
-        void *map_sl1p = NULL;
-
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if ( mfn_eq(mfn, INVALID_MFN) )
-                dirty = 1;
-            else
-            {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d, mfn,
-                                                            _gfn(begin_pfn + 
i));
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e;
-                        mfn_t sl1mfn = maddr_to_mfn(sl1ma);
-
-                        if ( !mfn_eq(sl1mfn, map_mfn) )
-                        {
-                            if ( map_sl1p )
-                                unmap_domain_page(map_sl1p);
-                            map_sl1p = map_domain_page(sl1mfn);
-                            map_mfn = sl1mfn;
-                        }
-                        sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
-                    break;
-                }
-            }
-
-            if ( dirty )
-            {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
-            }
-        }
-
-        if ( map_sl1p )
-            unmap_domain_page(map_sl1p);
-
-        memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
-        memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-        if ( dirty_vram->last_dirty + SECONDS(2) < NOW() )
-        {
-            /* was clean for more than two seconds, try to disable guest
-             * write access */
-            for ( i = begin_pfn; i < end_pfn; i++ )
-            {
-                mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                if ( !mfn_eq(mfn, INVALID_MFN) )
-                    flush_tlb |= sh_remove_write_access(d, mfn, 1, 0);
-            }
-            dirty_vram->last_dirty = -1;
-        }
-    }
-    if ( flush_tlb )
-        guest_flush_tlb_mask(d, d->dirty_cpumask);
-    goto out;
-
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm.dirty_vram = NULL;
-
-out:
-    paging_unlock(d);
-    if ( rc == 0 && dirty_bitmap != NULL &&
-         copy_to_guest(guest_dirty_bitmap, dirty_bitmap, dirty_size) )
-    {
-        paging_lock(d);
-        for ( i = 0; i < dirty_size; i++ )
-            dirty_vram->dirty_bitmap[i] |= dirty_bitmap[i];
-        paging_unlock(d);
-        rc = -EFAULT;
-    }
-    vfree(dirty_bitmap);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
-}
-
 /* Fluhs TLB of selected vCPUs. */
 bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
                       void *ctxt)
diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
index 608360daec..c5da7a071c 100644
--- a/xen/arch/x86/mm/shadow/hvm.c
+++ b/xen/arch/x86/mm/shadow/hvm.c
@@ -691,6 +691,218 @@ static void sh_emulate_unmap_dest(struct vcpu *v, void 
*addr,
     atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
 }
 
+/**************************************************************************/
+/* VRAM dirty tracking support */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    unsigned long dirty_size = (nr + 7) / 8;
+    int flush_tlb = 0;
+    unsigned long i;
+    p2m_type_t t;
+    struct sh_dirty_vram *dirty_vram;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm.dirty_vram;
+
+    if ( dirty_vram && (!nr ||
+             ( begin_pfn != dirty_vram->begin_pfn
+            || end_pfn   != dirty_vram->end_pfn )) )
+    {
+        /* Different tracking, tear the previous down. */
+        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", 
dirty_vram->begin_pfn, dirty_vram->end_pfn);
+        xfree(dirty_vram->sl1ma);
+        xfree(dirty_vram->dirty_bitmap);
+        xfree(dirty_vram);
+        dirty_vram = d->arch.hvm.dirty_vram = NULL;
+    }
+
+    if ( !nr )
+        goto out;
+
+    dirty_bitmap = vzalloc(dirty_size);
+    if ( dirty_bitmap == NULL )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+    /*
+     * This should happen seldomly (Video mode change),
+     * no need to be careful.
+     */
+    if ( !dirty_vram )
+    {
+        /*
+         * Throw away all the shadows rather than walking through them
+         * up to nr times getting rid of mappings of each pfn.
+         */
+        shadow_blow_tables(d);
+
+        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
+
+        rc = -ENOMEM;
+        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+            goto out;
+        dirty_vram->begin_pfn = begin_pfn;
+        dirty_vram->end_pfn = end_pfn;
+        d->arch.hvm.dirty_vram = dirty_vram;
+
+        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
+            goto out_dirty_vram;
+        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
+
+        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) 
== NULL )
+            goto out_sl1ma;
+
+        dirty_vram->last_dirty = NOW();
+
+        /* Tell the caller that this time we could not track dirty bits. */
+        rc = -ENODATA;
+    }
+    else if ( dirty_vram->last_dirty == -1 )
+        /* still completely clean, just copy our empty bitmap */
+        memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
+    else
+    {
+        mfn_t map_mfn = INVALID_MFN;
+        void *map_sl1p = NULL;
+
+        /* Iterate over VRAM to track dirty bits. */
+        for ( i = 0; i < nr; i++ )
+        {
+            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
+            struct page_info *page;
+            int dirty = 0;
+            paddr_t sl1ma = dirty_vram->sl1ma[i];
+
+            if ( mfn_eq(mfn, INVALID_MFN) )
+                dirty = 1;
+            else
+            {
+                page = mfn_to_page(mfn);
+                switch ( page->u.inuse.type_info & PGT_count_mask )
+                {
+                case 0:
+                    /* No guest reference, nothing to track. */
+                    break;
+
+                case 1:
+                    /* One guest reference. */
+                    if ( sl1ma == INVALID_PADDR )
+                    {
+                        /* We don't know which sl1e points to this, too bad. */
+                        dirty = 1;
+                        /*
+                         * TODO: Heuristics for finding the single mapping of
+                         * this gmfn
+                         */
+                        flush_tlb |= sh_remove_all_mappings(d, mfn,
+                                                            _gfn(begin_pfn + 
i));
+                    }
+                    else
+                    {
+                        /*
+                         * Hopefully the most common case: only one mapping,
+                         * whose dirty bit we can use.
+                         */
+                        l1_pgentry_t *sl1e;
+                        mfn_t sl1mfn = maddr_to_mfn(sl1ma);
+
+                        if ( !mfn_eq(sl1mfn, map_mfn) )
+                        {
+                            if ( map_sl1p )
+                                unmap_domain_page(map_sl1p);
+                            map_sl1p = map_domain_page(sl1mfn);
+                            map_mfn = sl1mfn;
+                        }
+                        sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
+
+                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                        {
+                            dirty = 1;
+                            /*
+                             * Note: this is atomic, so we may clear a
+                             * _PAGE_ACCESSED set by another processor.
+                             */
+                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                            flush_tlb = 1;
+                        }
+                    }
+                    break;
+
+                default:
+                    /* More than one guest reference,
+                     * we don't afford tracking that. */
+                    dirty = 1;
+                    break;
+                }
+            }
+
+            if ( dirty )
+            {
+                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+                dirty_vram->last_dirty = NOW();
+            }
+        }
+
+        if ( map_sl1p )
+            unmap_domain_page(map_sl1p);
+
+        memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
+        memset(dirty_vram->dirty_bitmap, 0, dirty_size);
+        if ( dirty_vram->last_dirty + SECONDS(2) < NOW() )
+        {
+            /*
+             * Was clean for more than two seconds, try to disable guest
+             * write access.
+             */
+            for ( i = begin_pfn; i < end_pfn; i++ )
+            {
+                mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
+                if ( !mfn_eq(mfn, INVALID_MFN) )
+                    flush_tlb |= sh_remove_write_access(d, mfn, 1, 0);
+            }
+            dirty_vram->last_dirty = -1;
+        }
+    }
+    if ( flush_tlb )
+        guest_flush_tlb_mask(d, d->dirty_cpumask);
+    goto out;
+
+ out_sl1ma:
+    xfree(dirty_vram->sl1ma);
+ out_dirty_vram:
+    xfree(dirty_vram);
+    dirty_vram = d->arch.hvm.dirty_vram = NULL;
+
+ out:
+    paging_unlock(d);
+    if ( rc == 0 && dirty_bitmap != NULL &&
+         copy_to_guest(guest_dirty_bitmap, dirty_bitmap, dirty_size) )
+    {
+        paging_lock(d);
+        for ( i = 0; i < dirty_size; i++ )
+            dirty_vram->dirty_bitmap[i] |= dirty_bitmap[i];
+        paging_unlock(d);
+        rc = -EFAULT;
+    }
+    vfree(dirty_bitmap);
+    p2m_unlock(p2m_get_hostp2m(d));
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 7d16d1c1a9..e34937f43b 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -494,7 +494,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -649,15 +648,19 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
+    if ( unlikely(level == 1) && is_hvm_domain(d) )
     {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
+        struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
+
+        if ( dirty_vram && dirty_vram->last_dirty == -1 &&
+             gfn_x(target_gfn) >= dirty_vram->begin_pfn &&
+             gfn_x(target_gfn) < dirty_vram->end_pfn )
+        {
+            if ( ft & FETCH_TYPE_WRITE )
+                dirty_vram->last_dirty = NOW();
+            else
+                sflags &= ~_PAGE_RW;
+        }
     }
 
     /* Read-only memory */
@@ -1082,7 +1085,7 @@ static inline void shadow_vram_get_l1e(shadow_l1e_t 
new_sl1e,
     unsigned long gfn;
     struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
+    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
          || !(flags & _PAGE_RW) /* read-only mapping? */
          || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
         return;
@@ -1113,7 +1116,7 @@ static inline void shadow_vram_put_l1e(shadow_l1e_t 
old_sl1e,
     unsigned long gfn;
     struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
+    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
          || !(flags & _PAGE_RW) /* read-only mapping? */
          || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
         return;
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index 3fd3f0617a..eb5d1e3fab 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -438,6 +438,14 @@ mfn_t oos_snapshot_lookup(struct domain *d, mfn_t gmfn);
 
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
 
+/* Deliberately free all the memory we can: tear down all of d's shadows. */
+void shadow_blow_tables(struct domain *d);
+
+/*
+ * Remove all mappings of a guest frame from the shadow tables.
+ * Returns non-zero if we need to flush TLBs.
+ */
+int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn);
 
 /* Reset the up-pointers of every L3 shadow to 0.
  * This is called when l3 shadows stop being pinnable, to clear out all
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.