[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86: make page table handling error paths preemptible



commit b8efae696c9a2d46e91fa0eda739427efc16c250
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu May 2 16:39:37 2013 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu May 2 16:39:37 2013 +0200

    x86: make page table handling error paths preemptible
    
    ... as they may take significant amounts of time.
    
    This requires cloning the tweaked continuation logic from
    do_mmuext_op() to do_mmu_update().
    
    Note that in mod_l[34]_entry() a negative "preemptible" value gets
    passed to put_page_from_l[34]e() now, telling the callee to store the
    respective page in current->arch.old_guest_table (for a hypercall
    continuation to pick up), rather than carrying out the put right away.
    This is going to be made a little more explicit by a subsequent cleanup
    patch.
    
    This is part of CVE-2013-1918 / XSA-45.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/mm.c |  125 +++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 0dc9070..051e06e 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1139,7 +1139,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned 
long pfn,
     }
 
     if ( unlikely(partial > 0) )
+    {
+        ASSERT(preemptible >= 0);
         return __put_page_type(l3e_get_page(l3e), preemptible);
+    }
+
+    if ( preemptible < 0 )
+    {
+        current->arch.old_guest_table = l3e_get_page(l3e);
+        return 0;
+    }
 
     return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
 }
@@ -1151,7 +1160,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned 
long pfn,
          (l4e_get_pfn(l4e) != pfn) )
     {
         if ( unlikely(partial > 0) )
+        {
+            ASSERT(preemptible >= 0);
             return __put_page_type(l4e_get_page(l4e), preemptible);
+        }
+
+        if ( preemptible < 0 )
+        {
+            current->arch.old_guest_table = l4e_get_page(l4e);
+            return 0;
+        }
+
         return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
     }
     return 1;
@@ -1347,12 +1366,17 @@ static int alloc_l3_table(struct page_info *page, int 
preemptible)
     if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
     {
         MEM_LOG("Failure in alloc_l3_table: entry %d", i);
+        if ( i )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 0;
+            current->arch.old_guest_table = page;
+        }
         while ( i-- > 0 )
         {
             if ( !is_guest_l3_slot(i) )
                 continue;
             unadjust_guest_l3e(pl3e[i], d);
-            put_page_from_l3e(pl3e[i], pfn, 0, 0);
         }
     }
 
@@ -1398,22 +1422,24 @@ static int alloc_l4_table(struct page_info *page, int 
preemptible)
             page->nr_validated_ptes = i;
             page->partial_pte = partial ?: 1;
         }
-        else if ( rc == -EINTR )
+        else if ( rc < 0 )
         {
+            if ( rc != -EINTR )
+                MEM_LOG("Failure in alloc_l4_table: entry %d", i);
             if ( i )
             {
                 page->nr_validated_ptes = i;
                 page->partial_pte = 0;
-                rc = -EAGAIN;
+                if ( rc == -EINTR )
+                    rc = -EAGAIN;
+                else
+                {
+                    if ( current->arch.old_guest_table )
+                        page->nr_validated_ptes++;
+                    current->arch.old_guest_table = page;
+                }
             }
         }
-        else if ( rc < 0 )
-        {
-            MEM_LOG("Failure in alloc_l4_table: entry %d", i);
-            while ( i-- > 0 )
-                if ( is_guest_l4_slot(d, i) )
-                    put_page_from_l4e(pl4e[i], pfn, 0, 0);
-        }
         if ( rc < 0 )
         {
             unmap_domain_page(pl4e);
@@ -1846,7 +1872,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
         if ( !create_pae_xen_mappings(d, pl3e) )
             BUG();
 
-    put_page_from_l3e(ol3e, pfn, 0, 0);
+    put_page_from_l3e(ol3e, pfn, 0, -preemptible);
     return rc;
 }
 
@@ -1907,7 +1933,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
         return -EFAULT;
     }
 
-    put_page_from_l4e(ol4e, pfn, 0, 0);
+    put_page_from_l4e(ol4e, pfn, 0, -preemptible);
     return rc;
 }
 
@@ -2063,7 +2089,15 @@ static int alloc_page_type(struct page_info *page, 
unsigned long type,
                 PRtype_info ": caf=%08lx taf=%" PRtype_info,
                 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
                 type, page->count_info, page->u.inuse.type_info);
-        page->u.inuse.type_info = 0;
+        if ( page != current->arch.old_guest_table )
+            page->u.inuse.type_info = 0;
+        else
+        {
+            ASSERT((page->u.inuse.type_info &
+                    (PGT_count_mask | PGT_validated)) == 1);
+            get_page_light(page);
+            page->u.inuse.type_info |= PGT_partial;
+        }
     }
     else
     {
@@ -2961,21 +2995,17 @@ long do_mmuext_op(
             }
 
             if ( (rc = xsm_memory_pin_page(XSM_HOOK, d, pg_owner, page)) != 0 )
-            {
-                put_page_and_type(page);
                 okay = 0;
-                break;
-            }
-
-            if ( unlikely(test_and_set_bit(_PGT_pinned,
-                                           &page->u.inuse.type_info)) )
+            else if ( unlikely(test_and_set_bit(_PGT_pinned,
+                                                &page->u.inuse.type_info)) )
             {
                 MEM_LOG("Mfn %lx already pinned", page_to_mfn(page));
-                put_page_and_type(page);
                 okay = 0;
-                break;
             }
 
+            if ( unlikely(!okay) )
+                goto pin_drop;
+
             /* A page is dirtied when its pin status is set. */
             paging_mark_dirty(pg_owner, page_to_mfn(page));
 
@@ -2989,7 +3019,13 @@ long do_mmuext_op(
                                                &page->u.inuse.type_info));
                 spin_unlock(&pg_owner->page_alloc_lock);
                 if ( drop_ref )
-                    put_page_and_type(page);
+                {
+        pin_drop:
+                    if ( type == PGT_l1_page_table )
+                        put_page_and_type(page);
+                    else
+                        curr->arch.old_guest_table = page;
+                }
             }
 
             break;
@@ -3383,13 +3419,30 @@ long do_mmu_update(
     void *va;
     unsigned long gpfn, gmfn, mfn;
     struct page_info *page;
-    int rc = 0, i = 0;
-    unsigned int cmd, done = 0, pt_dom;
-    struct vcpu *v = current;
+    unsigned int cmd, i = 0, done = 0, pt_dom;
+    struct vcpu *curr = current, *v = curr;
     struct domain *d = v->domain, *pt_owner = d, *pg_owner;
     struct domain_mmap_cache mapcache;
     uint32_t xsm_needed = 0;
     uint32_t xsm_checked = 0;
+    int rc = put_old_guest_table(curr);
+
+    if ( unlikely(rc) )
+    {
+        if ( likely(rc == -EAGAIN) )
+            rc = hypercall_create_continuation(
+                     __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
+                     foreigndom);
+        return rc;
+    }
+
+    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
+         likely(guest_handle_is_null(ureqs)) )
+    {
+        /* See the curr->arch.old_guest_table related
+         * hypercall_create_continuation() below. */
+        return (int)foreigndom;
+    }
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
@@ -3433,7 +3486,7 @@ long do_mmu_update(
 
     for ( i = 0; i < count; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( curr->arch.old_guest_table || hypercall_preempt_check() )
         {
             rc = -EAGAIN;
             break;
@@ -3628,9 +3681,27 @@ long do_mmu_update(
     }
 
     if ( rc == -EAGAIN )
+    {
+        ASSERT(i < count);
         rc = hypercall_create_continuation(
             __HYPERVISOR_mmu_update, "hihi",
             ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+    }
+    else if ( curr->arch.old_guest_table )
+    {
+        XEN_GUEST_HANDLE_PARAM(void) null;
+
+        ASSERT(rc || i == count);
+        set_xen_guest_handle(null, NULL);
+        /*
+         * In order to have a way to communicate the final return value to
+         * our continuation, we pass this in place of "foreigndom", building
+         * on the fact that this argument isn't needed anymore.
+         */
+        rc = hypercall_create_continuation(
+                __HYPERVISOR_mmu_update, "hihi", null,
+                MMU_UPDATE_PREEMPTED, null, rc);
+    }
 
     put_pg_owner(pg_owner);
 
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.