[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH][PVFB][LINUX] Fix possible sleep while holding spinlock



xenfb_update_screen() calls zap_page_range() while holding spinlock
mm_lock.  Big no-no.

Changeset 13018:c98ca86138a7422cdf9b15d87c95619b7277bb6a merely sweeps
the bug under the carpet: it silences zap_page_range()'s cries for
help by keeping interrupts enabled.  That doesn't fix the bug, and
it's also wrong: if a critical region gets interrupted, and the
interrupt printk()s, xenfb_refresh() gets executed and promptly
deadlocks.

This patch fixes the locking, but leaves open a race between
xenfb_update_screen() and do_no_page().  See the source code for a
detailed explanation of how it works, and where it fails.

Signed-off-by: Markus Armbruster <armbru@xxxxxxxxxx>


diff -r c2fe2635e68b linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c
--- a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c  Fri Dec 15 09:52:19 
2006 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c  Fri Dec 15 15:52:39 
2006 +0100
@@ -49,8 +51,9 @@ struct xenfb_info
        struct timer_list       refresh;
        int                     dirty;
        int                     x1, y1, x2, y2; /* dirty rectangle,
-                                                  protected by mm_lock */
-       spinlock_t              mm_lock;
+                                                  protected by dirty_lock */
+       spinlock_t              dirty_lock;
+       struct mutex            mm_lock;
        int                     nr_pages;
        struct page             **pages;
        struct list_head        mappings; /* protected by mm_lock */
@@ -63,6 +66,70 @@ struct xenfb_info
 
        struct xenbus_device    *xbdev;
 };
+
+/*
+ * How the locks work together
+ *
+ * There are two locks: spinlock dirty_lock protecting the dirty
+ * rectangle, and mutex mm_lock protecting mappings.
+ *
+ * The problem is that dirty rectangle and mappings aren't
+ * independent: the dirty rectangle must cover all faulted pages in
+ * mappings.  We need to prove that our locking maintains this
+ * invariant.
+ *
+ * There are several kinds of critical regions:
+ *
+ * 1. Holding only dirty_lock: xenfb_refresh().  May run in
+ *    interrupts.  Extends the dirty rectangle.  Trivially preserves
+ *    invariant.
+ *
+ * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close().  Touch
+ *    only mappings.  The former creates unfaulted pages.  Preserves
+ *    invariant.  The latter removes pages.  Preserves invariant.
+ *
+ * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
+ *    rectangle and updates mappings consistently.  Preserves
+ *    invariant.
+ *
+ * 4. The ugliest one: xenfb_update_screen().  Clear the dirty
+ *    rectangle and update mappings consistently.
+ *
+ *    We can't simply hold both locks, because zap_page_range() cannot
+ *    be called with a spinlock held.
+ *
+ *    Therefore, we first clear the dirty rectangle with both locks
+ *    held.  Then we unlock dirty_lock and update the mappings.
+ *    Critical regions that hold only dirty_lock may interfere with
+ *    that.  This can only be region 1: xenfb_refresh().  But that
+ *    just extends the dirty rectangle, which can't harm the
+ *    invariant.
+ *
+ * But FIXME: the invariant is too weak.  It misses that the fault
+ * record in mappings must be consistent with the mapping of pages in
+ * the associated address space!  do_no_page() updates the PTE after
+ * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
+ * allows the following race:
+ *
+ * X writes to some address in the Xen frame buffer
+ * Fault - call do_no_page()
+ *     call xenfb_vm_nopage()
+ *         grab mm_lock
+ *         map->faults++;
+ *         release mm_lock
+ *     return back to do_no_page()
+ * (preempted, or SMP)
+ * Xen worker thread runs.
+ *      grab mm_lock
+ *      look at mappings
+ *          find this mapping, zaps its pages (but page not in pte yet)
+ *          clear map->faults
+ *      releases mm_lock
+ * (back to X process)
+ *     put page in X's pte
+ *
+ * Oh well, we wont be updating the writes to this page anytime soon.
+ */
 
 static int xenfb_fps = 20;
 static unsigned long xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH 
/ 8;
@@ -105,6 +172,7 @@ static int xenfb_queue_full(struct xenfb
 
 static void xenfb_update_screen(struct xenfb_info *info)
 {
+       unsigned long flags;
        int y1, y2, x1, x2;
        struct xenfb_mapping *map;
 
@@ -113,14 +181,16 @@ static void xenfb_update_screen(struct x
        if (xenfb_queue_full(info))
                return;
 
-       spin_lock(&info->mm_lock);
-
+       mutex_lock(&info->mm_lock);
+
+       spin_lock_irqsave(&info->dirty_lock, flags);
        y1 = info->y1;
        y2 = info->y2;
        x1 = info->x1;
        x2 = info->x2;
        info->x1 = info->y1 = INT_MAX;
        info->x2 = info->y2 = 0;
+       spin_unlock_irqrestore(&info->dirty_lock, flags);
 
        list_for_each_entry(map, &info->mappings, link) {
                if (!map->faults)
@@ -130,7 +200,7 @@ static void xenfb_update_screen(struct x
                map->faults = 0;
        }
 
-       spin_unlock(&info->mm_lock);
+       mutex_unlock(&info->mm_lock);
 
        xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
 }
@@ -213,9 +283,11 @@ static void xenfb_refresh(struct xenfb_i
 static void xenfb_refresh(struct xenfb_info *info,
                          int x1, int y1, int w, int h)
 {
-       spin_lock(&info->mm_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&info->dirty_lock, flags);
        __xenfb_refresh(info, x1, y1, w, h);
-       spin_unlock(&info->mm_lock);
+       spin_unlock_irqrestore(&info->dirty_lock, flags);
 }
 
 static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
@@ -253,12 +325,12 @@ static void xenfb_vm_close(struct vm_are
        struct xenfb_mapping *map = vma->vm_private_data;
        struct xenfb_info *info = map->info;
 
-       spin_lock(&info->mm_lock);
+       mutex_lock(&info->mm_lock);
        if (atomic_dec_and_test(&map->map_refs)) {
                list_del(&map->link);
                kfree(map);
        }
-       spin_unlock(&info->mm_lock);
+       mutex_unlock(&info->mm_lock);
 }
 
 static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
@@ -267,13 +339,15 @@ static struct page *xenfb_vm_nopage(stru
        struct xenfb_mapping *map = vma->vm_private_data;
        struct xenfb_info *info = map->info;
        int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long flags;
        struct page *page;
        int y1, y2;
 
        if (pgnr >= info->nr_pages)
                return NOPAGE_SIGBUS;
 
-       spin_lock(&info->mm_lock);
+       mutex_lock(&info->mm_lock);
+       spin_lock_irqsave(&info->dirty_lock, flags);
        page = info->pages[pgnr];
        get_page(page);
        map->faults++;
@@ -283,7 +357,8 @@ static struct page *xenfb_vm_nopage(stru
        if (y2 > info->fb_info->var.yres)
                y2 = info->fb_info->var.yres;
        __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
-       spin_unlock(&info->mm_lock);
+       spin_unlock_irqrestore(&info->dirty_lock, flags);
+       mutex_unlock(&info->mm_lock);
 
        if (type)
                *type = VM_FAULT_MINOR;
@@ -323,9 +398,9 @@ static int xenfb_mmap(struct fb_info *fb
        map->info = info;
        atomic_set(&map->map_refs, 1);
 
-       spin_lock(&info->mm_lock);
+       mutex_lock(&info->mm_lock);
        list_add(&map->link, &info->mappings);
-       spin_unlock(&info->mm_lock);
+       mutex_unlock(&info->mm_lock);
 
        vma->vm_ops = &xenfb_vm_ops;
        vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
@@ -382,7 +459,8 @@ static int __devinit xenfb_probe(struct 
        info->xbdev = dev;
        info->irq = -1;
        info->x1 = info->y1 = INT_MAX;
-       spin_lock_init(&info->mm_lock);
+       spin_lock_init(&info->dirty_lock);
+       mutex_init(&info->mm_lock);
        init_waitqueue_head(&info->wq);
        init_timer(&info->refresh);
        info->refresh.function = xenfb_timer;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.