[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] blktap: refine mm tracking


  • To: xen-changelog@xxxxxxxxxxxxxxxxxxx
  • From: Xen patchbot-linux-2.6.18-xen <patchbot@xxxxxxx>
  • Date: Mon, 16 Nov 2015 12:44:04 +0000
  • Delivery-date: Mon, 16 Nov 2015 12:44:10 +0000
  • List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xen.org>

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1447677635 -3600
# Node ID a0a79976ffebcfa5ff55feb09f833fc1adbeb2e8
# Parent  3ae3ae4c878fe95de3f895110806a428e88e90b1
blktap: refine mm tracking

As already noted in c/s 1013:eb21d96a6aae ("xen/blktap: fix cleanup
after unclean application exit") and 1015:2a4b455b1fba ("blktap: fix
cleanup after unclean application exit #2"), the extra reference
obtained on tapdisk's mm (from c/s 867:978499ee4f39 ["linux/blktap: fix
vma_close() for partial munmap"]) is problematic. However, while the
two c/s fixed what they claim to, the reconnect case got broken
(tap_blkif_schedule() clearing init->mm left no way for it to get set
again).

Do away with the extra reference: The mm of interest can't go away as
long as we have a VMA in it. To take care of VMA splitting, track the
amount of outstanding mapped space, and zap info->mm when that value
drops to zero.

At the same time also fix the oversight in the first of the mentioned
c/s of not keeping ring_ok up to date: The zapping of info->mm without
clearing info->ring_ok led to NULL pointer accesses in down_read()
called from dispatch_rw_block_io(). We don't really need the extra
flag, we can use info->mm for that purpose.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---


diff -r 3ae3ae4c878f -r a0a79976ffeb drivers/xen/blktap/blktap.c
--- a/drivers/xen/blktap/blktap.c       Mon Nov 16 13:38:00 2015 +0100
+++ b/drivers/xen/blktap/blktap.c       Mon Nov 16 13:40:35 2015 +0100
@@ -102,10 +102,10 @@ typedef struct domid_translate_ext {
 typedef struct tap_blkif {
        struct mm_struct *mm;         /*User address space                   */
        unsigned long rings_vstart;   /*Kernel memory mapping                */
+       unsigned long rings_total;    /*Kernel memory mapping size           */
        unsigned long user_vstart;    /*User memory mapping                  */
        unsigned long dev_inuse;      /*One process opens device at a time.  */
        unsigned long dev_pending;    /*In process of being opened           */
-       unsigned long ring_ok;        /*make this ring->state                */
        blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
        wait_queue_head_t wait;       /*for poll                             */
        unsigned long mode;           /*current switching mode               */
@@ -397,12 +397,22 @@ static void blktap_vma_open(struct vm_ar
  */
 static void blktap_vma_close(struct vm_area_struct *vma)
 {
+       tap_blkif_t *info;
        struct vm_area_struct *next = vma->vm_next;
 
+       if (vma->vm_file == NULL)
+               return;
+
+       info = vma->vm_file->private_data;
+       info->rings_total -= vma->vm_end - vma->vm_start;
+       if (info->rings_total == 0) {
+               info->mm = NULL;
+               return;
+       }
+
        if (next == NULL ||
            vma->vm_ops != next->vm_ops ||
            vma->vm_end != next->vm_start ||
-           vma->vm_file == NULL ||
            vma->vm_file != next->vm_file)
                return;
 
@@ -537,7 +547,6 @@ void signal_tapdisk(int idx)
 {
        tap_blkif_t *info;
        struct task_struct *ptask;
-       struct mm_struct *mm;
 
        /*
         * if the userland tools set things up wrong, this could be negative;
@@ -556,10 +565,7 @@ void signal_tapdisk(int idx)
                        info->status = CLEANSHUTDOWN;
        }
        info->blkif = NULL;
-
-       mm = xchg(&info->mm, NULL);
-       if (mm)
-               mmput(mm);
+       info->mm = NULL;
 }
 
 static int blktap_open(struct inode *inode, struct file *filp)
@@ -629,19 +635,16 @@ static int blktap_open(struct inode *ino
 static int blktap_release(struct inode *inode, struct file *filp)
 {
        tap_blkif_t *info = filp->private_data;
-       struct mm_struct *mm;
        
        /* check for control device */
        if (!info)
                return 0;
 
-       info->ring_ok = 0;
+       info->mm = NULL;
        smp_wmb();
        info->rings_vstart = 0;
+       info->rings_total = 0;
 
-       mm = xchg(&info->mm, NULL);
-       if (mm)
-               mmput(mm);
        kfree(info->foreign_maps->map);
        kfree(info->foreign_maps);
        info->foreign_maps = NULL;
@@ -700,7 +703,7 @@ static int blktap_mmap(struct file *filp
                return -ENOMEM;
        }
 
-       if (info->rings_vstart) {
+       if (info->rings_total) {
                WPRINTK("mmap already called on filp %p (minor %d)\n",
                        filp, info->minor);
                return -EPERM;
@@ -718,6 +721,7 @@ static int blktap_mmap(struct file *filp
 
        size >>= PAGE_SHIFT;
        info->rings_vstart = vma->vm_start;
+       info->rings_total  = vma->vm_end - vma->vm_start;
        info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
     
        /* Map the ring pages to the start of the region and reserve it. */
@@ -754,15 +758,15 @@ static int blktap_mmap(struct file *filp
        vma->vm_mm->context.has_foreign_mappings = 1;
 #endif
 
-       info->mm = get_task_mm(current);
        smp_wmb();
-       info->ring_ok = 1;
+       info->mm = vma->vm_mm;
        return 0;
  fail:
        /* Clear any active mappings. */
        zap_page_range(vma, vma->vm_start, 
                       vma->vm_end - vma->vm_start, NULL);
        info->rings_vstart = 0;
+       info->rings_total = 0;
 
        return -ENOMEM;
 }
@@ -1057,6 +1061,7 @@ static void fast_flush_area(pending_req_
        unsigned long uvaddr;
        struct mm_struct *mm = info->mm;
 
+       smp_rmb();
        if (mm != NULL)
                down_read(&mm->mmap_sem);
 
@@ -1187,13 +1192,6 @@ int tap_blkif_schedule(void *arg)
        info = tapfds[blkif->dev_num];
        blkif_put(blkif);
 
-       if (info) {
-               struct mm_struct *mm = xchg(&info->mm, NULL);
-
-               if (mm)
-                       mmput(mm);
-       }
-
        return 0;
 }
 
@@ -1474,11 +1472,12 @@ static void dispatch_rw_block_io(blkif_t
        }
        
        /* Make sure userspace is ready. */
-       if (!info->ring_ok) {
+       mm = info->mm;
+       smp_rmb();
+       if (!mm) {
                WPRINTK("ring not ready for requests!\n");
                goto fail_response;
        }
-       smp_rmb();
 
        if (RING_FULL(&info->ufe_ring)) {
                WPRINTK("fe_ring is full, "
@@ -1496,7 +1495,6 @@ static void dispatch_rw_block_io(blkif_t
        if (req->operation == BLKIF_OP_WRITE)
                flags |= GNTMAP_readonly;
        op = 0;
-       mm = info->mm;
        if (!xen_feature(XENFEAT_auto_translated_physmap))
                down_read(&mm->mmap_sem);
        for (i = 0; i < nseg; i++) {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.