[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] blktap: fix locking (again)



# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1325493307 -3600
# Node ID 80fc8882d7f9730a231ecba84f5fcf738fc94d27
# Parent  a7851a465b41332cf7871cc41fe7a95580816ed9
blktap: fix locking (again)

c/s 1090 wasn't really helping much - I was doing that under the wrong
impression that the zap_pte() hook would be called with the mmap_sem
held. That being wrong, none of the uses of mmap_sem here actually are
write ones (they only look up vma information), so convert them all to
read acquires/releases.

A new spinlock needs to be introduced, protecting idx_map. This has to
nest inside mmap_sem, which requires some code movement.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---


diff -r a7851a465b41 -r 80fc8882d7f9 drivers/xen/blktap/blktap.c
--- a/drivers/xen/blktap/blktap.c       Mon Dec 19 10:08:20 2011 +0100
+++ b/drivers/xen/blktap/blktap.c       Mon Jan 02 09:35:07 2012 +0100
@@ -113,6 +113,7 @@
        pid_t pid;                    /*tapdisk process id                   */
        enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
                                                  shutdown                   */
+       spinlock_t map_lock;          /*protects idx_map                     */
        struct idx_map {
                u16 mem, req;
        } *idx_map;                   /*Record the user ring id to kern
@@ -295,7 +296,7 @@
        pte_t copy;
        tap_blkif_t *info = NULL;
        unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0;
-       unsigned long offset, uvstart = 0;
+       unsigned long offset;
        struct page *pg;
        struct grant_handle_pair *khandle;
        struct gnttab_unmap_grant_ref unmap[2];
@@ -304,25 +305,28 @@
         * If the address is before the start of the grant mapped region or
         * if vm_file is NULL (meaning mmap failed and we have nothing to do)
         */
-       if (vma->vm_file != NULL) {
+       if (vma->vm_file != NULL)
                info = vma->vm_file->private_data;
-               uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
-       }
-       if (vma->vm_file == NULL || uvaddr < uvstart)
+       if (info == NULL || uvaddr < info->user_vstart)
                return ptep_get_and_clear_full(vma->vm_mm, uvaddr, 
                                               ptep, is_fullmm);
 
-       /* TODO Should these be changed to if statements? */
-       BUG_ON(!info);
-       BUG_ON(!info->idx_map);
-
-       offset = (uvaddr - uvstart) >> PAGE_SHIFT;
+       offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT;
        usr_idx = OFFSET_TO_USR_IDX(offset);
        seg = OFFSET_TO_SEG(offset);
 
+       spin_lock(&info->map_lock);
+
        pending_idx = info->idx_map[usr_idx].req;
        mmap_idx = info->idx_map[usr_idx].mem;
 
+       /* fast_flush_area() may already have cleared this entry */
+       if (mmap_idx == INVALID_MIDX) {
+               spin_unlock(&info->map_lock);
+               return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
+                                              is_fullmm);
+       }
+
        pg = idx_to_page(mmap_idx, pending_idx, seg);
        ClearPageReserved(pg);
        info->foreign_map.map[offset + RING_PAGES] = NULL;
@@ -365,6 +369,8 @@
                        BUG();
        }
 
+       spin_unlock(&info->map_lock);
+
        return copy;
 }
 
@@ -489,6 +495,7 @@
                }
 
                info->minor = minor;
+               spin_lock_init(&info->map_lock);
                /*
                 * Make sure that we have a minor before others can
                 * see us.
@@ -1029,25 +1036,30 @@
                             unsigned int u_idx, tap_blkif_t *info)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
-       unsigned int i, mmap_idx, invcount = 0, locked = 0;
+       unsigned int i, mmap_idx, invcount = 0;
        struct grant_handle_pair *khandle;
        uint64_t ptep;
        int ret;
        unsigned long uvaddr;
        struct mm_struct *mm = info->mm;
 
+       if (mm != NULL)
+               down_read(&mm->mmap_sem);
+
        if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
-               down_write(&mm->mmap_sem);
+ slow:
                blktap_zap_page_range(mm,
                                      MMAP_VADDR(info->user_vstart, u_idx, 0),
                                      req->nr_pages);
                info->idx_map[u_idx].mem = INVALID_MIDX;
-               up_write(&mm->mmap_sem);
+               up_read(&mm->mmap_sem);
                return;
        }
 
        mmap_idx = req->mem_idx;
 
+       spin_lock(&info->map_lock);
+
        for (i = 0; i < req->nr_pages; i++) {
                uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
 
@@ -1066,15 +1078,13 @@
 
                if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) {
                        BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-                       if (!locked++)
-                               down_write(&mm->mmap_sem);
                        if (create_lookup_pte_addr(
                                mm,
                                MMAP_VADDR(info->user_vstart, u_idx, i),
                                &ptep) !=0) {
-                               up_write(&mm->mmap_sem);
+                               spin_unlock(&info->map_lock);
                                WPRINTK("Couldn't get a pte addr!\n");
-                               return;
+                               goto slow;
                        }
 
                        gnttab_set_unmap_op(&unmap[invcount], ptep,
@@ -1091,19 +1101,11 @@
                GNTTABOP_unmap_grant_ref, unmap, invcount);
        BUG_ON(ret);
        
-       if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) {
-               if (!locked++)
-                       down_write(&mm->mmap_sem);
-               blktap_zap_page_range(mm, 
-                                     MMAP_VADDR(info->user_vstart, u_idx, 0), 
-                                     req->nr_pages);
-       }
+       info->idx_map[u_idx].mem = INVALID_MIDX;
 
-       if (!locked && mm != NULL)
-               down_write(&mm->mmap_sem);
-       info->idx_map[u_idx].mem = INVALID_MIDX;
+       spin_unlock(&info->map_lock);
        if (mm != NULL)
-               up_write(&mm->mmap_sem);
+               up_read(&mm->mmap_sem);
 }
 
 /******************************************************************
@@ -1406,7 +1408,9 @@
                goto fail_response;
 
        /* Check we have space on user ring - should never fail. */
+       spin_lock(&info->map_lock);
        usr_idx = GET_NEXT_REQ(info->idx_map);
+       spin_unlock(&info->map_lock);
        if (usr_idx >= MAX_PENDING_REQS) {
                WARN_ON(1);
                goto fail_response;
@@ -1445,7 +1449,7 @@
        op = 0;
        mm = info->mm;
        if (!xen_feature(XENFEAT_auto_translated_physmap))
-               down_write(&mm->mmap_sem);
+               down_read(&mm->mmap_sem);
        for (i = 0; i < nseg; i++) {
                unsigned long uvaddr;
                unsigned long kvaddr;
@@ -1462,9 +1466,9 @@
                        /* Now map it to user. */
                        ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
                        if (ret) {
-                               up_write(&mm->mmap_sem);
+                               up_read(&mm->mmap_sem);
                                WPRINTK("Couldn't get a pte addr!\n");
-                               goto fail_flush;
+                               goto fail_response;
                        }
 
                        gnttab_set_map_op(&map[op], ptep,
@@ -1478,12 +1482,15 @@
                             req->seg[i].first_sect + 1);
        }
 
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               down_read(&mm->mmap_sem);
+
+       spin_lock(&info->map_lock);
+
        ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
        BUG_ON(ret);
 
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-               up_write(&mm->mmap_sem);
-
                for (i = 0; i < (nseg*2); i+=2) {
                        unsigned long uvaddr;
                        unsigned long offset;
@@ -1548,18 +1555,18 @@
                }
        }
 
+       /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
+       info->idx_map[usr_idx].mem = mmap_idx;
+       info->idx_map[usr_idx].req = pending_idx;
+
+       spin_unlock(&info->map_lock);
+
        if (ret)
                goto fail_flush;
 
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               down_write(&mm->mmap_sem);
-       /* Mark mapped pages as reserved: */
-       for (i = 0; i < req->nr_segments; i++) {
-               struct page *pg;
-
-               pg = idx_to_page(mmap_idx, pending_idx, i);
-               SetPageReserved(pg);
-               if (xen_feature(XENFEAT_auto_translated_physmap)) {
+       if (xen_feature(XENFEAT_auto_translated_physmap)) {
+               for (i = 0; i < nseg; i++) {
+                       struct page *pg = idx_to_page(mmap_idx, pending_idx, i);
                        unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
                                                          usr_idx, i);
                        if (vma && uvaddr >= vma->vm_end) {
@@ -1577,18 +1584,12 @@
                                        continue;
                        }
                        ret = vm_insert_page(vma, uvaddr, pg);
-                       if (ret) {
-                               up_write(&mm->mmap_sem);
+                       if (ret)
                                goto fail_flush;
-                       }
                }
        }
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               up_write(&mm->mmap_sem);
        
-       /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
-       info->idx_map[usr_idx].mem = mmap_idx;
-       info->idx_map[usr_idx].req = pending_idx;
+       up_read(&mm->mmap_sem);
 
        blkif_get(blkif);
        /* Finally, write the request message to the user ring. */
@@ -1611,6 +1612,7 @@
        return;
 
  fail_flush:
+       up_read(&mm->mmap_sem);
        WPRINTK("Reached Fail_flush\n");
        fast_flush_area(pending_req, pending_idx, usr_idx, info);
  fail_response:

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.