|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [linux-2.6.18-xen] blktap: refine mm tracking
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1447677635 -3600
# Node ID a0a79976ffebcfa5ff55feb09f833fc1adbeb2e8
# Parent 3ae3ae4c878fe95de3f895110806a428e88e90b1
blktap: refine mm tracking
As already noted in c/s 1013:eb21d96a6aae ("xen/blktap: fix cleanup
after unclean application exit") and 1015:2a4b455b1fba ("blktap: fix
cleanup after unclean application exit #2"), the extra reference
obtained on tapdisk's mm (from c/s 867:978499ee4f39 ["linux/blktap: fix
vma_close() for partial munmap"]) is problematic. However, while the
two c/s fixed what they claim to, the reconnect case got broken
(tap_blkif_schedule() clearing init->mm left no way for it to get set
again).
Do away with the extra reference: The mm of interest can't go away as
long as we have a VMA in it. To take care of VMA splitting, track the
amount of outstanding mapped space, and zap info->mm when that value
drops to zero.
At the same time also fix the oversight in the first of the mentioned
c/s of not keeping ring_ok up to date: The zapping of info->mm without
clearing info->ring_ok led to NULL pointer accesses in down_read()
called from dispatch_rw_block_io(). We don't really need the extra
flag, we can use info->mm for that purpose.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
diff -r 3ae3ae4c878f -r a0a79976ffeb drivers/xen/blktap/blktap.c
--- a/drivers/xen/blktap/blktap.c Mon Nov 16 13:38:00 2015 +0100
+++ b/drivers/xen/blktap/blktap.c Mon Nov 16 13:40:35 2015 +0100
@@ -102,10 +102,10 @@ typedef struct domid_translate_ext {
typedef struct tap_blkif {
struct mm_struct *mm; /*User address space */
unsigned long rings_vstart; /*Kernel memory mapping */
+ unsigned long rings_total; /*Kernel memory mapping size */
unsigned long user_vstart; /*User memory mapping */
unsigned long dev_inuse; /*One process opens device at a time. */
unsigned long dev_pending; /*In process of being opened */
- unsigned long ring_ok; /*make this ring->state */
blkif_front_ring_t ufe_ring; /*Rings up to user space. */
wait_queue_head_t wait; /*for poll */
unsigned long mode; /*current switching mode */
@@ -397,12 +397,22 @@ static void blktap_vma_open(struct vm_ar
*/
static void blktap_vma_close(struct vm_area_struct *vma)
{
+ tap_blkif_t *info;
struct vm_area_struct *next = vma->vm_next;
+ if (vma->vm_file == NULL)
+ return;
+
+ info = vma->vm_file->private_data;
+ info->rings_total -= vma->vm_end - vma->vm_start;
+ if (info->rings_total == 0) {
+ info->mm = NULL;
+ return;
+ }
+
if (next == NULL ||
vma->vm_ops != next->vm_ops ||
vma->vm_end != next->vm_start ||
- vma->vm_file == NULL ||
vma->vm_file != next->vm_file)
return;
@@ -537,7 +547,6 @@ void signal_tapdisk(int idx)
{
tap_blkif_t *info;
struct task_struct *ptask;
- struct mm_struct *mm;
/*
* if the userland tools set things up wrong, this could be negative;
@@ -556,10 +565,7 @@ void signal_tapdisk(int idx)
info->status = CLEANSHUTDOWN;
}
info->blkif = NULL;
-
- mm = xchg(&info->mm, NULL);
- if (mm)
- mmput(mm);
+ info->mm = NULL;
}
static int blktap_open(struct inode *inode, struct file *filp)
@@ -629,19 +635,16 @@ static int blktap_open(struct inode *ino
static int blktap_release(struct inode *inode, struct file *filp)
{
tap_blkif_t *info = filp->private_data;
- struct mm_struct *mm;
/* check for control device */
if (!info)
return 0;
- info->ring_ok = 0;
+ info->mm = NULL;
smp_wmb();
info->rings_vstart = 0;
+ info->rings_total = 0;
- mm = xchg(&info->mm, NULL);
- if (mm)
- mmput(mm);
kfree(info->foreign_maps->map);
kfree(info->foreign_maps);
info->foreign_maps = NULL;
@@ -700,7 +703,7 @@ static int blktap_mmap(struct file *filp
return -ENOMEM;
}
- if (info->rings_vstart) {
+ if (info->rings_total) {
WPRINTK("mmap already called on filp %p (minor %d)\n",
filp, info->minor);
return -EPERM;
@@ -718,6 +721,7 @@ static int blktap_mmap(struct file *filp
size >>= PAGE_SHIFT;
info->rings_vstart = vma->vm_start;
+ info->rings_total = vma->vm_end - vma->vm_start;
info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
/* Map the ring pages to the start of the region and reserve it. */
@@ -754,15 +758,15 @@ static int blktap_mmap(struct file *filp
vma->vm_mm->context.has_foreign_mappings = 1;
#endif
- info->mm = get_task_mm(current);
smp_wmb();
- info->ring_ok = 1;
+ info->mm = vma->vm_mm;
return 0;
fail:
/* Clear any active mappings. */
zap_page_range(vma, vma->vm_start,
vma->vm_end - vma->vm_start, NULL);
info->rings_vstart = 0;
+ info->rings_total = 0;
return -ENOMEM;
}
@@ -1057,6 +1061,7 @@ static void fast_flush_area(pending_req_
unsigned long uvaddr;
struct mm_struct *mm = info->mm;
+ smp_rmb();
if (mm != NULL)
down_read(&mm->mmap_sem);
@@ -1187,13 +1192,6 @@ int tap_blkif_schedule(void *arg)
info = tapfds[blkif->dev_num];
blkif_put(blkif);
- if (info) {
- struct mm_struct *mm = xchg(&info->mm, NULL);
-
- if (mm)
- mmput(mm);
- }
-
return 0;
}
@@ -1474,11 +1472,12 @@ static void dispatch_rw_block_io(blkif_t
}
/* Make sure userspace is ready. */
- if (!info->ring_ok) {
+ mm = info->mm;
+ smp_rmb();
+ if (!mm) {
WPRINTK("ring not ready for requests!\n");
goto fail_response;
}
- smp_rmb();
if (RING_FULL(&info->ufe_ring)) {
WPRINTK("fe_ring is full, "
@@ -1496,7 +1495,6 @@ static void dispatch_rw_block_io(blkif_t
if (req->operation == BLKIF_OP_WRITE)
flags |= GNTMAP_readonly;
op = 0;
- mm = info->mm;
if (!xen_feature(XENFEAT_auto_translated_physmap))
down_read(&mm->mmap_sem);
for (i = 0; i < nseg; i++) {
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |