[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/5] blktap: Make VMAs non-foreign and bounce buffered.



Drop zero-copy I/O. Removes all grantmap mechanism from blktap,
bouncing I/O from/to a pool of dom0 memory and request
SGs. Essentially renders blktap without any residual dependencies on
Xen whatsoever.

Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
 drivers/xen/blktap/blktap.h  |   43 ++--
 drivers/xen/blktap/control.c |    8 +-
 drivers/xen/blktap/device.c  |  564 ++++++------------------------------------
 drivers/xen/blktap/request.c |   20 ++-
 drivers/xen/blktap/ring.c    |  319 +++++++++++++------------
 5 files changed, 285 insertions(+), 669 deletions(-)

diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
index ad79c15..fe63fc9 100644
--- a/drivers/xen/blktap/blktap.h
+++ b/drivers/xen/blktap/blktap.h
@@ -7,7 +7,6 @@
 #include <linux/init.h>
 #include <linux/scatterlist.h>
 #include <xen/blkif.h>
-#include <xen/grant_table.h>
 
 extern int blktap_debug_level;
 extern int blktap_ring_major;
@@ -27,7 +26,6 @@ extern int blktap_device_major;
 
 #define MAX_BLKTAP_DEVICE            1024
 
-#define BLKTAP_CONTROL               1
 #define BLKTAP_DEVICE                4
 #define BLKTAP_DEVICE_CLOSED         5
 #define BLKTAP_SHUTDOWN_REQUESTED    8
@@ -94,11 +92,13 @@ struct blktap_ring {
        struct task_struct            *task;
 
        struct vm_area_struct         *vma;
-       struct blkif_front_ring             ring;
-       struct vm_foreign_map          foreign_map;
+       struct blkif_front_ring        ring;
        unsigned long                  ring_vstart;
        unsigned long                  user_vstart;
 
+       int                            n_pending;
+       struct blktap_request         *pending[MAX_PENDING_REQS];
+
        wait_queue_head_t              poll_wait;
 
        dev_t                          devno;
@@ -123,19 +123,21 @@ struct blktap_statistics {
 struct blktap_request {
        struct blktap                 *tap;
        struct request                *rq;
-       uint16_t                       usr_idx;
-
-       uint8_t                        status;
-       atomic_t                       pendcnt;
-       unsigned short                 operation;
+       int                            usr_idx;
 
+       int                            operation;
        struct timeval                 time;
-       struct grant_handle_pair       handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 
+       struct scatterlist             sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page                   *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        int                            nr_pages;
 };
 
+#define blktap_for_each_sg(_sg, _req, _i)      \
+       for (_sg = (_req)->sg_table, _i = 0;    \
+            _i < (_req)->nr_pages;             \
+            (_sg)++, (_i)++)
+
 struct blktap {
        int                            minor;
        unsigned long                  dev_inuse;
@@ -144,10 +146,6 @@ struct blktap {
        struct blktap_device           device;
        struct blktap_page_pool       *pool;
 
-       int                            pending_cnt;
-       struct blktap_request         *pending_requests[MAX_PENDING_REQS];
-       struct scatterlist             sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-
        wait_queue_head_t              remove_wait;
        struct work_struct             remove_work;
        char                           name[BLKTAP2_MAX_MESSAGE_LEN];
@@ -174,6 +172,13 @@ void blktap_ring_exit(void);
 size_t blktap_ring_debug(struct blktap *, char *, size_t);
 int blktap_ring_create(struct blktap *);
 int blktap_ring_destroy(struct blktap *);
+struct blktap_request *blktap_ring_make_request(struct blktap *);
+void blktap_ring_free_request(struct blktap *,struct blktap_request *);
+void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, 
int);
+int blktap_ring_map_request(struct blktap *, struct blktap_request *);
+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
+void blktap_ring_set_message(struct blktap *, int);
 void blktap_ring_kick_user(struct blktap *);
 
 int blktap_sysfs_init(void);
@@ -187,7 +192,7 @@ size_t blktap_device_debug(struct blktap *, char *, size_t);
 int blktap_device_create(struct blktap *, struct blktap_params *);
 int blktap_device_destroy(struct blktap *);
 void blktap_device_destroy_sync(struct blktap *);
-int blktap_device_run_queue(struct blktap *);
+void blktap_device_run_queue(struct blktap *);
 void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
 
 int blktap_page_pool_init(struct kobject *);
@@ -200,13 +205,5 @@ int blktap_request_get_pages(struct blktap *, struct 
blktap_request *, int);
 void blktap_request_free(struct blktap *, struct blktap_request *);
 void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
 
-static inline unsigned long
-request_to_kaddr(struct blktap_request *req, int seg)
-{
-       return (unsigned long)page_address(req->pages[seg]);
-}
-
-#define request_to_page(_request, _seg) ((_request)->pages[_seg])
-
 
 #endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
index 8652e07..f339bba 100644
--- a/drivers/xen/blktap/control.c
+++ b/drivers/xen/blktap/control.c
@@ -18,13 +18,10 @@ blktap_control_get_minor(void)
        int minor;
        struct blktap *tap;
 
-       tap = kmalloc(sizeof(*tap), GFP_KERNEL);
+       tap = kzalloc(sizeof(*tap), GFP_KERNEL);
        if (unlikely(!tap))
                return NULL;
 
-       memset(tap, 0, sizeof(*tap));
-       sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
        mutex_lock(&blktap_lock);
 
        for (minor = 0; minor < blktap_max_minor; minor++)
@@ -290,9 +287,6 @@ blktap_init(void)
 {
        int err;
 
-       if (!xen_pv_domain())
-               return -ENODEV;
-
        err = blktap_device_init();
        if (err)
                goto fail;
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
index ed95548..02e1fc8 100644
--- a/drivers/xen/blktap/device.c
+++ b/drivers/xen/blktap/device.c
@@ -2,27 +2,11 @@
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
 #include <linux/hdreg.h>
-#include <linux/module.h>
-#include <asm/tlbflush.h>
-
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 
-#include <xen/xenbus.h>
-#include <xen/interface/io/blkif.h>
-
-#include <asm/xen/page.h>
-#include <asm/xen/hypercall.h>
-
 #include "blktap.h"
 
-#include "../blkback/blkback-pagemap.h"
-
-struct blktap_grant_table {
-       int cnt;
-       struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
-};
-
 int blktap_device_major;
 
 #define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
@@ -119,526 +103,136 @@ static struct block_device_operations 
blktap_device_file_operations = {
        .getgeo    = blktap_device_getgeo
 };
 
-static int
-blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
-                   unsigned long addr, void *data)
-{
-       pte_t *pte = (pte_t *)data;
-
-       BTDBG("ptep %p -> %012llx\n", ptep, pte_val(*pte));
-       set_pte(ptep, *pte);
-       return 0;
-}
-
-static int
-blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
-{
-       return apply_to_page_range(mm, address,
-                                  PAGE_SIZE, blktap_map_uaddr_fn, &pte);
-}
-
-static int
-blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
-                    unsigned long addr, void *data)
-{
-       struct mm_struct *mm = (struct mm_struct *)data;
-
-       BTDBG("ptep %p\n", ptep);
-       pte_clear(mm, addr, ptep);
-       return 0;
-}
-
-static int
-blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
-{
-       return apply_to_page_range(mm, address,
-                                  PAGE_SIZE, blktap_umap_uaddr_fn, mm);
-}
-
-static inline void
-flush_tlb_kernel_page(unsigned long kvaddr)
-{
-       flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
-}
-
-static void
-blktap_device_end_dequeued_request(struct blktap_device *dev,
-                                  struct request *req, int error)
-{
-       unsigned long flags;
-       int ret;
-
-       //spin_lock_irq(&dev->lock);
-       spin_lock_irqsave(dev->gd->queue->queue_lock, flags);
-       ret = __blk_end_request(req, error, blk_rq_bytes(req));
-       spin_unlock_irqrestore(dev->gd->queue->queue_lock, flags);
-       //spin_unlock_irq(&dev->lock);
-
-       BUG_ON(ret);
-}
-
-static void
-blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
-{
-       uint64_t ptep;
-       int ret, usr_idx;
-       unsigned int i, cnt;
-       struct page **map, *page;
-       struct blktap_ring *ring;
-       struct grant_handle_pair *khandle;
-       unsigned long kvaddr, uvaddr, offset;
-       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
-
-       cnt     = 0;
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       map     = ring->foreign_map.map;
-
-       if (!ring->vma)
-               return;
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               zap_page_range(ring->vma, 
-                              MMAP_VADDR(ring->user_vstart, usr_idx, 0),
-                              request->nr_pages << PAGE_SHIFT, NULL);
-
-       for (i = 0; i < request->nr_pages; i++) {
-               kvaddr = request_to_kaddr(request, i);
-               uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
-
-               khandle = request->handles + i;
-
-               if (khandle->kernel != INVALID_GRANT_HANDLE) {
-                       gnttab_set_unmap_op(&unmap[cnt], kvaddr,
-                                           GNTMAP_host_map, khandle->kernel);
-                       cnt++;
-                       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
-                                           INVALID_P2M_ENTRY);
-               }
-
-               if (khandle->user != INVALID_GRANT_HANDLE) {
-                       BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-                       if (create_lookup_pte_addr(ring->vma->vm_mm,
-                                                  uvaddr, &ptep) != 0) {
-                               BTERR("Couldn't get a pte addr!\n");
-                               return;
-                       }
-
-                       gnttab_set_unmap_op(&unmap[cnt], ptep,
-                                           GNTMAP_host_map
-                                           | GNTMAP_application_map
-                                           | GNTMAP_contains_pte,
-                                           khandle->user);
-                       cnt++;
-               }
-
-               offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
-
-               BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
-                     "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
-                     "0x%08lx, handle: %u\n", offset, map[offset], request,
-                     usr_idx, i, kvaddr, khandle->kernel, uvaddr,
-                     khandle->user);
-
-               page = map[offset];
-               if (page && blkback_pagemap_contains_page(page))
-                       set_page_private(page, 0);
-
-               map[offset] = NULL;
-
-               khandle->kernel = INVALID_GRANT_HANDLE;
-               khandle->user   = INVALID_GRANT_HANDLE;
-       }
-
-       if (cnt) {
-               ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-                                               unmap, cnt);
-               BUG_ON(ret);
-       }
-
-       if (!xen_feature(XENFEAT_auto_translated_physmap))
-               zap_page_range(ring->vma, 
-                              MMAP_VADDR(ring->user_vstart, usr_idx, 0), 
-                              request->nr_pages << PAGE_SHIFT, NULL);
-}
-
-static void
-blktap_unmap(struct blktap *tap, struct blktap_request *request)
-{
-       int i, usr_idx;
-       unsigned long kvaddr;
-
-       usr_idx = request->usr_idx;
-
-       for (i = 0; i < request->nr_pages; i++) {
-               kvaddr = request_to_kaddr(request, i);
-               BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
-                     "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
-                     kvaddr, request->handles[i].kernel,
-                     MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
-                     request->handles[i].user);
-
-               if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
-                       blktap_umap_uaddr(current->mm, kvaddr);
-                       flush_tlb_kernel_page(kvaddr);
-                       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
-                                           INVALID_P2M_ENTRY);
-               }
-       }
-
-       blktap_device_fast_flush(tap, request);
-}
-
 void
 blktap_device_end_request(struct blktap *tap,
                          struct blktap_request *request,
                          int error)
 {
        struct blktap_device *tapdev = &tap->device;
+       struct request *rq = request->rq;
+
+       blktap_ring_unmap_request(tap, request);
+
+       blktap_ring_free_request(tap, request);
 
-       blktap_unmap(tap, request);
+       dev_dbg(&tapdev->gd->dev,
+               "end_request: op=%d error=%d bytes=%d\n",
+               rq_data_dir(rq), error, blk_rq_bytes(rq));
 
        spin_lock_irq(&tapdev->lock);
        end_request(request->rq, !error);
        spin_unlock_irq(&tapdev->lock);
-
-       blktap_request_free(tap, request);
 }
 
-static int
-blktap_prep_foreign(struct blktap *tap,
-                   struct blktap_request *request,
-                   struct blkif_request *blkif_req,
-                   unsigned int seg, struct page *page,
-                   struct blktap_grant_table *table)
-{
-       uint64_t ptep;
-       uint32_t flags;
-#ifdef BLKTAP_CHAINED_BLKTAP
-       struct page *tap_page;
-#endif
-       struct blktap_ring *ring;
-       struct blkback_pagemap map;
-       unsigned long uvaddr, kvaddr;
-
-       ring = &tap->ring;
-       map  = blkback_pagemap_read(page);
-       blkif_req->seg[seg].gref = map.gref;
-
-       uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
-       kvaddr = request_to_kaddr(request, seg);
-       flags  = GNTMAP_host_map |
-               (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
-
-       gnttab_set_map_op(&table->grants[table->cnt],
-                         kvaddr, flags, map.gref, map.domid);
-       table->cnt++;
-
-
-#ifdef BLKTAP_CHAINED_BLKTAP
-       /* enable chained tap devices */
-       tap_page = request_to_page(request, seg);
-       set_page_private(tap_page, page_private(page));
-       SetPageBlkback(tap_page);
-#endif
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               return 0;
-
-       if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
-               BTERR("couldn't get a pte addr!\n");
-               return -1;
-       }
-
-       flags |= GNTMAP_application_map | GNTMAP_contains_pte;
-       gnttab_set_map_op(&table->grants[table->cnt],
-                         ptep, flags, map.gref, map.domid);
-       table->cnt++;
-
-       return 0;
-}
-
-static int
-blktap_map_foreign(struct blktap *tap,
-                  struct blktap_request *request,
-                  struct blkif_request *blkif_req,
-                  struct blktap_grant_table *table)
+int
+blktap_device_make_request(struct blktap *tap, struct request *rq)
 {
-       struct page *page;
-       int i, grant, err, usr_idx;
-       struct blktap_ring *ring;
-       unsigned long uvaddr, foreign_mfn;
-
-       if (!table->cnt)
-               return 0;
+       struct blktap_device *tapdev = &tap->device;
+       struct blktap_request *request;
+       int write, nsegs;
+       int err;
 
-       err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-                                       table->grants, table->cnt);
-       BUG_ON(err);
+       request = blktap_ring_make_request(tap);
+       if (IS_ERR(request)) {
+               err = PTR_ERR(request);
+               request = NULL;
 
-       grant   = 0;
-       usr_idx = request->usr_idx;
-       ring    = &tap->ring;
+               if (err == -ENOSPC || err == -ENOMEM)
+                       goto stop;
 
-       for (i = 0; i < request->nr_pages; i++) {
-               if (!blkif_req->seg[i].gref)
-                       continue;
+               goto fail;
+       }
 
-               uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
+       write = rq_data_dir(rq) == WRITE;
+       nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
 
-               if (unlikely(table->grants[grant].status)) {
-                       BTERR("invalid kernel buffer: could not remap it\n");
-                       err |= 1;
-                       table->grants[grant].handle = INVALID_GRANT_HANDLE;
-               }
+       dev_dbg(&tapdev->gd->dev,
+               "make_request: op=%c bytes=%d nsegs=%d\n",
+               write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
 
-               request->handles[i].kernel = table->grants[grant].handle;
-               foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
-               grant++;
+       request->rq = rq;
+       request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
 
-               if (xen_feature(XENFEAT_auto_translated_physmap))
-                       goto done;
-
-               if (unlikely(table->grants[grant].status)) {
-                       BTERR("invalid user buffer: could not remap it\n");
-                       err |= 1;
-                       table->grants[grant].handle = INVALID_GRANT_HANDLE;
-               }
+       err = blktap_request_get_pages(tap, request, nsegs);
+       if (err)
+               goto stop;
 
-               request->handles[i].user = table->grants[grant].handle;
-               grant++;
+       err = blktap_ring_map_request(tap, request);
+       if (err)
+               goto fail;
 
-       done:
-               if (err)
-                       continue;
+       blktap_ring_submit_request(tap, request);
 
-               page = request_to_page(request, i);
+       return 0;
 
-               if (!xen_feature(XENFEAT_auto_translated_physmap))
-                       set_phys_to_machine(page_to_pfn(page),
-                                           FOREIGN_FRAME(foreign_mfn));
-               else if (vm_insert_page(ring->vma, uvaddr, page))
-                       err |= 1;
+stop:
+       tap->stats.st_oo_req++;
+       err = -EBUSY;
 
-               BTDBG("pending_req: %p, seg: %d, page: %p, "
-                     "kvaddr: 0x%p, khandle: %u, uvaddr: 0x%08lx, "
-                     "uhandle: %u\n", request, i, page,
-                     pfn_to_kaddr(page_to_pfn(page)),
-                     request->handles[i].kernel,
-                     uvaddr, request->handles[i].user);
-       }
+_out:
+       if (request)
+               blktap_ring_free_request(tap, request);
 
        return err;
-}
-
-static void
-blktap_map(struct blktap *tap,
-          struct blktap_request *request,
-          unsigned int seg, struct page *page)
-{
-       pte_t pte;
-       int usr_idx;
-       struct blktap_ring *ring;
-       unsigned long uvaddr, kvaddr;
-
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       uvaddr  = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
-       kvaddr  = request_to_kaddr(request, seg);
-
-       pte = mk_pte(page, ring->vma->vm_page_prot);
-       blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
-       flush_tlb_page(ring->vma, uvaddr);
-       blktap_map_uaddr(ring->vma->vm_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
-       flush_tlb_kernel_page(kvaddr);
-
-       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
-       request->handles[seg].kernel = INVALID_GRANT_HANDLE;
-       request->handles[seg].user   = INVALID_GRANT_HANDLE;
-
-       BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
-             "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
-             uvaddr);
-}
-
-static int
-blktap_device_process_request(struct blktap *tap,
-                             struct blktap_request *request,
-                             struct request *req)
-{
-       struct page *page;
-       int i, usr_idx, err;
-       struct blktap_ring *ring;
-       struct scatterlist *sg;
-       struct blktap_grant_table table;
-       unsigned int fsect, lsect, nr_sects;
-       unsigned long offset, uvaddr;
-       struct blkif_request blkif_req, *target;
-
-       err = -1;
-       memset(&table, 0, sizeof(table));
-
-       ring    = &tap->ring;
-       usr_idx = request->usr_idx;
-       blkif_req.id = usr_idx;
-       blkif_req.sector_number = (blkif_sector_t)req->sector;
-       blkif_req.handle = 0;
-       blkif_req.operation = rq_data_dir(req) ?
-               BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-       request->rq        = req;
-       request->operation = blkif_req.operation;
-       request->status    = BLKTAP_REQUEST_PENDING;
-       do_gettimeofday(&request->time);
-
-       nr_sects = 0;
-       request->nr_pages = 0;
-       blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg);
-       BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       for (i = 0; i < blkif_req.nr_segments; ++i) {
-                       sg = tap->sg + i;
-                       fsect = sg->offset >> 9;
-                       lsect = fsect + (sg->length >> 9) - 1;
-                       nr_sects += sg->length >> 9;
-
-                       blkif_req.seg[i] =
-                               (struct blkif_request_segment) {
-                               .gref       = 0,
-                               .first_sect = fsect,
-                               .last_sect  = lsect };
-
-                       if (blkback_pagemap_contains_page(sg_page(sg))) {
-                               /* foreign page -- use xen */
-                               if (blktap_prep_foreign(tap,
-                                                       request,
-                                                       &blkif_req,
-                                                       i,
-                                                       sg_page(sg),
-                                                       &table))
-                                       goto out;
-                       } else {
-                               /* do it the old fashioned way */
-                               blktap_map(tap,
-                                          request,
-                                          i,
-                                          sg_page(sg));
-                       }
-
-                       uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
-                       offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
-                       page   = request_to_page(request, i);
-                       ring->foreign_map.map[offset] = page;
-                       SetPageReserved(page);
-
-                       BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
-                             uvaddr, page, page_to_pfn(page));
-                       BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
-                             "page: %p, kvaddr: %p, uvaddr: 0x%08lx\n",
-                             offset, request, i,
-                             page, pfn_to_kaddr(page_to_pfn(page)), uvaddr);
-
-                       request->nr_pages++;
-       }
-
-       if (blktap_map_foreign(tap, request, &blkif_req, &table))
-               goto out;
-
-       /* Finally, write the request message to the user ring. */
-       target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
-       memcpy(target, &blkif_req, sizeof(blkif_req));
-       target->id = request->usr_idx;
-       wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
-       ring->ring.req_prod_pvt++;
-
-       if (rq_data_dir(req)) {
-               tap->stats.st_wr_sect += nr_sects;
-               tap->stats.st_wr_req++;
-       } else {
-               tap->stats.st_rd_sect += nr_sects;
-               tap->stats.st_rd_req++;
-       }
-
-       err = 0;
-
-out:
-       if (err)
-               blktap_device_fast_flush(tap, request);
-       return err;
+fail:
+       if (printk_ratelimit())
+               dev_warn(&tapdev->gd->dev,
+                        "make request: %d, failing\n", err);
+       goto _out;
 }
 
 /*
  * called from tapdisk context
  */
-int
+void
 blktap_device_run_queue(struct blktap *tap)
 {
-       int err, rv;
-       struct request_queue *rq;
-       struct request *req;
-       struct blktap_ring *ring;
-       struct blktap_device *dev;
-       struct blktap_request *request;
-
-       ring   = &tap->ring;
-       dev    = &tap->device;
-       rq     = dev->gd->queue;
+       struct blktap_device *tapdev = &tap->device;
+       struct request_queue *q;
+       struct request *rq;
+       int err;
 
-       BTDBG("running queue for %d\n", tap->minor);
-       spin_lock_irq(&dev->lock);
-       queue_flag_clear(QUEUE_FLAG_STOPPED, rq);
+       if (!tapdev->gd)
+               return;
 
-       while ((req = elv_next_request(rq)) != NULL) {
-               if (!blk_fs_request(req)) {
-                       end_request(req, 0);
-                       continue;
-               }
+       q = tapdev->gd->queue;
 
-               if (blk_empty_barrier_rq(req)) {
-                       end_request(req, 1);
-                       continue;
-               }
+       spin_lock_irq(&tapdev->lock);
+       queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 
-               if (RING_FULL(&ring->ring)) {
-               wait:
-                       /* Avoid pointless unplugs. */
-                       blk_stop_queue(rq);
+       do {
+               rq = elv_next_request(q);
+               if (!rq)
                        break;
+
+               if (!blk_fs_request(rq)) {
+                       end_queued_request(rq, 0);
+                       continue;
                }
 
-               request = blktap_request_alloc(tap);
-               if (!request) {
-                       tap->stats.st_oo_req++;
-                       goto wait;
+               if (blk_empty_barrier(rq)) {
+                       end_queued_request(rq, 1);
+                       continue;
                }
 
-               BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
-                     "buffer:%p [%s], pending: %p\n", req, tap->minor,
-                     req->cmd, (unsigned long long)req->sector,
-                     req->current_nr_sectors,
-                     req->nr_sectors, req->buffer,
-                     rq_data_dir(req) ? "write" : "read", request);
+               spin_unlock_irq(&tapdev->lock);
 
-               blkdev_dequeue_request(req);
+               err = blktap_device_make_request(tap, rq);
 
-               spin_unlock_irq(&dev->lock);
+               spin_lock_irq(&tapdev->lock);
 
-               err = blktap_device_process_request(tap, request, req);
-               if (err) {
-                       blktap_device_end_dequeued_request(dev, req, -EIO);
-                       blktap_request_free(tap, request);
+               if (err == -EBUSY) {
+                       blk_stop_queue(q);
+                       break;
                }
 
-               spin_lock_irq(&dev->lock);
-       }
-
-       spin_unlock_irq(&dev->lock);
-
-       rv = ring->ring.req_prod_pvt -
-               ring->ring.sring->req_prod;
+               blkdev_dequeue_request(req);
 
-       RING_PUSH_REQUESTS(&ring->ring);
+               if (unlikely(err))
+                       end_request(rq, 0);
+       } while (1);
 
-       return rv;
+       spin_unlock_irq(&tapdev->lock);
 }
 
 static void
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
index ca12442..9bef48c 100644
--- a/drivers/xen/blktap/request.c
+++ b/drivers/xen/blktap/request.c
@@ -3,7 +3,6 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/device.h>
-#include <xen/balloon.h>
 
 #include "blktap.h"
 
@@ -129,6 +128,25 @@ blktap_request_free(struct blktap *tap,
        __page_pool_wake(tap->pool);
 }
 
+void
+blktap_request_bounce(struct blktap *tap,
+                     struct blktap_request *request,
+                     int seg, int write)
+{
+       struct scatterlist *sg = &request->sg_table[seg];
+       void *s, *p;
+
+       BUG_ON(seg >= request->nr_pages);
+
+       s = sg_virt(sg);
+       p = page_address(request->pages[seg]) + sg->offset;
+
+       if (write)
+               memcpy(p, s, sg->length);
+       else
+               memcpy(s, p, sg->length);
+}
+
 static void
 blktap_request_ctor(void *obj)
 {
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
index a72a1b3..38896e7 100644
--- a/drivers/xen/blktap/ring.c
+++ b/drivers/xen/blktap/ring.c
@@ -1,30 +1,15 @@
+
 #include <linux/device.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/poll.h>
-
-#include <asm/xen/page.h>
-#include <asm/xen/hypercall.h>
+#include <linux/blkdev.h>
 
 #include "blktap.h"
 
-#ifdef CONFIG_XEN_BLKDEV_BACKEND
-#include "../blkback/blkback-pagemap.h"
-#else
-#define blkback_pagemap_contains_page(page) 0
-#endif
-
 int blktap_ring_major;
 static struct cdev blktap_ring_cdev;
 
-static inline struct blktap *
-vma_to_blktap(struct vm_area_struct *vma)
-{
-       struct vm_foreign_map *m = vma->vm_private_data;
-       struct blktap_ring *r = container_of(m, struct blktap_ring, 
foreign_map);
-       return container_of(r, struct blktap, ring);
-}
-
  /* 
   * BLKTAP - immediately before the mmap area,
   * we have a bunch of pages reserved for shared memory rings.
@@ -47,7 +32,7 @@ blktap_ring_read_response(struct blktap *tap,
                goto invalid;
        }
 
-       request = tap->pending_requests[usr_idx];
+       request = ring->pending[usr_idx];
 
        if (!request) {
                err = -ESRCH;
@@ -110,90 +95,15 @@ static int blktap_ring_fault(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-static pte_t
-blktap_ring_clear_pte(struct vm_area_struct *vma,
-                     unsigned long uvaddr,
-                     pte_t *ptep, int is_fullmm)
-{
-       pte_t copy;
-       struct blktap *tap;
-       unsigned long kvaddr;
-       struct page **map, *page;
-       struct blktap_ring *ring;
-       struct blktap_request *request;
-       struct grant_handle_pair *khandle;
-       struct gnttab_unmap_grant_ref unmap[2];
-       int offset, seg, usr_idx, count = 0;
-
-       tap  = vma_to_blktap(vma);
-       ring = &tap->ring;
-       map  = ring->foreign_map.map;
-       BUG_ON(!map);   /* TODO Should this be changed to if statement? */
-
-       /*
-        * Zap entry if the address is before the start of the grant
-        * mapped region.
-        */
-       if (uvaddr < ring->user_vstart)
-               return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
-                                              ptep, is_fullmm);
-
-       offset  = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
-       usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       seg     = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-       offset  = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
-       page    = map[offset];
-       if (page && blkback_pagemap_contains_page(page))
-               set_page_private(page, 0);
-       map[offset] = NULL;
-
-       request = tap->pending_requests[usr_idx];
-       kvaddr  = request_to_kaddr(request, seg);
-       khandle = request->handles + seg;
-
-       if (khandle->kernel != INVALID_GRANT_HANDLE) {
-               gnttab_set_unmap_op(&unmap[count], kvaddr, 
-                                   GNTMAP_host_map, khandle->kernel);
-               count++;
-
-               set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, 
-                                   INVALID_P2M_ENTRY);
-       }
-
-       if (khandle->user != INVALID_GRANT_HANDLE) {
-               BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-
-               copy = *ptep;
-               gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep).maddr,
-                                   GNTMAP_host_map
-                                   | GNTMAP_application_map
-                                   | GNTMAP_contains_pte,
-                                   khandle->user);
-               count++;
-       } else
-               copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
-                                              is_fullmm);
-
-       if (count)
-               if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-                                             unmap, count))
-                       BUG();
-
-       khandle->kernel = INVALID_GRANT_HANDLE;
-       khandle->user   = INVALID_GRANT_HANDLE;
-
-       return copy;
-}
-
 static void
 blktap_ring_fail_pending(struct blktap *tap)
 {
+       struct blktap_ring *ring = &tap->ring;
        struct blktap_request *request;
        int usr_idx;
 
        for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
-               request = tap->pending_requests[usr_idx];
+               request = ring->pending[usr_idx];
                if (!request)
                        continue;
 
@@ -204,15 +114,12 @@ blktap_ring_fail_pending(struct blktap *tap)
 static void
 blktap_ring_vm_close(struct vm_area_struct *vma)
 {
-       struct blktap *tap = vma_to_blktap(vma);
+       struct blktap *tap = vma->vm_private_data;
        struct blktap_ring *ring = &tap->ring;
        struct page *page = virt_to_page(ring->ring.sring);
 
        blktap_ring_fail_pending(tap);
 
-       kfree(ring->foreign_map.map);
-       ring->foreign_map.map = NULL;
-
        zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
        ClearPageReserved(page);
        __free_page(page);
@@ -226,9 +133,154 @@ blktap_ring_vm_close(struct vm_area_struct *vma)
 static struct vm_operations_struct blktap_ring_vm_operations = {
        .close    = blktap_ring_vm_close,
        .fault    = blktap_ring_fault,
-       .zap_pte  = blktap_ring_clear_pte,
 };
 
+int
+blktap_ring_map_segment(struct blktap *tap,
+                       struct blktap_request *request,
+                       int seg)
+{
+       struct blktap_ring *ring = &tap->ring;
+       unsigned long uaddr;
+
+       uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
+       return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
+}
+
+int
+blktap_ring_map_request(struct blktap *tap,
+                       struct blktap_request *request)
+{
+       int seg, err = 0;
+       int write;
+
+       write = request->operation == BLKIF_OP_WRITE;
+
+       for (seg = 0; seg < request->nr_pages; seg++) {
+               if (write)
+                       blktap_request_bounce(tap, request, seg, write);
+
+               err = blktap_ring_map_segment(tap, request, seg);
+               if (err)
+                       break;
+       }
+
+       if (err)
+               blktap_ring_unmap_request(tap, request);
+
+       return err;
+}
+
+void
+blktap_ring_unmap_request(struct blktap *tap,
+                         struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+       unsigned long uaddr;
+       unsigned size;
+       int seg, read;
+
+       uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
+       size  = request->nr_pages << PAGE_SHIFT;
+       read  = request->operation == BLKIF_OP_READ;
+
+       if (read)
+               for (seg = 0; seg < request->nr_pages; seg++)
+                       blktap_request_bounce(tap, request, seg, !read);
+
+       zap_page_range(ring->vma, uaddr, size, NULL);
+}
+
+void
+blktap_ring_free_request(struct blktap *tap,
+                        struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+
+       ring->pending[request->usr_idx] = NULL;
+       ring->n_pending--;
+
+       blktap_request_free(tap, request);
+}
+
+struct blktap_request*
+blktap_ring_make_request(struct blktap *tap)
+{
+       struct blktap_ring *ring = &tap->ring;
+       struct blktap_request *request;
+       int usr_idx;
+
+       if (RING_FULL(&ring->ring))
+               return ERR_PTR(-ENOSPC);
+
+       request = blktap_request_alloc(tap);
+       if (!request)
+               return ERR_PTR(-ENOMEM);
+
+       for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++)
+               if (!ring->pending[usr_idx])
+                       break;
+
+       BUG_ON(usr_idx >= BLK_RING_SIZE);
+
+       request->tap     = tap;
+       request->usr_idx = usr_idx;
+
+       ring->pending[usr_idx] = request;
+       ring->n_pending++;
+
+       return request;
+}
+
+void
+blktap_ring_submit_request(struct blktap *tap,
+                          struct blktap_request *request)
+{
+       struct blktap_ring *ring = &tap->ring;
+       struct blkif_request *breq;
+       struct scatterlist *sg;
+       int i, nsecs = 0;
+
+       dev_dbg(ring->dev,
+               "request %d [%p] submit\n", request->usr_idx, request);
+
+       breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
+
+       breq->id            = request->usr_idx;
+       breq->sector_number = request->rq->sector;
+       breq->handle        = 0;
+       breq->operation     = request->operation;
+       breq->nr_segments   = request->nr_pages;
+
+       blktap_for_each_sg(sg, request, i) {
+               struct blkif_request_segment *seg = &breq->seg[i];
+               int first, count;
+
+               count = sg->length >> 9;
+               first = sg->offset >> 9;
+
+               seg->first_sect = first;
+               seg->last_sect  = first + count - 1;
+
+               nsecs += count;
+       }
+
+       ring->ring.req_prod_pvt++;
+
+       do_gettimeofday(&request->time);
+
+
+       if (request->operation == BLKIF_OP_WRITE) {
+               tap->stats.st_wr_sect += nsecs;
+               tap->stats.st_wr_req++;
+       }
+
+       if (request->operation == BLKIF_OP_READ) {
+               tap->stats.st_rd_sect += nsecs;
+               tap->stats.st_rd_req++;
+       }
+}
+
 static int
 blktap_ring_open(struct inode *inode, struct file *filp)
 {
@@ -270,51 +322,21 @@ blktap_ring_release(struct inode *inode, struct file 
*filp)
        return 0;
 }
 
-/* Note on mmap:
- * We need to map pages to user space in a way that will allow the block
- * subsystem set up direct IO to them.  This couldn't be done before, because
- * there isn't really a sane way to translate a user virtual address down to a 
- * physical address when the page belongs to another domain.
- *
- * My first approach was to map the page in to kernel memory, add an entry
- * for it in the physical frame list (using alloc_lomem_region as in blkback)
- * and then attempt to map that page up to user space.  This is disallowed
- * by xen though, which realizes that we don't really own the machine frame
- * underlying the physical page.
- *
- * The new approach is to provide explicit support for this in xen linux.
- * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
- * mapped from other vms.  vma->vm_private_data is set up as a mapping 
- * from pages to actual page structs.  There is a new clause in get_user_pages
- * that does the right thing for this sort of mapping.
- */
 static int
 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
 {
        struct blktap *tap = filp->private_data;
        struct blktap_ring *ring = &tap->ring;
        struct blkif_sring *sring;
-       struct page *page;
-       int size, err;
-       struct page **map;
-
-       map   = NULL;
-       sring = NULL;
+       struct page *page = NULL;
+       int err;
 
        if (ring->vma)
                return -EBUSY;
 
-       size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-       if (size != (MMAP_PAGES + RING_PAGES)) {
-               BTERR("you _must_ map exactly %lu pages!\n",
-                     MMAP_PAGES + RING_PAGES);
-               return -EAGAIN;
-       }
-
-       /* allocate the shared ring */
        page = alloc_page(GFP_KERNEL|__GFP_ZERO);
        if (!page)
-               goto fail;
+               return -ENOMEM;
 
        SetPageReserved(page);
 
@@ -329,22 +351,12 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct 
*vma)
        ring->ring_vstart = vma->vm_start;
        ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
 
-       /* allocate the foreign map */
-       map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
-       if (!map)
-               goto fail;
+       vma->vm_private_data = tap;
 
-       /* Mark this VM as containing foreign pages, and set up mappings. */
-       ring->foreign_map.map = map;
-       vma->vm_private_data = &ring->foreign_map;
-       vma->vm_flags |= VM_FOREIGN;
        vma->vm_flags |= VM_DONTCOPY;
        vma->vm_flags |= VM_RESERVED;
-       vma->vm_ops = &blktap_ring_vm_operations;
 
-#ifdef CONFIG_X86
-       vma->vm_mm->context.has_foreign_mappings = 1;
-#endif
+       vma->vm_ops = &blktap_ring_vm_operations;
 
        ring->vma = vma;
        return 0;
@@ -356,10 +368,7 @@ fail:
                __free_page(page);
        }
 
-       if (map)
-               kfree(map);
-
-       return -ENOMEM;
+       return err;
 }
 
 static int
@@ -405,16 +414,19 @@ static unsigned int blktap_ring_poll(struct file *filp, 
poll_table *wait)
 {
        struct blktap *tap = filp->private_data;
        struct blktap_ring *ring = &tap->ring;
-       int work = 0;
+       int work;
 
        poll_wait(filp, &tap->pool->wait, wait);
        poll_wait(filp, &ring->poll_wait, wait);
 
        down_read(&current->mm->mmap_sem);
        if (ring->vma && tap->device.gd)
-               work = blktap_device_run_queue(tap);
+               blktap_device_run_queue(tap);
        up_read(&current->mm->mmap_sem);
 
+       work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
+       RING_PUSH_REQUESTS(&ring->ring);
+
        if (work ||
            ring->ring.sring->private.tapif_user.msg ||
            test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
@@ -463,18 +475,19 @@ blktap_ring_create(struct blktap *tap)
 size_t
 blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
 {
+       struct blktap_ring *ring = &tap->ring;
        char *s = buf, *end = buf + size;
        int usr_idx;
 
        s += snprintf(s, end - s,
-                     "begin pending:%d\n", tap->pending_cnt);
+                     "begin pending:%d\n", ring->n_pending);
 
        for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
                struct blktap_request *request;
                struct timeval *time;
                int write;
 
-               request = tap->pending_requests[usr_idx];
+               request = ring->pending[usr_idx];
                if (!request)
                        continue;
 
-- 
1.7.0.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.