[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2] xen: fix qdisk BLKIF_OP_DISCARD for 32/64 word size mix



In case the word size of the domU and qemu running the qdisk backend
differ BLKIF_OP_DISCARD will not work reliably, as the request
structure in the ring have different layouts for different word size.

Correct this by copying the request structure in case of different
word size element by element in the BLKIF_OP_DISCARD case, too.

The easiest way to achieve this is to resync hw/block/xen_blkif.h with
its original source from the Linux kernel.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
V2: resync with Linux kernel version of hw/block/xen_blkif.h as
    suggested by Paul Durrant
---
 hw/block/xen_blkif.h | 379 ++++++++++++++++++++++++++++++++++++++++-----------
 hw/block/xen_disk.c  |   1 +
 2 files changed, 304 insertions(+), 76 deletions(-)

diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h
index e3b133b..7b31d87 100644
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -1,3 +1,29 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
 #ifndef __XEN_BLKIF_H__
 #define __XEN_BLKIF_H__
 
@@ -5,115 +31,316 @@
 #include <xen/io/blkif.h>
 #include <xen/io/protocols.h>
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XC_PAGE_SIZE)
+
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+    (XC_PAGE_SIZE / sizeof(struct blkif_request_segment))
+#define SEGS_PER_INDIRECT_FRAME      \
+    (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
+
+#define MAX_INDIRECT_PAGES           \
+    ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1) /    \
+     SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
  * avoid using switch(protocol) in a number of places.  */
 struct blkif_common_request {
-       char dummy;
+    char dummy;
 };
 struct blkif_common_response {
-       char dummy;
+    char dummy;
 };
 
+struct blkif_x86_32_request_rw {
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_discard {
+    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
+    blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    uint64_t       nr_sectors;
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_other {
+    uint8_t        _pad1;
+    blkif_vdev_t   _pad2;
+    uint64_t       id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_indirect {
+    uint8_t        indirect_op;
+    uint16_t       nr_segments;
+    uint64_t       id;
+    blkif_sector_t sector_number;
+    blkif_vdev_t   handle;
+    uint16_t       _pad1;
+    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+    /*
+     * The maximum number of indirect segments (and pages) that will
+     * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+     * is also exported to the guest (via xenstore
+     * feature-max-indirect-segments entry), so the frontend knows how
+     * many indirect segments the backend supports.
+     */
+    uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request {
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    union {
+        struct blkif_x86_32_request_rw rw;
+        struct blkif_x86_32_request_discard discard;
+        struct blkif_x86_32_request_other other;
+        struct blkif_x86_32_request_indirect indirect;
+    } u;
+} __attribute__((__packed__));
+
 /* i386 protocol version */
 #pragma pack(push, 4)
-struct blkif_x86_32_request {
-       uint8_t        operation;    /* BLKIF_OP_???                         */
-       uint8_t        nr_segments;  /* number of segments                   */
-       blkif_vdev_t   handle;       /* only for read/write requests         */
-       uint64_t       id;           /* private guest value, echoed in resp  */
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
 struct blkif_x86_32_response {
-       uint64_t        id;              /* copied from request */
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
+    uint64_t        id;              /* copied from request */
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
 };
-typedef struct blkif_x86_32_request blkif_x86_32_request_t;
-typedef struct blkif_x86_32_response blkif_x86_32_response_t;
 #pragma pack(pop)
-
 /* x86_64 protocol version */
+
+struct blkif_x86_64_request_rw {
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
+    uint64_t       id;
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_discard {
+    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
+    blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
+    uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
+    uint64_t       id;
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    uint64_t       nr_sectors;
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_other {
+    uint8_t        _pad1;
+    blkif_vdev_t   _pad2;
+    uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_indirect {
+    uint8_t        indirect_op;
+    uint16_t       nr_segments;
+    uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+    uint64_t       id;
+    blkif_sector_t sector_number;
+    blkif_vdev_t   handle;
+    uint16_t       _pad2;
+    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+    /*
+     * The maximum number of indirect segments (and pages) that will
+     * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+     * is also exported to the guest (via xenstore
+     * feature-max-indirect-segments entry), so the frontend knows how
+     * many indirect segments the backend supports.
+     */
+    uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
-       uint8_t        operation;    /* BLKIF_OP_???                         */
-       uint8_t        nr_segments;  /* number of segments                   */
-       blkif_vdev_t   handle;       /* only for read/write requests         */
-       uint64_t       __attribute__((__aligned__(8))) id;
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    union {
+        struct blkif_x86_64_request_rw rw;
+        struct blkif_x86_64_request_discard discard;
+        struct blkif_x86_64_request_other other;
+        struct blkif_x86_64_request_indirect indirect;
+    } u;
+} __attribute__((__packed__));
+
 struct blkif_x86_64_response {
-       uint64_t       __attribute__((__aligned__(8))) id;
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
+    uint64_t       __attribute__((__aligned__(8))) id;
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
 };
-typedef struct blkif_x86_64_request blkif_x86_64_request_t;
-typedef struct blkif_x86_64_response blkif_x86_64_response_t;
 
-DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct 
blkif_common_response);
-DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct 
blkif_x86_32_response);
-DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct 
blkif_x86_64_response);
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
+          struct blkif_common_response);
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
+          struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
+          struct blkif_x86_64_response);
 
 union blkif_back_rings {
-       blkif_back_ring_t        native;
-       blkif_common_back_ring_t common;
-        blkif_x86_32_back_ring_t x86_32_part;
-        blkif_x86_64_back_ring_t x86_64_part;
+    struct blkif_back_ring        native;
+    struct blkif_common_back_ring common;
+    struct blkif_x86_32_back_ring x86_32_part;
+    struct blkif_x86_64_back_ring x86_64_part;
 };
 typedef union blkif_back_rings blkif_back_rings_t;
 
 enum blkif_protocol {
-       BLKIF_PROTOCOL_NATIVE = 1,
-       BLKIF_PROTOCOL_X86_32 = 2,
-       BLKIF_PROTOCOL_X86_64 = 3,
+    BLKIF_PROTOCOL_NATIVE = 1,
+    BLKIF_PROTOCOL_X86_32 = 2,
+    BLKIF_PROTOCOL_X86_64 = 3,
 };
 
-static inline void blkif_get_x86_32_req(blkif_request_t *dst, 
blkif_x86_32_request_t *src)
+struct blkif_request_rw {
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+#ifndef __i386__
+    uint32_t       _pad1;        /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+} __attribute__((__packed__));
+
+struct blkif_request_other {
+    uint8_t      _pad1;
+    blkif_vdev_t _pad2;        /* only for read/write requests         */
+#ifndef __i386__
+    uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/
+#endif
+    uint64_t     id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
+struct blkif_request_u {
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    union {
+        struct blkif_request_rw rw;
+        struct blkif_request_discard discard;
+        struct blkif_request_other other;
+        struct blkif_request_indirect indirect;
+    } u;
+} __attribute__((__packed__));
+
+static inline void blkif_get_x86_32_req(struct blkif_request *d,
+                                        struct blkif_x86_32_request *src)
 {
-       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+    int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
+    struct blkif_request_u *dst = (void *)d;
 
-       dst->operation = src->operation;
-       dst->nr_segments = src->nr_segments;
-       dst->handle = src->handle;
-       dst->id = src->id;
-       dst->sector_number = src->sector_number;
-       /* Prevent the compiler from using src->... instead. */
-       barrier();
-       if (dst->operation == BLKIF_OP_DISCARD) {
-               struct blkif_request_discard *s = (void *)src;
-               struct blkif_request_discard *d = (void *)dst;
-               d->nr_sectors = s->nr_sectors;
-               return;
-       }
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->seg[i] = src->seg[i];
+    dst->operation = atomic_read(&src->operation);
+    switch (dst->operation) {
+    case BLKIF_OP_READ:
+    case BLKIF_OP_WRITE:
+    case BLKIF_OP_WRITE_BARRIER:
+    case BLKIF_OP_FLUSH_DISKCACHE:
+        dst->u.rw.nr_segments = src->u.rw.nr_segments;
+        dst->u.rw.handle = src->u.rw.handle;
+        dst->u.rw.id = src->u.rw.id;
+        dst->u.rw.sector_number = src->u.rw.sector_number;
+        barrier();
+        if (n > dst->u.rw.nr_segments) {
+            n = dst->u.rw.nr_segments;
+        }
+        for (i = 0; i < n; i++) {
+            dst->u.rw.seg[i] = src->u.rw.seg[i];
+        }
+        break;
+    case BLKIF_OP_DISCARD:
+        dst->u.discard.flag = src->u.discard.flag;
+        dst->u.discard.id = src->u.discard.id;
+        dst->u.discard.sector_number = src->u.discard.sector_number;
+        dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+        break;
+    case BLKIF_OP_INDIRECT:
+        dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+        dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+        dst->u.indirect.handle = src->u.indirect.handle;
+        dst->u.indirect.id = src->u.indirect.id;
+        dst->u.indirect.sector_number = src->u.indirect.sector_number;
+        barrier();
+        j = MIN(MAX_INDIRECT_PAGES,
+                INDIRECT_PAGES(dst->u.indirect.nr_segments));
+        for (i = 0; i < j; i++) {
+            dst->u.indirect.indirect_grefs[i] =
+                src->u.indirect.indirect_grefs[i];
+        }
+        break;
+    default:
+        /*
+         * Don't know how to translate this op. Only get the
+         * ID so failure can be reported to the frontend.
+         */
+        dst->u.other.id = src->u.other.id;
+        break;
+    }
 }
 
-static inline void blkif_get_x86_64_req(blkif_request_t *dst, 
blkif_x86_64_request_t *src)
+static inline void blkif_get_x86_64_req(struct blkif_request *d,
+                                        struct blkif_x86_64_request *src)
 {
-       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+    int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
+    struct blkif_request_u *dst = (void *)d;
 
-       dst->operation = src->operation;
-       dst->nr_segments = src->nr_segments;
-       dst->handle = src->handle;
-       dst->id = src->id;
-       dst->sector_number = src->sector_number;
-       /* Prevent the compiler from using src->... instead. */
-       barrier();
-       if (dst->operation == BLKIF_OP_DISCARD) {
-               struct blkif_request_discard *s = (void *)src;
-               struct blkif_request_discard *d = (void *)dst;
-               d->nr_sectors = s->nr_sectors;
-               return;
-       }
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->seg[i] = src->seg[i];
+    dst->operation = atomic_read(&src->operation);
+    switch (dst->operation) {
+    case BLKIF_OP_READ:
+    case BLKIF_OP_WRITE:
+    case BLKIF_OP_WRITE_BARRIER:
+    case BLKIF_OP_FLUSH_DISKCACHE:
+        dst->u.rw.nr_segments = src->u.rw.nr_segments;
+        dst->u.rw.handle = src->u.rw.handle;
+        dst->u.rw.id = src->u.rw.id;
+        dst->u.rw.sector_number = src->u.rw.sector_number;
+        barrier();
+        if (n > dst->u.rw.nr_segments) {
+            n = dst->u.rw.nr_segments;
+        }
+        for (i = 0; i < n; i++) {
+            dst->u.rw.seg[i] = src->u.rw.seg[i];
+        }
+        break;
+    case BLKIF_OP_DISCARD:
+        dst->u.discard.flag = src->u.discard.flag;
+        dst->u.discard.id = src->u.discard.id;
+        dst->u.discard.sector_number = src->u.discard.sector_number;
+        dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+        break;
+    case BLKIF_OP_INDIRECT:
+        dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+        dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+        dst->u.indirect.handle = src->u.indirect.handle;
+        dst->u.indirect.id = src->u.indirect.id;
+        dst->u.indirect.sector_number = src->u.indirect.sector_number;
+        barrier();
+        j = MIN(MAX_INDIRECT_PAGES,
+                INDIRECT_PAGES(dst->u.indirect.nr_segments));
+        for (i = 0; i < j; i++) {
+            dst->u.indirect.indirect_grefs[i] =
+                src->u.indirect.indirect_grefs[i];
+        }
+        break;
+    default:
+        /*
+         * Don't know how to translate this op. Only get the
+         * ID so failure can be reported to the frontend.
+         */
+        dst->u.other.id = src->u.other.id;
+        break;
+    }
 }
 
 #endif /* __XEN_BLKIF_H__ */
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 90aca73..2862b59 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -22,6 +22,7 @@
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
 #include <sys/uio.h>
+#include <sys/user.h>
 
 #include "hw/hw.h"
 #include "hw/xen/xen_backend.h"
-- 
2.6.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.