[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH 3/7] Implement diskseq checks in blkback



From: Demi Marie Obenour <demiobenour@xxxxxxxxx>

This allows specifying a disk sequence number in XenStore.  If it does
not match the disk sequence number of the underlying device, the device
will not be exported and a warning will be logged.  Userspace can use
this to eliminate race conditions due to major/minor number reuse.
Older kernels will ignore this, so it is safe for userspace to set it
unconditionally.

This also makes physical-device parsing stricter.  I do not believe this
will break any extant userspace tools.

Signed-off-by: Demi Marie Obenour <demi@xxxxxxxxxxxxxxxxxxxxxx>
---
 drivers/block/xen-blkback/xenbus.c | 137 +++++++++++++++++++++--------
 1 file changed, 100 insertions(+), 37 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index 
4807af1d58059394d7a992335dabaf2bc3901721..2c43bfc7ab5ba6954f11d4b949a5668660dbd290
 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -24,6 +24,7 @@ struct backend_info {
        struct xenbus_watch     backend_watch;
        unsigned                major;
        unsigned                minor;
+       unsigned long long      diskseq;
        char                    *mode;
 };
 
@@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)
 
 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
                          unsigned major, unsigned minor, int readonly,
-                         int cdrom)
+                         bool cdrom, u64 diskseq)
 {
        struct xen_vbd *vbd;
        struct block_device *bdev;
@@ -507,6 +508,25 @@ static int xen_vbd_create(struct xen_blkif *blkif, 
blkif_vdev_t handle,
                xen_vbd_free(vbd);
                return -ENOENT;
        }
+
+       if (diskseq) {
+               struct gendisk *disk = bdev->bd_disk;
+               if (unlikely(disk == NULL)) {
+                       pr_err("xen_vbd_create: device %08x has no gendisk\n",
+                              vbd->pdevice);
+                       xen_vbd_free(vbd);
+                       return -EFAULT;
+               }
+
+               if (unlikely(disk->diskseq != diskseq)) {
+                       pr_warn("xen_vbd_create: device %08x has incorrect 
sequence "
+                               "number 0x%llx (expected 0x%llx)\n",
+                               vbd->pdevice, disk->diskseq, diskseq);
+                       xen_vbd_free(vbd);
+                       return -ENODEV;
+               }
+       }
+
        vbd->size = vbd_sz(vbd);
 
        if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
@@ -690,6 +710,55 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        return err;
 }
 
+static bool read_physical_device(struct xenbus_device *dev,
+                                unsigned long long *diskseq,
+                                unsigned *major, unsigned *minor)
+{
+       char *physical_device, *problem;
+       int i, physical_device_length;
+       char junk;
+
+       physical_device = xenbus_read(XBT_NIL, dev->nodename, "physical-device",
+                                     &physical_device_length);
+
+       if (IS_ERR(physical_device)) {
+               int err = PTR_ERR(physical_device);
+               /*
+                * Since this watch will fire once immediately after it is
+                * registered, we expect "does not exist" errors.  Ignore
+                * them and wait for the hotplug scripts.
+                */
+               if (unlikely(!XENBUS_EXIST_ERR(err)))
+                       xenbus_dev_fatal(dev, err, "reading physical-device");
+               return false;
+       }
+
+       for (i = 0; i < physical_device_length; ++i)
+               if (unlikely(physical_device[i] <= 0x20 || physical_device[i] 
>= 0x7F)) {
+                       problem = "bad byte in physical-device";
+                       goto fail;
+               }
+
+       if (sscanf(physical_device, "%16llx@%8x:%8x%c",
+                  diskseq, major, minor, &junk) == 3) {
+               if (*diskseq == 0) {
+                       problem = "diskseq 0 is invalid";
+                       goto fail;
+               }
+       } else if (sscanf(physical_device, "%8x:%8x%c", major, minor, &junk) == 
2) {
+               *diskseq = 0;
+       } else {
+               problem = "invalid physical-device";
+               goto fail;
+       }
+       kfree(physical_device);
+       return true;
+fail:
+       kfree(physical_device);
+       xenbus_dev_fatal(dev, -EINVAL, problem);
+       return false;
+}
+
 /*
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -707,28 +776,17 @@ static void backend_changed(struct xenbus_watch *watch,
        int cdrom = 0;
        unsigned long handle;
        char *device_type;
+       unsigned long long diskseq;
 
        pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
-
-       err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
-                          &major, &minor);
-       if (XENBUS_EXIST_ERR(err)) {
-               /*
-                * Since this watch will fire once immediately after it is
-                * registered, we expect this.  Ignore it, and wait for the
-                * hotplug scripts.
-                */
+       if (!read_physical_device(dev, &diskseq, &major, &minor))
                return;
-       }
-       if (err != 2) {
-               xenbus_dev_fatal(dev, err, "reading physical-device");
-               return;
-       }
 
-       if (be->major | be->minor) {
-               if (be->major != major || be->minor != minor)
-                       pr_warn("changing physical device (from %x:%x to %x:%x) 
not supported.\n",
-                               be->major, be->minor, major, minor);
+       if (be->major | be->minor | be->diskseq) {
+               if (be->major != major || be->minor != minor || be->diskseq != 
diskseq)
+                       pr_warn("changing physical device (from %x:%x:%llx to 
%x:%x:%llx)"
+                               " not supported.\n",
+                               be->major, be->minor, be->diskseq, major, 
minor, diskseq);
                return;
        }
 
@@ -756,29 +814,34 @@ static void backend_changed(struct xenbus_watch *watch,
 
        be->major = major;
        be->minor = minor;
+       be->diskseq = diskseq;
 
        err = xen_vbd_create(be->blkif, handle, major, minor,
-                            !strchr(be->mode, 'w'), cdrom);
-
-       if (err)
-               xenbus_dev_fatal(dev, err, "creating vbd structure");
-       else {
-               err = xenvbd_sysfs_addif(dev);
-               if (err) {
-                       xen_vbd_free(&be->blkif->vbd);
-                       xenbus_dev_fatal(dev, err, "creating sysfs entries");
-               }
-       }
+                            !strchr(be->mode, 'w'), cdrom, diskseq);
 
        if (err) {
-               kfree(be->mode);
-               be->mode = NULL;
-               be->major = 0;
-               be->minor = 0;
-       } else {
-               /* We're potentially connected now */
-               xen_update_blkif_status(be->blkif);
+               xenbus_dev_fatal(dev, err, "creating vbd structure");
+               goto fail;
        }
+
+       err = xenvbd_sysfs_addif(dev);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "creating sysfs entries");
+               goto free_vbd;
+       }
+
+       /* We're potentially connected now */
+       xen_update_blkif_status(be->blkif);
+       return;
+
+free_vbd:
+       xen_vbd_free(&be->blkif->vbd);
+fail:
+       kfree(be->mode);
+       be->mode = NULL;
+       be->major = 0;
+       be->minor = 0;
+       be->diskseq = 0;
 }
 
 /*
-- 
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.