[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Port over the latest zfs code from opensolaris-grub to libfsimage



3 files changed, 192 insertions(+), 97 deletions(-)
tools/libfsimage/common/fsimage_grub.h |    1 
tools/libfsimage/zfs/fsys_zfs.c        |  276 ++++++++++++++++++++++----------
tools/libfsimage/zfs/zfs-include/zfs.h |   12 -


# HG changeset patch
# User Jody Belka <dev@xxxxxxxx>
# Date 1232022187 0
# Branch zfs14
# Node ID 74b7416dc04c4bf734c4e8df7d64b2630da6ef0b
# Parent  10a8fae412c5e1b1cd24e58f94f780f180b84ab8
Port over the latest zfs code from opensolaris-grub to libfsimage

The existing libfsimage zfs code is rather old now, and can't read
versions of zfs now in general use. Port over the current code from
the opensolaris-patched grub, so that pygrub can be used again for
booting solaris/zfs based domains.

Signed-off-by: Jody Belka <dev@xxxxxxxx>

diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/common/fsimage_grub.h
--- a/tools/libfsimage/common/fsimage_grub.h    Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/common/fsimage_grub.h    Thu Jan 15 12:23:07 2009 +0000
@@ -78,6 +78,7 @@
 #define        ERR_DEV_VALUES 1
 #define        ERR_WONT_FIT 1
 #define        ERR_READ 1
+#define        ERR_NEWER_VERSION 1
 
 fsi_plugin_ops_t *fsig_init(fsi_plugin_t *, fsig_plugin_ops_t *);
 
diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/zfs/fsys_zfs.c
--- a/tools/libfsimage/zfs/fsys_zfs.c   Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/zfs/fsys_zfs.c   Thu Jan 15 12:23:07 2009 +0000
@@ -85,11 +85,15 @@
 
 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
 {
-       {"noop", 0},
+       {"inherit", 0},                 /* ZIO_COMPRESS_INHERIT */
        {"on", lzjb_decompress},        /* ZIO_COMPRESS_ON */
-       {"off", 0},
-       {"lzjb", lzjb_decompress}       /* ZIO_COMPRESS_LZJB */
+       {"off", 0},                     /* ZIO_COMPRESS_OFF */
+       {"lzjb", lzjb_decompress},      /* ZIO_COMPRESS_LZJB */
+       {"empty", 0}                    /* ZIO_COMPRESS_EMPTY */
 };
+
+static int zio_read_data(fsi_file_t *ffi, blkptr_t *bp, void *buf, char 
*stack);
+
 
 /* From disk_io.c */
 /* ZFS root filesystem for booting */
@@ -170,8 +174,7 @@
 zio_checksum_verify(blkptr_t *bp, char *data, int size)
 {
        zio_cksum_t zc = bp->blk_cksum;
-       uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER :
-           BP_GET_CHECKSUM(bp);
+       uint32_t checksum = BP_GET_CHECKSUM(bp);
        int byteswap = BP_SHOULD_BYTESWAP(bp);
        zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1;
        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
@@ -185,27 +188,13 @@
                return (-1);
 
        if (ci->ci_zbt) {
-               if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
-                       /*
-                        * 'gang blocks' is not supported.
-                        */
-                       return (-1);
-               }
-
-               if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) {
-                       /* byte swapping is not supported */
-                       return (-1);
-               } else {
-                       expected_cksum = zbt->zbt_cksum;
-                       zbt->zbt_cksum = zc;
-                       ci->ci_func[0](data, size, &actual_cksum);
-                       zbt->zbt_cksum = expected_cksum;
-               }
+               expected_cksum = zbt->zbt_cksum;
+               zbt->zbt_cksum = zc;
+               ci->ci_func[0](data, size, &actual_cksum);
+               zbt->zbt_cksum = expected_cksum;
                zc = expected_cksum;
 
        } else {
-               if (BP_IS_GANG(bp))
-                       return (-1);
                ci->ci_func[byteswap](data, size, &actual_cksum);
        }
 
@@ -298,7 +287,7 @@
                return (-1);
 
        if (uber->ub_magic == UBERBLOCK_MAGIC &&
-           uber->ub_version >= SPA_VERSION_1 &&
+           uber->ub_version > 0 &&
            uber->ub_version <= SPA_VERSION)
                return (0);
 
@@ -337,7 +326,93 @@
 }
 
 /*
- * Read in a block and put its uncompressed data in buf.
+ * Read a block of data based on the gang block address dva,
+ * and put its data in buf.
+ *
+ * Return:
+ *     0 - success
+ *     1 - failure
+ */
+static int
+zio_read_gang(fsi_file_t *ffi, blkptr_t *bp, dva_t *dva, void *buf, char 
*stack)
+{
+       zio_gbh_phys_t *zio_gb;
+       uint64_t offset, sector;
+       blkptr_t tmpbp;
+       int i;
+
+       zio_gb = (zio_gbh_phys_t *)stack;
+       stack += SPA_GANGBLOCKSIZE;
+       offset = DVA_GET_OFFSET(dva);
+       sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
+
+       /* read in the gang block header */
+       if (devread(ffi, sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
+               return (1);
+       }
+
+       /* self checksuming the gang block header */
+       BP_ZERO(&tmpbp);
+       BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER);
+       BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER);
+       ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva),
+           DVA_GET_OFFSET(dva), bp->blk_birth, 0);
+       if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) {
+               return (1);
+       }
+
+       for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
+               if (zio_gb->zg_blkptr[i].blk_birth == 0)
+                       continue;
+
+               if (zio_read_data(ffi, &zio_gb->zg_blkptr[i], buf, stack))
+                       return (1);
+               buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]);
+       }
+
+       return (0);
+}
+
+/*
+ * Read in a block of raw data to buf.
+ *
+ * Return:
+ *     0 - success
+ *     1 - failure
+ */
+static int
+zio_read_data(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
+{
+       int i, psize;
+
+       psize = BP_GET_PSIZE(bp);
+
+       /* pick a good dva from the block pointer */
+       for (i = 0; i < SPA_DVAS_PER_BP; i++) {
+               uint64_t offset, sector;
+
+               if (bp->blk_dva[i].dva_word[0] == 0 &&
+                   bp->blk_dva[i].dva_word[1] == 0)
+                       continue;
+
+               if (DVA_GET_GANG(&bp->blk_dva[i])) {
+                       if (zio_read_gang(ffi, bp, &bp->blk_dva[i], buf, stack) 
== 0)
+                               return (0);
+               } else {
+                       /* read in a data block */
+                       offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
+                       sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
+                       if (devread(ffi, sector, 0, psize, buf))
+                               return (0);
+               }
+       }
+
+       return (1);
+}
+
+/*
+ * Read in a block of data, verify its checksum, decompress if needed,
+ * and put the uncompressed data in buf.
  *
  * Return:
  *     0 - success
@@ -346,49 +421,41 @@
 static int
 zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
 {
-       uint64_t offset, sector;
-       int psize, lsize;
-       int i, comp, cksum;
+       int lsize, psize, comp;
+       char *retbuf;
 
+       comp = BP_GET_COMPRESS(bp);
+       lsize = BP_GET_LSIZE(bp);
        psize = BP_GET_PSIZE(bp);
-       lsize = BP_GET_LSIZE(bp);
-       comp = BP_GET_COMPRESS(bp);
-       cksum = BP_GET_CHECKSUM(bp);
 
        if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
            (comp != ZIO_COMPRESS_OFF &&
-           decomp_table[comp].decomp_func == NULL))
+           decomp_table[comp].decomp_func == NULL)) {
                return (ERR_FSYS_CORRUPT);
-
-       /* pick a good dva from the block pointer */
-       for (i = 0; i < SPA_DVAS_PER_BP; i++) {
-
-               if (bp->blk_dva[i].dva_word[0] == 0 &&
-                   bp->blk_dva[i].dva_word[1] == 0)
-                       continue;
-
-               /* read in a block */
-               offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
-               sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
-
-               if (comp != ZIO_COMPRESS_OFF) {
-
-                       if (devread(ffi, sector, 0, psize, stack) == 0)
-                               continue;
-                       if (zio_checksum_verify(bp, stack, psize) != 0)
-                               continue;
-                       decomp_table[comp].decomp_func(stack, buf, psize,
-                           lsize);
-               } else {
-                       if (devread(ffi, sector, 0, psize, buf) == 0)
-                               continue;
-                       if (zio_checksum_verify(bp, buf, psize) != 0)
-                               continue;
-               }
-               return (0);
        }
 
-       return (ERR_FSYS_CORRUPT);
+       if ((char *)buf < stack && ((char *)buf) + lsize > stack) {
+               return (ERR_WONT_FIT);
+       }
+
+       retbuf = buf;
+       if (comp != ZIO_COMPRESS_OFF) {
+               buf = stack;
+               stack += psize;
+       }
+
+       if (zio_read_data(ffi, bp, buf, stack)) {
+               return (ERR_FSYS_CORRUPT);
+       }
+
+       if (zio_checksum_verify(bp, buf, psize) != 0) {
+               return (ERR_FSYS_CORRUPT);
+       }
+
+       if (comp != ZIO_COMPRESS_OFF)
+               decomp_table[comp].decomp_func(buf, retbuf, psize, lsize);
+
+       return (0);
 }
 
 /*
@@ -618,6 +685,8 @@
        /* Get the leaf block */
        l = (zap_leaf_phys_t *)stack;
        stack += 1<<blksft;
+       if ((1<<blksft) < sizeof (zap_leaf_phys_t))
+               return (ERR_FSYS_CORRUPT);
        if ((errnum = dmu_read(ffi, zap_dnode, blkid, l, stack)))
                return (errnum);
 
@@ -865,6 +934,8 @@
        char *cname, ch;
        blkptr_t *bp;
        objset_phys_t *osp;
+       int issnapshot = 0;
+       char *snapname = NULL;
 
        if (fsname == NULL && obj) {
                headobj = *obj;
@@ -905,6 +976,13 @@
                ch = *fsname;
                *fsname = 0;
 
+               snapname = cname;
+               while (*snapname && !isspace((uint8_t)*snapname) && *snapname 
!= '@')
+                       snapname++;
+               if (*snapname == '@') {
+                       issnapshot = 1;
+                       *snapname = 0;
+               }
                childobj =
                    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
                if ((errnum = dnode_get(ffi, mosmdn, childobj,
@@ -919,6 +997,8 @@
                        return (errnum);
 
                *fsname = ch;
+               if (issnapshot)
+                       *snapname = '@';
        }
        headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
        if (obj)
@@ -928,8 +1008,23 @@
        if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn,
            stack)))
                return (errnum);
+       if (issnapshot) {
+               uint64_t snapobj;
 
-       /* TODO: Add snapshot support here - for fsname=snapshot-name */
+               snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))->
+                   ds_snapnames_zapobj;
+
+               if ((errnum = dnode_get(ffi, mosmdn, snapobj,
+                   DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)))
+                       return (errnum);
+               if (zap_lookup(ffi, mdn, snapname + 1, &headobj, stack))
+                       return (ERR_FILESYSTEM_NOT_FOUND);
+               if ((errnum = dnode_get(ffi, mosmdn, headobj,
+                   DMU_OT_DSL_DATASET, mdn, stack)))
+                       return (errnum);
+               if (obj)
+                       *obj = headobj;
+       }
 
        bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
        osp = (objset_phys_t *)stack;
@@ -1073,8 +1168,6 @@
            DATA_TYPE_UINT64, NULL) == 0 ||
            nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
            DATA_TYPE_UINT64, NULL) == 0 ||
-           nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival,
-           DATA_TYPE_UINT64, NULL) == 0 ||
            nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
            DATA_TYPE_UINT64, NULL) == 0)
                return (ERR_DEV_VALUES);
@@ -1087,20 +1180,35 @@
  * The caller should already allocate MAXNAMELEN memory for bootpath.
  */
 static int
-vdev_get_bootpath(char *nv, char *bootpath)
+vdev_get_bootpath(char *nv, uint64_t inguid, char *bootpath)
 {
        char type[16];
 
-       bootpath[0] = '\0';
        if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
            NULL))
                return (ERR_FSYS_CORRUPT);
 
        if (strcmp(type, VDEV_TYPE_DISK) == 0) {
-               if (vdev_validate(nv) != 0 ||
-                   nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath,
-                   DATA_TYPE_STRING, NULL) != 0)
+               uint64_t guid;
+
+               if (vdev_validate(nv) != 0)
                        return (ERR_NO_BOOTPATH);
+
+               if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID,
+                   &guid, DATA_TYPE_UINT64, NULL) != 0)
+                       return (ERR_NO_BOOTPATH);
+
+               if (guid != inguid)
+                       return (ERR_NO_BOOTPATH);
+
+               if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
+                   bootpath, DATA_TYPE_STRING, NULL) != 0)
+                       bootpath[0] = '\0';
+
+               if (strlen(bootpath) >= MAXNAMELEN)
+                       return (ERR_WONT_FIT);
+
+               return (0);
 
        } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
                int nelm, i;
@@ -1111,28 +1219,15 @@
                        return (ERR_FSYS_CORRUPT);
 
                for (i = 0; i < nelm; i++) {
-                       char tmp_path[MAXNAMELEN];
                        char *child_i;
 
                        child_i = nvlist_array(child, i);
-                       if (vdev_validate(child_i) != 0)
-                               continue;
-
-                       if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH,
-                           tmp_path, DATA_TYPE_STRING, NULL) != 0)
-                               return (ERR_NO_BOOTPATH);
-
-                       if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN)
-                               return (ERR_WONT_FIT);
-
-                       if (strlen(bootpath) == 0)
-                               sprintf(bootpath, "%s", tmp_path);
-                       else
-                               sprintf(bootpath, "%s %s", bootpath, tmp_path);
+                       if (vdev_get_bootpath(child_i, inguid, bootpath) == 0)
+                               return (0);
                }
        }
 
-       return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH);
+       return (ERR_NO_BOOTPATH);
 }
 
 /*
@@ -1148,6 +1243,8 @@
        vdev_phys_t *vdev;
        uint64_t sector, pool_state, txg = 0;
        char *nvlist, *nv;
+       uint64_t diskguid;
+       uint64_t version;
        zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
 
        sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
@@ -1181,11 +1278,18 @@
        if (txg == 0)
                return (ERR_NO_BOOTPATH);
 
+       if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
+           DATA_TYPE_UINT64, NULL))
+               return (ERR_FSYS_CORRUPT);
+       if (version > SPA_VERSION)
+               return (ERR_NEWER_VERSION);
        if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
            DATA_TYPE_NVLIST, NULL))
                return (ERR_FSYS_CORRUPT);
-
-       if (vdev_get_bootpath(nv, current_bootpath))
+       if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
+           DATA_TYPE_UINT64, NULL))
+               return (ERR_FSYS_CORRUPT);
+       if (vdev_get_bootpath(nv, diskguid, current_bootpath))
                return (ERR_NO_BOOTPATH);
 
        return (0);
diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/zfs/zfs-include/zfs.h
--- a/tools/libfsimage/zfs/zfs-include/zfs.h    Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/zfs/zfs-include/zfs.h    Thu Jan 15 12:23:07 2009 +0000
@@ -28,17 +28,7 @@
 /*
  * On-disk version number.
  */
-#define        SPA_VERSION_1                   1ULL
-#define        SPA_VERSION_2                   2ULL
-#define        SPA_VERSION_3                   3ULL
-#define        SPA_VERSION_4                   4ULL
-#define        SPA_VERSION_5                   5ULL
-#define        SPA_VERSION_6                   6ULL
-#define        SPA_VERSION_7                   7ULL
-#define        SPA_VERSION_8                   8ULL
-#define        SPA_VERSION_9                   9ULL
-#define        SPA_VERSION_10                  10ULL
-#define        SPA_VERSION                     SPA_VERSION_10
+#define        SPA_VERSION                     14ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.