[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Add ZFS libfsimage support patch
# HG changeset patch # User john.levon@xxxxxxx # Date 1209521787 25200 # Node ID 51023851f6401182c0c2c200716e5bfcc8f14002 # Parent 7aabc2f601717f031b29cc219bcd80d04616575a Add ZFS libfsimage support patch Add support to pygrub and libfsimage to boot ZFS root filesystems. Boot argument of zfs-bootfs is set to describe ZFS root pool and boot filesystem object number. Boot argument bootpath is set to describe the virtual device root mirror components. Signed-off-by: Susan Kamm-Worrell <susan.kamm-worrell@xxxxxxx> diff --git a/tools/libfsimage/Makefile b/tools/libfsimage/Makefile --- a/tools/libfsimage/Makefile +++ b/tools/libfsimage/Makefile @@ -1,7 +1,7 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk -SUBDIRS-y = common ufs reiserfs iso9660 fat +SUBDIRS-y = common ufs reiserfs iso9660 fat zfs SUBDIRS-y += $(shell env CC="$(CC)" ./check-libext2fs) .PHONY: all clean install diff --git a/tools/libfsimage/common/fsimage.c b/tools/libfsimage/common/fsimage.c --- a/tools/libfsimage/common/fsimage.c +++ b/tools/libfsimage/common/fsimage.c @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -51,6 +51,7 @@ fsi_t *fsi_open_fsimage(const char *path fsi->f_fd = fd; fsi->f_off = off; fsi->f_data = NULL; + fsi->f_bootstring = NULL; pthread_mutex_lock(&fsi_lock); err = find_plugin(fsi, path, options); @@ -140,3 +141,29 @@ ssize_t fsi_pread_file(fsi_file_t *ffi, return (ret); } + +char * +fsi_bootstring_alloc(fsi_t *fsi, size_t len) +{ + fsi->f_bootstring = malloc(len); + if (fsi->f_bootstring == NULL) + return (NULL); + + bzero(fsi->f_bootstring, len); + return (fsi->f_bootstring); +} + +void +fsi_bootstring_free(fsi_t *fsi) +{ + if (fsi->f_bootstring != NULL) { + free(fsi->f_bootstring); + fsi->f_bootstring = NULL; + } +} + +char * +fsi_fs_bootstring(fsi_t *fsi) +{ + return (fsi->f_bootstring); +} diff --git a/tools/libfsimage/common/fsimage.h b/tools/libfsimage/common/fsimage.h --- a/tools/libfsimage/common/fsimage.h +++ b/tools/libfsimage/common/fsimage.h @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +45,10 @@ ssize_t fsi_read_file(fsi_file_t *, void ssize_t fsi_read_file(fsi_file_t *, void *, size_t); ssize_t fsi_pread_file(fsi_file_t *, void *, size_t, uint64_t); +char *fsi_bootstring_alloc(fsi_t *, size_t); +void fsi_bootstring_free(fsi_t *); +char *fsi_fs_bootstring(fsi_t *); + #ifdef __cplusplus }; #endif diff --git a/tools/libfsimage/common/fsimage_grub.c b/tools/libfsimage/common/fsimage_grub.c --- a/tools/libfsimage/common/fsimage_grub.c +++ b/tools/libfsimage/common/fsimage_grub.c @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -286,6 +286,7 @@ fsig_mount(fsi_t *fsi, const char *path, if (!ops->fpo_mount(ffi, options)) { fsip_file_free(ffi); + fsi_bootstring_free(fsi); free(fsi->f_data); fsi->f_data = NULL; return (-1); @@ -299,6 +300,7 @@ static int static int fsig_umount(fsi_t *fsi) { + fsi_bootstring_free(fsi); free(fsi->f_data); return (0); } diff --git a/tools/libfsimage/common/fsimage_grub.h b/tools/libfsimage/common/fsimage_grub.h --- a/tools/libfsimage/common/fsimage_grub.h +++ b/tools/libfsimage/common/fsimage_grub.h @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -72,6 +72,12 @@ unsigned long fsig_log2(unsigned long); #define ERR_FILELENGTH 1 #define ERR_BAD_FILETYPE 1 #define ERR_FILE_NOT_FOUND 1 +#define ERR_BAD_ARGUMENT 1 +#define ERR_FILESYSTEM_NOT_FOUND 1 +#define ERR_NO_BOOTPATH 1 +#define ERR_DEV_VALUES 1 +#define ERR_WONT_FIT 1 +#define ERR_READ 1 fsi_plugin_ops_t *fsig_init(fsi_plugin_t *, fsig_plugin_ops_t *); diff --git a/tools/libfsimage/common/fsimage_priv.h b/tools/libfsimage/common/fsimage_priv.h --- a/tools/libfsimage/common/fsimage_priv.h +++ b/tools/libfsimage/common/fsimage_priv.h @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,6 +46,7 @@ struct fsi { uint64_t f_off; void *f_data; fsi_plugin_t *f_plugin; + char *f_bootstring; }; struct fsi_file { diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU --- a/tools/libfsimage/common/mapfile-GNU +++ b/tools/libfsimage/common/mapfile-GNU @@ -8,6 +8,9 @@ VERSION { fsi_close_file; fsi_read_file; fsi_pread_file; + fsi_bootstring_alloc; + fsi_bootstring_free; + fsi_fs_bootstring; fsip_fs_set_data; fsip_file_alloc; diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS --- a/tools/libfsimage/common/mapfile-SunOS +++ b/tools/libfsimage/common/mapfile-SunOS @@ -7,6 +7,9 @@ libfsimage.so.1.0 { fsi_close_file; fsi_read_file; fsi_pread_file; + fsi_bootstring_alloc; + fsi_bootstring_free; + fsi_fs_bootstring; fsip_fs_set_data; fsip_file_alloc; diff --git a/tools/libfsimage/zfs/Makefile b/tools/libfsimage/zfs/Makefile new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/Makefile @@ -0,0 +1,37 @@ +# +# GRUB -- GRand Unified Bootloader +# Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +XEN_ROOT = ../../.. + +LIB_SRCS-y = fsys_zfs.c zfs_lzjb.c zfs_sha256.c zfs_fletcher.c + +FS = zfs + +.PHONY: all +all: fs-all + +.PHONY: install +install: fs-install + +include $(XEN_ROOT)/tools/libfsimage/Rules.mk diff --git a/tools/libfsimage/zfs/fsys_zfs.c b/tools/libfsimage/zfs/fsys_zfs.c new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/fsys_zfs.c @@ -0,0 +1,1457 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * All files in the zfs directory are derived from the OpenSolaris + * zfs grub files. All files in the zfs-include directory were + * included without changes. + */ + +/* + * The zfs plug-in routines for GRUB are: + * + * zfs_mount() - locates a valid uberblock of the root pool and reads + * in its MOS at the memory address MOS. + * + * zfs_open() - locates a plain file object by following the MOS + * and places its dnode at the memory address DNODE. + * + * zfs_read() - read in the data blocks pointed by the DNODE. + * + * ZFS_SCRATCH is used as a working area. + * + * (memory addr) MOS DNODE ZFS_SCRATCH + * | | | + * +-------V---------V----------V---------------+ + * memory | | dnode | dnode | scratch | + * | | 512B | 512B | area | + * +--------------------------------------------+ + */ + +#include <stdio.h> +#include <strings.h> + +/* From "shared.h" */ +#include "mb_info.h" + +/* Boot signature related defines for the findroot command */ +#define BOOTSIGN_DIR "/boot/grub/bootsign" +#define BOOTSIGN_BACKUP "/etc/bootsign" + +/* Maybe redirect memory requests through grub_scratch_mem. */ +#define RAW_ADDR(x) (x) +#define RAW_SEG(x) (x) + +/* ZFS will use the top 4 Meg of physical memory (below 4Gig) for sratch */ +#define ZFS_SCRATCH_SIZE 0x400000 + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +/* End from shared.h */ + +#include "fsys_zfs.h" + +/* cache for a file block of the currently zfs_open()-ed file */ +#define file_buf zfs_ba->zfs_file_buf +#define file_start zfs_ba->zfs_file_start +#define file_end zfs_ba->zfs_file_end + +/* cache for a dnode block */ +#define dnode_buf zfs_ba->zfs_dnode_buf +#define dnode_mdn zfs_ba->zfs_dnode_mdn +#define dnode_start zfs_ba->zfs_dnode_start +#define dnode_end zfs_ba->zfs_dnode_end + +#define stackbase zfs_ba->zfs_stackbase + +decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = +{ + {"noop", 0}, + {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ + {"off", 0}, + {"lzjb", lzjb_decompress} /* ZIO_COMPRESS_LZJB */ +}; + +/* From disk_io.c */ +/* ZFS root filesystem for booting */ +#define current_bootpath zfs_ba->zfs_current_bootpath +#define current_rootpool zfs_ba->zfs_current_rootpool +#define current_bootfs zfs_ba->zfs_current_bootfs +#define current_bootfs_obj zfs_ba->zfs_current_bootfs_obj +#define is_zfs_mount (*fsig_int1(ffi)) +/* End from disk_io.c */ + +#define is_zfs_open zfs_ba->zfs_open + +/* + * Our own version of bcmp(). + */ +static int +zfs_bcmp(const void *s1, const void *s2, size_t n) +{ + const unsigned char *ps1 = s1; + const unsigned char *ps2 = s2; + + if (s1 != s2 && n != 0) { + do { + if (*ps1++ != *ps2++) + return (1); + } while (--n != 0); + } + + return (0); +} + +/* + * Our own version of log2(). Same thing as highbit()-1. + */ +static int +zfs_log2(uint64_t num) +{ + int i = 0; + + while (num > 1) { + i++; + num = num >> 1; + } + + return (i); +} + +/* Checksum Functions */ +static void +zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); +} + +/* Checksum Table and Values */ +zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { + {{NULL, NULL}, 0, 0, "inherit"}, + {{NULL, NULL}, 0, 0, "on"}, + {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, + {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"} +}; + +/* + * zio_checksum_verify: Provides support for checksum verification. + * + * Fletcher2, Fletcher4, and SHA256 are supported. + * + * Return: + * -1 = Failure + * 0 = Success + */ +static int +zio_checksum_verify(blkptr_t *bp, char *data, int size) +{ + zio_cksum_t zc = bp->blk_cksum; + uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : + BP_GET_CHECKSUM(bp); + int byteswap = BP_SHOULD_BYTESWAP(bp); + zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1; + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + zio_cksum_t actual_cksum, expected_cksum; + + /* byteswap is not supported */ + if (byteswap) + return (-1); + + if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) + return (-1); + + if (ci->ci_zbt) { + if (checksum == ZIO_CHECKSUM_GANG_HEADER) { + /* + * 'gang blocks' is not supported. + */ + return (-1); + } + + if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) { + /* byte swapping is not supported */ + return (-1); + } else { + expected_cksum = zbt->zbt_cksum; + zbt->zbt_cksum = zc; + ci->ci_func[0](data, size, &actual_cksum); + zbt->zbt_cksum = expected_cksum; + } + zc = expected_cksum; + + } else { + if (BP_IS_GANG(bp)) + return (-1); + ci->ci_func[byteswap](data, size, &actual_cksum); + } + + if ((actual_cksum.zc_word[0] - zc.zc_word[0]) | + (actual_cksum.zc_word[1] - zc.zc_word[1]) | + (actual_cksum.zc_word[2] - zc.zc_word[2]) | + (actual_cksum.zc_word[3] - zc.zc_word[3])) + return (-1); + + return (0); +} + +/* + * vdev_label_offset takes "offset" (the offset within a vdev_label) and + * returns its physical disk offset (starting from the beginning of the vdev). + * + * Input: + * psize : Physical size of this vdev + * l : Label Number (0-3) + * offset : The offset with a vdev_label in which we want the physical + * address + * Return: + * Success : physical disk offset + * Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless + */ +static uint64_t +vdev_label_offset(fsi_file_t *ffi, uint64_t psize, int l, uint64_t offset) +{ + /* XXX Need to add back label support! */ + if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) { + errnum = ERR_BAD_ARGUMENT; + return (0); + } + + return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); + +} + +/* + * vdev_uberblock_compare takes two uberblock structures and returns an integer + * indicating the more recent of the two. + * Return Value = 1 if ub2 is more recent + * Return Value = -1 if ub1 is more recent + * The most recent uberblock is determined using its transaction number and + * timestamp. The uberblock with the highest transaction number is + * considered "newer". If the transaction numbers of the two blocks match, the + * timestamps are compared to determine the "newer" of the two. + */ +static int +vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) +{ + if (ub1->ub_txg < ub2->ub_txg) + return (-1); + if (ub1->ub_txg > ub2->ub_txg) + return (1); + + if (ub1->ub_timestamp < ub2->ub_timestamp) + return (-1); + if (ub1->ub_timestamp > ub2->ub_timestamp) + return (1); + + return (0); +} + +/* + * Three pieces of information are needed to verify an uberblock: the magic + * number, the version number, and the checksum. + * + * Currently Implemented: version number, magic number + * Need to Implement: checksum + * + * Return: + * 0 - Success + * -1 - Failure + */ +static int +uberblock_verify(uberblock_phys_t *ub, int offset) +{ + + uberblock_t *uber = &ub->ubp_uberblock; + blkptr_t bp; + + BP_ZERO(&bp); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); + ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); + + if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) + return (-1); + + if (uber->ub_magic == UBERBLOCK_MAGIC && + uber->ub_version >= SPA_VERSION_1 && + uber->ub_version <= SPA_VERSION) + return (0); + + return (-1); +} + +/* + * Find the best uberblock. + * Return: + * Success - Pointer to the best uberblock. + * Failure - NULL + */ +static uberblock_phys_t * +find_bestub(fsi_file_t *ffi, uberblock_phys_t *ub_array, int label) +{ + uberblock_phys_t *ubbest = NULL; + int i, offset; + + for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { + offset = vdev_label_offset(ffi, 0, label, + VDEV_UBERBLOCK_OFFSET(i)); + if (errnum == ERR_BAD_ARGUMENT) + return (NULL); + if (uberblock_verify(&ub_array[i], offset) == 0) { + if (ubbest == NULL) { + ubbest = &ub_array[i]; + } else if (vdev_uberblock_compare( + &(ub_array[i].ubp_uberblock), + &(ubbest->ubp_uberblock)) > 0) { + ubbest = &ub_array[i]; + } + } + } + + return (ubbest); +} + +/* + * Read in a block and put its uncompressed data in buf. + * + * Return: + * 0 - success + * errnum - failure + */ +static int +zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack) +{ + uint64_t offset, sector; + int psize, lsize; + int i, comp, cksum; + + psize = BP_GET_PSIZE(bp); + lsize = BP_GET_LSIZE(bp); + comp = BP_GET_COMPRESS(bp); + cksum = BP_GET_CHECKSUM(bp); + + if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || + (comp != ZIO_COMPRESS_OFF && + decomp_table[comp].decomp_func == NULL)) + return (ERR_FSYS_CORRUPT); + + /* pick a good dva from the block pointer */ + for (i = 0; i < SPA_DVAS_PER_BP; i++) { + + if (bp->blk_dva[i].dva_word[0] == 0 && + bp->blk_dva[i].dva_word[1] == 0) + continue; + + /* read in a block */ + offset = DVA_GET_OFFSET(&bp->blk_dva[i]); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + + if (comp != ZIO_COMPRESS_OFF) { + + if (devread(ffi, sector, 0, psize, stack) == 0) + continue; + if (zio_checksum_verify(bp, stack, psize) != 0) + continue; + decomp_table[comp].decomp_func(stack, buf, psize, + lsize); + } else { + if (devread(ffi, sector, 0, psize, buf) == 0) + continue; + if (zio_checksum_verify(bp, buf, psize) != 0) + continue; + } + return (0); + } + + return (ERR_FSYS_CORRUPT); +} + +/* + * Get the block from a block id. + * push the block onto the stack. + * + * Return: + * 0 - success + * errnum - failure + */ +static int +dmu_read(fsi_file_t *ffi, dnode_phys_t *dn, uint64_t blkid, void *buf, + char *stack) +{ + int idx, level; + blkptr_t *bp_array = dn->dn_blkptr; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + blkptr_t *bp, *tmpbuf; + + bp = (blkptr_t *)stack; + stack += sizeof (blkptr_t); + + tmpbuf = (blkptr_t *)stack; + stack += 1<<dn->dn_indblkshift; + + for (level = dn->dn_nlevels - 1; level >= 0; level--) { + idx = (blkid >> (epbs * level)) & ((1<<epbs)-1); + *bp = bp_array[idx]; + if (level == 0) + tmpbuf = buf; + if (BP_IS_HOLE(bp)) { + grub_memset(buf, 0, + dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); + break; + } else if ((errnum = zio_read(ffi, bp, tmpbuf, stack))) { + return (errnum); + } + bp_array = tmpbuf; + } + + return (0); +} + +/* + * mzap_lookup: Looks up property described by "name" and returns the value + * in "value". + * + * Return: + * 0 - success + * errnum - failure + */ +static int +mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, + uint64_t *value) +{ + int i, chunks; + mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; + + chunks = objsize/MZAP_ENT_LEN - 1; + for (i = 0; i < chunks; i++) { + if (strcmp(mzap_ent[i].mze_name, name) == 0) { + *value = mzap_ent[i].mze_value; + return (0); + } + } + + return (ERR_FSYS_CORRUPT); +} + +static uint64_t +zap_hash(fsi_file_t *ffi, uint64_t salt, const char *name) +{ + static uint64_t table[256]; + const uint8_t *cp; + uint8_t c; + uint64_t crc = salt; + + if (table[128] == 0) { + uint64_t *ct; + int i, j; + for (i = 0; i < 256; i++) { + for (ct = table + i, *ct = i, j = 8; j > 0; j--) + *ct = (*ct >> 1) ^ (-(*ct & 1) & + ZFS_CRC64_POLY); + } + } + + if (crc == 0 || table[128] != ZFS_CRC64_POLY) { + errnum = ERR_FSYS_CORRUPT; + return (0); + } + + for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) + crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF]; + + /* + * Only use 28 bits, since we need 4 bits in the cookie for the + * collision differentiator. We MUST use the high bits, since + * those are the onces that we first pay attention to when + * chosing the bucket. + */ + crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); + + return (crc); +} + +/* + * Only to be used on 8-bit arrays. + * array_len is actual len in bytes (not encoded le_value_length). + * buf is null-terminated. + */ +static int +zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, + int array_len, const char *buf) +{ + int bseen = 0; + + while (bseen < array_len) { + struct zap_leaf_array *la = + &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array; + int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); + + if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) + return (0); + + if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0) + break; + chunk = la->la_next; + bseen += toread; + } + return (bseen == array_len); +} + +/* + * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the + * value for the property "name". + * + * Return: + * 0 - success + * errnum - failure + */ +static int +zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, + const char *name, uint64_t *value) +{ + uint16_t chunk; + struct zap_leaf_entry *le; + + /* Verify if this is a valid leaf block */ + if (l->l_hdr.lh_block_type != ZBT_LEAF) + return (ERR_FSYS_CORRUPT); + if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC) + return (ERR_FSYS_CORRUPT); + + for (chunk = l->l_hash[LEAF_HASH(blksft, h)]; + chunk != CHAIN_END; chunk = le->le_next) { + + if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) + return (ERR_FSYS_CORRUPT); + + le = ZAP_LEAF_ENTRY(l, blksft, chunk); + + /* Verify the chunk entry */ + if (le->le_type != ZAP_CHUNK_ENTRY) + return (ERR_FSYS_CORRUPT); + + if (le->le_hash != h) + continue; + + if (zap_leaf_array_equal(l, blksft, le->le_name_chunk, + le->le_name_length, name)) { + + struct zap_leaf_array *la; + uint8_t *ip; + + if (le->le_int_size != 8 || le->le_value_length != 1) + return (ERR_FSYS_CORRUPT); + + /* get the uint64_t property value */ + la = &ZAP_LEAF_CHUNK(l, blksft, + le->le_value_chunk).l_array; + ip = la->la_array; + + *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | + (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | + (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | + (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; + + return (0); + } + } + + return (ERR_FSYS_CORRUPT); +} + +/* + * Fat ZAP lookup + * + * Return: + * 0 - success + * errnum - failure + */ +static int +fzap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, zap_phys_t *zap, + char *name, uint64_t *value, char *stack) +{ + zap_leaf_phys_t *l; + uint64_t hash, idx, blkid; + int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); + + /* Verify if this is a fat zap header block */ + if (zap->zap_magic != (uint64_t)ZAP_MAGIC) + return (ERR_FSYS_CORRUPT); + + hash = zap_hash(ffi, zap->zap_salt, name); + if (errnum) + return (errnum); + + /* get block id from index */ + if (zap->zap_ptrtbl.zt_numblks != 0) { + /* external pointer tables not supported */ + return (ERR_FSYS_CORRUPT); + } + idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift); + blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))]; + + /* Get the leaf block */ + l = (zap_leaf_phys_t *)stack; + stack += 1<<blksft; + if ((errnum = dmu_read(ffi, zap_dnode, blkid, l, stack))) + return (errnum); + + return (zap_leaf_lookup(l, blksft, hash, name, value)); +} + +/* + * Read in the data of a zap object and find the value for a matching + * property name. + * + * Return: + * 0 - success + * errnum - failure + */ +static int +zap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, char *name, + uint64_t *val, char *stack) +{ + uint64_t block_type; + int size; + void *zapbuf; + + /* Read in the first block of the zap object data. */ + zapbuf = stack; + size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; + stack += size; + if ((errnum = dmu_read(ffi, zap_dnode, 0, zapbuf, stack))) + return (errnum); + + block_type = *((uint64_t *)zapbuf); + + if (block_type == ZBT_MICRO) { + return (mzap_lookup(zapbuf, size, name, val)); + } else if (block_type == ZBT_HEADER) { + /* this is a fat zap */ + return (fzap_lookup(ffi, zap_dnode, zapbuf, name, + val, stack)); + } + + return (ERR_FSYS_CORRUPT); +} + +/* + * Get the dnode of an object number from the metadnode of an object set. + * + * Input + * mdn - metadnode to get the object dnode + * objnum - object number for the object dnode + * buf - data buffer that holds the returning dnode + * stack - scratch area + * + * Return: + * 0 - success + * errnum - failure + */ +static int +dnode_get(fsi_file_t *ffi, dnode_phys_t *mdn, uint64_t objnum, + uint8_t type, dnode_phys_t *buf, char *stack) +{ + uint64_t blkid, blksz; /* the block id this object dnode is in */ + int epbs; /* shift of number of dnodes in a block */ + int idx; /* index within a block */ + dnode_phys_t *dnbuf; + zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; + + blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; + epbs = zfs_log2(blksz) - DNODE_SHIFT; + blkid = objnum >> epbs; + idx = objnum & ((1<<epbs)-1); + + if (dnode_buf != NULL && dnode_mdn == mdn && + objnum >= dnode_start && objnum < dnode_end) { + grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE); + VERIFY_DN_TYPE(buf, type); + return (0); + } + + if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) { + dnbuf = dnode_buf; + dnode_mdn = mdn; + dnode_start = blkid << epbs; + dnode_end = (blkid + 1) << epbs; + } else { + dnbuf = (dnode_phys_t *)stack; + stack += blksz; + } + + if ((errnum = dmu_read(ffi, mdn, blkid, (char *)dnbuf, stack))) + return (errnum); + + grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); + VERIFY_DN_TYPE(buf, type); + + return (0); +} + +/* + * Check if this is a special file that resides at the top + * dataset of the pool. Currently this is the GRUB menu, + * boot signature and boot signature backup. + * str starts with '/'. + */ +static int +is_top_dataset_file(char *str) +{ + char *tptr; + + if (((tptr = strstr(str, "menu.lst"))) && + (tptr[8] == '\0' || tptr[8] == ' ') && + *(tptr-1) == '/') + return (1); + + if (strncmp(str, BOOTSIGN_DIR"/", + strlen(BOOTSIGN_DIR) + 1) == 0) + return (1); + + if (strcmp(str, BOOTSIGN_BACKUP) == 0) + return (1); + + return (0); +} + +/* + * Get the file dnode for a given file name where mdn is the meta dnode + * for this ZFS object set. When found, place the file dnode in dn. + * The 'path' argument will be mangled. + * + * Return: + * 0 - success + * errnum - failure + */ +static int +dnode_get_path(fsi_file_t *ffi, dnode_phys_t *mdn, char *path, + dnode_phys_t *dn, char *stack) +{ + uint64_t objnum, version; + char *cname, ch; + + if ((errnum = dnode_get(ffi, mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, + dn, stack))) + return (errnum); + + if ((errnum = zap_lookup(ffi, dn, ZPL_VERSION_STR, &version, stack))) + return (errnum); + if (version > ZPL_VERSION) + return (-1); + + if ((errnum = zap_lookup(ffi, dn, ZFS_ROOT_OBJ, &objnum, stack))) + return (errnum); + + if ((errnum = dnode_get(ffi, mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, + dn, stack))) + return (errnum); + + /* skip leading slashes */ + while (*path == '/') + path++; + + while (*path && !isspace(*path)) { + + /* get the next component name */ + cname = path; + while (*path && !isspace(*path) && *path != '/') + path++; + ch = *path; + *path = 0; /* ensure null termination */ + + if ((errnum = zap_lookup(ffi, dn, cname, &objnum, stack))) + return (errnum); + + objnum = ZFS_DIRENT_OBJ(objnum); + if ((errnum = dnode_get(ffi, mdn, objnum, 0, dn, stack))) + return (errnum); + + *path = ch; + while (*path == '/') + path++; + } + + /* We found the dnode for this file. Verify if it is a plain file. */ + VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS); + + return (0); +} + +/* + * Get the default 'bootfs' property value from the rootpool. + * + * Return: + * 0 - success + * errnum -failure + */ +static int +get_default_bootfsobj(fsi_file_t *ffi, dnode_phys_t *mosmdn, + uint64_t *obj, char *stack) +{ + uint64_t objnum = 0; + dnode_phys_t *dn = (dnode_phys_t *)stack; + stack += DNODE_SIZE; + + if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack))) + return (errnum); + + /* + * find the object number for 'pool_props', and get the dnode + * of the 'pool_props'. + */ + if (zap_lookup(ffi, dn, DMU_POOL_PROPS, &objnum, stack)) + return (ERR_FILESYSTEM_NOT_FOUND); + + if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_POOL_PROPS, dn, + stack))) + return (errnum); + + if (zap_lookup(ffi, dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) + return (ERR_FILESYSTEM_NOT_FOUND); + + if (!objnum) + return (ERR_FILESYSTEM_NOT_FOUND); + + + *obj = objnum; + return (0); +} + +/* + * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), + * e.g. pool/rootfs, or a given object number (obj), e.g. the object number + * of pool/rootfs. + * + * If no fsname and no obj are given, return the DSL_DIR metadnode. + * If fsname is given, return its metadnode and its matching object number. + * If only obj is given, return the metadnode for this object number. + * + * Return: + * 0 - success + * errnum - failure + */ +static int +get_objset_mdn(fsi_file_t *ffi, dnode_phys_t *mosmdn, char *fsname, + uint64_t *obj, dnode_phys_t *mdn, char *stack) +{ + uint64_t objnum, headobj; + char *cname, ch; + blkptr_t *bp; + objset_phys_t *osp; + + if (fsname == NULL && obj) { + headobj = *obj; + goto skip; + } + + if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, mdn, stack))) + return (errnum); + + if ((errnum = zap_lookup(ffi, mdn, DMU_POOL_ROOT_DATASET, &objnum, + stack))) + return (errnum); + + if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, mdn, + stack))) + return (errnum); + + if (fsname == NULL) { + headobj = + ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; + goto skip; + } + + /* take out the pool name */ + while (*fsname && !isspace(*fsname) && *fsname != '/') + fsname++; + + while (*fsname && !isspace(*fsname)) { + uint64_t childobj; + + while (*fsname == '/') + fsname++; + + cname = fsname; + while (*fsname && !isspace(*fsname) && *fsname != '/') + fsname++; + ch = *fsname; + *fsname = 0; + + childobj = + ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; + if ((errnum = dnode_get(ffi, mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))) + return (errnum); + + if (zap_lookup(ffi, mdn, cname, &objnum, stack)) + return (ERR_FILESYSTEM_NOT_FOUND); + + if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack))) + return (errnum); + + *fsname = ch; + } + headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; + if (obj) + *obj = headobj; + +skip: + if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, + stack))) + return (errnum); + + /* TODO: Add snapshot support here - for fsname=snapshot-name */ + + bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; + osp = (objset_phys_t *)stack; + stack += sizeof (objset_phys_t); + if ((errnum = zio_read(ffi, bp, osp, stack))) + return (errnum); + + grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); + + return (0); +} + +/* + * For a given XDR packed nvlist, verify the first 4 bytes and move on. + * + * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : + * + * encoding method/host endian (4 bytes) + * nvl_version (4 bytes) + * nvl_nvflag (4 bytes) + * encoded nvpairs: + * encoded size of the nvpair (4 bytes) + * decoded size of the nvpair (4 bytes) + * name string size (4 bytes) + * name string data (sizeof(NV_ALIGN4(string)) + * data type (4 bytes) + * # of elements in the nvpair (4 bytes) + * data + * 2 zero's for the last nvpair + * (end of the entire list) (8 bytes) + * + * Return: + * 0 - success + * 1 - failure + */ +static int +nvlist_unpack(char *nvlist, char **out) +{ + /* Verify if the 1st and 2nd byte in the nvlist are valid. */ + if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) + return (1); + + nvlist += 4; + *out = nvlist; + return (0); +} + +static char * +nvlist_array(char *nvlist, int index) +{ + int i, encode_size; + + for (i = 0; i < index; i++) { + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 2; + + while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) + nvlist += encode_size; /* goto the next nvpair */ + + nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ + } + + return (nvlist); +} + +static int +nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, + int *nelmp) +{ + int name_len, type, slen, encode_size; + char *nvpair, *nvp_name, *strval = val; + uint64_t *intval = val; + + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 2; + + /* + * Loop thru the nvpair list + * The XDR representation of an integer is in big-endian byte order. + */ + while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) { + + nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ + + name_len = BSWAP_32(*(uint32_t *)nvpair); + nvpair += 4; + + nvp_name = nvpair; + nvpair = nvpair + ((name_len + 3) & ~3); /* align */ + + type = BSWAP_32(*(uint32_t *)nvpair); + nvpair += 4; + + if (((strncmp(nvp_name, name, name_len) == 0) && + type == valtype)) { + int nelm; + + if (((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)) + return (1); + nvpair += 4; + + switch (valtype) { + case DATA_TYPE_STRING: + slen = BSWAP_32(*(uint32_t *)nvpair); + nvpair += 4; + grub_memmove(strval, nvpair, slen); + strval[slen] = '\0'; + return (0); + + case DATA_TYPE_UINT64: + *intval = BSWAP_64(*(uint64_t *)nvpair); + return (0); + + case DATA_TYPE_NVLIST: + *(void **)val = (void *)nvpair; + return (0); + + case DATA_TYPE_NVLIST_ARRAY: + *(void **)val = (void *)nvpair; + if (nelmp) + *nelmp = nelm; + return (0); + } + } + + nvlist += encode_size; /* goto the next nvpair */ + } + + return (1); +} + +/* + * Check if this vdev is online and is in a good state. + */ +static int +vdev_validate(char *nv) +{ + uint64_t ival; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, + DATA_TYPE_UINT64, NULL) == 0) + return (ERR_DEV_VALUES); + + return (0); +} + +/* + * Get a list of valid vdev pathname from the boot device. + * The caller should already allocate MAXNAMELEN memory for bootpath. + */ +static int +vdev_get_bootpath(char *nv, char *bootpath) +{ + char type[16]; + + bootpath[0] = '\0'; + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, + NULL)) + return (ERR_FSYS_CORRUPT); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + if (vdev_validate(nv) != 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath, + DATA_TYPE_STRING, NULL) != 0) + return (ERR_NO_BOOTPATH); + + } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) { + int nelm, i; + char *child; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, + DATA_TYPE_NVLIST_ARRAY, &nelm)) + return (ERR_FSYS_CORRUPT); + + for (i = 0; i < nelm; i++) { + char tmp_path[MAXNAMELEN]; + char *child_i; + + child_i = nvlist_array(child, i); + if (vdev_validate(child_i) != 0) + continue; + + if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH, + tmp_path, DATA_TYPE_STRING, NULL) != 0) + return (ERR_NO_BOOTPATH); + + if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN) + return (ERR_WONT_FIT); + + if (strlen(bootpath) == 0) + sprintf(bootpath, "%s", tmp_path); + else + sprintf(bootpath, "%s %s", bootpath, tmp_path); + } + } + + return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH); +} + +/* + * Check the disk label information and retrieve needed vdev name-value pairs. + * + * Return: + * 0 - success + * ERR_* - failure + */ +static int +check_pool_label(fsi_file_t *ffi, int label, char *stack) +{ + vdev_phys_t *vdev; + uint64_t sector, pool_state, txg = 0; + char *nvlist, *nv; + zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; + + sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE + + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT; + + /* Read in the vdev name-value pair list (112K). */ + if (devread(ffi, sector, 0, VDEV_PHYS_SIZE, stack) == 0) + return (ERR_READ); + + vdev = (vdev_phys_t *)stack; + + if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) + return (ERR_FSYS_CORRUPT); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + + if (pool_state == POOL_STATE_DESTROYED) + return (ERR_FILESYSTEM_NOT_FOUND); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, + current_rootpool, DATA_TYPE_STRING, NULL)) + return (ERR_FSYS_CORRUPT); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + + /* not an active device */ + if (txg == 0) + return (ERR_NO_BOOTPATH); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, + DATA_TYPE_NVLIST, NULL)) + return (ERR_FSYS_CORRUPT); + + if (vdev_get_bootpath(nv, current_bootpath)) + return (ERR_NO_BOOTPATH); + + return (0); +} + +/* + * zfs_mount() locates a valid uberblock of the root pool and read in its MOS + * to the memory address MOS. + * + * Return: + * 1 - success + * 0 - failure + */ +int +zfs_mount(fsi_file_t *ffi, const char *options) +{ + char *stack; + int label = 0; + uberblock_phys_t *ub_array, *ubbest = NULL; + objset_phys_t *osp; + zfs_bootarea_t *zfs_ba; + + /* if zfs is already mounted, don't do it again */ + if (is_zfs_mount == 1) + return (1); + + /* get much bigger data block for zfs */ + if (((zfs_ba = malloc(sizeof (zfs_bootarea_t))) == NULL)) { + return (1); + } + bzero(zfs_ba, sizeof (zfs_bootarea_t)); + + /* replace small data area in fsi with big one */ + free(ffi->ff_fsi->f_data); + ffi->ff_fsi->f_data = (void *)zfs_ba; + + /* If an boot filesystem is passed in, set it to current_bootfs */ + if (options != NULL) { + if (strlen(options) < MAXNAMELEN) { + strcpy(current_bootfs, options); + } + } + + stackbase = ZFS_SCRATCH; + stack = stackbase; + ub_array = (uberblock_phys_t *)stack; + stack += VDEV_UBERBLOCK_RING; + + osp = (objset_phys_t *)stack; + stack += sizeof (objset_phys_t); + + /* XXX add back labels support? */ + for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) { + uint64_t sector = (label * sizeof (vdev_label_t) + + VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE + + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT; + + + /* Read in the uberblock ring (128K). */ + if (devread(ffi, sector, 0, VDEV_UBERBLOCK_RING, + (char *)ub_array) == 0) + continue; + + if ((ubbest = find_bestub(ffi, ub_array, label)) != NULL && + zio_read(ffi, &ubbest->ubp_uberblock.ub_rootbp, osp, stack) + == 0) { + + VERIFY_OS_TYPE(osp, DMU_OST_META); + + /* Got the MOS. Save it at the memory addr MOS. */ + grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); + + if (check_pool_label(ffi, label, stack)) + return (0); + + /* + * Copy fsi->f_data to ffi->ff_data since + * fsig_mount copies from ff_data to f_data + * overwriting fsi->f_data. + */ + bcopy(zfs_ba, fsig_file_buf(ffi), FSYS_BUFLEN); + + is_zfs_mount = 1; + return (1); + } + } + + return (0); +} + +/* + * zfs_open() locates a file in the rootpool by following the + * MOS and places the dnode of the file in the memory address DNODE. + * + * Return: + * 1 - success + * 0 - failure + */ +int +zfs_open(fsi_file_t *ffi, char *filename) +{ + char *stack; + dnode_phys_t *mdn; + char *bootstring; + zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; + + file_buf = NULL; + stackbase = ZFS_SCRATCH; + stack = stackbase; + + mdn = (dnode_phys_t *)stack; + stack += sizeof (dnode_phys_t); + + dnode_mdn = NULL; + dnode_buf = (dnode_phys_t *)stack; + stack += 1<<DNODE_BLOCK_SHIFT; + + /* + * menu.lst is placed at the root pool filesystem level, + * do not goto 'current_bootfs'. + */ + if (is_top_dataset_file(filename)) { + if ((errnum = get_objset_mdn(ffi, MOS, NULL, NULL, mdn, stack))) + return (0); + + current_bootfs_obj = 0; + } else { + if (current_bootfs[0] == '\0') { + /* Get the default root filesystem object number */ + if ((errnum = get_default_bootfsobj(ffi, MOS, + ¤t_bootfs_obj, stack))) + return (0); + if ((errnum = get_objset_mdn(ffi, MOS, NULL, + ¤t_bootfs_obj, mdn, stack))) + return (0); + } else { + if ((errnum = get_objset_mdn(ffi, MOS, + current_bootfs, ¤t_bootfs_obj, mdn, stack))) + return (0); + } + + /* + * Put zfs rootpool and boot obj number into bootstring. + */ + if (is_zfs_open == 0) { + char temp[25]; /* needs to hold long long */ + int alloc_size; + char zfs_bootstr[] = "zfs-bootfs="; + char zfs_bootpath[] = ",bootpath='"; + + sprintf(temp, "%llu", (unsigned long long) + current_bootfs_obj); + alloc_size = strlen(zfs_bootstr) + + strlen(current_rootpool) + + strlen(temp) + strlen(zfs_bootpath) + + strlen(current_bootpath) + 3; + bootstring = fsi_bootstring_alloc(ffi->ff_fsi, + alloc_size); + if (bootstring != NULL) { + strcpy(bootstring, zfs_bootstr); + strcat(bootstring, current_rootpool); + strcat(bootstring, "/"); + strcat(bootstring, temp); + strcat(bootstring, zfs_bootpath); + strcat(bootstring, current_bootpath); + strcat(bootstring, "'"); + is_zfs_open = 1; + } + } + } + + if (dnode_get_path(ffi, mdn, filename, DNODE, stack)) { + errnum = ERR_FILE_NOT_FOUND; + return (0); + } + + /* get the file size and set the file position to 0 */ + filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; + filepos = 0; + + dnode_buf = NULL; + return (1); +} + +/* + * zfs_read reads in the data blocks pointed by the DNODE. + * + * Return: + * len - the length successfully read in to the buffer + * 0 - failure + */ +int +zfs_read(fsi_file_t *ffi, char *buf, int len) +{ + char *stack; + int blksz, length, movesize; + zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; + + if (file_buf == NULL) { + file_buf = stackbase; + stackbase += SPA_MAXBLOCKSIZE; + file_start = file_end = 0; + } + stack = stackbase; + + /* + * If offset is in memory, move it into the buffer provided and return. + */ + if (filepos >= file_start && filepos+len <= file_end) { + grub_memmove(buf, file_buf + filepos - file_start, len); + filepos += len; + return (len); + } + + blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT; + + /* + * Entire Dnode is too big to fit into the space available. We + * will need to read it in chunks. This could be optimized to + * read in as large a chunk as there is space available, but for + * now, this only reads in one data block at a time. + */ + length = len; + while (length) { + /* + * Find requested blkid and the offset within that block. + */ + uint64_t blkid = filepos / blksz; + + if ((errnum = dmu_read(ffi, DNODE, blkid, file_buf, stack))) + return (0); + + file_start = blkid * blksz; + file_end = file_start + blksz; + + movesize = MIN(length, file_end - filepos); + + grub_memmove(buf, file_buf + filepos - file_start, + movesize); + buf += movesize; + length -= movesize; + filepos += movesize; + } + + return (len); +} + +/* + * No-Op + */ +int +zfs_embed(int *start_sector, int needed_sectors) +{ + return (1); +} + +fsi_plugin_ops_t * +fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name) +{ + static fsig_plugin_ops_t ops = { + FSIMAGE_PLUGIN_VERSION, + .fpo_mount = zfs_mount, + .fpo_dir = zfs_open, + .fpo_read = zfs_read + }; + + *name = "zfs"; + return (fsig_init(fp, &ops)); +} diff --git a/tools/libfsimage/zfs/fsys_zfs.h b/tools/libfsimage/zfs/fsys_zfs.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/fsys_zfs.h @@ -0,0 +1,203 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _FSYS_ZFS_H +#define _FSYS_ZFS_H + +#include <fsimage_grub.h> +#include <fsimage_priv.h> + +#include "zfs-include/zfs.h" +#include "zfs-include/dmu.h" +#include "zfs-include/spa.h" +#include "zfs-include/zio.h" +#include "zfs-include/zio_checksum.h" +#include "zfs-include/vdev_impl.h" +#include "zfs-include/zap_impl.h" +#include "zfs-include/zap_leaf.h" +#include "zfs-include/uberblock_impl.h" +#include "zfs-include/dnode.h" +#include "zfs-include/dsl_dir.h" +#include "zfs-include/zfs_acl.h" +#include "zfs-include/zfs_znode.h" +#include "zfs-include/dsl_dataset.h" +#include "zfs-include/zil.h" +#include "zfs-include/dmu_objset.h" + +/* + * Global Memory addresses to store MOS and DNODE data + */ +#define MOS ((dnode_phys_t *)(((zfs_bootarea_t *) \ + (ffi->ff_fsi->f_data))->zfs_data)) +#define DNODE (MOS+1) /* move sizeof(dnode_phys_t) bytes */ +#define ZFS_SCRATCH ((char *)(DNODE+1)) + +#define MAXNAMELEN 256 + +typedef struct zfs_bootarea { + char zfs_current_bootpath[MAXNAMELEN]; + char zfs_current_rootpool[MAXNAMELEN]; + char zfs_current_bootfs[MAXNAMELEN]; + uint64_t zfs_current_bootfs_obj; + int zfs_open; + + /* cache for a file block of the currently zfs_open()-ed file */ + void *zfs_file_buf; + uint64_t zfs_file_start; + uint64_t zfs_file_end; + + /* cache for a dnode block */ + dnode_phys_t *zfs_dnode_buf; + dnode_phys_t *zfs_dnode_mdn; + uint64_t zfs_dnode_start; + uint64_t zfs_dnode_end; + + char *zfs_stackbase; + char zfs_data[0x400000]; +} zfs_bootarea_t; + +/* + * Verify dnode type. + * Can only be used in functions returning non-0 for failure. + */ +#define VERIFY_DN_TYPE(dnp, type) \ + if (type && (dnp)->dn_type != type) { \ + return (ERR_FSYS_CORRUPT); \ + } + +/* + * Verify object set type. + * Can only be used in functions returning 0 for failure. + */ +#define VERIFY_OS_TYPE(osp, type) \ + if (type && (osp)->os_type != type) { \ + errnum = ERR_FSYS_CORRUPT; \ + return (0); \ + } + +#define ZPOOL_PROP_BOOTFS "bootfs" + +/* General macros */ +#define BSWAP_8(x) ((x) & 0xff) +#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8)) +#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16)) +#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) + +/* + * XXX Match these macro up with real zfs once we have nvlist support so that we + * can support large sector disks. + */ +#define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) +#undef offsetof +#define offsetof(t, m) (size_t)(&(((t *)0)->m)) +#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT +#define VDEV_UBERBLOCK_OFFSET(n) \ +offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) + +typedef struct uberblock uberblock_t; + +/* XXX Uberblock_phys_t is no longer in the kernel zfs */ +typedef struct uberblock_phys { + uberblock_t ubp_uberblock; + char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - + sizeof (zio_block_tail_t)]; + zio_block_tail_t ubp_zbt; +} uberblock_phys_t; + +/* + * Macros to get fields in a bp or DVA. + */ +#define P2PHASE(x, align) ((x) & ((align) - 1)) +#define DVA_OFFSET_TO_PHYS_SECTOR(offset) \ + ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT) + +/* + * For nvlist manipulation. (from nvpair.h) + */ +#define NV_ENCODE_NATIVE 0 +#define NV_ENCODE_XDR 1 +#define HOST_ENDIAN 1 /* for x86 machine */ +#define DATA_TYPE_UINT64 8 +#define DATA_TYPE_STRING 9 +#define DATA_TYPE_NVLIST 19 +#define DATA_TYPE_NVLIST_ARRAY 20 + +/* + * Decompression Entry - lzjb + */ +#ifndef NBBY +#define NBBY 8 +#endif + +typedef int zfs_decomp_func_t(void *s_start, void *d_start, size_t s_len, + size_t d_len); +typedef struct decomp_entry { + char *name; + zfs_decomp_func_t *decomp_func; +} decomp_entry_t; + +/* + * FAT ZAP data structures + */ +#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ +#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n)))) +#define CHAIN_END 0xffff /* end of the chunk chain */ + +/* + * The amount of space within the chunk available for the array is: + * chunk size - space for type (1) - space for next pointer (2) + */ +#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) + +#define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5) +#define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs)) +#define LEAF_HASH(bs, h) \ + ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \ + ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len))) + +/* + * The amount of space available for chunks is: + * block size shift - hash entry size (2) * number of hash + * entries - header space (2*chunksize) + */ +#define ZAP_LEAF_NUMCHUNKS(bs) \ + (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \ + ZAP_LEAF_CHUNKSIZE - 2) + +/* + * The chunks start immediately after the hash table. The end of the + * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a + * chunk_t. + */ +#define ZAP_LEAF_CHUNK(l, bs, idx) \ + ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx] +#define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry) + +extern void fletcher_2_native(const void *, uint64_t, zio_cksum_t *); +extern void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *); +extern void fletcher_4_native(const void *, uint64_t, zio_cksum_t *); +extern void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); +extern void zio_checksum_SHA256(const void *, uint64_t, zio_cksum_t *); +extern int lzjb_decompress(void *, void *, size_t, size_t); + +#endif /* !_FSYS_ZFS_H */ diff --git a/tools/libfsimage/zfs/mb_info.h b/tools/libfsimage/zfs/mb_info.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/mb_info.h @@ -0,0 +1,217 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2000,2003 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * The structure type "mod_list" is used by the "multiboot_info" structure. + */ + +struct mod_list +{ + /* the memory used goes from bytes 'mod_start' to 'mod_end-1' inclusive */ + unsigned long mod_start; + unsigned long mod_end; + + /* Module command line */ + unsigned long cmdline; + + /* padding to take it to 16 bytes (must be zero) */ + unsigned long pad; +}; + + +/* + * INT-15, AX=E820 style "AddressRangeDescriptor" + * ...with a "size" parameter on the front which is the structure size - 4, + * pointing to the next one, up until the full buffer length of the memory + * map has been reached. + */ + +struct AddrRangeDesc +{ + unsigned long size; + unsigned long long BaseAddr; + unsigned long long Length; + unsigned long Type; + + /* unspecified optional padding... */ +} __attribute__ ((packed)); + +/* usable memory "Type", all others are reserved. */ +#define MB_ARD_MEMORY 1 + + +/* Drive Info structure. */ +struct drive_info +{ + /* The size of this structure. */ + unsigned long size; + + /* The BIOS drive number. */ + unsigned char drive_number; + + /* The access mode (see below). */ + unsigned char drive_mode; + + /* The BIOS geometry. */ + unsigned short drive_cylinders; + unsigned char drive_heads; + unsigned char drive_sectors; + + /* The array of I/O ports used for the drive. */ + unsigned short drive_ports[0]; +}; + +/* Drive Mode. */ +#define MB_DI_CHS_MODE 0 +#define MB_DI_LBA_MODE 1 + + +/* APM BIOS info. */ +struct apm_info +{ + unsigned short version; + unsigned short cseg; + unsigned long offset; + unsigned short cseg_16; + unsigned short dseg_16; + unsigned short cseg_len; + unsigned short cseg_16_len; + unsigned short dseg_16_len; +}; + + +/* + * MultiBoot Info description + * + * This is the struct passed to the boot image. This is done by placing + * its address in the EAX register. + */ + +struct multiboot_info +{ + /* MultiBoot info version number */ + unsigned long flags; + + /* Available memory from BIOS */ + unsigned long mem_lower; + unsigned long mem_upper; + + /* "root" partition */ + unsigned long boot_device; + + /* Kernel command line */ + unsigned long cmdline; + + /* Boot-Module list */ + unsigned long mods_count; + unsigned long mods_addr; + + union + { + struct + { + /* (a.out) Kernel symbol table info */ + unsigned long tabsize; + unsigned long strsize; + unsigned long addr; + unsigned long pad; + } + a; + + struct + { + /* (ELF) Kernel section header table */ + unsigned long num; + unsigned long size; + unsigned long addr; + unsigned long shndx; + } + e; + } + syms; + + /* Memory Mapping buffer */ + unsigned long mmap_length; + unsigned long mmap_addr; + + /* Drive Info buffer */ + unsigned long drives_length; + unsigned long drives_addr; + + /* ROM configuration table */ + unsigned long config_table; + + /* Boot Loader Name */ + unsigned long boot_loader_name; + + /* APM table */ + unsigned long apm_table; + + /* Video */ + unsigned long vbe_control_info; + unsigned long vbe_mode_info; + unsigned short vbe_mode; + unsigned short vbe_interface_seg; + unsigned short vbe_interface_off; + unsigned short vbe_interface_len; +}; + +/* + * Flags to be set in the 'flags' parameter above + */ + +/* is there basic lower/upper memory information? */ +#define MB_INFO_MEMORY 0x00000001 +/* is there a boot device set? */ +#define MB_INFO_BOOTDEV 0x00000002 +/* is the command-line defined? */ +#define MB_INFO_CMDLINE 0x00000004 +/* are there modules to do something with? */ +#define MB_INFO_MODS 0x00000008 + +/* These next two are mutually exclusive */ + +/* is there a symbol table loaded? */ +#define MB_INFO_AOUT_SYMS 0x00000010 +/* is there an ELF section header table? */ +#define MB_INFO_ELF_SHDR 0x00000020 + +/* is there a full memory map? */ +#define MB_INFO_MEM_MAP 0x00000040 + +/* Is there drive info? */ +#define MB_INFO_DRIVE_INFO 0x00000080 + +/* Is there a config table? */ +#define MB_INFO_CONFIG_TABLE 0x00000100 + +/* Is there a boot loader name? */ +#define MB_INFO_BOOT_LOADER_NAME 0x00000200 + +/* Is there a APM table? */ +#define MB_INFO_APM_TABLE 0x00000400 + +/* Is there video information? */ +#define MB_INFO_VIDEO_INFO 0x00000800 + +/* + * The following value must be present in the EAX register. + */ + +#define MULTIBOOT_VALID 0x2BADB002 diff --git a/tools/libfsimage/zfs/zfs-include/dmu.h b/tools/libfsimage/zfs/zfs-include/dmu.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/dmu.h @@ -0,0 +1,105 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_H +#define _SYS_DMU_H + +/* + * This file describes the interface that the DMU provides for its + * consumers. + * + * The DMU also interacts with the SPA. That interface is described in + * dmu_spa.h. + */ +typedef enum dmu_object_type { + DMU_OT_NONE, + /* general: */ + DMU_OT_OBJECT_DIRECTORY, /* ZAP */ + DMU_OT_OBJECT_ARRAY, /* UINT64 */ + DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ + DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ + DMU_OT_BPLIST, /* UINT64 */ + DMU_OT_BPLIST_HDR, /* UINT64 */ + /* spa: */ + DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ + DMU_OT_SPACE_MAP, /* UINT64 */ + /* zil: */ + DMU_OT_INTENT_LOG, /* UINT64 */ + /* dmu: */ + DMU_OT_DNODE, /* DNODE */ + DMU_OT_OBJSET, /* OBJSET */ + /* dsl: */ + DMU_OT_DSL_DIR, /* UINT64 */ + DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ + DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ + DMU_OT_DSL_PROPS, /* ZAP */ + DMU_OT_DSL_DATASET, /* UINT64 */ + /* zpl: */ + DMU_OT_ZNODE, /* ZNODE */ + DMU_OT_ACL, /* ACL */ + DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ + DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ + DMU_OT_MASTER_NODE, /* ZAP */ + DMU_OT_UNLINKED_SET, /* ZAP */ + /* zvol: */ + DMU_OT_ZVOL, /* UINT8 */ + DMU_OT_ZVOL_PROP, /* ZAP */ + /* other; for testing only! */ + DMU_OT_PLAIN_OTHER, /* UINT8 */ + DMU_OT_UINT64_OTHER, /* UINT64 */ + DMU_OT_ZAP_OTHER, /* ZAP */ + /* new object types: */ + DMU_OT_ERROR_LOG, /* ZAP */ + DMU_OT_SPA_HISTORY, /* UINT8 */ + DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ + DMU_OT_POOL_PROPS, /* ZAP */ + + DMU_OT_NUMTYPES +} dmu_object_type_t; + +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + +/* + * The names of zap entries in the DIRECTORY_OBJECT of the MOS. + */ +#define DMU_POOL_DIRECTORY_OBJECT 1 +#define DMU_POOL_CONFIG "config" +#define DMU_POOL_ROOT_DATASET "root_dataset" +#define DMU_POOL_SYNC_BPLIST "sync_bplist" +#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" +#define DMU_POOL_ERRLOG_LAST "errlog_last" +#define DMU_POOL_SPARES "spares" +#define DMU_POOL_DEFLATE "deflate" +#define DMU_POOL_HISTORY "history" +#define DMU_POOL_PROPS "pool_props" +#define DMU_POOL_L2CACHE "l2cache" + +#endif /* _SYS_DMU_H */ diff --git a/tools/libfsimage/zfs/zfs-include/dmu_objset.h b/tools/libfsimage/zfs/zfs-include/dmu_objset.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/dmu_objset.h @@ -0,0 +1,35 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_OBJSET_H +#define _SYS_DMU_OBJSET_H + +typedef struct objset_phys { + dnode_phys_t os_meta_dnode; + zil_header_t os_zil_header; + uint64_t os_type; + char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) - + sizeof (uint64_t)]; +} objset_phys_t; + +#endif /* _SYS_DMU_OBJSET_H */ diff --git a/tools/libfsimage/zfs/zfs-include/dnode.h b/tools/libfsimage/zfs/zfs-include/dnode.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/dnode.h @@ -0,0 +1,76 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DNODE_H +#define _SYS_DNODE_H + +/* + * Fixed constants. + */ +#define DNODE_SHIFT 9 /* 512 bytes */ +#define DN_MIN_INDBLKSHIFT 10 /* 1k */ +#define DN_MAX_INDBLKSHIFT 14 /* 16k */ +#define DNODE_BLOCK_SHIFT 14 /* 16k */ +#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ +#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ +#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */ + +/* + * Derived constants. + */ +#define DNODE_SIZE (1 << DNODE_SHIFT) +#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) +#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) +#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) + +#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) +#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) +#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) + +#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ + (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) + +typedef struct dnode_phys { + uint8_t dn_type; /* dmu_object_type_t */ + uint8_t dn_indblkshift; /* ln2(indirect block size) */ + uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */ + uint8_t dn_nblkptr; /* length of dn_blkptr */ + uint8_t dn_bonustype; /* type of data in bonus buffer */ + uint8_t dn_checksum; /* ZIO_CHECKSUM type */ + uint8_t dn_compress; /* ZIO_COMPRESS type */ + uint8_t dn_flags; /* DNODE_FLAG_* */ + uint16_t dn_datablkszsec; /* data block size in 512b sectors */ + uint16_t dn_bonuslen; /* length of dn_bonus */ + uint8_t dn_pad2[4]; + + /* accounting is protected by dn_dirty_mtx */ + uint64_t dn_maxblkid; /* largest allocated block ID */ + uint64_t dn_used; /* bytes (or sectors) of disk space */ + + uint64_t dn_pad3[4]; + + blkptr_t dn_blkptr[1]; + uint8_t dn_bonus[DN_MAX_BONUSLEN]; +} dnode_phys_t; + +#endif /* _SYS_DNODE_H */ diff --git a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h @@ -0,0 +1,53 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DSL_DATASET_H +#define _SYS_DSL_DATASET_H + +typedef struct dsl_dataset_phys { + uint64_t ds_dir_obj; + uint64_t ds_prev_snap_obj; + uint64_t ds_prev_snap_txg; + uint64_t ds_next_snap_obj; + uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */ + uint64_t ds_num_children; /* clone/snap children; ==0 for head */ + uint64_t ds_creation_time; /* seconds since 1970 */ + uint64_t ds_creation_txg; + uint64_t ds_deadlist_obj; + uint64_t ds_used_bytes; + uint64_t ds_compressed_bytes; + uint64_t ds_uncompressed_bytes; + uint64_t ds_unique_bytes; /* only relevant to snapshots */ + /* + * The ds_fsid_guid is a 56-bit ID that can change to avoid + * collisions. The ds_guid is a 64-bit ID that will never + * change, so there is a small probability that it will collide. + */ + uint64_t ds_fsid_guid; + uint64_t ds_guid; + uint64_t ds_flags; + blkptr_t ds_bp; + uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */ +} dsl_dataset_phys_t; + +#endif /* _SYS_DSL_DATASET_H */ diff --git a/tools/libfsimage/zfs/zfs-include/dsl_dir.h b/tools/libfsimage/zfs/zfs-include/dsl_dir.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/dsl_dir.h @@ -0,0 +1,49 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DSL_DIR_H +#define _SYS_DSL_DIR_H + +typedef struct dsl_dir_phys { + uint64_t dd_creation_time; /* not actually used */ + uint64_t dd_head_dataset_obj; + uint64_t dd_parent_obj; + uint64_t dd_clone_parent_obj; + uint64_t dd_child_dir_zapobj; + /* + * how much space our children are accounting for; for leaf + * datasets, == physical space used by fs + snaps + */ + uint64_t dd_used_bytes; + uint64_t dd_compressed_bytes; + uint64_t dd_uncompressed_bytes; + /* Administrative quota setting */ + uint64_t dd_quota; + /* Administrative reservation setting */ + uint64_t dd_reserved; + uint64_t dd_props_zapobj; + uint64_t dd_deleg_zapobj; /* dataset permissions */ + uint64_t dd_pad[20]; /* pad out to 256 bytes for good measure */ +} dsl_dir_phys_t; + +#endif /* _SYS_DSL_DIR_H */ diff --git a/tools/libfsimage/zfs/zfs-include/spa.h b/tools/libfsimage/zfs/zfs-include/spa.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/spa.h @@ -0,0 +1,283 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SPA_H +#define _SYS_SPA_H + +/* + * General-purpose 32-bit and 64-bit bitfield encodings. + */ +#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) +#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) +#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) +#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) + +#define BF32_GET(x, low, len) BF32_DECODE(x, low, len) +#define BF64_GET(x, low, len) BF64_DECODE(x, low, len) + +#define BF32_SET(x, low, len, val) \ + ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) +#define BF64_SET(x, low, len, val) \ + ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) + +#define BF32_GET_SB(x, low, len, shift, bias) \ + ((BF32_GET(x, low, len) + (bias)) << (shift)) +#define BF64_GET_SB(x, low, len, shift, bias) \ + ((BF64_GET(x, low, len) + (bias)) << (shift)) + +#define BF32_SET_SB(x, low, len, shift, bias, val) \ + BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) +#define BF64_SET_SB(x, low, len, shift, bias, val) \ + BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) + +/* + * We currently support nine block sizes, from 512 bytes to 128K. + * We could go higher, but the benefits are near-zero and the cost + * of COWing a giant block to modify one byte would become excessive. + */ +#define SPA_MINBLOCKSHIFT 9 +#define SPA_MAXBLOCKSHIFT 17 +#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) +#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) + +#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) + +/* + * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. + * The ASIZE encoding should be at least 64 times larger (6 more bits) + * to support up to 4-way RAID-Z mirror mode with worst-case gang block + * overhead, three DVAs per bp, plus one more bit in case we do anything + * else that expands the ASIZE. + */ +#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ +#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ +#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ + +/* + * All SPA data is represented by 128-bit data virtual addresses (DVAs). + * The members of the dva_t should be considered opaque outside the SPA. + */ +typedef struct dva { + uint64_t dva_word[2]; +} dva_t; + +/* + * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. + */ +typedef struct zio_cksum { + uint64_t zc_word[4]; +} zio_cksum_t; + +/* + * Each block is described by its DVAs, time of birth, checksum, etc. + * The word-by-word, bit-by-bit layout of the blkptr is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | vdev3 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 |G| offset3 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | checksum[2] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | checksum[3] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * vdev virtual device ID + * offset offset into virtual device + * LSIZE logical size + * PSIZE physical size (after compression) + * ASIZE allocated size (including RAID-Z parity and gang block headers) + * GRID RAID-Z layout information (reserved for future use) + * cksum checksum function + * comp compression function + * G gang block indicator + * E endianness + * type DMU object type + * lvl level of indirection + * birth txg transaction group in which the block was born + * fill count number of non-zero blocks under this bp + * checksum[4] 256-bit checksum of the data this bp describes + */ +typedef struct blkptr { + dva_t blk_dva[3]; /* 128-bit Data Virtual Address */ + uint64_t blk_prop; /* size, compression, type, etc */ + uint64_t blk_pad[3]; /* Extra space for the future */ + uint64_t blk_birth; /* transaction group at birth */ + uint64_t blk_fill; /* fill count */ + zio_cksum_t blk_cksum; /* 256-bit checksum */ +} blkptr_t; + +#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ +#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ + +/* + * Macros to get and set fields in a bp or DVA. + */ +#define DVA_GET_ASIZE(dva) \ + BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_ASIZE(dva, x) \ + BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) +#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) + +#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) +#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) + +#define DVA_GET_OFFSET(dva) \ + BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_OFFSET(dva, x) \ + BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) +#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) + +#define BP_GET_LSIZE(bp) \ + (BP_IS_HOLE(bp) ? 0 : \ + BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)) +#define BP_SET_LSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_PSIZE(bp) \ + BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) +#define BP_SET_PSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) + +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) + +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) + +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) + +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) + +#define BP_GET_ASIZE(bp) \ + (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_GET_UCSIZE(bp) \ + ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ + BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); + +#define BP_GET_NDVAS(bp) \ + (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_COUNT_GANG(bp) \ + (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ + DVA_GET_GANG(&(bp)->blk_dva[1]) + \ + DVA_GET_GANG(&(bp)->blk_dva[2])) + +#define DVA_EQUAL(dva1, dva2) \ + ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ + (dva1)->dva_word[0] == (dva2)->dva_word[0]) + +#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ + ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ + ((zc1).zc_word[3] - (zc2).zc_word[3]))) + + +#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) + +#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ +{ \ + (zcp)->zc_word[0] = w0; \ + (zcp)->zc_word[1] = w1; \ + (zcp)->zc_word[2] = w2; \ + (zcp)->zc_word[3] = w3; \ +} + +#define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) +#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) +#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) +#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) + +#define BP_ZERO(bp) \ +{ \ + (bp)->blk_dva[0].dva_word[0] = 0; \ + (bp)->blk_dva[0].dva_word[1] = 0; \ + (bp)->blk_dva[1].dva_word[0] = 0; \ + (bp)->blk_dva[1].dva_word[1] = 0; \ + (bp)->blk_dva[2].dva_word[0] = 0; \ + (bp)->blk_dva[2].dva_word[1] = 0; \ + (bp)->blk_prop = 0; \ + (bp)->blk_pad[0] = 0; \ + (bp)->blk_pad[1] = 0; \ + (bp)->blk_pad[2] = 0; \ + (bp)->blk_birth = 0; \ + (bp)->blk_fill = 0; \ + ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ +} + +/* + * Note: the byteorder is either 0 or -1, both of which are palindromes. + * This simplifies the endianness handling a bit. + */ +#ifdef _BIG_ENDIAN +#define ZFS_HOST_BYTEORDER (0ULL) +#else +#define ZFS_HOST_BYTEORDER (-1ULL) +#endif + +#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) + +#define BP_SPRINTF_LEN 320 + +#endif /* _SYS_SPA_H */ diff --git a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h @@ -0,0 +1,49 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_UBERBLOCK_IMPL_H +#define _SYS_UBERBLOCK_IMPL_H + +/* + * The uberblock version is incremented whenever an incompatible on-disk + * format change is made to the SPA, DMU, or ZAP. + * + * Note: the first two fields should never be moved. When a storage pool + * is opened, the uberblock must be read off the disk before the version + * can be checked. If the ub_version field is moved, we may not detect + * version mismatch. If the ub_magic field is moved, applications that + * expect the magic number in the first word won't work. + */ +#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ +#define UBERBLOCK_SHIFT 10 /* up to 1K */ + +struct uberblock { + uint64_t ub_magic; /* UBERBLOCK_MAGIC */ + uint64_t ub_version; /* ZFS_VERSION */ + uint64_t ub_txg; /* txg of last sync */ + uint64_t ub_guid_sum; /* sum of all vdev guids */ + uint64_t ub_timestamp; /* UTC time of last sync */ + blkptr_t ub_rootbp; /* MOS objset_phys_t */ +}; + +#endif /* _SYS_UBERBLOCK_IMPL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/vdev_impl.h b/tools/libfsimage/zfs/zfs-include/vdev_impl.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/vdev_impl.h @@ -0,0 +1,70 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_VDEV_IMPL_H +#define _SYS_VDEV_IMPL_H + +#define VDEV_SKIP_SIZE (8 << 10) +#define VDEV_BOOT_HEADER_SIZE (8 << 10) +#define VDEV_PHYS_SIZE (112 << 10) +#define VDEV_UBERBLOCK_RING (128 << 10) + +/* ZFS boot block */ +#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL +#define VDEV_BOOT_VERSION 1 /* version number */ + +typedef struct vdev_boot_header { + uint64_t vb_magic; /* VDEV_BOOT_MAGIC */ + uint64_t vb_version; /* VDEV_BOOT_VERSION */ + uint64_t vb_offset; /* start offset (bytes) */ + uint64_t vb_size; /* size (bytes) */ + char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)]; +} vdev_boot_header_t; + +typedef struct vdev_phys { + char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)]; + zio_block_tail_t vp_zbt; +} vdev_phys_t; + +typedef struct vdev_label { + char vl_pad[VDEV_SKIP_SIZE]; /* 8K */ + vdev_boot_header_t vl_boot_header; /* 8K */ + vdev_phys_t vl_vdev_phys; /* 112K */ + char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ +} vdev_label_t; /* 256K total */ + +/* + * Size and offset of embedded boot loader region on each label. + * The total size of the first two labels plus the boot area is 4MB. + */ +#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t)) +#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */ + +/* + * Size of label regions at the start and end of each leaf device. + */ +#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) +#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) +#define VDEV_LABELS 4 + +#endif /* _SYS_VDEV_IMPL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zap_impl.h b/tools/libfsimage/zfs/zfs-include/zap_impl.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zap_impl.h @@ -0,0 +1,110 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZAP_IMPL_H +#define _SYS_ZAP_IMPL_H + +#define ZAP_MAGIC 0x2F52AB2ABULL + +#define ZAP_HASHBITS 28 +#define MZAP_ENT_LEN 64 +#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) +#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT +#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) + +typedef struct mzap_ent_phys { + uint64_t mze_value; + uint32_t mze_cd; + uint16_t mze_pad; /* in case we want to chain them someday */ + char mze_name[MZAP_NAME_LEN]; +} mzap_ent_phys_t; + +typedef struct mzap_phys { + uint64_t mz_block_type; /* ZBT_MICRO */ + uint64_t mz_salt; + uint64_t mz_pad[6]; + mzap_ent_phys_t mz_chunk[1]; + /* actually variable size depending on block size */ +} mzap_phys_t; + +/* + * The (fat) zap is stored in one object. It is an array of + * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of: + * + * ptrtbl fits in first block: + * [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ... + * + * ptrtbl too big for first block: + * [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ... + * + */ + +#define ZBT_LEAF ((1ULL << 63) + 0) +#define ZBT_HEADER ((1ULL << 63) + 1) +#define ZBT_MICRO ((1ULL << 63) + 3) +/* any other values are ptrtbl blocks */ + +/* + * the embedded pointer table takes up half a block: + * block size / entry size (2^3) / 2 + */ +#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1) + +/* + * The embedded pointer table starts half-way through the block. Since + * the pointer table itself is half the block, it starts at (64-bit) + * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)). + */ +#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \ + ((uint64_t *)(zap)->zap_f.zap_phys) \ + [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))] + +/* + * TAKE NOTE: + * If zap_phys_t is modified, zap_byteswap() must be modified. + */ +typedef struct zap_phys { + uint64_t zap_block_type; /* ZBT_HEADER */ + uint64_t zap_magic; /* ZAP_MAGIC */ + + struct zap_table_phys { + uint64_t zt_blk; /* starting block number */ + uint64_t zt_numblks; /* number of blocks */ + uint64_t zt_shift; /* bits to index it */ + uint64_t zt_nextblk; /* next (larger) copy start block */ + uint64_t zt_blks_copied; /* number source blocks copied */ + } zap_ptrtbl; + + uint64_t zap_freeblk; /* the next free block */ + uint64_t zap_num_leafs; /* number of leafs */ + uint64_t zap_num_entries; /* number of entries */ + uint64_t zap_salt; /* salt to stir into hash function */ + /* + * This structure is followed by padding, and then the embedded + * pointer table. The embedded pointer table takes up second + * half of the block. It is accessed using the + * ZAP_EMBEDDED_PTRTBL_ENT() macro. + */ +} zap_phys_t; + +#endif /* _SYS_ZAP_IMPL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zap_leaf.h b/tools/libfsimage/zfs/zfs-include/zap_leaf.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zap_leaf.h @@ -0,0 +1,100 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZAP_LEAF_H +#define _SYS_ZAP_LEAF_H + +#define ZAP_LEAF_MAGIC 0x2AB1EAF + +/* chunk size = 24 bytes */ +#define ZAP_LEAF_CHUNKSIZE 24 + +/* + * The amount of space within the chunk available for the array is: + * chunk size - space for type (1) - space for next pointer (2) + */ +#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) + +typedef enum zap_chunk_type { + ZAP_CHUNK_FREE = 253, + ZAP_CHUNK_ENTRY = 252, + ZAP_CHUNK_ARRAY = 251, + ZAP_CHUNK_TYPE_MAX = 250 +} zap_chunk_type_t; + +/* + * TAKE NOTE: + * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified. + */ +typedef struct zap_leaf_phys { + struct zap_leaf_header { + uint64_t lh_block_type; /* ZBT_LEAF */ + uint64_t lh_pad1; + uint64_t lh_prefix; /* hash prefix of this leaf */ + uint32_t lh_magic; /* ZAP_LEAF_MAGIC */ + uint16_t lh_nfree; /* number free chunks */ + uint16_t lh_nentries; /* number of entries */ + uint16_t lh_prefix_len; /* num bits used to id this */ + +/* above is accessable to zap, below is zap_leaf private */ + + uint16_t lh_freelist; /* chunk head of free list */ + uint8_t lh_pad2[12]; + } l_hdr; /* 2 24-byte chunks */ + + /* + * The header is followed by a hash table with + * ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is + * followed by an array of ZAP_LEAF_NUMCHUNKS(zap) + * zap_leaf_chunk structures. These structures are accessed + * with the ZAP_LEAF_CHUNK() macro. + */ + + uint16_t l_hash[1]; +} zap_leaf_phys_t; + +typedef union zap_leaf_chunk { + struct zap_leaf_entry { + uint8_t le_type; /* always ZAP_CHUNK_ENTRY */ + uint8_t le_int_size; /* size of ints */ + uint16_t le_next; /* next entry in hash chain */ + uint16_t le_name_chunk; /* first chunk of the name */ + uint16_t le_name_length; /* bytes in name, incl null */ + uint16_t le_value_chunk; /* first chunk of the value */ + uint16_t le_value_length; /* value length in ints */ + uint32_t le_cd; /* collision differentiator */ + uint64_t le_hash; /* hash value of the name */ + } l_entry; + struct zap_leaf_array { + uint8_t la_type; /* always ZAP_CHUNK_ARRAY */ + uint8_t la_array[ZAP_LEAF_ARRAY_BYTES]; + uint16_t la_next; /* next blk or CHAIN_END */ + } l_array; + struct zap_leaf_free { + uint8_t lf_type; /* always ZAP_CHUNK_FREE */ + uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES]; + uint16_t lf_next; /* next in free list, or CHAIN_END */ + } l_free; +} zap_leaf_chunk_t; + +#endif /* _SYS_ZAP_LEAF_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zfs.h b/tools/libfsimage/zfs/zfs-include/zfs.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zfs.h @@ -0,0 +1,112 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_H +#define _SYS_FS_ZFS_H + + +/* + * On-disk version number. + */ +#define SPA_VERSION_1 1ULL +#define SPA_VERSION_2 2ULL +#define SPA_VERSION_3 3ULL +#define SPA_VERSION_4 4ULL +#define SPA_VERSION_5 5ULL +#define SPA_VERSION_6 6ULL +#define SPA_VERSION_7 7ULL +#define SPA_VERSION_8 8ULL +#define SPA_VERSION_9 9ULL +#define SPA_VERSION_10 10ULL +#define SPA_VERSION SPA_VERSION_10 + +/* + * The following are configuration names used in the nvlist describing a pool's + * configuration. + */ +#define ZPOOL_CONFIG_VERSION "version" +#define ZPOOL_CONFIG_POOL_NAME "name" +#define ZPOOL_CONFIG_POOL_STATE "state" +#define ZPOOL_CONFIG_POOL_TXG "txg" +#define ZPOOL_CONFIG_POOL_GUID "pool_guid" +#define ZPOOL_CONFIG_CREATE_TXG "create_txg" +#define ZPOOL_CONFIG_TOP_GUID "top_guid" +#define ZPOOL_CONFIG_VDEV_TREE "vdev_tree" +#define ZPOOL_CONFIG_TYPE "type" +#define ZPOOL_CONFIG_CHILDREN "children" +#define ZPOOL_CONFIG_ID "id" +#define ZPOOL_CONFIG_GUID "guid" +#define ZPOOL_CONFIG_PATH "path" +#define ZPOOL_CONFIG_DEVID "devid" +#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" +#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" +#define ZPOOL_CONFIG_ASHIFT "ashift" +#define ZPOOL_CONFIG_ASIZE "asize" +#define ZPOOL_CONFIG_DTL "DTL" +#define ZPOOL_CONFIG_STATS "stats" +#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" +#define ZPOOL_CONFIG_ERRCOUNT "error_count" +#define ZPOOL_CONFIG_NOT_PRESENT "not_present" +#define ZPOOL_CONFIG_SPARES "spares" +#define ZPOOL_CONFIG_IS_SPARE "is_spare" +#define ZPOOL_CONFIG_NPARITY "nparity" +#define ZPOOL_CONFIG_PHYS_PATH "phys_path" +#define ZPOOL_CONFIG_L2CACHE "l2cache" +/* + * The persistent vdev state is stored as separate values rather than a single + * 'vdev_state' entry. This is because a device can be in multiple states, such + * as offline and degraded. + */ +#define ZPOOL_CONFIG_OFFLINE "offline" +#define ZPOOL_CONFIG_FAULTED "faulted" +#define ZPOOL_CONFIG_DEGRADED "degraded" +#define ZPOOL_CONFIG_REMOVED "removed" + +#define VDEV_TYPE_ROOT "root" +#define VDEV_TYPE_MIRROR "mirror" +#define VDEV_TYPE_REPLACING "replacing" +#define VDEV_TYPE_RAIDZ "raidz" +#define VDEV_TYPE_DISK "disk" +#define VDEV_TYPE_FILE "file" +#define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_SPARE "spare" +#define VDEV_TYPE_L2CACHE "l2cache" + +/* + * pool state. The following states are written to disk as part of the normal + * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE. The remaining + * states are software abstractions used at various levels to communicate pool + * state. + */ +typedef enum pool_state { + POOL_STATE_ACTIVE = 0, /* In active use */ + POOL_STATE_EXPORTED, /* Explicitly exported */ + POOL_STATE_DESTROYED, /* Explicitly destroyed */ + POOL_STATE_SPARE, /* Reserved for hot spare use */ + POOL_STATE_L2CACHE, /* Level 2 ARC device */ + POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ + POOL_STATE_UNAVAIL, /* Internal libzfs state */ + POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ +} pool_state_t; + +#endif /* _SYS_FS_ZFS_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zfs_acl.h b/tools/libfsimage/zfs/zfs-include/zfs_acl.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zfs_acl.h @@ -0,0 +1,60 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_ACL_H +#define _SYS_FS_ZFS_ACL_H + +#ifndef _UID_T +#define _UID_T +typedef unsigned int uid_t; /* UID type */ +#endif /* _UID_T */ + +typedef struct zfs_oldace { + uint32_t z_fuid; /* "who" */ + uint32_t z_access_mask; /* access mask */ + uint16_t z_flags; /* flags, i.e inheritance */ + uint16_t z_type; /* type of entry allow/deny */ +} zfs_oldace_t; + +#define ACE_SLOT_CNT 6 + +typedef struct zfs_znode_acl_v0 { + uint64_t z_acl_extern_obj; /* ext acl pieces */ + uint32_t z_acl_count; /* Number of ACEs */ + uint16_t z_acl_version; /* acl version */ + uint16_t z_acl_pad; /* pad */ + zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */ +} zfs_znode_acl_v0_t; + +#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT) + +typedef struct zfs_znode_acl { + uint64_t z_acl_extern_obj; /* ext acl pieces */ + uint32_t z_acl_size; /* Number of bytes in ACL */ + uint16_t z_acl_version; /* acl version */ + uint16_t z_acl_count; /* ace count */ + uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */ +} zfs_znode_acl_t; + + +#endif /* _SYS_FS_ZFS_ACL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zfs_znode.h b/tools/libfsimage/zfs/zfs-include/zfs_znode.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zfs_znode.h @@ -0,0 +1,68 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_ZNODE_H +#define _SYS_FS_ZFS_ZNODE_H + +#define MASTER_NODE_OBJ 1 +#define ZFS_ROOT_OBJ "ROOT" +#define ZPL_VERSION_STR "VERSION" + +#define ZPL_VERSION 3ULL + +#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) + +/* + * This is the persistent portion of the znode. It is stored + * in the "bonus buffer" of the file. Short symbolic links + * are also stored in the bonus buffer. + */ +typedef struct znode_phys { + uint64_t zp_atime[2]; /* 0 - last file access time */ + uint64_t zp_mtime[2]; /* 16 - last file modification time */ + uint64_t zp_ctime[2]; /* 32 - last file change time */ + uint64_t zp_crtime[2]; /* 48 - creation time */ + uint64_t zp_gen; /* 64 - generation (txg of creation) */ + uint64_t zp_mode; /* 72 - file mode bits */ + uint64_t zp_size; /* 80 - size of file */ + uint64_t zp_parent; /* 88 - directory parent (`..') */ + uint64_t zp_links; /* 96 - number of links to file */ + uint64_t zp_xattr; /* 104 - DMU object for xattrs */ + uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ + uint64_t zp_flags; /* 120 - persistent flags */ + uint64_t zp_uid; /* 128 - file owner */ + uint64_t zp_gid; /* 136 - owning group */ + uint64_t zp_pad[4]; /* 144 - future */ + zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */ + /* + * Data may pad out any remaining bytes in the znode buffer, eg: + * + * |<---------------------- dnode_phys (512) ------------------------>| + * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| + * |<---- znode (264) ---->|<---- data (56) ---->| + * + * At present, we only use this space to store symbolic links. + */ +} znode_phys_t; + +#endif /* _SYS_FS_ZFS_ZNODE_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zil.h b/tools/libfsimage/zfs/zfs-include/zil.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zil.h @@ -0,0 +1,51 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIL_H +#define _SYS_ZIL_H + +/* + * Intent log format: + * + * Each objset has its own intent log. The log header (zil_header_t) + * for objset N's intent log is kept in the Nth object of the SPA's + * intent_log objset. The log header points to a chain of log blocks, + * each of which contains log records (i.e., transactions) followed by + * a log block trailer (zil_trailer_t). The format of a log record + * depends on the record (or transaction) type, but all records begin + * with a common structure that defines the type, length, and txg. + */ + +/* + * Intent log header - this on disk structure holds fields to manage + * the log. All fields are 64 bit to easily handle cross architectures. + */ +typedef struct zil_header { + uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ + uint64_t zh_replay_seq; /* highest replayed sequence number */ + blkptr_t zh_log; /* log chain */ + uint64_t zh_claim_seq; /* highest claimed sequence number */ + uint64_t zh_pad[5]; +} zil_header_t; + +#endif /* _SYS_ZIL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zio.h b/tools/libfsimage/zfs/zfs-include/zio.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zio.h @@ -0,0 +1,81 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZIO_H +#define _ZIO_H + +#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ + +typedef struct zio_block_tail { + uint64_t zbt_magic; /* for validation, endianness */ + zio_cksum_t zbt_cksum; /* 256-bit checksum */ +} zio_block_tail_t; + +/* + * Gang block headers are self-checksumming and contain an array + * of block pointers. + */ +#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE +#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_block_tail_t)) / sizeof (blkptr_t)) +#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_block_tail_t) - \ + (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ + sizeof (uint64_t)) + +#define ZIO_GET_IOSIZE(zio) \ + (BP_IS_GANG((zio)->io_bp) ? \ + SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp)) + +typedef struct zio_gbh { + blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; + uint64_t zg_filler[SPA_GBH_FILLER]; + zio_block_tail_t zg_tail; +} zio_gbh_phys_t; + +enum zio_checksum { + ZIO_CHECKSUM_INHERIT = 0, + ZIO_CHECKSUM_ON, + ZIO_CHECKSUM_OFF, + ZIO_CHECKSUM_LABEL, + ZIO_CHECKSUM_GANG_HEADER, + ZIO_CHECKSUM_ZILOG, + ZIO_CHECKSUM_FLETCHER_2, + ZIO_CHECKSUM_FLETCHER_4, + ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_FUNCTIONS +}; + +#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2 +#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON + +enum zio_compress { + ZIO_COMPRESS_INHERIT = 0, + ZIO_COMPRESS_ON, + ZIO_COMPRESS_OFF, + ZIO_COMPRESS_LZJB, + ZIO_COMPRESS_EMPTY, + ZIO_COMPRESS_FUNCTIONS +}; + +#endif /* _ZIO_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zio_checksum.h b/tools/libfsimage/zfs/zfs-include/zio_checksum.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/zio_checksum.h @@ -0,0 +1,42 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIO_CHECKSUM_H +#define _SYS_ZIO_CHECKSUM_H + +/* + * Signature for checksum functions. + */ +typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp); + +/* + * Information about each checksum function. + */ +typedef struct zio_checksum_info { + zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */ + int ci_correctable; /* number of correctable bits */ + int ci_zbt; /* uses zio block tail? */ + char *ci_name; /* descriptive name */ +} zio_checksum_info_t; + +#endif /* _SYS_ZIO_CHECKSUM_H */ diff --git a/tools/libfsimage/zfs/zfs_fletcher.c b/tools/libfsimage/zfs/zfs_fletcher.c new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs_fletcher.c @@ -0,0 +1,93 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fsys_zfs.h" + + +void +fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += ip[0]; + a1 += ip[1]; + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += BSWAP_64(ip[0]); + a1 += BSWAP_64(ip[1]); + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} diff --git a/tools/libfsimage/zfs/zfs_lzjb.c b/tools/libfsimage/zfs/zfs_lzjb.c new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs_lzjb.c @@ -0,0 +1,60 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fsys_zfs.h" + +#define MATCH_BITS 6 +#define MATCH_MIN 3 +#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) + + +/*ARGSUSED*/ +int +lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) +{ + unsigned char *src = s_start; + unsigned char *dst = d_start; + unsigned char *d_end = (unsigned char *)d_start + d_len; + unsigned char *cpy; + unsigned char copymap = '\0'; + int copymask = 1 << (NBBY - 1); + + while (dst < d_end) { + if ((copymask <<= 1) == (1 << NBBY)) { + copymask = 1; + copymap = *src++; + } + if (copymap & (unsigned char)copymask) { + int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; + int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; + src += 2; + if ((cpy = dst - offset) < (unsigned char *)d_start) + return (-1); + while (--mlen >= 0 && dst < d_end) + *dst++ = *cpy++; + } else { + *dst++ = *src++; + } + } + return (0); +} diff --git a/tools/libfsimage/zfs/zfs_sha256.c b/tools/libfsimage/zfs/zfs_sha256.c new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs_sha256.c @@ -0,0 +1,124 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fsys_zfs.h" + +/* + * SHA-256 checksum, as specified in FIPS 180-2, available at: + * http://csrc.nist.gov/cryptval + * + * This is a very compact implementation of SHA-256. + * It is designed to be simple and portable, not to be fast. + */ + +/* + * The literal definitions according to FIPS180-2 would be: + * + * Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) + * Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) + * + * We use logical equivalents which require one less op. + */ +#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y)))) +#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s))) +#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22)) +#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25)) +#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3)) +#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10)) + +static const uint32_t SHA256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +static void +SHA256Transform(uint32_t *H, const uint8_t *cp) +{ + uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64]; + + for (t = 0; t < 16; t++, cp += 4) + W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3]; + + for (t = 16; t < 64; t++) + W[t] = sigma1(W[t - 2]) + W[t - 7] + + sigma0(W[t - 15]) + W[t - 16]; + + a = H[0]; b = H[1]; c = H[2]; d = H[3]; + e = H[4]; f = H[5]; g = H[6]; h = H[7]; + + for (t = 0; t < 64; t++) { + T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t]; + T2 = SIGMA0(a) + Maj(a, b, c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + H[0] += a; H[1] += b; H[2] += c; H[3] += d; + H[4] += e; H[5] += f; H[6] += g; H[7] += h; +} + +void +zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; + uint8_t pad[128]; + int padsize = size & 63; + int i; + + for (i = 0; i < size - padsize; i += 64) + SHA256Transform(H, (uint8_t *)buf + i); + + for (i = 0; i < padsize; i++) + pad[i] = ((uint8_t *)buf)[i]; + + for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++) + pad[padsize] = 0; + + for (i = 0; i < 8; i++) + pad[padsize++] = (size << 3) >> (56 - 8 * i); + + for (i = 0; i < padsize; i += 64) + SHA256Transform(H, pad + i); + + ZIO_SET_CHECKSUM(zcp, + (uint64_t)H[0] << 32 | H[1], + (uint64_t)H[2] << 32 | H[3], + (uint64_t)H[4] << 32 | H[5], + (uint64_t)H[6] << 32 | H[7]); +} diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c --- a/tools/pygrub/src/fsimage/fsimage.c +++ b/tools/pygrub/src/fsimage/fsimage.c @@ -17,7 +17,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -281,6 +281,22 @@ fsimage_open(PyObject *o, PyObject *args return (PyObject *)fs; } +static PyObject * +fsimage_getbootstring(PyObject *o, PyObject *args) +{ + PyObject *fs; + char *bootstring; + fsi_t *fsi; + + if (!PyArg_ParseTuple(args, "O", &fs)) + return (NULL); + + fsi = ((fsimage_fs_t *)fs)->fs; + bootstring = fsi_fs_bootstring(fsi); + + return Py_BuildValue("s", bootstring); +} + PyDoc_STRVAR(fsimage_open__doc__, "open(name, [offset=off]) - Open the given file as a filesystem image.\n" "\n" @@ -288,9 +304,15 @@ PyDoc_STRVAR(fsimage_open__doc__, "offset - offset of file system within file image.\n" "options - mount options string.\n"); +PyDoc_STRVAR(fsimage_getbootstring__doc__, + "getbootstring(fs) - Return the boot string needed for this file system " + "or NULL if none is needed.\n"); + static struct PyMethodDef fsimage_module_methods[] = { { "open", (PyCFunction)fsimage_open, METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ }, + { "getbootstring", (PyCFunction)fsimage_getbootstring, + METH_VARARGS, fsimage_getbootstring__doc__ }, { NULL, NULL, 0, NULL } }; diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub +++ b/tools/pygrub/src/pygrub @@ -646,7 +646,13 @@ if __name__ == "__main__": print " args: %s" % chosencfg["args"] sys.exit(0) - fs = fsimage.open(file, get_fs_offset(file)) + # if boot filesystem is set then pass to fsimage.open + bootfsargs = '"%s"' % incfg["args"] + bootfsgroup = re.findall('zfs-bootfs=(.*?)[\s\,\"]', bootfsargs) + if bootfsgroup: + fs = fsimage.open(file, get_fs_offset(file), bootfsgroup[0]) + else: + fs = fsimage.open(file, get_fs_offset(file)) chosencfg = sniff_solaris(fs, incfg) @@ -672,7 +678,15 @@ if __name__ == "__main__": if bootcfg["ramdisk"]: sxp += "(ramdisk %s)" % bootcfg["ramdisk"] if chosencfg["args"]: - sxp += "(args \"%s\")" % chosencfg["args"] + zfsinfo = fsimage.getbootstring(fs) + if zfsinfo is None: + sxp += "(args \"%s\")" % chosencfg["args"] + else: + e = re.compile("zfs-bootfs=[\w\-\.\:@/]+" ) + (chosencfg["args"],count) = e.subn(zfsinfo, chosencfg["args"]) + if count == 0: + chosencfg["args"] += " -B %s" % zfsinfo + sxp += "(args \"%s\")" % (chosencfg["args"]) sys.stdout.flush() os.write(fd, sxp) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |