[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/2] libfsimage: Add support for btrfs
Add support for booting from btrfs filesystems. All the code except fsys_btrfs.c is imported from btrfs-progs 4.5.1. btrfs-progs is licensed as GPLv2. The files have been imported with minimal modification. Further work could split and prune the support code to only what is required for reading. The driver supports following subvolumes and symlinks, and reading files with compression (zlib and lzo). This driver is useful for booting SLES 12 in its default configuration, since it uses btrfs. Signed-off-by: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx> --- tools/libfsimage/Makefile | 2 +- tools/libfsimage/btrfs/Makefile | 36 + tools/libfsimage/btrfs/bitops.h | 224 ++ tools/libfsimage/btrfs/btrfs-list.h | 178 ++ tools/libfsimage/btrfs/commands.h | 134 + tools/libfsimage/btrfs/crc32c.c | 222 ++ tools/libfsimage/btrfs/crc32c.h | 33 + tools/libfsimage/btrfs/ctree.c | 2941 ++++++++++++++++++++ tools/libfsimage/btrfs/ctree.h | 2570 ++++++++++++++++++ tools/libfsimage/btrfs/dir-item.c | 351 +++ tools/libfsimage/btrfs/disk-io.c | 1779 +++++++++++++ tools/libfsimage/btrfs/disk-io.h | 168 ++ tools/libfsimage/btrfs/extent-cache.c | 341 +++ tools/libfsimage/btrfs/extent-cache.h | 121 + tools/libfsimage/btrfs/extent-tree.c | 4122 +++++++++++++++++++++++++++++ tools/libfsimage/btrfs/extent_io.c | 895 +++++++ tools/libfsimage/btrfs/extent_io.h | 159 ++ tools/libfsimage/btrfs/file-item.c | 487 ++++ tools/libfsimage/btrfs/free-space-cache.c | 879 ++++++ tools/libfsimage/btrfs/free-space-cache.h | 62 + tools/libfsimage/btrfs/fsys_btrfs.c | 862 ++++++ tools/libfsimage/btrfs/hash.h | 28 + tools/libfsimage/btrfs/inode-item.c | 460 ++++ tools/libfsimage/btrfs/inode.c | 536 ++++ tools/libfsimage/btrfs/internal.h | 42 + tools/libfsimage/btrfs/ioctl.h | 729 +++++ tools/libfsimage/btrfs/kerncompat.h | 354 +++ tools/libfsimage/btrfs/list.h | 486 ++++ tools/libfsimage/btrfs/print-tree.h | 27 + tools/libfsimage/btrfs/radix-tree.h | 97 + tools/libfsimage/btrfs/raid6.c | 101 + tools/libfsimage/btrfs/rbtree-utils.c | 82 + tools/libfsimage/btrfs/rbtree-utils.h | 53 + tools/libfsimage/btrfs/rbtree.c | 548 ++++ tools/libfsimage/btrfs/rbtree.h | 118 + tools/libfsimage/btrfs/rbtree_augmented.h | 249 ++ tools/libfsimage/btrfs/repair.c | 50 + tools/libfsimage/btrfs/repair.h | 34 + tools/libfsimage/btrfs/root-tree.c | 192 ++ tools/libfsimage/btrfs/transaction.h | 64 + tools/libfsimage/btrfs/utils.c | 3242 +++++++++++++++++++++++ tools/libfsimage/btrfs/utils.h | 342 +++ tools/libfsimage/btrfs/volumes.c | 2131 +++++++++++++++ tools/libfsimage/btrfs/volumes.h | 229 ++ 44 files changed, 26759 insertions(+), 1 deletion(-) create mode 100644 tools/libfsimage/btrfs/Makefile create mode 100644 tools/libfsimage/btrfs/bitops.h create mode 100644 tools/libfsimage/btrfs/btrfs-list.h create mode 100644 tools/libfsimage/btrfs/commands.h create mode 100644 tools/libfsimage/btrfs/crc32c.c create mode 100644 tools/libfsimage/btrfs/crc32c.h create mode 100644 tools/libfsimage/btrfs/ctree.c create mode 100644 tools/libfsimage/btrfs/ctree.h create mode 100644 tools/libfsimage/btrfs/dir-item.c create mode 100644 tools/libfsimage/btrfs/disk-io.c create mode 100644 tools/libfsimage/btrfs/disk-io.h create mode 100644 tools/libfsimage/btrfs/extent-cache.c create mode 100644 tools/libfsimage/btrfs/extent-cache.h create mode 100644 tools/libfsimage/btrfs/extent-tree.c create mode 100644 tools/libfsimage/btrfs/extent_io.c create mode 100644 tools/libfsimage/btrfs/extent_io.h create mode 100644 tools/libfsimage/btrfs/file-item.c create mode 100644 tools/libfsimage/btrfs/free-space-cache.c create mode 100644 tools/libfsimage/btrfs/free-space-cache.h create mode 100644 tools/libfsimage/btrfs/fsys_btrfs.c create mode 100644 tools/libfsimage/btrfs/hash.h create mode 100644 tools/libfsimage/btrfs/inode-item.c create mode 100644 tools/libfsimage/btrfs/inode.c create mode 100644 tools/libfsimage/btrfs/internal.h create mode 100644 tools/libfsimage/btrfs/ioctl.h create mode 100644 tools/libfsimage/btrfs/kerncompat.h create mode 100644 tools/libfsimage/btrfs/list.h create mode 100644 tools/libfsimage/btrfs/print-tree.h create mode 100644 tools/libfsimage/btrfs/radix-tree.h create mode 100644 tools/libfsimage/btrfs/raid6.c create mode 100644 tools/libfsimage/btrfs/rbtree-utils.c create mode 100644 tools/libfsimage/btrfs/rbtree-utils.h create mode 100644 tools/libfsimage/btrfs/rbtree.c create mode 100644 tools/libfsimage/btrfs/rbtree.h create mode 100644 tools/libfsimage/btrfs/rbtree_augmented.h create mode 100644 tools/libfsimage/btrfs/repair.c create mode 100644 tools/libfsimage/btrfs/repair.h create mode 100644 tools/libfsimage/btrfs/root-tree.c create mode 100644 tools/libfsimage/btrfs/transaction.h create mode 100644 tools/libfsimage/btrfs/utils.c create mode 100644 tools/libfsimage/btrfs/utils.h create mode 100644 tools/libfsimage/btrfs/volumes.c create mode 100644 tools/libfsimage/btrfs/volumes.h diff --git a/tools/libfsimage/Makefile b/tools/libfsimage/Makefile index 69fd18a..f1459d8 100644 --- a/tools/libfsimage/Makefile +++ b/tools/libfsimage/Makefile @@ -1,7 +1,7 @@ XEN_ROOT = $(CURDIR)/../.. include $(XEN_ROOT)/tools/Rules.mk -SUBDIRS-y = common ufs reiserfs iso9660 fat zfs +SUBDIRS-y = common ufs reiserfs iso9660 fat zfs btrfs SUBDIRS-$(CONFIG_X86) += xfs ifneq ($(EXTFS_LIBS), ) SUBDIRS-y += ext2fs-lib diff --git a/tools/libfsimage/btrfs/Makefile b/tools/libfsimage/btrfs/Makefile new file mode 100644 index 0000000..394185e --- /dev/null +++ b/tools/libfsimage/btrfs/Makefile @@ -0,0 +1,36 @@ +XEN_ROOT = $(CURDIR)/../../.. + +CFLAGS += -DBTRFS_FLAT_INCLUDES=1 +LIB_SRCS-y = fsys_btrfs.c \ + crc32c.c \ + ctree.c \ + dir-item.c \ + disk-io.c \ + extent-cache.c \ + extent_io.c \ + extent-tree.c \ + file-item.c \ + free-space-cache.c \ + inode.c \ + inode-item.c \ + raid6.c \ + rbtree.c \ + rbtree-utils.c \ + repair.c \ + root-tree.c \ + utils.c \ + volumes.c + +FS = btrfs +FS_LIBDEPS = $(filter -llzo2,$(ZLIB)) -lz + +# Include configure output (config.h) +CFLAGS += -include $(XEN_ROOT)/tools/config.h + +.PHONY: all +all: fs-all + +.PHONY: install +install: fs-install + +include $(XEN_ROOT)/tools/libfsimage/Rules.mk diff --git a/tools/libfsimage/btrfs/bitops.h b/tools/libfsimage/btrfs/bitops.h new file mode 100644 index 0000000..5b35f9f --- /dev/null +++ b/tools/libfsimage/btrfs/bitops.h @@ -0,0 +1,224 @@ +#ifndef _PERF_LINUX_BITOPS_H_ +#define _PERF_LINUX_BITOPS_H_ + +#include <linux/kernel.h> + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static inline void clear_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#define ffz(x) __ffs(~(x)) + +/* + * Find the first set bit in a memory region. + */ +static inline unsigned long +find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} + +/* + * Find the next set bit in a memory region. + */ +static inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +static inline unsigned long +find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} +#endif diff --git a/tools/libfsimage/btrfs/btrfs-list.h b/tools/libfsimage/btrfs/btrfs-list.h new file mode 100644 index 0000000..13f44c3 --- /dev/null +++ b/tools/libfsimage/btrfs/btrfs-list.h @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2012 FUJITSU LIMITED. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_LIST_H__ +#define __BTRFS_LIST_H__ + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#include "rbtree.h" +#include "ioctl.h" +#else +#include <btrfs/kerncompat.h> +#include <btrfs/rbtree.h> +#include <btrfs/ioctl.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#include <time.h> + +#define BTRFS_LIST_LAYOUT_DEFAULT 0 +#define BTRFS_LIST_LAYOUT_TABLE 1 +#define BTRFS_LIST_LAYOUT_RAW 2 + +/* + * one of these for each root we find. + */ +struct root_info { + struct rb_node rb_node; + struct rb_node sort_node; + + /* this root's id */ + u64 root_id; + + /* equal the offset of the root's key */ + u64 root_offset; + + /* flags of the root */ + u64 flags; + + /* the id of the root that references this one */ + u64 ref_tree; + + /* the dir id we're in from ref_tree */ + u64 dir_id; + + u64 top_id; + + /* generation when the root is created or last updated */ + u64 gen; + + /* creation generation of this root in sec*/ + u64 ogen; + + /* creation time of this root in sec*/ + time_t otime; + + u8 uuid[BTRFS_UUID_SIZE]; + u8 puuid[BTRFS_UUID_SIZE]; + u8 ruuid[BTRFS_UUID_SIZE]; + + /* path from the subvol we live in to this root, including the + * root's name. This is null until we do the extra lookup ioctl. + */ + char *path; + + /* the name of this root in the directory it lives in */ + char *name; + + char *full_path; + + int deleted; +}; + +typedef int (*btrfs_list_filter_func)(struct root_info *, u64); +typedef int (*btrfs_list_comp_func)(struct root_info *, struct root_info *, + int); + +struct btrfs_list_filter { + btrfs_list_filter_func filter_func; + u64 data; +}; + +struct btrfs_list_comparer { + btrfs_list_comp_func comp_func; + int is_descending; +}; + +struct btrfs_list_filter_set { + int total; + int nfilters; + int only_deleted; + struct btrfs_list_filter filters[0]; +}; + +struct btrfs_list_comparer_set { + int total; + int ncomps; + struct btrfs_list_comparer comps[0]; +}; + +enum btrfs_list_column_enum { + BTRFS_LIST_OBJECTID, + BTRFS_LIST_GENERATION, + BTRFS_LIST_OGENERATION, + BTRFS_LIST_PARENT, + BTRFS_LIST_TOP_LEVEL, + BTRFS_LIST_OTIME, + BTRFS_LIST_PUUID, + BTRFS_LIST_RUUID, + BTRFS_LIST_UUID, + BTRFS_LIST_PATH, + BTRFS_LIST_ALL, +}; + +enum btrfs_list_filter_enum { + BTRFS_LIST_FILTER_ROOTID, + BTRFS_LIST_FILTER_SNAPSHOT_ONLY, + BTRFS_LIST_FILTER_FLAGS, + BTRFS_LIST_FILTER_GEN, + BTRFS_LIST_FILTER_GEN_EQUAL = BTRFS_LIST_FILTER_GEN, + BTRFS_LIST_FILTER_GEN_LESS, + BTRFS_LIST_FILTER_GEN_MORE, + BTRFS_LIST_FILTER_CGEN, + BTRFS_LIST_FILTER_CGEN_EQUAL = BTRFS_LIST_FILTER_CGEN, + BTRFS_LIST_FILTER_CGEN_LESS, + BTRFS_LIST_FILTER_CGEN_MORE, + BTRFS_LIST_FILTER_TOPID_EQUAL, + BTRFS_LIST_FILTER_FULL_PATH, + BTRFS_LIST_FILTER_BY_PARENT, + BTRFS_LIST_FILTER_DELETED, + BTRFS_LIST_FILTER_MAX, +}; + +enum btrfs_list_comp_enum { + BTRFS_LIST_COMP_ROOTID, + BTRFS_LIST_COMP_OGEN, + BTRFS_LIST_COMP_GEN, + BTRFS_LIST_COMP_PATH, + BTRFS_LIST_COMP_MAX, +}; + +int btrfs_list_parse_sort_string(char *optarg, + struct btrfs_list_comparer_set **comps); +int btrfs_list_parse_filter_string(char *optarg, + struct btrfs_list_filter_set **filters, + enum btrfs_list_filter_enum type); +void btrfs_list_setup_print_column(enum btrfs_list_column_enum column); +struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void); +void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set); +int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set, + enum btrfs_list_filter_enum filter, u64 data); +struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void); +void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set); + +int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + int is_tab_result, int full_path, char *raw_prefix); +int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen); +int btrfs_list_get_default_subvolume(int fd, u64 *default_id); +char *btrfs_list_path_for_root(int fd, u64 root); +int btrfs_list_get_path_rootid(int fd, u64 *treeid); +int btrfs_get_subvol(int fd, struct root_info *the_ri); + +#endif diff --git a/tools/libfsimage/btrfs/commands.h b/tools/libfsimage/btrfs/commands.h new file mode 100644 index 0000000..2da093b --- /dev/null +++ b/tools/libfsimage/btrfs/commands.h @@ -0,0 +1,134 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_COMMANDS_H__ +#define __BTRFS_COMMANDS_H__ + +enum { + CMD_HIDDEN = (1 << 0), /* should not be in help listings */ + CMD_ALIAS = (1 << 1), /* alias of next command in cmd_group */ +}; + +struct cmd_struct { + const char *token; + int (*fn)(int, char **); + + /* + * Usage strings + * + * A NULL-terminated array of the following format: + * + * usagestr[0] - one-line synopsis (required) + * usagestr[1] - one-line short description (required) + * usagestr[2..m] - a long (possibly multi-line) description + * (optional) + * usagestr[m + 1] - an empty line separator (required if at least one + * option string is given, not needed otherwise) + * usagestr[m + 2..n] - option strings, one option per line + * (optional) + * usagestr[n + 1] - NULL terminator + * + * Options (if present) should always (even if there is no long + * description) be prepended with an empty line. Supplied strings are + * indented but otherwise printed as-is, no automatic wrapping is done. + * + * Grep for cmd_*_usage[] for examples. + */ + const char * const *usagestr; + + /* should be NULL if token is not a subgroup */ + const struct cmd_group *next; + + /* CMD_* flags above */ + int flags; +}; + +#define NULL_CMD_STRUCT {NULL, NULL, NULL, NULL, 0} + +struct cmd_group { + const char * const *usagestr; + const char *infostr; + + const struct cmd_struct commands[]; +}; + +/* btrfs.c */ +int prefixcmp(const char *str, const char *prefix); + +int handle_command_group(const struct cmd_group *grp, int argc, + char **argv); + +/* help.c */ +extern const char * const generic_cmd_help_usage[]; + +void usage(const char * const *usagestr) __attribute__((noreturn)); +void usage_command(const struct cmd_struct *cmd, int full, int err); +void usage_command_group(const struct cmd_group *grp, int all, int err); +void usage_command_group_short(const struct cmd_group *grp); + +void help_unknown_token(const char *arg, const struct cmd_group *grp) __attribute__((noreturn)); +void help_ambiguous_token(const char *arg, const struct cmd_group *grp) __attribute__((noreturn)); + +void help_command_group(const struct cmd_group *grp, int argc, char **argv); + +extern const struct cmd_group subvolume_cmd_group; +extern const struct cmd_group filesystem_cmd_group; +extern const struct cmd_group balance_cmd_group; +extern const struct cmd_group device_cmd_group; +extern const struct cmd_group scrub_cmd_group; +extern const struct cmd_group inspect_cmd_group; +extern const struct cmd_group property_cmd_group; +extern const struct cmd_group quota_cmd_group; +extern const struct cmd_group qgroup_cmd_group; +extern const struct cmd_group replace_cmd_group; +extern const struct cmd_group rescue_cmd_group; + +extern const char * const cmd_send_usage[]; +extern const char * const cmd_receive_usage[]; +extern const char * const cmd_check_usage[]; +extern const char * const cmd_chunk_recover_usage[]; +extern const char * const cmd_super_recover_usage[]; +extern const char * const cmd_restore_usage[]; +extern const char * const cmd_rescue_usage[]; + +int cmd_subvolume(int argc, char **argv); +int cmd_filesystem(int argc, char **argv); +int cmd_balance(int argc, char **argv); +int cmd_device(int argc, char **argv); +int cmd_scrub(int argc, char **argv); +int cmd_check(int argc, char **argv); +int cmd_chunk_recover(int argc, char **argv); +int cmd_super_recover(int argc, char **argv); +int cmd_inspect(int argc, char **argv); +int cmd_property(int argc, char **argv); +int cmd_send(int argc, char **argv); +int cmd_receive(int argc, char **argv); +int cmd_quota(int argc, char **argv); +int cmd_qgroup(int argc, char **argv); +int cmd_replace(int argc, char **argv); +int cmd_restore(int argc, char **argv); +int cmd_select_super(int argc, char **argv); +int cmd_dump_super(int argc, char **argv); +int cmd_debug_tree(int argc, char **argv); +int cmd_rescue(int argc, char **argv); + +/* subvolume exported functions */ +int test_issubvolume(const char *path); + +/* send.c */ +char *get_subvol_name(char *mnt, char *full_path); + +#endif diff --git a/tools/libfsimage/btrfs/crc32c.c b/tools/libfsimage/btrfs/crc32c.c new file mode 100644 index 0000000..dfa4e6c --- /dev/null +++ b/tools/libfsimage/btrfs/crc32c.c @@ -0,0 +1,222 @@ +/* + * Copied from the kernel source code, lib/libcrc32c.c. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include "kerncompat.h" +#include "crc32c.h" +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length); +static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le; + +#ifdef __x86_64__ + +/* + * Based on a posting to lkml by Austin Zhang <austin.zhang@xxxxxxxxx> + * + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ +#if __SIZEOF_LONG__ == 8 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static int crc32c_probed = 0; +static int crc32c_intel_available = 0; + +static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned long length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ +static uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; + unsigned long *ptmp = (unsigned long *)data; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +static void crc32c_intel_probe(void) +{ + if (!crc32c_probed) { + unsigned int eax, ebx, ecx, edx; + + eax = 1; + + do_cpuid(&eax, &ebx, &ecx, &edx); + crc32c_intel_available = (ecx & (1 << 20)) != 0; + crc32c_probed = 1; + } +} + +void crc32c_optimization_init(void) +{ + crc32c_intel_probe(); + if (crc32c_intel_available) + crc_function = crc32c_intel; +} +#else + +void crc32c_optimization_init(void) +{ +} + +#endif /* __x86_64__ */ + +/* + * This is the CRC-32C table + * Generated with: + * width = 32 bits + * poly = 0x1EDC6F41 + * reflect input bytes = true + * reflect output bytes = true + */ + +static const u32 crc32c_table[256] = { + 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, + 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, + 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, + 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, + 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, + 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, + 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, + 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, + 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, + 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, + 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, + 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, + 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, + 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, + 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, + 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, + 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, + 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, + 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, + 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, + 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, + 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, + 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, + 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, + 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, + 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, + 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, + 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, + 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, + 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, + 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, + 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, + 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, + 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, + 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, + 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, + 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, + 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, + 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, + 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, + 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, + 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, + 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, + 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, + 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, + 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, + 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, + 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, + 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, + 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, + 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, + 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, + 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, + 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, + 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, + 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, + 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, + 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, + 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, + 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, + 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, + 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, + 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, + 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length) +{ + while (length--) + crc = + crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); + return crc; +} + +u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +{ + return crc_function(crc, data, length); +} diff --git a/tools/libfsimage/btrfs/crc32c.h b/tools/libfsimage/btrfs/crc32c.h new file mode 100644 index 0000000..c552ef6 --- /dev/null +++ b/tools/libfsimage/btrfs/crc32c.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __CRC32C__ +#define __CRC32C__ + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +u32 crc32c_le(u32 seed, unsigned char const *data, size_t length); +void crc32c_optimization_init(void); + +#define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) +#define btrfs_crc32c crc32c +#endif diff --git a/tools/libfsimage/btrfs/ctree.c b/tools/libfsimage/btrfs/ctree.c new file mode 100644 index 0000000..1f938d7 --- /dev/null +++ b/tools/libfsimage/btrfs/ctree.c @@ -0,0 +1,2941 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "repair.h" +#include "internal.h" + +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level); +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, int extend); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src, int empty); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); + +inline void btrfs_init_path(struct btrfs_path *p) +{ + memset(p, 0, sizeof(*p)); +} + +struct btrfs_path *btrfs_alloc_path(void) +{ + struct btrfs_path *path; + path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS); + return path; +} + +void btrfs_free_path(struct btrfs_path *p) +{ + if (!p) + return; + btrfs_release_path(p); + kfree(p); +} + +void btrfs_release_path(struct btrfs_path *p) +{ + int i; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (!p->nodes[i]) + continue; + free_extent_buffer(p->nodes[i]); + } + memset(p, 0, sizeof(*p)); +} + +void add_root_to_dirty_list(struct btrfs_root *root) +{ + if (root->track_dirty && list_empty(&root->dirty_list)) { + list_add(&root->dirty_list, + &root->fs_info->dirty_cowonly_roots); + } +} + +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid) +{ + struct extent_buffer *cow; + int ret = 0; + int level; + struct btrfs_root *new_root; + struct btrfs_disk_key disk_key; + + new_root = kmalloc(sizeof(*new_root), GFP_NOFS); + if (!new_root) + return -ENOMEM; + + memcpy(new_root, root, sizeof(*new_root)); + new_root->root_key.objectid = new_root_objectid; + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + + level = btrfs_header_level(buf); + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); + else + btrfs_node_key(buf, &disk_key, 0); + cow = btrfs_alloc_free_block(trans, new_root, buf->len, + new_root_objectid, &disk_key, + level, buf->start, 0); + if (IS_ERR(cow)) { + kfree(new_root); + return PTR_ERR(cow); + } + + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_bytenr(cow, cow->start); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, new_root_objectid); + + write_extent_buffer(cow, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + WARN_ON(btrfs_header_generation(buf) > trans->transid); + ret = btrfs_inc_ref(trans, new_root, cow, 0); + kfree(new_root); + + if (ret) + return ret; + + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +/* + * check if the tree block can be shared by multiple trees + */ +static int btrfs_block_can_be_shared(struct btrfs_root *root, + struct extent_buffer *buf) +{ + /* + * Tree blocks not in refernece counted trees and tree roots + * are never shared. If a block was allocated after the last + * snapshot and the block was not allocated by tree relocation, + * we know the block is not shared. + */ + if (root->ref_cows && + buf != root->node && buf != root->commit_root && + (btrfs_header_generation(buf) <= + btrfs_root_last_snapshot(&root->root_item) || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 1; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (root->ref_cows && + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + return 1; +#endif + return 0; +} + +static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *cow) +{ + u64 refs; + u64 owner; + u64 flags; + u64 new_flags = 0; + int ret; + + /* + * Backrefs update rules: + * + * Always use full backrefs for extent pointers in tree block + * allocated by tree relocation. + * + * If a shared tree block is no longer referenced by its owner + * tree (btrfs_header_owner(buf) == root->root_key.objectid), + * use full backrefs for extent pointers in tree block. + * + * If a tree block is been relocating + * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), + * use full backrefs for extent pointers in tree block. + * The reason for this is some operations (such as drop tree) + * are only allowed for blocks use full backrefs. + */ + + if (btrfs_block_can_be_shared(root, buf)) { + ret = btrfs_lookup_extent_info(trans, root, buf->start, + btrfs_header_level(buf), 1, + &refs, &flags); + BUG_ON(ret); + BUG_ON(refs == 0); + } else { + refs = 1; + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; + else + flags = 0; + } + + owner = btrfs_header_owner(buf); + BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) && + owner == BTRFS_TREE_RELOC_OBJECTID); + + if (refs > 1) { + if ((owner == root->root_key.objectid || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && + !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { + ret = btrfs_inc_ref(trans, root, buf, 1); + BUG_ON(ret); + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_dec_ref(trans, root, buf, 0); + BUG_ON(ret); + ret = btrfs_inc_ref(trans, root, cow, 1); + BUG_ON(ret); + } + new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + } + if (new_flags != 0) { + ret = btrfs_set_block_flags(trans, root, buf->start, + btrfs_header_level(buf), + new_flags); + BUG_ON(ret); + } + } else { + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + ret = btrfs_dec_ref(trans, root, buf, 1); + BUG_ON(ret); + } + clean_tree_block(trans, root, buf); + } + return 0; +} + +int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) +{ + struct extent_buffer *cow; + struct btrfs_disk_key disk_key; + int level; + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + + level = btrfs_header_level(buf); + + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); + else + btrfs_node_key(buf, &disk_key, 0); + + cow = btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, &disk_key, + level, search_start, empty_size); + if (IS_ERR(cow)) + return PTR_ERR(cow); + + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_bytenr(cow, cow->start); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, root->root_key.objectid); + + write_extent_buffer(cow, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) && + btrfs_header_generation(buf) > trans->transid); + + update_ref_for_cow(trans, root, buf, cow); + + if (buf == root->node) { + root->node = cow; + extent_buffer_get(cow); + + btrfs_free_extent(trans, root, buf->start, buf->len, + 0, root->root_key.objectid, level, 0); + free_extent_buffer(buf); + add_root_to_dirty_list(root); + } else { + btrfs_set_node_blockptr(parent, parent_slot, + cow->start); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(parent, parent_slot, + trans->transid); + btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + + btrfs_free_extent(trans, root, buf->start, buf->len, + 0, root->root_key.objectid, level, 1); + } + if (!list_empty(&buf->recow)) { + list_del_init(&buf->recow); + free_extent_buffer(buf); + } + free_extent_buffer(buf); + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +static inline int should_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf) +{ + if (btrfs_header_generation(buf) == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && + !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 0; + return 1; +} + +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) +{ + u64 search_start; + int ret; + /* + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + */ + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %llu running %llu\n", + (unsigned long long)trans->transid, + (unsigned long long)root->fs_info->generation); + WARN_ON(1); + } + if (!should_cow_block(trans, root, buf)) { + *cow_ret = buf; + return 0; + } + + search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); + ret = __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); + return ret; +} + +int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} + +/* + * compare two keys in a memcmp fashion + */ +static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +{ + struct btrfs_key k1; + + btrfs_disk_key_to_cpu(&k1, disk); + return btrfs_comp_cpu_keys(&k1, k2); +} + +/* + * The leaf data grows from end-to-front in the node. + * this returns the address of the start of the last item, + * which is the stop of the leaf data stack + */ +static inline unsigned int leaf_data_end(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + u32 nr = btrfs_header_nritems(leaf); + if (nr == 0) + return BTRFS_LEAF_DATA_SIZE(root); + return btrfs_item_offset_nr(leaf, nr - 1); +} + +enum btrfs_tree_block_status +btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) +{ + int i; + struct btrfs_key cpukey; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; + + if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root)) + goto fail; + + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + if (parent_key && parent_key->type) { + btrfs_node_key(buf, &key, 0); + if (memcmp(parent_key, &key, sizeof(key))) + goto fail; + } + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_node_key(buf, &key, i); + btrfs_node_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) + goto fail; + } + return BTRFS_TREE_BLOCK_CLEAN; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_node_key_to_cpu(buf, &cpukey, 0); + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, + btrfs_header_level(buf)); + } + return ret; +} + +enum btrfs_tree_block_status +btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) +{ + int i; + struct btrfs_key cpukey; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); + enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS; + + if (nritems * sizeof(struct btrfs_item) > buf->len) { + fprintf(stderr, "invalid number of items %llu\n", + (unsigned long long)buf->start); + goto fail; + } + + if (btrfs_header_level(buf) != 0) { + ret = BTRFS_TREE_BLOCK_INVALID_LEVEL; + fprintf(stderr, "leaf is not a leaf %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + if (btrfs_leaf_free_space(root, buf) < 0) { + ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE; + fprintf(stderr, "leaf free space incorrect %llu %d\n", + (unsigned long long)btrfs_header_bytenr(buf), + btrfs_leaf_free_space(root, buf)); + goto fail; + } + + if (nritems == 0) + return BTRFS_TREE_BLOCK_CLEAN; + + btrfs_item_key(buf, &key, 0); + if (parent_key && parent_key->type && + memcmp(parent_key, &key, sizeof(key))) { + ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY; + fprintf(stderr, "leaf parent key incorrect %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + for (i = 0; nritems > 1 && i < nritems - 1; i++) { + btrfs_item_key(buf, &key, i); + btrfs_item_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) { + ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER; + fprintf(stderr, "bad key ordering %d %d\n", i, i+1); + goto fail; + } + if (btrfs_item_offset_nr(buf, i) != + btrfs_item_end_nr(buf, i + 1)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "incorrect offsets %u %u\n", + btrfs_item_offset_nr(buf, i), + btrfs_item_end_nr(buf, i + 1)); + goto fail; + } + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "bad item end %u wanted %u\n", + btrfs_item_end_nr(buf, i), + (unsigned)BTRFS_LEAF_DATA_SIZE(root)); + goto fail; + } + } + + for (i = 0; i < nritems; i++) { + if (btrfs_item_end_nr(buf, i) > BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_item_key(buf, &key, 0); + btrfs_print_key(&key); + fflush(stdout); + ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS; + fprintf(stderr, "slot end outside of leaf %llu > %llu\n", + (unsigned long long)btrfs_item_end_nr(buf, i), + (unsigned long long)BTRFS_LEAF_DATA_SIZE(root)); + goto fail; + } + } + + return BTRFS_TREE_BLOCK_CLEAN; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_item_key_to_cpu(buf, &cpukey, 0); + + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, 0); + } + return ret; +} + +static int noinline check_block(struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + struct btrfs_disk_key key; + struct btrfs_disk_key *key_ptr = NULL; + struct extent_buffer *parent; + enum btrfs_tree_block_status ret; + + if (path->skip_check_block) + return 0; + if (path->nodes[level + 1]) { + parent = path->nodes[level + 1]; + btrfs_node_key(parent, &key, path->slots[level + 1]); + key_ptr = &key; + } + if (level == 0) + ret = btrfs_check_leaf(root, key_ptr, path->nodes[0]); + else + ret = btrfs_check_node(root, key_ptr, path->nodes[level]); + if (ret == BTRFS_TREE_BLOCK_CLEAN) + return 0; + return -EIO; +} + +/* + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * + * the slot in the array is returned via slot, and it points to + * the place where you would insert key if it is not found in + * the array. + * + * slot may point to max if the key is bigger than all of the keys + */ +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) +{ + int low = 0; + int high = max; + int mid; + int ret; + unsigned long offset; + struct btrfs_disk_key *tmp; + + while(low < high) { + mid = (low + high) / 2; + offset = p + mid * item_size; + + tmp = (struct btrfs_disk_key *)(eb->data + offset); + ret = btrfs_comp_keys(tmp, key); + + if (ret < 0) + low = mid + 1; + else if (ret > 0) + high = mid; + else { + *slot = mid; + return 0; + } + } + *slot = low; + return 1; +} + +/* + * simple bin_search frontend that does the right thing for + * leaves vs nodes + */ +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) +{ + if (level == 0) + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), + sizeof(struct btrfs_item), + key, btrfs_header_nritems(eb), + slot); + else + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), + sizeof(struct btrfs_key_ptr), + key, btrfs_header_nritems(eb), + slot); +} + +struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) +{ + int level = btrfs_header_level(parent); + if (slot < 0) + return NULL; + if (slot >= btrfs_header_nritems(parent)) + return NULL; + + if (level == 0) + return NULL; + + return read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(parent, slot)); +} + +static int balance_level(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; + int ret = 0; + int wret; + int pslot; + int orig_slot = path->slots[level]; + u64 orig_ptr; + + if (level == 0) + return 0; + + mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); + + orig_ptr = btrfs_node_blockptr(mid, orig_slot); + + if (level < BTRFS_MAX_LEVEL - 1) { + parent = path->nodes[level + 1]; + pslot = path->slots[level + 1]; + } + + /* + * deal with the case where there is only one pointer in the root + * by promoting the node below to a root + */ + if (!parent) { + struct extent_buffer *child; + + if (btrfs_header_nritems(mid) != 1) + return 0; + + /* promote the child to a root */ + child = read_node_slot(root, mid, 0); + BUG_ON(!extent_buffer_uptodate(child)); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child); + BUG_ON(ret); + + root->node = child; + add_root_to_dirty_list(root); + path->nodes[level] = NULL; + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + /* once for the path */ + free_extent_buffer(mid); + + ret = btrfs_free_extent(trans, root, mid->start, mid->len, + 0, root->root_key.objectid, + level, 1); + /* once for the root ptr */ + free_extent_buffer(mid); + return ret; + } + if (btrfs_header_nritems(mid) > + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) + return 0; + + left = read_node_slot(root, parent, pslot - 1); + if (extent_buffer_uptodate(left)) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); + if (wret) { + ret = wret; + goto enospc; + } + } + right = read_node_slot(root, parent, pslot + 1); + if (extent_buffer_uptodate(right)) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); + if (wret) { + ret = wret; + goto enospc; + } + } + + /* first, try to make some room in the middle buffer */ + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid, 1); + if (wret < 0) + ret = wret; + } + + /* + * then try to empty the right most buffer into the middle + */ + if (right) { + wret = push_node_left(trans, root, mid, right, 1); + if (wret < 0 && wret != -ENOSPC) + ret = wret; + if (btrfs_header_nritems(right) == 0) { + u64 bytenr = right->start; + u32 blocksize = right->len; + + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); + right = NULL; + wret = btrfs_del_ptr(trans, root, path, + level + 1, pslot + 1); + if (wret) + ret = wret; + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 0, + root->root_key.objectid, + level, 0); + if (wret) + ret = wret; + } else { + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + } + } + if (btrfs_header_nritems(mid) == 1) { + /* + * we're not allowed to leave a node with one item in the + * tree during a delete. A deletion from lower in the tree + * could try to delete the only pointer in this node. + * So, pull some keys from the left. + * There has to be a left pointer at this point because + * otherwise we would have pulled some pointers from the + * right + */ + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); + if (wret < 0) { + ret = wret; + goto enospc; + } + if (wret == 1) { + wret = push_node_left(trans, root, left, mid, 1); + if (wret < 0) + ret = wret; + } + BUG_ON(wret == 1); + } + if (btrfs_header_nritems(mid) == 0) { + /* we've managed to empty the middle node, drop it */ + u64 bytenr = mid->start; + u32 blocksize = mid->len; + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); + mid = NULL; + wret = btrfs_del_ptr(trans, root, path, level + 1, pslot); + if (wret) + ret = wret; + wret = btrfs_free_extent(trans, root, bytenr, blocksize, + 0, root->root_key.objectid, + level, 0); + if (wret) + ret = wret; + } else { + /* update the parent key to reflect our changes */ + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); + } + + /* update the path */ + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; + path->slots[level + 1] -= 1; + path->slots[level] = orig_slot; + if (mid) + free_extent_buffer(mid); + } else { + orig_slot -= btrfs_header_nritems(left); + path->slots[level] = orig_slot; + } + } + /* double check we haven't messed things up */ + check_block(root, path, level); + if (orig_ptr != + btrfs_node_blockptr(path->nodes[level], path->slots[level])) + BUG(); +enospc: + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); + return ret; +} + +/* returns zero if the push worked, non-zero otherwise */ +static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; + int ret = 0; + int wret; + int pslot; + int orig_slot = path->slots[level]; + + if (level == 0) + return 1; + + mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); + + if (level < BTRFS_MAX_LEVEL - 1) { + parent = path->nodes[level + 1]; + pslot = path->slots[level + 1]; + } + + if (!parent) + return 1; + + left = read_node_slot(root, parent, pslot - 1); + + /* first, try to make some room in the middle buffer */ + if (extent_buffer_uptodate(left)) { + u32 left_nr; + left_nr = btrfs_header_nritems(left); + if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { + wret = 1; + } else { + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); + if (ret) + wret = 1; + else { + wret = push_node_left(trans, root, + left, mid, 0); + } + } + if (wret < 0) + ret = wret; + if (wret == 0) { + struct btrfs_disk_key disk_key; + orig_slot += left_nr; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; + path->slots[level + 1] -= 1; + path->slots[level] = orig_slot; + free_extent_buffer(mid); + } else { + orig_slot -= + btrfs_header_nritems(left); + path->slots[level] = orig_slot; + free_extent_buffer(left); + } + return 0; + } + free_extent_buffer(left); + } + right= read_node_slot(root, parent, pslot + 1); + + /* + * then try to empty the right most buffer into the middle + */ + if (extent_buffer_uptodate(right)) { + u32 right_nr; + right_nr = btrfs_header_nritems(right); + if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { + wret = 1; + } else { + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); + if (ret) + wret = 1; + else { + wret = balance_node_right(trans, root, + right, mid); + } + } + if (wret < 0) + ret = wret; + if (wret == 0) { + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; + path->slots[level + 1] += 1; + path->slots[level] = orig_slot - + btrfs_header_nritems(mid); + free_extent_buffer(mid); + } else { + free_extent_buffer(right); + } + return 0; + } + free_extent_buffer(right); + } + return 1; +} + +/* + * readahead one full node of leaves + */ +void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, + int level, int slot, u64 objectid) +{ + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + u32 nritems; + u64 search; + u64 lowest_read; + u64 highest_read; + u64 nread = 0; + int direction = path->reada; + struct extent_buffer *eb; + u32 nr; + u32 blocksize; + u32 nscan = 0; + + if (level != 1) + return; + + if (!path->nodes[level]) + return; + + node = path->nodes[level]; + search = btrfs_node_blockptr(node, slot); + blocksize = btrfs_level_size(root, level - 1); + eb = btrfs_find_tree_block(root, search, blocksize); + if (eb) { + free_extent_buffer(eb); + return; + } + + highest_read = search; + lowest_read = search; + + nritems = btrfs_header_nritems(node); + nr = slot; + while(1) { + if (direction < 0) { + if (nr == 0) + break; + nr--; + } else if (direction > 0) { + nr++; + if (nr >= nritems) + break; + } + if (path->reada < 0 && objectid) { + btrfs_node_key(node, &disk_key, nr); + if (btrfs_disk_key_objectid(&disk_key) != objectid) + break; + } + search = btrfs_node_blockptr(node, nr); + if ((search >= lowest_read && search <= highest_read) || + (search < lowest_read && lowest_read - search <= 32768) || + (search > highest_read && search - highest_read <= 32768)) { + readahead_tree_block(root, search, blocksize, + btrfs_node_ptr_generation(node, nr)); + nread += blocksize; + } + nscan++; + if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) + break; + if(nread > (1024 * 1024) || nscan > 128) + break; + + if (search < lowest_read) + lowest_read = search; + if (search > highest_read) + highest_read = search; + } +} + +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key) +{ + int ret; + struct btrfs_key key; + struct extent_buffer *eb; + struct btrfs_path *path; + + key.type = key_type; + key.objectid = iobjectid; + key.offset = ioff; + + if (found_path == NULL) { + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + } else + path = found_path; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if ((ret < 0) || (found_key == NULL)) + goto out; + + eb = path->nodes[0]; + if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(fs_root, path); + if (ret) + goto out; + eb = path->nodes[0]; + } + + btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); + if (found_key->type != key.type || + found_key->objectid != key.objectid) { + ret = 1; + goto out; + } + +out: + if (path != found_path) + btrfs_free_path(path); + return ret; +} + +/* + * look for key in the tree. path is filled in with nodes along the way + * if key is found, we return zero and you can find the item in the leaf + * level of the path (level 0) + * + * If the key isn't found, the path points to the slot where it should + * be inserted, and 1 is returned. If there are other errors during the + * search a negative error number is returned. + * + * if ins_len > 0, nodes and leaves will be split as we walk down the + * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if + * possible) + */ +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow) +{ + struct extent_buffer *b; + int slot; + int ret; + int level; + int should_reada = p->reada; + u8 lowest_level = 0; + + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len > 0); + WARN_ON(p->nodes[0] != NULL); + /* + WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); + */ +again: + b = root->node; + extent_buffer_get(b); + while (b) { + level = btrfs_header_level(b); + if (cow) { + int wret; + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], + &b); + if (wret) { + free_extent_buffer(b); + return wret; + } + } + BUG_ON(!cow && ins_len); + if (level != btrfs_header_level(b)) + WARN_ON(1); + level = btrfs_header_level(b); + p->nodes[level] = b; + ret = check_block(root, p, level); + if (ret) + return -1; + ret = bin_search(b, key, level, &slot); + if (level != 0) { + if (ret && slot > 0) + slot -= 1; + p->slots[level] = slot; + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= + BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { + int sret = split_node(trans, root, p, level); + BUG_ON(sret > 0); + if (sret) + return sret; + b = p->nodes[level]; + slot = p->slots[level]; + } else if (ins_len < 0) { + int sret = balance_level(trans, root, p, + level); + if (sret) + return sret; + b = p->nodes[level]; + if (!b) { + btrfs_release_path(p); + goto again; + } + slot = p->slots[level]; + BUG_ON(btrfs_header_nritems(b) == 1); + } + /* this is only true while dropping a snapshot */ + if (level == lowest_level) + break; + + if (should_reada) + reada_for_search(root, p, level, slot, + key->objectid); + + b = read_node_slot(root, b, slot); + if (!extent_buffer_uptodate(b)) + return -EIO; + } else { + p->slots[level] = slot; + if (ins_len > 0 && + ins_len > btrfs_leaf_free_space(root, b)) { + int sret = split_leaf(trans, root, key, + p, ins_len, ret == 0); + BUG_ON(sret > 0); + if (sret) + return sret; + } + return ret; + } + } + return 1; +} + +/* + * adjust the pointers going up the tree, starting at level + * making sure the right key of each node is points to 'key'. + * This is used after shifting pointers to the left, so it stops + * fixing up pointers when a given leaf/node is not in slot 0 of the + * higher levels + */ +void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) +{ + int i; + struct extent_buffer *t; + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + int tslot = path->slots[i]; + if (!path->nodes[i]) + break; + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); + btrfs_mark_buffer_dirty(path->nodes[i]); + if (tslot != 0) + break; + } +} + +/* + * update item key. + * + * This function isn't completely safe. It's the caller's responsibility + * that the new key won't break the order + */ +int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *eb; + int slot; + + eb = path->nodes[0]; + slot = path->slots[0]; + if (slot > 0) { + btrfs_item_key(eb, &disk_key, slot - 1); + if (btrfs_comp_keys(&disk_key, new_key) >= 0) + return -1; + } + if (slot < btrfs_header_nritems(eb) - 1) { + btrfs_item_key(eb, &disk_key, slot + 1); + if (btrfs_comp_keys(&disk_key, new_key) <= 0) + return -1; + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(eb, &disk_key, slot); + btrfs_mark_buffer_dirty(eb); + if (slot == 0) + btrfs_fixup_low_keys(root, path, &disk_key, 1); + return 0; +} + +/* + * update an item key without the safety checks. This is meant to be called by + * fsck only. + */ +void btrfs_set_item_key_unsafe(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *eb; + int slot; + + eb = path->nodes[0]; + slot = path->slots[0]; + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(eb, &disk_key, slot); + btrfs_mark_buffer_dirty(eb); + if (slot == 0) + btrfs_fixup_low_keys(root, path, &disk_key, 1); +} + +/* + * try to push data from one node into the next node left in the + * tree. + * + * returns 0 if some ptrs were pushed left, < 0 if there was some horrible + * error, and > 0 if there was no room in the left hand block. + */ +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src, int empty) +{ + int push_items = 0; + int src_nritems; + int dst_nritems; + int ret = 0; + + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); + + if (!empty && src_nritems <= 8) + return 1; + + if (push_items <= 0) { + return 1; + } + + if (empty) { + push_items = min(src_nritems, push_items); + if (push_items < src_nritems) { + /* leave at least 8 pointers in the node if + * we aren't going to empty it + */ + if (src_nritems - push_items < 8) { + if (push_items <= 8) + return 1; + push_items -= 8; + } + } + } else + push_items = min(src_nritems - 8, push_items); + + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + + if (push_items < src_nritems) { + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); + } + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); + + return ret; +} + +/* + * try to push data from one node into the next node right in the + * tree. + * + * returns 0 if some ptrs were pushed, < 0 if there was some horrible + * error, and > 0 if there was no room in the right hand block. + * + * this will only push up to 1/2 the contents of the left node over + */ +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) +{ + int push_items = 0; + int max_push; + int src_nritems; + int dst_nritems; + int ret = 0; + + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); + + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + if (push_items <= 0) { + return 1; + } + + if (src_nritems < 4) { + return 1; + } + + max_push = src_nritems / 2 + 1; + /* don't try to empty the node */ + if (max_push >= src_nritems) { + return 1; + } + + if (max_push < push_items) + push_items = max_push; + + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); + + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); + + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); + + return ret; +} + +/* + * helper function to insert a new root level in the tree. + * A new node is allocated, and a single item is inserted to + * point to the existing root + * + * returns zero on success or < 0 on failure. + */ +static int noinline insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + u64 lower_gen; + struct extent_buffer *lower; + struct extent_buffer *c; + struct extent_buffer *old; + struct btrfs_disk_key lower_key; + + BUG_ON(path->nodes[level]); + BUG_ON(path->nodes[level-1] != root->node); + + lower = path->nodes[level-1]; + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); + else + btrfs_node_key(lower, &lower_key, 0); + + c = btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, &lower_key, + level, root->node->start, 0); + + if (IS_ERR(c)) + return PTR_ERR(c); + + memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_bytenr(c, c->start); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(c, root->root_key.objectid); + + write_extent_buffer(c, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, lower->start); + lower_gen = btrfs_header_generation(lower); + WARN_ON(lower_gen != trans->transid); + + btrfs_set_node_ptr_generation(c, 0, lower_gen); + + btrfs_mark_buffer_dirty(c); + + old = root->node; + root->node = c; + + /* the super has an extra ref to root->node */ + free_extent_buffer(old); + + add_root_to_dirty_list(root); + extent_buffer_get(c); + path->nodes[level] = c; + path->slots[level] = 0; + return 0; +} + +/* + * worker function to insert a single pointer in a node. + * the node should have enough room for the pointer already + * + * slot and level indicate where you want the key to go, and + * blocknr is the block the key points to. + * + * returns zero on success and < 0 on any error + */ +static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, u64 bytenr, int slot, int level) +{ + struct extent_buffer *lower; + int nritems; + + BUG_ON(!path->nodes[level]); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); + if (slot > nritems) + BUG(); + if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) + BUG(); + if (slot != nritems) { + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_key_ptr)); + } + btrfs_set_node_key(lower, key, slot); + btrfs_set_node_blockptr(lower, slot, bytenr); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(lower, slot, trans->transid); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); + return 0; +} + +/* + * split the node at the specified level in path in two. + * The path is corrected to point to the appropriate node after the split + * + * Before splitting this tries to make some room in the node by pushing + * left and right, if either one works, it returns right away. + * + * returns 0 on success and < 0 on failure + */ +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) +{ + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; + int mid; + int ret; + int wret; + u32 c_nritems; + + c = path->nodes[level]; + WARN_ON(btrfs_header_generation(c) != trans->transid); + if (c == root->node) { + /* trying to split the root, lets make a new one */ + ret = insert_new_root(trans, root, path, level + 1); + if (ret) + return ret; + } else { + ret = push_nodes_for_insert(trans, root, path, level); + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < + BTRFS_NODEPTRS_PER_BLOCK(root) - 3) + return 0; + if (ret < 0) + return ret; + } + + c_nritems = btrfs_header_nritems(c); + mid = (c_nritems + 1) / 2; + btrfs_node_key(c, &disk_key, mid); + + split = btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, + &disk_key, level, c->start, 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_bytenr(split, split->start); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + write_extent_buffer(split, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(split), + BTRFS_UUID_SIZE); + + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); + ret = 0; + + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + wret = insert_ptr(trans, root, path, &disk_key, split->start, + path->slots[level + 1] + 1, + level + 1); + if (wret) + ret = wret; + + if (path->slots[level] >= mid) { + path->slots[level] -= mid; + free_extent_buffer(c); + path->nodes[level] = split; + path->slots[level + 1] += 1; + } else { + free_extent_buffer(split); + } + return ret; +} + +/* + * how many bytes are required to store the items in a leaf. start + * and nr indicate which items in the leaf to check. This totals up the + * space used both by the item structs and the item data + */ +static int leaf_space_used(struct extent_buffer *l, int start, int nr) +{ + int data_len; + int nritems = btrfs_header_nritems(l); + int end = min(nritems, start + nr) - 1; + + if (!nr) + return 0; + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); + data_len += sizeof(struct btrfs_item) * nr; + WARN_ON(data_len < 0); + return data_len; +} + +/* + * The space between the end of the leaf items and + * the start of the leaf data. IOW, how much room + * the leaf has left for both items and data + */ +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) +{ + u32 nodesize = (root ? BTRFS_LEAF_DATA_SIZE(root) : leaf->len); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = nodesize - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n", + ret, nodesize, leaf_space_used(leaf, 0, nritems), + nritems); + } + return ret; +} + +/* + * push some data in the path leaf to the right, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * returns 1 if the push failed because the other node didn't have enough + * room, 0 if everything worked out and < 0 if there were major errors. + */ +static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; + int slot; + u32 i; + int free_space; + int push_space = 0; + int push_items = 0; + struct btrfs_item *item; + u32 left_nritems; + u32 nr; + u32 right_nritems; + u32 data_end; + u32 this_item_size; + int ret; + + slot = path->slots[1]; + if (!path->nodes[1]) { + return 1; + } + upper = path->nodes[1]; + if (slot >= btrfs_header_nritems(upper) - 1) + return 1; + + right = read_node_slot(root, upper, slot + 1); + if (!extent_buffer_uptodate(right)) { + if (IS_ERR(right)) + return PTR_ERR(right); + return -EIO; + } + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) { + free_extent_buffer(right); + return 1; + } + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); + if (ret) { + free_extent_buffer(right); + return 1; + } + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) { + free_extent_buffer(right); + return 1; + } + + left_nritems = btrfs_header_nritems(left); + if (left_nritems == 0) { + free_extent_buffer(right); + return 1; + } + + if (empty) + nr = 0; + else + nr = 1; + + i = left_nritems - 1; + while (i >= nr) { + item = btrfs_item_nr(i); + + if (path->slots[0] == i) + push_space += data_size + sizeof(*item); + + this_item_size = btrfs_item_size(left, item); + if (this_item_size + sizeof(*item) + push_space > free_space) + break; + push_items++; + push_space += this_item_size + sizeof(*item); + if (i == 0) + break; + i--; + } + + if (push_items == 0) { + free_extent_buffer(right); + return 1; + } + + if (!empty && push_items == left_nritems) + WARN_ON(1); + + /* push left to right */ + right_nritems = btrfs_header_nritems(right); + + push_space = btrfs_item_end_nr(left, left_nritems - push_items); + push_space -= leaf_data_end(root, left); + + /* make room in the right data area */ + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + + /* copy from the left data area */ + copy_extent_buffer(right, left, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), + push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + + /* copy the items from left to right */ + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); + + /* update the item pointers */ + right_nritems += push_items; + btrfs_set_header_nritems(right, right_nritems); + push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(i); + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + left_nritems -= push_items; + btrfs_set_header_nritems(left, left_nritems); + + if (left_nritems) + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); + + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); + btrfs_mark_buffer_dirty(upper); + + /* then fixup the leaf pointer in the path */ + if (path->slots[0] >= left_nritems) { + path->slots[0] -= left_nritems; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[1] += 1; + } else { + free_extent_buffer(right); + } + return 0; +} +/* + * push some data in the path leaf to the left, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + */ +static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; + int slot; + int i; + int free_space; + int push_space = 0; + int push_items = 0; + struct btrfs_item *item; + u32 old_left_nritems; + u32 right_nritems; + u32 nr; + int ret = 0; + u32 this_item_size; + u32 old_left_item_size; + + slot = path->slots[1]; + if (slot == 0) + return 1; + if (!path->nodes[1]) + return 1; + + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + return 1; + } + + left = read_node_slot(root, path->nodes[1], slot - 1); + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + free_extent_buffer(left); + return 1; + } + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + free_extent_buffer(left); + return 1; + } + + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + free_extent_buffer(left); + return 1; + } + + if (empty) + nr = right_nritems; + else + nr = right_nritems - 1; + + for (i = 0; i < nr; i++) { + item = btrfs_item_nr(i); + + if (path->slots[0] == i) + push_space += data_size + sizeof(*item); + + this_item_size = btrfs_item_size(right, item); + if (this_item_size + sizeof(*item) + push_space > free_space) + break; + + push_items++; + push_space += this_item_size + sizeof(*item); + } + + if (push_items == 0) { + free_extent_buffer(left); + return 1; + } + if (!empty && push_items == btrfs_header_nritems(right)) + WARN_ON(1); + + /* push data from right to left */ + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + + push_space = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset_nr(right, push_items - 1), + push_space); + old_left_nritems = btrfs_header_nritems(left); + BUG_ON(old_left_nritems == 0); + + old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); + for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { + u32 ioff; + + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); + } + btrfs_set_header_nritems(left, old_left_nritems + push_items); + + /* fixup right node */ + if (push_items > right_nritems) { + printk("push items %d nr %u\n", push_items, right_nritems); + WARN_ON(1); + } + + if (push_items < right_nritems) { + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + } + right_nritems -= push_items; + btrfs_set_header_nritems(right, right_nritems); + push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(i); + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + btrfs_mark_buffer_dirty(left); + if (right_nritems) + btrfs_mark_buffer_dirty(right); + + btrfs_item_key(right, &disk_key, 0); + btrfs_fixup_low_keys(root, path, &disk_key, 1); + + /* then fixup the leaf pointer in the path */ + if (path->slots[0] < push_items) { + path->slots[0] += old_left_nritems; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; + path->slots[1] -= 1; + } else { + free_extent_buffer(left); + path->slots[0] -= push_items; + } + BUG_ON(path->slots[0] < 0); + return ret; +} + +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. + */ +static noinline int copy_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *l, + struct extent_buffer *right, + int slot, int mid, int nritems) +{ + int data_copy_size; + int rt_data_off; + int i; + int ret = 0; + int wret; + struct btrfs_disk_key disk_key; + + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(l, mid); + + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); + } + + btrfs_set_header_nritems(l, mid); + ret = 0; + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); + BUG_ON(path->slots[0] != slot); + + if (mid <= slot) { + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] -= mid; + path->slots[1] += 1; + } else { + free_extent_buffer(right); + } + + BUG_ON(path->slots[0] < 0); + + return ret; +} + +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. + */ +static noinline int split_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, + int extend) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *l; + u32 nritems; + int mid; + int slot; + struct extent_buffer *right; + int ret = 0; + int wret; + int split; + int num_doubles = 0; + + l = path->nodes[0]; + slot = path->slots[0]; + if (extend && data_size + btrfs_item_size_nr(l, slot) + + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) + return -EOVERFLOW; + + /* first try to make some room by pushing left and right */ + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { + wret = push_leaf_right(trans, root, path, data_size, 0); + if (wret < 0) + return wret; + if (wret) { + wret = push_leaf_left(trans, root, path, data_size, 0); + if (wret < 0) + return wret; + } + l = path->nodes[0]; + + /* did the pushes work? */ + if (btrfs_leaf_free_space(root, l) >= data_size) + return 0; + } + + if (!path->nodes[1]) { + ret = insert_new_root(trans, root, path, 1); + if (ret) + return ret; + } +again: + split = 1; + l = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(l); + mid = (nritems + 1) / 2; + + if (mid <= slot) { + if (nritems == 1 || + leaf_space_used(l, mid, nritems - mid) + data_size > + BTRFS_LEAF_DATA_SIZE(root)) { + if (slot >= nritems) { + split = 0; + } else { + mid = slot; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + split = 2; + } + } + } + } else { + if (leaf_space_used(l, 0, mid) + data_size > + BTRFS_LEAF_DATA_SIZE(root)) { + if (!extend && data_size && slot == 0) { + split = 0; + } else if ((extend || !data_size) && slot == 0) { + mid = 1; + } else { + mid = slot; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + split = 2 ; + } + } + } + } + + if (split == 0) + btrfs_cpu_key_to_disk(&disk_key, ins_key); + else + btrfs_item_key(l, &disk_key, mid); + + right = btrfs_alloc_free_block(trans, root, root->leafsize, + root->root_key.objectid, + &disk_key, 0, l->start, 0); + if (IS_ERR(right)) { + BUG_ON(1); + return PTR_ERR(right); + } + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(right, right->start); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + write_extent_buffer(right, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(right), + BTRFS_UUID_SIZE); + + if (split == 0) { + if (mid <= slot) { + btrfs_set_header_nritems(right, 0); + wret = insert_ptr(trans, root, path, + &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] = 0; + path->slots[1] += 1; + } else { + btrfs_set_header_nritems(right, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right->start, + path->slots[1], 1); + if (wret) + ret = wret; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] = 0; + if (path->slots[1] == 0) { + btrfs_fixup_low_keys(root, path, + &disk_key, 1); + } + } + btrfs_mark_buffer_dirty(right); + return ret; + } + + ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); + BUG_ON(ret); + + if (split == 2) { + BUG_ON(num_doubles != 0); + num_doubles++; + goto again; + } + + return ret; +} + +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + u32 item_size; + struct extent_buffer *leaf; + struct btrfs_key orig_key; + struct btrfs_item *item; + struct btrfs_item *new_item; + int ret = 0; + int slot; + u32 nritems; + u32 orig_offset; + struct btrfs_disk_key disk_key; + char *buf; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); + if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) + goto split; + + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + btrfs_release_path(path); + + path->search_for_split = 1; + + ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 0; + + /* if our item isn't there or got smaller, return now */ + if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], + path->slots[0])) { + return -EAGAIN; + } + + ret = split_leaf(trans, root, &orig_key, path, 0, 0); + BUG_ON(ret); + + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + leaf = path->nodes[0]; + +split: + item = btrfs_item_nr(path->slots[0]); + orig_offset = btrfs_item_offset(leaf, item); + item_size = btrfs_item_size(leaf, item); + + + buf = kmalloc(item_size, GFP_NOFS); + BUG_ON(!buf); + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, + path->slots[0]), item_size); + slot = path->slots[0] + 1; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + + if (slot != nritems) { + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(leaf, &disk_key, slot); + + new_item = btrfs_item_nr(slot); + + btrfs_set_item_offset(leaf, new_item, orig_offset); + btrfs_set_item_size(leaf, new_item, item_size - split_offset); + + btrfs_set_item_offset(leaf, item, + orig_offset + item_size - split_offset); + btrfs_set_item_size(leaf, item, split_offset); + + btrfs_set_header_nritems(leaf, nritems + 1); + + /* write the data for the start of the original item */ + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + split_offset); + + /* write the data for the new item */ + write_extent_buffer(leaf, buf + split_offset, + btrfs_item_ptr_offset(leaf, slot), + item_size - split_offset); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + kfree(buf); + return ret; +} + +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size, int from_end) +{ + int ret = 0; + int slot; + struct extent_buffer *leaf; + struct btrfs_item *item; + u32 nritems; + unsigned int data_end; + unsigned int old_data_start; + unsigned int old_size; + unsigned int size_diff; + int i; + + leaf = path->nodes[0]; + slot = path->slots[0]; + + old_size = btrfs_item_size_nr(leaf, slot); + if (old_size == new_size) + return 0; + + nritems = btrfs_header_nritems(leaf); + data_end = leaf_data_end(root, leaf); + + old_data_start = btrfs_item_offset_nr(leaf, slot); + + size_diff = old_size - new_size; + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff; + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); + } + + /* shift the data */ + if (from_end) { + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + } else { + struct btrfs_disk_key disk_key; + u64 offset; + + btrfs_item_key(leaf, &disk_key, slot); + + if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { + unsigned long ptr; + struct btrfs_file_extent_item *fi; + + fi = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + fi = (struct btrfs_file_extent_item *)( + (unsigned long)fi - size_diff); + + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) { + ptr = btrfs_item_ptr_offset(leaf, slot); + memmove_extent_buffer(leaf, ptr, + (unsigned long)fi, + offsetof(struct btrfs_file_extent_item, + disk_bytenr)); + } + } + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start - data_end); + + offset = btrfs_disk_key_offset(&disk_key); + btrfs_set_disk_key_offset(&disk_key, offset + size_diff); + btrfs_set_item_key(leaf, &disk_key, slot); + if (slot == 0) + btrfs_fixup_low_keys(root, path, &disk_key, 1); + } + + item = btrfs_item_nr(slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + return ret; +} + +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) +{ + int ret = 0; + int slot; + struct extent_buffer *leaf; + struct btrfs_item *item; + u32 nritems; + unsigned int data_end; + unsigned int old_data; + unsigned int old_size; + int i; + + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + data_end = leaf_data_end(root, leaf); + + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); + BUG(); + } + slot = path->slots[0]; + old_data = btrfs_item_end_nr(leaf, slot); + + BUG_ON(slot < 0); + if (slot >= nritems) { + btrfs_print_leaf(root, leaf); + printk("slot %d too large, nritems %d\n", slot, nritems); + BUG_ON(1); + } + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff; + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); + } + + /* shift the data */ + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); + + data_end = old_data; + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + return ret; +} + +/* + * Given a key and some data, insert an item into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + int nr) +{ + struct extent_buffer *leaf; + struct btrfs_item *item; + int ret = 0; + int slot; + int i; + u32 nritems; + u32 total_size = 0; + u32 total_data = 0; + unsigned int data_end; + struct btrfs_disk_key disk_key; + + for (i = 0; i < nr; i++) { + total_data += data_size[i]; + } + + /* create a root if there isn't one */ + if (!root->node) + BUG(); + + total_size = total_data + nr * sizeof(struct btrfs_item); + ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); + if (ret == 0) { + return -EEXIST; + } + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + data_end = leaf_data_end(root, leaf); + + if (btrfs_leaf_free_space(root, leaf) < total_size) { + btrfs_print_leaf(root, leaf); + printk("not enough freespace need %u have %d\n", + total_size, btrfs_leaf_free_space(root, leaf)); + BUG(); + } + + slot = path->slots[0]; + BUG_ON(slot < 0); + + if (slot != nritems) { + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff; + + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - total_data); + } + + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + /* shift the data */ + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end - total_data, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); + data_end = old_data; + } + + /* setup the item for the new data */ + for (i = 0; i < nr; i++) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); + btrfs_set_item_key(leaf, &disk_key, slot + i); + item = btrfs_item_nr(slot + i); + btrfs_set_item_offset(leaf, item, data_end - data_size[i]); + data_end -= data_size[i]; + btrfs_set_item_size(leaf, item, data_size[i]); + } + btrfs_set_header_nritems(leaf, nritems + nr); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (slot == 0) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key); + btrfs_fixup_low_keys(root, path, &disk_key, 1); + } + + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + +out: + return ret; +} + +/* + * Given a key and some data, insert an item into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *cpu_key, void *data, u32 + data_size) +{ + int ret = 0; + struct btrfs_path *path; + struct extent_buffer *leaf; + unsigned long ptr; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); + if (!ret) { + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); + } + btrfs_free_path(path); + return ret; +} + +/* + * delete the pointer from a given node. + * + * If the delete empties a node, the node is removed from the tree, + * continuing all the way the root if required. The root is converted into + * a leaf if all the nodes are emptied. + */ +int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot) +{ + struct extent_buffer *parent = path->nodes[level]; + u32 nritems; + int ret = 0; + + nritems = btrfs_header_nritems(parent); + if (slot != nritems -1) { + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), + sizeof(struct btrfs_key_ptr) * + (nritems - slot - 1)); + } + nritems--; + btrfs_set_header_nritems(parent, nritems); + if (nritems == 0 && parent == root->node) { + BUG_ON(btrfs_header_level(root->node) != 1); + /* just turn the root into a leaf and break */ + btrfs_set_header_level(root->node, 0); + } else if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + btrfs_fixup_low_keys(root, path, &disk_key, level + 1); + } + btrfs_mark_buffer_dirty(parent); + return ret; +} + +/* + * a helper function to delete the leaf pointed to by path->slots[1] and + * path->nodes[1]. + * + * This deletes the pointer in path->nodes[1] and frees the leaf + * block extent. zero is returned if it all worked out, < 0 otherwise. + * + * The path must have already been setup for deleting the leaf, including + * all the proper balancing. path->nodes[1] must be locked. + */ +static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf) +{ + int ret; + + WARN_ON(btrfs_header_generation(leaf) != trans->transid); + ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]); + if (ret) + return ret; + + ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, + 0, root->root_key.objectid, 0, 0); + return ret; +} + +/* + * delete the item at the leaf level in path. If that empties + * the leaf, remove it from the tree + */ +int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int slot, int nr) +{ + struct extent_buffer *leaf; + struct btrfs_item *item; + int last_off; + int dsize = 0; + int ret = 0; + int wret; + int i; + u32 nritems; + + leaf = path->nodes[0]; + last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); + + for (i = 0; i < nr; i++) + dsize += btrfs_item_size_nr(leaf, slot + i); + + nritems = btrfs_header_nritems(leaf); + + if (slot + nr != nritems) { + int data_end = leaf_data_end(root, leaf); + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, + last_off - data_end); + + for (i = slot + nr; i < nritems; i++) { + u32 ioff; + + item = btrfs_item_nr(i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); + } + + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + nr), + sizeof(struct btrfs_item) * + (nritems - slot - nr)); + } + btrfs_set_header_nritems(leaf, nritems - nr); + nritems -= nr; + + /* delete the leaf if we've emptied it */ + if (nritems == 0) { + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); + } else { + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + + wret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); + if (wret) + ret = wret; + } + } else { + int used = leaf_space_used(leaf, 0, nritems); + if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); + btrfs_fixup_low_keys(root, path, &disk_key, 1); + } + + /* delete the leaf if it is mostly empty */ + if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { + /* push_leaf_left fixes the path. + * make sure the path still points to our leaf + * for possible call to del_ptr below + */ + slot = path->slots[1]; + extent_buffer_get(leaf); + + wret = push_leaf_left(trans, root, path, 1, 1); + if (wret < 0 && wret != -ENOSPC) + ret = wret; + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { + wret = push_leaf_right(trans, root, path, 1, 1); + if (wret < 0 && wret != -ENOSPC) + ret = wret; + } + + if (btrfs_header_nritems(leaf) == 0) { + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + + path->slots[1] = slot; + ret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); + free_extent_buffer(leaf); + + } else { + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); + } + } else { + btrfs_mark_buffer_dirty(leaf); + } + } + return ret; +} + +/* + * walk up the tree as far as required to find the previous leaf. + * returns 0 if it found something or 1 if there are no lesser leaves. + * returns < 0 on io errors. + */ +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level]; + c = path->nodes[level]; + if (slot == 0) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + slot--; + + next = read_node_slot(root, c, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + slot = btrfs_header_nritems(next); + if (slot != 0) + slot--; + path->nodes[level] = next; + path->slots[level] = slot; + if (!level) + break; + next = read_node_slot(root, next, slot); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + return PTR_ERR(next); + return -EIO; + } + } + return 0; +} + +/* + * walk up the tree as far as required to find the next leaf. + * returns 0 if it found something or 1 if there are no greater leaves. + * returns < 0 on io errors. + */ +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level] + 1; + c = path->nodes[level]; + if (slot >= btrfs_header_nritems(c)) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + + if (path->reada) + reada_for_search(root, path, level, slot, 0); + + next = read_node_slot(root, c, slot); + if (!extent_buffer_uptodate(next)) + return -EIO; + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + path->nodes[level] = next; + path->slots[level] = 0; + if (!level) + break; + if (path->reada) + reada_for_search(root, path, level, 0, 0); + next = read_node_slot(root, next, 0); + if (!extent_buffer_uptodate(next)) + return -EIO; + } + return 0; +} + +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == type) + return 0; + } + return 1; +} + +/* + * search in extent tree to find a previous Metadata/Data extent item with + * min objecitd. + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ +int btrfs_previous_extent_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + u32 nritems; + int ret; + + while (1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid < min_objectid) + break; + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) + return 0; + if (found_key.objectid == min_objectid && + found_key.type < BTRFS_EXTENT_ITEM_KEY) + break; + } + return 1; +} diff --git a/tools/libfsimage/btrfs/ctree.h b/tools/libfsimage/btrfs/ctree.h new file mode 100644 index 0000000..5ab0f4a --- /dev/null +++ b/tools/libfsimage/btrfs/ctree.h @@ -0,0 +1,2570 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_CTREE_H__ +#define __BTRFS_CTREE_H__ + +#if BTRFS_FLAT_INCLUDES +#include "list.h" +#include "kerncompat.h" +#include "radix-tree.h" +#include "extent-cache.h" +#include "extent_io.h" +#include "ioctl.h" +#else +#include <btrfs/list.h> +#include <btrfs/kerncompat.h> +#include <btrfs/radix-tree.h> +#include <btrfs/extent-cache.h> +#include <btrfs/extent_io.h> +#include <btrfs/ioctl.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +struct btrfs_root; +struct btrfs_trans_handle; +struct btrfs_free_space_ctl; +#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ + +#define BTRFS_MAX_MIRRORS 3 + +#define BTRFS_MAX_LEVEL 8 + +#define BTRFS_COMPAT_EXTENT_TREE_V0 + +/* holds pointers to all of the tree roots */ +#define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 3ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 4ULL + +/* one per subvolume, storing files and directories */ +#define BTRFS_FS_TREE_OBJECTID 5ULL + +/* directory objectid inside the root tree */ +#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL +/* holds checksums of all the data extents */ +#define BTRFS_CSUM_TREE_OBJECTID 7ULL +#define BTRFS_QUOTA_TREE_OBJECTID 8ULL + +/* for storing items that use the BTRFS_UUID_KEY* */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + +/* tracks free space in block groups. */ +#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL + +/* for storing balance parameters in the root tree */ +#define BTRFS_BALANCE_OBJECTID -4ULL + +/* oprhan objectid for tracking unlinked/truncated files */ +#define BTRFS_ORPHAN_OBJECTID -5ULL + +/* does write ahead logging to speed up fsyncs */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + +/* space balancing */ +#define BTRFS_TREE_RELOC_OBJECTID -8ULL +#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL + +/* + * extent checksums all have this objectid + * this allows them to share the logging tree + * for fsyncs + */ +#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL + +/* For storing free space cache */ +#define BTRFS_FREE_SPACE_OBJECTID -11ULL + +/* + * The inode number assigned to the special inode for sotring + * free ino cache + */ +#define BTRFS_FREE_INO_OBJECTID -12ULL + +/* dummy objectid represents multiple objectids */ +#define BTRFS_MULTIPLE_OBJECTIDS -255ULL + +/* + * All files have objectids in this range. + */ +#define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_LAST_FREE_OBJECTID -256ULL +#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL + + + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + +/* + * the max metadata block size. This limit is somewhat artificial, + * but the memmove costs go through the roof for larger blocks. + */ +#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 + +/* + * we can actually store much bigger names, but lets not confuse the rest + * of linux + */ +#define BTRFS_NAME_LEN 255 + +/* + * Theoretical limit is larger, but we keep this down to a sane + * value. That should limit greatly the possibility of collisions on + * inode ref items. + */ +#define BTRFS_LINK_MAX 65535U + +/* 32 bytes in various csum fields */ +#define BTRFS_CSUM_SIZE 32 + +/* csum types */ +#define BTRFS_CSUM_TYPE_CRC32 0 + +static int btrfs_csum_sizes[] = { 4 }; + +/* four bytes for CRC32 */ +#define BTRFS_CRC32_SIZE 4 +#define BTRFS_EMPTY_DIR_SIZE 0 + +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_XATTR 8 +#define BTRFS_FT_MAX 9 + +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + +/* + * the key defines the order in the tree, and so it also defines (optimal) + * block layout. objectid corresonds to the inode number. The flags + * tells us things about the object, and is a kind of stream selector. + * so for a given inode, keys with flags of 1 might refer to the inode + * data, flags of 2 may point to file data in the btree and flags == 3 + * may point to extents. + * + * offset is the starting byte offset for this key in the stream. + * + * btrfs_disk_key is in disk byte order. struct btrfs_key is always + * in cpu native order. Otherwise they are identical and their sizes + * should be the same (ie both packed) + */ +struct btrfs_disk_key { + __le64 objectid; + u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_key { + u64 objectid; + u8 type; + u64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_mapping_tree { + struct cache_tree cache_tree; +}; + +#define BTRFS_UUID_SIZE 16 +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* type and info about this device */ + __le64 type; + + /* expected generation for this device */ + __le64 generation; + + /* + * starting byte of this partition on the device, + * to allowr for stripe alignment in the future + */ + __le64 start_offset; + + /* grouping information for allocation decisions */ + __le32 dev_group; + + /* seek speed 0-100 where 100 is fastest */ + u8 seek_speed; + + /* bandwidth 0-100 where 100 is fastest */ + u8 bandwidth; + + /* btrfs generated uuid for this device */ + u8 uuid[BTRFS_UUID_SIZE]; + + /* uuid of FS who owns this device */ + u8 fsid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; + u8 dev_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + /* size of this chunk in bytes */ + __le64 length; + + /* objectid of the root referencing this chunk */ + __le64 owner; + + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + + /* sub stripes only matter for raid10 */ + __le16 sub_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_EXTENT 1 +#define BTRFS_FREE_SPACE_BITMAP 2 + +struct btrfs_free_space_entry { + __le64 offset; + __le64 bytes; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_header { + struct btrfs_disk_key location; + __le64 generation; + __le64 num_entries; + __le64 num_bitmaps; +} __attribute__ ((__packed__)); + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + +#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) +#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) +#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) +#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) +#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) + +#define BTRFS_BACKREF_REV_MAX 256 +#define BTRFS_BACKREF_REV_SHIFT 56 +#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ + BTRFS_BACKREF_REV_SHIFT) + +#define BTRFS_OLD_BACKREF_REV 0 +#define BTRFS_MIXED_BACKREF_REV 1 + +/* + * every tree block (leaf or node) starts with this header. + */ +struct btrfs_header { + /* these first four must match the super block */ + u8 csum[BTRFS_CSUM_SIZE]; + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + __le64 bytenr; /* which block this node is supposed to live in */ + __le64 flags; + + /* allowed to be different from the super from here on down */ + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + __le64 generation; + __le64 owner; + __le32 nritems; + u8 level; +} __attribute__ ((__packed__)); + +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ + sizeof(struct btrfs_header)) / \ + sizeof(struct btrfs_key_ptr)) +#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) +#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) - \ + sizeof(struct btrfs_file_extent_item)) +#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) -\ + sizeof(struct btrfs_dir_item)) + + +/* + * this is a very generous portion of the super block, giving us + * room to translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 +#define BTRFS_LABEL_SIZE 256 + +/* + * just in case we somehow lose the roots and are not able to mount, + * we store an array of the roots from previous transactions + * in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unsed_64[4]; + + u8 tree_root_level; + u8 chunk_root_level; + u8 extent_root_level; + u8 fs_root_level; + u8 dev_root_level; + u8 csum_root_level; + /* future and to align */ + u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* + * the super block basically lists the main trees of the FS + * it currently lacks any block count etc etc + */ +struct btrfs_super_block { + u8 csum[BTRFS_CSUM_SIZE]; + /* the first 3 fields must match struct btrfs_header */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + __le64 bytenr; /* this block number */ + __le64 flags; + + /* allowed to be different from the btrfs_header from here own down */ + __le64 magic; + __le64 generation; + __le64 root; + __le64 chunk_root; + __le64 log_root; + + /* this will help find the new super based on the log root */ + __le64 log_root_transid; + __le64 total_bytes; + __le64 bytes_used; + __le64 root_dir_objectid; + __le64 num_devices; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; + __le32 stripesize; + __le32 sys_chunk_array_size; + __le64 chunk_root_generation; + __le64 compat_flags; + __le64 compat_ro_flags; + __le64 incompat_flags; + __le16 csum_type; + u8 root_level; + u8 chunk_root_level; + u8 log_root_level; + struct btrfs_dev_item dev_item; + + char label[BTRFS_LABEL_SIZE]; + + __le64 cache_generation; + __le64 uuid_tree_generation; + + /* future expansion */ + __le64 reserved[30]; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; +} __attribute__ ((__packed__)); + +/* + * Compat flags that we support. If any incompat flags are set other than the + * ones specified below then we will fail to mount + */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0) + +#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) +#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) + +/* + * some patches floated around with a second compression method + * lets save that incompat here for when they do get in + * Note we don't actually support it, we're just reserving the + * number + */ +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) + +/* + * older kernels tried to do bigger metadata blocks, but the + * code was pretty buggy. Lets not let them try anymore. + */ +#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) +#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) +#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) +#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) +#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) + +#define BTRFS_FEATURE_COMPAT_SUPP 0ULL + +#define BTRFS_FEATURE_COMPAT_RO_SUPP \ + (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE) + +#define BTRFS_FEATURE_INCOMPAT_SUPP \ + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ + BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ + BTRFS_FEATURE_INCOMPAT_NO_HOLES) + +/* + * A leaf is full of items. offset and size tell us where to find + * the item in the leaf (relative to the start of the data area) + */ +struct btrfs_item { + struct btrfs_disk_key key; + __le32 offset; + __le32 size; +} __attribute__ ((__packed__)); + +/* + * leaves have an item area and a data area: + * [item0, item1....itemN] [free space] [dataN...data1, data0] + * + * The data is separate from the items to get the keys closer together + * during searches. + */ +struct btrfs_leaf { + struct btrfs_header header; + struct btrfs_item items[]; +} __attribute__ ((__packed__)); + +/* + * all non-leaf blocks are nodes, they hold only keys and pointers to + * other blocks + */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; + __le64 generation; +} __attribute__ ((__packed__)); + +struct btrfs_node { + struct btrfs_header header; + struct btrfs_key_ptr ptrs[]; +} __attribute__ ((__packed__)); + +/* + * btrfs_paths remember the path taken from the root down to the leaf. + * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point + * to any other levels that are present. + * + * The slots array records the index of the item or block pointer + * used while walking the tree. + */ + +struct btrfs_path { + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; + int slots[BTRFS_MAX_LEVEL]; + /* if there is real range locking, this locks field will change */ + int locks[BTRFS_MAX_LEVEL]; + int reada; + /* keep some upper locks as we walk down */ + int lowest_level; + + /* + * set by btrfs_split_item, tells search_slot to keep all locks + * and to force calls to keep space in the nodes + */ + unsigned int search_for_split:1; + unsigned int skip_check_block:1; +}; + +/* + * items in the extent btree are used to record the objectid of the + * owner of the block and the number of references + */ + +struct btrfs_extent_item { + __le64 refs; + __le64 generation; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_extent_item_v0 { + __le32 refs; +} __attribute__ ((__packed__)); + +#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \ + sizeof(struct btrfs_item)) + +#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) +#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) + +/* following flags only apply to tree blocks */ + +/* use full backrefs for extent pointers in the block*/ +#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) + +struct btrfs_tree_block_info { + struct btrfs_disk_key key; + u8 level; +} __attribute__ ((__packed__)); + +struct btrfs_extent_data_ref { + __le64 root; + __le64 objectid; + __le64 offset; + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_shared_data_ref { + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_extent_inline_ref { + u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_extent_ref_v0 { + __le64 root; + __le64 generation; + __le64 objectid; + __le32 count; +} __attribute__ ((__packed__)); + +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent. The chunk tree uuid field is a way to double check the owner + */ +struct btrfs_dev_extent { + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 chunk_offset; + __le64 length; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_inode_ref { + __le64 index; + __le16 name_len; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_inode_extref { + __le64 parent_objectid; + __le64 index; + __le16 name_len; + __u8 name[0]; /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_timespec { + __le64 sec; + __le32 nsec; +} __attribute__ ((__packed__)); + +typedef enum { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_LAST = 3, +} btrfs_compression_type; + +/* we don't understand any encryption methods right now */ +typedef enum { + BTRFS_ENCRYPTION_NONE = 0, + BTRFS_ENCRYPTION_LAST = 1, +} btrfs_encryption_type; + +enum btrfs_tree_block_status { + BTRFS_TREE_BLOCK_CLEAN, + BTRFS_TREE_BLOCK_INVALID_NRITEMS, + BTRFS_TREE_BLOCK_INVALID_PARENT_KEY, + BTRFS_TREE_BLOCK_BAD_KEY_ORDER, + BTRFS_TREE_BLOCK_INVALID_LEVEL, + BTRFS_TREE_BLOCK_INVALID_FREE_SPACE, + BTRFS_TREE_BLOCK_INVALID_OFFSETS, +}; + +struct btrfs_inode_item { + /* nfs style generation number */ + __le64 generation; + /* transid that last touched this inode */ + __le64 transid; + __le64 size; + __le64 nbytes; + __le64 block_group; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le64 rdev; + __le64 flags; + + /* modification sequence number for NFS */ + __le64 sequence; + + /* + * a little future expansion, for more than this we can + * just grow the inode item and version it + */ + __le64 reserved[4]; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; +} __attribute__ ((__packed__)); + +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + +struct btrfs_dir_item { + struct btrfs_disk_key location; + __le64 transid; + __le16 data_len; + __le16 name_len; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_root_item_v0 { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + u8 drop_level; + u8 level; +} __attribute__ ((__packed__)); + +struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + u8 drop_level; + u8 level; + + /* + * The following fields appear after subvol_uuids+subvol_times + * were introduced. + */ + + /* + * This generation number is used to test if the new fields are valid + * and up to date while reading the root item. Everytime the root item + * is written out, the "generation" field is copied into this field. If + * anyone ever mounted the fs with an older kernel, we will have + * mismatching generation values here and thus must invalidate the + * new fields. See btrfs_update_root and btrfs_find_last_root for + * details. + * the offset of generation_v2 is also used as the start for the memset + * when invalidating the fields. + */ + __le64 generation_v2; + u8 uuid[BTRFS_UUID_SIZE]; + u8 parent_uuid[BTRFS_UUID_SIZE]; + u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* updated when an inode changes */ + __le64 otransid; /* trans when created */ + __le64 stransid; /* trans when sent. non-zero for received subvol */ + __le64 rtransid; /* trans when received. non-zero for received subvol */ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* for future */ +} __attribute__ ((__packed__)); + +/* + * this is used for both forward and backward root refs + */ +struct btrfs_root_ref { + __le64 dirid; + __le64 sequence; + __le16 name_len; +} __attribute__ ((__packed__)); + +#define BTRFS_FILE_EXTENT_INLINE 0 +#define BTRFS_FILE_EXTENT_REG 1 +#define BTRFS_FILE_EXTENT_PREALLOC 2 + +struct btrfs_file_extent_item { + /* + * transaction id that created this extent + */ + __le64 generation; + /* + * max number of bytes to hold this extent in ram + * when we split a compressed extent we can't know how big + * each of the resulting pieces will be. So, this is + * an upper limit on the size of the extent in ram instead of + * an exact limit. + */ + __le64 ram_bytes; + + /* + * 32 bits for the various ways we might encode the data, + * including compression and encryption. If any of these + * are set to something a given disk format doesn't understand + * it is treated like an incompat flag for reading and writing, + * but not for stat. + */ + u8 compression; + u8 encryption; + __le16 other_encoding; /* spare for later use */ + + /* are we inline data or a real extent? */ + u8 type; + + /* + * disk space consumed by the extent, checksum blocks are included + * in these numbers + */ + __le64 disk_bytenr; + __le64 disk_num_bytes; + /* + * the logical offset in file blocks (no csums) + * this extent record is for. This allows a file extent to point + * into the middle of an existing extent on disk, sharing it + * between two snapshots (useful if some bytes in the middle of the + * extent have changed + */ + __le64 offset; + /* + * the logical number of file blocks (no csums included) + */ + __le64 num_bytes; + +} __attribute__ ((__packed__)); + +struct btrfs_csum_item { + u8 csum; +} __attribute__ ((__packed__)); + +/* + * We don't want to overwrite 1M at the beginning of device, even though + * there is our 1st superblock at 64k. Some possible reasons: + * - the first 64k blank is useful for some boot loader/manager + * - the first 1M could be scratched by buggy partitioner or somesuch + */ +#define BTRFS_BLOCK_RESERVED_1M_FOR_SUPER ((u64)1024 * 1024) + +/* tag for the radix tree of block groups in ram */ +#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) +#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) +#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) +#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) +#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE +#define BTRFS_NR_RAID_TYPES 7 + +#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ + BTRFS_BLOCK_GROUP_SYSTEM | \ + BTRFS_BLOCK_GROUP_METADATA) + +#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ + BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID5 | \ + BTRFS_BLOCK_GROUP_RAID6 | \ + BTRFS_BLOCK_GROUP_DUP | \ + BTRFS_BLOCK_GROUP_RAID10) + +/* used in struct btrfs_balance_args fields */ +#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) + +/* + * GLOBAL_RSV does not exist as a on-disk block group type and is used + * internally for exporting info about global block reserve from space infos + */ +#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) + +#define BTRFS_QGROUP_LEVEL_SHIFT 48 + +static inline u64 btrfs_qgroup_level(u64 qgroupid) +{ + return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; +} + +static inline u64 btrfs_qgroup_subvid(u64 qgroupid) +{ + return qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1); +} + +#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) +#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) + +struct btrfs_qgroup_status_item { + __le64 version; + __le64 generation; + __le64 flags; + __le64 scan; /* progress during scanning */ +} __attribute__ ((__packed__)); + +struct btrfs_block_group_item { + __le64 used; + __le64 chunk_objectid; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_info { + __le32 extent_count; + __le32 flags; +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) + +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 referenced; + __le64 referenced_compressed; + __le64 exclusive; + __le64 exclusive_compressed; +} __attribute__ ((__packed__)); + +/* flags definition for qgroup limits */ +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) + +struct btrfs_qgroup_limit_item { + __le64 flags; + __le64 max_referenced; + __le64 max_exclusive; + __le64 rsv_referenced; + __le64 rsv_exclusive; +} __attribute__ ((__packed__)); + +struct btrfs_space_info { + u64 flags; + u64 total_bytes; + u64 bytes_used; + u64 bytes_pinned; + int full; + struct list_head list; +}; + +struct btrfs_block_group_cache { + struct cache_extent cache; + struct btrfs_key key; + struct btrfs_block_group_item item; + struct btrfs_space_info *space_info; + struct btrfs_free_space_ctl *free_space_ctl; + u64 bytes_super; + u64 pinned; + u64 flags; + int cached; + int ro; +}; + +struct btrfs_extent_ops { + int (*alloc_extent)(struct btrfs_root *root, u64 num_bytes, + u64 hint_byte, struct btrfs_key *ins, int metadata); + int (*free_extent)(struct btrfs_root *root, u64 bytenr, + u64 num_bytes); +}; + +struct btrfs_device; +struct btrfs_fs_devices; +struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; + u8 *new_fsid; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + u8 *new_chunk_tree_uuid; + struct btrfs_root *fs_root; + struct btrfs_root *extent_root; + struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; + struct btrfs_root *csum_root; + struct btrfs_root *quota_root; + struct btrfs_root *free_space_root; + + struct rb_root fs_root_tree; + + /* the log root tree is a directory of all the other log roots */ + struct btrfs_root *log_root_tree; + + struct extent_io_tree extent_cache; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; + struct extent_io_tree *excluded_extents; + + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + + u64 generation; + u64 last_trans_committed; + + u64 avail_data_alloc_bits; + u64 avail_metadata_alloc_bits; + u64 avail_system_alloc_bits; + u64 data_alloc_profile; + u64 metadata_alloc_profile; + u64 system_alloc_profile; + u64 alloc_start; + + struct btrfs_trans_handle *running_transaction; + struct btrfs_super_block *super_copy; + struct mutex fs_mutex; + + u64 super_bytenr; + u64 total_pinned; + + struct btrfs_extent_ops *extent_ops; + struct list_head dirty_cowonly_roots; + struct list_head recow_ebs; + + struct btrfs_fs_devices *fs_devices; + struct list_head space_info; + int system_allocs; + + unsigned int readonly:1; + unsigned int on_restoring:1; + unsigned int is_chunk_recover:1; + unsigned int quota_enabled:1; + unsigned int suppress_check_block_errors:1; + unsigned int ignore_fsid_mismatch:1; + unsigned int ignore_chunk_tree_error:1; + + int (*free_extent_hook)(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset, + int refs_to_drop); + struct cache_tree *fsck_extent_cache; + struct cache_tree *corrupt_blocks; + +}; + +/* + * in ram representation of the tree. extent_root is used for all allocations + * and for the extent tree extent_root root. + */ +struct btrfs_root { + struct extent_buffer *node; + struct extent_buffer *commit_root; + struct btrfs_root_item root_item; + struct btrfs_key root_key; + struct btrfs_fs_info *fs_info; + u64 objectid; + u64 last_trans; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + + /* leaf allocations are done in leafsize units */ + u32 stripesize; + + int ref_cows; + int track_dirty; + + + u32 type; + u64 highest_inode; + u64 last_inode_alloc; + + /* + * Record orphan data extent ref + * + * TODO: Don't restore things in btrfs_root. + * Directly record it into inode_record, which needs a lot of + * infrastructure change to allow cooperation between extent + * and fs tree scan. + */ + struct list_head orphan_data_extents; + + /* the dirty list is only used by non-reference counted roots */ + struct list_head dirty_list; + struct rb_node rb_node; +}; + +/* + * inode items have the data typically returned from stat and store other + * info about object characteristics. There is one for every file and dir in + * the FS + */ +#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_REF_KEY 12 +#define BTRFS_INODE_EXTREF_KEY 13 +#define BTRFS_XATTR_ITEM_KEY 24 +#define BTRFS_ORPHAN_ITEM_KEY 48 + +#define BTRFS_DIR_LOG_ITEM_KEY 60 +#define BTRFS_DIR_LOG_INDEX_KEY 72 +/* + * dir items are the name -> inode pointers in a directory. There is one + * for every name in a directory. + */ +#define BTRFS_DIR_ITEM_KEY 84 +#define BTRFS_DIR_INDEX_KEY 96 + +/* + * extent data is for file data + */ +#define BTRFS_EXTENT_DATA_KEY 108 + +/* + * csum items have the checksums for data in the extents + */ +#define BTRFS_CSUM_ITEM_KEY 120 +/* + * extent csums are stored in a separate tree and hold csums for + * an entire extent on disk. + */ +#define BTRFS_EXTENT_CSUM_KEY 128 + +/* + * root items point to tree roots. There are typically in the root + * tree used by the super block to find all the other trees + */ +#define BTRFS_ROOT_ITEM_KEY 132 + +/* + * root backrefs tie subvols and snapshots to the directory entries that + * reference them + */ +#define BTRFS_ROOT_BACKREF_KEY 144 + +/* + * root refs make a fast index for listing all of the snapshots and + * subvolumes referenced by a given root. They point directly to the + * directory item in the root that references the subvol + */ +#define BTRFS_ROOT_REF_KEY 156 + +/* + * extent items are in the extent map tree. These record which blocks + * are used, and how many references there are to each block + */ +#define BTRFS_EXTENT_ITEM_KEY 168 + +/* + * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know + * the length, so we save the level in key->offset instead of the length. + */ +#define BTRFS_METADATA_ITEM_KEY 169 + +#define BTRFS_TREE_BLOCK_REF_KEY 176 + +#define BTRFS_EXTENT_DATA_REF_KEY 178 + +/* old style extent backrefs */ +#define BTRFS_EXTENT_REF_V0_KEY 180 + +#define BTRFS_SHARED_BLOCK_REF_KEY 182 + +#define BTRFS_SHARED_DATA_REF_KEY 184 + + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 192 + +/* + * Every block group is represented in the free space tree by a free space info + * item, which stores some accounting information. It is keyed on + * (block_group_start, FREE_SPACE_INFO, block_group_length). + */ +#define BTRFS_FREE_SPACE_INFO_KEY 198 + +/* + * A free space extent tracks an extent of space that is free in a block group. + * It is keyed on (start, FREE_SPACE_EXTENT, length). + */ +#define BTRFS_FREE_SPACE_EXTENT_KEY 199 + +/* + * When a block group becomes very fragmented, we convert it to use bitmaps + * instead of extents. A free space bitmap is keyed on + * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with + * (length / sectorsize) bits. + */ +#define BTRFS_FREE_SPACE_BITMAP_KEY 200 + +#define BTRFS_DEV_EXTENT_KEY 204 +#define BTRFS_DEV_ITEM_KEY 216 +#define BTRFS_CHUNK_ITEM_KEY 228 + +#define BTRFS_BALANCE_ITEM_KEY 248 + +/* + * quota groups + */ +#define BTRFS_QGROUP_STATUS_KEY 240 +#define BTRFS_QGROUP_INFO_KEY 242 +#define BTRFS_QGROUP_LIMIT_KEY 244 +#define BTRFS_QGROUP_RELATION_KEY 246 + +/* + * Persistently stores the io stats in the device tree. + * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). + */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * Persistently stores the device replace state in the device tree. + * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). + */ +#define BTRFS_DEV_REPLACE_KEY 250 + +/* + * Stores items that allow to quickly map UUIDs to something else. + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#if BTRFS_UUID_SIZE != 16 +#error "UUID items require BTRFS_UUID_SIZE == 16!" +#endif +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + +/* + * string items are for debugging. They just store a short string of + * data in the FS + */ +#define BTRFS_STRING_ITEM_KEY 253 +/* + * Inode flags + */ +#define BTRFS_INODE_NODATASUM (1 << 0) +#define BTRFS_INODE_NODATACOW (1 << 1) +#define BTRFS_INODE_READONLY (1 << 2) + +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ +{ \ + const struct btrfs_header *h = (struct btrfs_header *)eb->data; \ + return le##bits##_to_cpu(h->member); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + struct btrfs_header *h = (struct btrfs_header *)eb->data; \ + h->member = cpu_to_le##bits(val); \ +} + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ + const type *s) \ +{ \ + unsigned long offset = (unsigned long)s; \ + const type *p = (type *) (eb->data + offset); \ + return get_unaligned_le##bits(&p->member); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + unsigned long offset = (unsigned long)s; \ + type *p = (type *) (eb->data + offset); \ + put_unaligned_le##bits(val, &p->member); \ +} + +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ +} + +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item, + start_offset, 64); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); +BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); +BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, + dev_group, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, + seek_speed, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, + bandwidth, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, + generation, 64); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, fsid); +} + +BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) +{ + return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); +} + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, + sub_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); + +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); + +/* struct btrfs_free_space_info */ +BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, + extent_count, 32); +BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); + +/* struct btrfs_inode_ref */ +BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); + +/* struct btrfs_inode_extref */ +BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, + parent_objectid, 64); +BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, + name_len, 16); +BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); + +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, + struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, + struct btrfs_inode_item, sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, + struct btrfs_inode_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, + struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, + struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, + struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, + struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, + struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, + struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, + struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, + struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, + struct btrfs_inode_item, flags, 64); + +static inline struct btrfs_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_timespec *)ptr; +} + +static inline struct btrfs_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) +{ + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_timespec *)ptr; +} + +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, + sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, + nsec, 32); + +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, + chunk_objectid, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, + chunk_offset, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent, + length, 64); + +static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +{ + unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); + return (u8 *)((unsigned long)dev + ptr); +} + + +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64); + +BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32); + +BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); + +static inline void btrfs_tree_block_key(struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +static inline void btrfs_set_tree_block_key(struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, + root, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, + objectid, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, + offset, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, + count, 32); + +BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, + count, 32); + +BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, + type, 8); +BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type, + struct btrfs_extent_inline_ref, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset, + struct btrfs_extent_inline_ref, offset, 64); + +static inline u32 btrfs_extent_inline_ref_size(int type) +{ + if (type == BTRFS_TREE_BLOCK_REF_KEY || + type == BTRFS_SHARED_BLOCK_REF_KEY) + return sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_SHARED_DATA_REF_KEY) + return sizeof(struct btrfs_shared_data_ref) + + sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_EXTENT_DATA_REF_KEY) + return sizeof(struct btrfs_extent_data_ref) + + offsetof(struct btrfs_extent_inline_ref, offset); + BUG(); + return 0; +} + +BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64); +BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0, + generation, 64); +BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64); +BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); + +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); + +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline unsigned long btrfs_node_key_ptr_offset(int nr) +{ + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; +} + +static inline void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} + +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} + +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); + +static inline unsigned long btrfs_item_nr_offset(int nr) +{ + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; +} + +static inline struct btrfs_item *btrfs_item_nr(int nr) +{ + return (struct btrfs_item *)btrfs_item_nr_offset(nr); +} + +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) +{ + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); +} + +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) +{ + return btrfs_item_end(eb, btrfs_item_nr(nr)); +} + +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) +{ + return btrfs_item_offset(eb, btrfs_item_nr(nr)); +} + +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) +{ + return btrfs_item_size(eb, btrfs_item_nr(nr)); +} + +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); + +/* + * struct btrfs_root_ref + */ +BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64); +BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64); +BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16); + +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16); + +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); + +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +/* struct btrfs_free_space_header */ +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, + num_entries, 64); +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, + num_bitmaps, 64); +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +static inline void btrfs_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); + +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, + struct btrfs_disk_key *disk) +{ + cpu->offset = le64_to_cpu(disk->offset); + cpu->type = disk->type; + cpu->objectid = le64_to_cpu(disk->objectid); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, + struct btrfs_key *cpu) +{ + disk->offset = cpu_to_le64(cpu->offset); + disk->type = cpu->type; + disk->objectid = cpu_to_le64(cpu->objectid); +} + +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) +{ + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); +} + + +static inline u8 btrfs_key_type(struct btrfs_key *key) +{ + return key->type; +} + +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) +{ + key->type = val; +} + +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, + 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); + +static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) +{ + return (btrfs_header_flags(eb) & flag) == flag; +} + +static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags | flag); + return (flags & flag) == flag; +} + +static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags & ~flag); + return (flags & flag) == flag; +} + +static inline int btrfs_header_backref_rev(struct extent_buffer *eb) +{ + u64 flags = btrfs_header_flags(eb); + return flags >> BTRFS_BACKREF_REV_SHIFT; +} + +static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, + int rev) +{ + u64 flags = btrfs_header_flags(eb); + flags &= ~BTRFS_BACKREF_REV_MASK; + flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; + btrfs_set_header_flags(eb, flags); +} + +static inline unsigned long btrfs_header_fsid(void) +{ + return offsetof(struct btrfs_header, fsid); +} + +static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +{ + return offsetof(struct btrfs_header, chunk_tree_uuid); +} + +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) +{ + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; +} + +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) +{ + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; +} + +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) +{ + return NULL; +} + +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) +{ + return NULL; +} + +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) +{ + return NULL; +} + +static inline int btrfs_is_leaf(struct extent_buffer *eb) +{ + return (btrfs_header_level(eb) == 0); +} + +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, + generation, 64); +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); + +BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, + last_snapshot, 64); +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, + generation_v2, 64); +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, + ctransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, + otransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, + stransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, + rtransid, 64); + +/* struct btrfs_root_backup */ +BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, + tree_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, + tree_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, + tree_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, + chunk_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, + chunk_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, + extent_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, + fs_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, + fs_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, + fs_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, + dev_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, + dev_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, + dev_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, + csum_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, + csum_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, + csum_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, + num_devices, 64); + +/* struct btrfs_super_block */ + +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, + struct btrfs_super_block, chunk_root_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, + log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block, + log_root_transid, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, + stripesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, + compat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, + compat_ro_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, + incompat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, + csum_type, 16); +BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, + cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); + +static inline int btrfs_super_csum_size(struct btrfs_super_block *s) +{ + int t = btrfs_super_csum_type(s); + BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes)); + return btrfs_csum_sizes[t]; +} + +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) +{ + return offsetof(struct btrfs_leaf, items); +} + +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8); + +static inline unsigned long btrfs_file_extent_inline_start(struct + btrfs_file_extent_item *e) +{ + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); + return offset; +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; +} + +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, + encryption, 8); +BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, + other_encoding, 16); + +/* btrfs_qgroup_status_item */ +BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, + version, 64); +BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, + scan, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version, + struct btrfs_qgroup_status_item, version, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation, + struct btrfs_qgroup_status_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags, + struct btrfs_qgroup_status_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_scan, + struct btrfs_qgroup_status_item, scan, 64); + +/* btrfs_qgroup_info_item */ +BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item, + referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, referenced_compressed, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item, + exclusive, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, exclusive_compressed, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, + struct btrfs_qgroup_info_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced, + struct btrfs_qgroup_info_item, referenced, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, referenced_compressed, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive, + struct btrfs_qgroup_info_item, exclusive, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, exclusive_compressed, 64); + +/* btrfs_qgroup_limit_item */ +BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item, + max_referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item, + max_exclusive, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item, + rsv_referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item, + rsv_exclusive, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags, + struct btrfs_qgroup_limit_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced, + struct btrfs_qgroup_limit_item, max_referenced, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive, + struct btrfs_qgroup_limit_item, max_exclusive, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced, + struct btrfs_qgroup_limit_item, rsv_referenced, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive, + struct btrfs_qgroup_limit_item, rsv_exclusive, 64); + +/* + * this returns the number of bytes used by the item on disk, minus the + * size of any extent headers. If a file is compressed on disk, this is + * the compressed size + */ +static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, + struct btrfs_item *e) +{ + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); + return btrfs_item_size(eb, e) - offset; +} + +/* this returns the number of file bytes represented by the inline item. + * If an item is compressed, this is the uncompressed size + */ +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + int slot, + struct btrfs_file_extent_item *fi) +{ + /* + * return the space used on disk if this item isn't + * compressed or encoded + */ + if (btrfs_file_extent_compression(eb, fi) == 0 && + btrfs_file_extent_encryption(eb, fi) == 0 && + btrfs_file_extent_other_encoding(eb, fi) == 0) { + return btrfs_file_extent_inline_item_len(eb, + btrfs_item_nr(slot)); + } + + /* otherwise use the ram bytes field */ + return btrfs_file_extent_ram_bytes(eb, fi); +} + +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { + if (level == 0) + return root->leafsize; + return root->nodesize; +} + +static inline int btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_incompat_flags(disk_super) & flag); +} + +static inline int btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_compat_ro_flags(disk_super) & flag); +} + +/* helper function to cast into the data area of the leaf. */ +#define btrfs_item_ptr(leaf, slot, type) \ + ((type *)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) + +/* extent-tree.c */ +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 empty_size, + u64 hint_byte, u64 search_end, + struct btrfs_key *ins, int data); +int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes); +void btrfs_unpin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes); +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 bytenr); +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr); +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, u64 root_objectid, + struct btrfs_disk_key *key, int level, + u64 hint, u64 empty_size); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, int data); +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 offset, int metadata, u64 *refs, u64 *flags); +int btrfs_set_block_flags(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, int level, u64 flags); +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent); +int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent); +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_io_tree *unpin); +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid); +int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_block_groups(struct btrfs_fs_info *info); +int btrfs_read_block_groups(struct btrfs_root *root); +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_objectid, u64 chunk_offset, u64 size); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_objectid, u64 chunk_offset, + u64 size); +int btrfs_make_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 num, + int alloc, int mark_free); +int btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 num_bytes); +int btrfs_free_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); +void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache); +int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache); +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end); +/* ctree.c */ +int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2); +int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot); +enum btrfs_tree_block_status +btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf); +enum btrfs_tree_block_status +btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key, + struct extent_buffer *buf); +void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, + int level, int slot, u64 objectid); +struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot); +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); +int btrfs_previous_extent_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); +int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size); +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid); +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size); +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size, int from_end); +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset); +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow); +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key); +void btrfs_release_path(struct btrfs_path *p); +void add_root_to_dirty_list(struct btrfs_root *root); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); +void btrfs_init_path(struct btrfs_path *p); +int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int slot, int nr); + +static inline int btrfs_del_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + return btrfs_del_items(trans, root, path, path->slots[0], 1); +} + +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, void *data, u32 data_size); +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, int nr); + +static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u32 data_size) +{ + return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); +} + +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +static inline int btrfs_next_item(struct btrfs_root *root, + struct btrfs_path *p) +{ + ++p->slots[0]; + if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) + return btrfs_next_leaf(root, p); + return 0; +} + +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); +void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level); +int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key); +void btrfs_set_item_key_unsafe(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key); + +/* root-item.c */ +int btrfs_add_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, + u64 root_id, u8 type, u64 ref_id, + u64 dirid, u64 sequence, + const char *name, int name_len); +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct + btrfs_root_item *item, struct btrfs_key *key); +/* dir-item.c */ +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type, u64 index); +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod); +struct btrfs_dir_item *btrfs_lookup_dir_index(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + u64 index, int mod); +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di); +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir); +/* inode-map.c */ +int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 dirid, u64 *objectid); + +/* inode-item.c */ +int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, u64 index); +int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 objectid, struct btrfs_inode_item + *inode_item); +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod); +struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_trans_handle + *trans, struct btrfs_path *path, struct btrfs_root *root, + u64 ino, u64 parent_ino, u64 index, const char *name, + int namelen, int ins_len); +int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, + u64 *index); +int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, u64 index); +struct btrfs_inode_ref *btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + const char *name, int namelen, u64 ino, u64 parent_ino, + u64 index, int ins_len); +int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, int name_len, + u64 ino, u64 parent_ino, u64 *index); + +/* file-item.c */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len); +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 pos, u64 offset, + u64 disk_num_bytes, + u64 num_bytes); +int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + u64 offset, char *buffer, size_t size); +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 alloc_end, + u64 bytenr, char *data, size_t len); +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize); + +/* uuid-tree.c */ +int btrfs_lookup_uuid_subvol_item(int fd, const u8 *uuid, u64 *subvol_id); +int btrfs_lookup_uuid_received_subvol_item(int fd, const u8 *uuid, + u64 *subvol_id); + +static inline int is_fstree(u64 rootid) +{ + if (rootid == BTRFS_FS_TREE_OBJECTID || + (signed long long)rootid >= (signed long long)BTRFS_FIRST_FREE_OBJECTID) + return 1; + return 0; +} + +/* inode.c */ +int check_dir_conflict(struct btrfs_root *root, char *name, int namelen, + u64 dir, u64 index); +int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u32 mode); +int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u64 parent_ino, char *name, int namelen, + u8 type, u64 *index, int add_backref); +int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u64 parent_ino, u64 index, const char *name, + int namelen, int add_orphan); +int btrfs_add_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 ino); +int btrfs_mkdir(struct btrfs_trans_handle *trans, struct btrfs_root *root, + char *name, int namelen, u64 parent_ino, u64 *ino, int mode); + +/* file.c */ +int btrfs_get_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 ino, u64 offset, u64 len, int ins_len); +int btrfs_punch_hole(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 ino, u64 offset, u64 len); +#endif diff --git a/tools/libfsimage/btrfs/dir-item.c b/tools/libfsimage/btrfs/dir-item.c new file mode 100644 index 0000000..bc59d17 --- /dev/null +++ b/tools/libfsimage/btrfs/dir-item.c @@ -0,0 +1,351 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/limits.h> +#include "ctree.h" +#include "disk-io.h" +#include "hash.h" +#include "transaction.h" + +static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len); + +static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle + *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size, + const char *name, + int name_len) +{ + int ret; + char *ptr; + struct btrfs_item *item; + struct extent_buffer *leaf; + + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); + if (ret == -EEXIST) { + struct btrfs_dir_item *di; + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) + return ERR_PTR(-EEXIST); + ret = btrfs_extend_item(trans, root, path, data_size); + WARN_ON(ret > 0); + } + if (ret < 0) + return ERR_PTR(ret); + WARN_ON(ret > 0); + leaf = path->nodes[0]; + item = btrfs_item_nr(path->slots[0]); + ptr = btrfs_item_ptr(leaf, path->slots[0], char); + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; + return (struct btrfs_dir_item *)ptr; +} + +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir) +{ + int ret = 0; + struct btrfs_path *path; + struct btrfs_dir_item *dir_item; + unsigned long name_ptr, data_ptr; + struct btrfs_key key, location; + struct btrfs_disk_key disk_key; + struct extent_buffer *leaf; + u32 data_size; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.offset = btrfs_name_hash(name, name_len); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + data_size = sizeof(*dir_item) + name_len + data_len; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); + /* + * FIXME: at some point we should handle xattr's that are larger than + * what we can fit in our leaf. We set location to NULL b/c we arent + * pointing at anything else, that will change if we store the xattr + * data in a separate inode. + */ + BUG_ON(IS_ERR(dir_item)); + memset(&location, 0, sizeof(location)); + + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, &location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_data_len(leaf, dir_item, data_len); + name_ptr = (unsigned long)(dir_item + 1); + data_ptr = (unsigned long)((char *)name_ptr + name_len); + + write_extent_buffer(leaf, name, name_ptr, name_len); + write_extent_buffer(leaf, data, data_ptr, data_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + + btrfs_free_path(path); + return ret; +} + +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type, u64 index) +{ + int ret = 0; + int ret2 = 0; + struct btrfs_path *path; + struct btrfs_dir_item *dir_item; + struct extent_buffer *leaf; + unsigned long name_ptr; + struct btrfs_key key; + struct btrfs_disk_key disk_key; + u32 data_size; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = btrfs_name_hash(name, name_len); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + data_size = sizeof(*dir_item) + name_len; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_data_len(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); + + /* FIXME, use some real flag for selecting the extra index */ + if (root == root->fs_info->tree_root) { + ret = 0; + goto out; + } + btrfs_release_path(path); + + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = index; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); + if (IS_ERR(dir_item)) { + ret2 = PTR_ERR(dir_item); + goto out; + } + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_data_len(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); +out: + btrfs_free_path(path); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + + key.offset = btrfs_name_hash(name, name_len); + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; + } + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); +} + +struct btrfs_dir_item *btrfs_lookup_dir_index(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + u64 index, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + + return btrfs_match_dir_item_name(root, path, name, name_len); +} + +/* + * given a pointer into a directory item, delete it. This + * handles items that have more than one entry in them. + */ +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di) +{ + + struct extent_buffer *leaf; + u32 sub_item_len; + u32 item_len; + int ret = 0; + + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) + + btrfs_dir_data_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { + ret = btrfs_del_item(trans, root, path); + } else { + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, + item_len - (ptr + sub_item_len - start)); + btrfs_truncate_item(trans, root, path, item_len - sub_item_len, 1); + } + return ret; +} + +static int verify_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dir_item *dir_item) +{ + u16 namelen = BTRFS_NAME_LEN; + u8 type = btrfs_dir_type(leaf, dir_item); + + if (type >= BTRFS_FT_MAX) { + fprintf(stderr, "invalid dir item type: %d\n", + (int)type); + return 1; + } + + if (type == BTRFS_FT_XATTR) + namelen = XATTR_NAME_MAX; + + if (btrfs_dir_name_len(leaf, dir_item) > namelen) { + fprintf(stderr, "invalid dir item name len: %u\n", + (unsigned)btrfs_dir_data_len(leaf, dir_item)); + return 1; + } + + /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ + if ((btrfs_dir_data_len(leaf, dir_item) + + btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { + fprintf(stderr, "invalid dir item name + data len: %u + %u\n", + (unsigned)btrfs_dir_name_len(leaf, dir_item), + (unsigned)btrfs_dir_data_len(leaf, dir_item)); + return 1; + } + + return 0; +} + +static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len) +{ + struct btrfs_dir_item *dir_item; + unsigned long name_ptr; + u32 total_len; + u32 cur = 0; + u32 this_len; + struct extent_buffer *leaf; + + leaf = path->nodes[0]; + dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (verify_dir_item(root, leaf, dir_item)) + return NULL; + + while(cur < total_len) { + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item) + + btrfs_dir_data_len(leaf, dir_item); + if (this_len > (total_len - cur)) { + fprintf(stderr, "invalid dir item size\n"); + return NULL; + } + + name_ptr = (unsigned long)(dir_item + 1); + + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) + return dir_item; + + cur += this_len; + dir_item = (struct btrfs_dir_item *)((char *)dir_item + + this_len); + } + return NULL; +} diff --git a/tools/libfsimage/btrfs/disk-io.c b/tools/libfsimage/btrfs/disk-io.c new file mode 100644 index 0000000..496431a --- /dev/null +++ b/tools/libfsimage/btrfs/disk-io.c @@ -0,0 +1,1779 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <uuid/uuid.h> +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "volumes.h" +#include "transaction.h" +#include "crc32c.h" +#include "utils.h" +#include "print-tree.h" +#include "rbtree-utils.h" + +extern uint64_t partition_offset; + +/* specified errno for check_tree_block */ +#define BTRFS_BAD_BYTENR (-1) +#define BTRFS_BAD_FSID (-2) +#define BTRFS_BAD_LEVEL (-3) +#define BTRFS_BAD_NRITEMS (-4) + +/* Calculate max possible nritems for a leaf/node */ +static u32 max_nritems(u8 level, u32 nodesize) +{ + + if (level == 0) + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_item)); + return ((nodesize - sizeof(struct btrfs_header)) / + sizeof(struct btrfs_key_ptr)); +} + +static int check_tree_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf) +{ + + struct btrfs_fs_devices *fs_devices; + u32 leafsize = btrfs_super_leafsize(fs_info->super_copy); + int ret = BTRFS_BAD_FSID; + + if (buf->start != btrfs_header_bytenr(buf)) + return BTRFS_BAD_BYTENR; + if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) + return BTRFS_BAD_LEVEL; + if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), + leafsize)) + return BTRFS_BAD_NRITEMS; + + fs_devices = fs_info->fs_devices; + while (fs_devices) { + if (fs_info->ignore_fsid_mismatch || + !memcmp_extent_buffer(buf, fs_devices->fsid, + btrfs_header_fsid(), + BTRFS_FSID_SIZE)) { + ret = 0; + break; + } + fs_devices = fs_devices->seed; + } + return ret; +} + +static void print_tree_block_error(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, + int err) +{ + char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; + u8 buf[BTRFS_UUID_SIZE]; + + switch (err) { + case BTRFS_BAD_FSID: + read_extent_buffer(eb, buf, btrfs_header_fsid(), + BTRFS_UUID_SIZE); + uuid_unparse(buf, found_uuid); + uuid_unparse(fs_info->fsid, fs_uuid); + fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", + fs_uuid, found_uuid); + break; + case BTRFS_BAD_BYTENR: + fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", + eb->start, btrfs_header_bytenr(eb)); + break; + case BTRFS_BAD_LEVEL: + fprintf(stderr, "bad level, %u > %u\n", + btrfs_header_level(eb), BTRFS_MAX_LEVEL); + break; + case BTRFS_BAD_NRITEMS: + fprintf(stderr, "invalid nr_items: %u\n", + btrfs_header_nritems(eb)); + break; + } +} + +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +{ + return crc32c(seed, data, len); +} + +void btrfs_csum_final(u32 crc, char *result) +{ + *(__le32 *)result = ~cpu_to_le32(crc); +} + +static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, + int verify, int silent) +{ + char result[BTRFS_CSUM_SIZE]; + u32 len; + u32 crc = ~(u32)0; + + len = buf->len - BTRFS_CSUM_SIZE; + crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len); + btrfs_csum_final(crc, result); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, csum_size)) { + if (!silent) + printk("checksum verify failed on %llu found %08X wanted %08X\n", + (unsigned long long)buf->start, + *((u32 *)result), + *((u32*)(char *)buf->data)); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, csum_size); + } + return 0; +} + +int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify) +{ + return __csum_tree_block_size(buf, csum_size, verify, 0); +} + +int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size) +{ + return __csum_tree_block_size(buf, csum_size, 1, 1); +} + +static int csum_tree_block_fs_info(struct btrfs_fs_info *fs_info, + struct extent_buffer *buf, int verify) +{ + u16 csum_size = + btrfs_super_csum_size(fs_info->super_copy); + if (verify && fs_info->suppress_check_block_errors) + return verify_tree_block_csum_silent(buf, csum_size); + return csum_tree_block_size(buf, csum_size, verify); +} + +int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + return csum_tree_block_fs_info(root->fs_info, buf, verify); +} + +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + return find_extent_buffer(&root->fs_info->extent_cache, + bytenr, blocksize); +} + +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize) +{ + return alloc_extent_buffer(&fs_info->extent_cache, bytenr, blocksize); +} + +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) +{ + struct extent_buffer *eb; + u64 length; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + + eb = btrfs_find_tree_block(root, bytenr, blocksize); + if (!(eb && btrfs_buffer_uptodate(eb, parent_transid)) && + !btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &length, &multi, 0, NULL)) { + device = multi->stripes[0].dev; + device->total_ios++; + blocksize = min(blocksize, (u32)(64 * 1024)); + readahead(device->fd, multi->stripes[0].physical, blocksize); + } + + free_extent_buffer(eb); + kfree(multi); +} + +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid, + int ignore) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + if (extent_buffer_uptodate(eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + if (ignore) { + eb->flags |= EXTENT_BAD_TRANSID; + printk("Ignoring transid failure\n"); + return 0; + } + + ret = 1; +out: + clear_extent_buffer_uptodate(io_tree, eb); + return ret; + +} + + +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +{ + unsigned long offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 read_len; + unsigned long bytes_left = eb->len; + + while (bytes_left) { + read_len = bytes_left; + device = NULL; + + if (!info->on_restoring && + eb->start != BTRFS_SUPER_INFO_OFFSET) { + ret = btrfs_map_block(&info->mapping_tree, READ, + eb->start + offset, &read_len, &multi, + mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + kfree(multi); + return -EIO; + } + device = multi->stripes[0].dev; + + if (device->fd <= 0) { + kfree(multi); + return -EIO; + } + + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + kfree(multi); + multi = NULL; + } else { + /* special case for restore metadump */ + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { + if (device->devid == 1) + break; + } + + eb->fd = device->fd; + eb->dev_bytenr = eb->start; + device->total_ios++; + } + + if (read_len > bytes_left) + read_len = bytes_left; + + ret = read_extent_from_disk(eb, offset, read_len); + if (ret) + return -EIO; + offset += read_len; + bytes_left -= read_len; + } + return 0; +} + +struct extent_buffer* read_tree_block_fs_info( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize, + u64 parent_transid) +{ + int ret; + struct extent_buffer *eb; + u64 best_transid = 0; + int mirror_num = 0; + int good_mirror = 0; + int num_copies; + int ignore = 0; + + eb = btrfs_find_create_tree_block(fs_info, bytenr, blocksize); + if (!eb) + return ERR_PTR(-ENOMEM); + + if (btrfs_buffer_uptodate(eb, parent_transid)) + return eb; + + while (1) { + ret = read_whole_eb(fs_info, eb, mirror_num); + if (ret == 0 && csum_tree_block_fs_info(fs_info, eb, 1) == 0 && + check_tree_block(fs_info, eb) == 0 && + verify_parent_transid(eb->tree, eb, parent_transid, ignore) + == 0) { + if (eb->flags & EXTENT_BAD_TRANSID && + list_empty(&eb->recow)) { + list_add_tail(&eb->recow, + &fs_info->recow_ebs); + eb->refs++; + } + btrfs_set_buffer_uptodate(eb); + return eb; + } + if (ignore) { + if (check_tree_block(fs_info, eb)) { + if (!fs_info->suppress_check_block_errors) + print_tree_block_error(fs_info, eb, + check_tree_block(fs_info, eb)); + } else { + if (!fs_info->suppress_check_block_errors) + fprintf(stderr, "Csum didn't match\n"); + } + ret = -EIO; + break; + } + num_copies = btrfs_num_copies(&fs_info->mapping_tree, + eb->start, eb->len); + if (num_copies == 1) { + ignore = 1; + continue; + } + if (btrfs_header_generation(eb) > best_transid && mirror_num) { + best_transid = btrfs_header_generation(eb); + good_mirror = mirror_num; + } + mirror_num++; + if (mirror_num > num_copies) { + mirror_num = good_mirror; + ignore = 1; + continue; + } + } + free_extent_buffer(eb); + return ERR_PTR(ret); +} + +int read_extent_data(struct btrfs_root *root, char *data, + u64 logical, u64 *len, int mirror) +{ + u64 offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_device *device; + int ret = 0; + u64 max_len = *len; + + ret = btrfs_map_block(&info->mapping_tree, READ, logical, len, + &multi, mirror, NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %llu\n", + logical + offset); + goto err; + } + device = multi->stripes[0].dev; + + if (device->fd <= 0) + goto err; + if (*len > max_len) + *len = max_len; + + ret = pread64(device->fd, data, *len, multi->stripes[0].physical + partition_offset); + if (ret != *len) + ret = -EIO; + else + ret = 0; +err: + kfree(multi); + return ret; +} + +int write_and_map_eb(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int ret; + int dev_nr; + u64 length; + u64 *raid_map = NULL; + struct btrfs_multi_bio *multi = NULL; + + dev_nr = 0; + length = eb->len; + ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, + eb->start, &length, &multi, 0, &raid_map); + + if (raid_map) { + ret = write_raid56_with_parity(root->fs_info, eb, multi, + length, raid_map); + BUG_ON(ret); + } else while (dev_nr < multi->num_stripes) { + BUG_ON(ret); + eb->fd = multi->stripes[dev_nr].dev->fd; + eb->dev_bytenr = multi->stripes[dev_nr].physical; + multi->stripes[dev_nr].dev->total_ios++; + dev_nr++; + ret = write_extent_to_disk(eb); + BUG_ON(ret); + } + kfree(raid_map); + kfree(multi); + return 0; +} + +int write_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + if (check_tree_block(root->fs_info, eb)) { + print_tree_block_error(root->fs_info, eb, + check_tree_block(root->fs_info, eb)); + BUG(); + } + + if (trans && !btrfs_buffer_uptodate(eb, trans->transid)) + BUG(); + + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + csum_tree_block(root, eb, 0); + + return write_and_map_eb(trans, root, eb); +} + +int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, + u32 stripesize, struct btrfs_root *root, + struct btrfs_fs_info *fs_info, u64 objectid) +{ + root->node = NULL; + root->commit_root = NULL; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; + root->stripesize = stripesize; + root->ref_cows = 0; + root->track_dirty = 0; + + root->fs_info = fs_info; + root->objectid = objectid; + root->last_trans = 0; + root->highest_inode = 0; + root->last_inode_alloc = 0; + + INIT_LIST_HEAD(&root->dirty_list); + INIT_LIST_HEAD(&root->orphan_data_extents); + memset(&root->root_key, 0, sizeof(root->root_key)); + memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; + return 0; +} + +static int update_cowonly_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + u64 old_root_bytenr; + struct btrfs_root *tree_root = root->fs_info->tree_root; + + btrfs_write_dirty_block_groups(trans, root); + while(1) { + old_root_bytenr = btrfs_root_bytenr(&root->root_item); + if (old_root_bytenr == root->node->start) + break; + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_generation(&root->root_item, + trans->transid); + root->root_item.level = btrfs_header_level(root->node); + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + &root->root_item); + BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, root); + } + return 0; +} + +static int commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root; + struct list_head *next; + struct extent_buffer *eb; + int ret; + + if (fs_info->readonly) + return 0; + + eb = fs_info->tree_root->node; + extent_buffer_get(eb); + ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); + free_extent_buffer(eb); + if (ret) + return ret; + + while(!list_empty(&fs_info->dirty_cowonly_roots)) { + next = fs_info->dirty_cowonly_roots.next; + list_del_init(next); + root = list_entry(next, struct btrfs_root, dirty_list); + update_cowonly_root(trans, root); + free_extent_buffer(root->commit_root); + root->commit_root = NULL; + } + + return 0; +} + +static int __commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + u64 start; + u64 end; + struct extent_buffer *eb; + struct extent_io_tree *tree = &root->fs_info->extent_cache; + int ret; + + while(1) { + ret = find_first_extent_bit(tree, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + while(start <= end) { + eb = find_first_extent_buffer(tree, start); + BUG_ON(!eb || eb->start != start); + ret = write_tree_block(trans, root, eb); + BUG_ON(ret); + start += eb->len; + clear_extent_buffer_dirty(eb); + free_extent_buffer(eb); + } + } + return 0; +} + +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + u64 transid = trans->transid; + int ret = 0; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (root->commit_root == root->node) + goto commit_tree; + if (root == root->fs_info->tree_root) + goto commit_tree; + if (root == root->fs_info->chunk_root) + goto commit_tree; + + free_extent_buffer(root->commit_root); + root->commit_root = NULL; + + btrfs_set_root_bytenr(&root->root_item, root->node->start); + btrfs_set_root_generation(&root->root_item, trans->transid); + root->root_item.level = btrfs_header_level(root->node); + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); +commit_tree: + ret = commit_tree_roots(trans, fs_info); + BUG_ON(ret); + ret = __commit_transaction(trans, root); + BUG_ON(ret); + write_ctree_super(trans, root); + btrfs_finish_extent_commit(trans, fs_info->extent_root, + &fs_info->pinned_extents); + btrfs_free_transaction(root, trans); + free_extent_buffer(root->commit_root); + root->commit_root = NULL; + fs_info->running_transaction = NULL; + fs_info->last_trans_committed = transid; + return 0; +} + +static int find_and_setup_root(struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, struct btrfs_root *root) +{ + int ret; + u32 blocksize; + u64 generation; + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); + ret = btrfs_find_last_root(tree_root, objectid, + &root->root_item, &root->root_key); + if (ret) + return ret; + + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + generation = btrfs_root_generation(&root->root_item); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize, generation); + if (!extent_buffer_uptodate(root->node)) + return -EIO; + + return 0; +} + +static int find_and_setup_log_root(struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + struct btrfs_super_block *disk_super) +{ + u32 blocksize; + u64 blocknr = btrfs_super_log_root(disk_super); + struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root)); + + if (!log_root) + return -ENOMEM; + + if (blocknr == 0) { + free(log_root); + return 0; + } + + blocksize = btrfs_level_size(tree_root, + btrfs_super_log_root_level(disk_super)); + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + log_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + log_root->node = read_tree_block(tree_root, blocknr, + blocksize, + btrfs_super_generation(disk_super) + 1); + + fs_info->log_root_tree = log_root; + + if (!extent_buffer_uptodate(log_root->node)) { + free_extent_buffer(log_root->node); + free(log_root); + fs_info->log_root_tree = NULL; + return -EIO; + } + + return 0; +} + +int btrfs_free_fs_root(struct btrfs_root *root) +{ + if (root->node) + free_extent_buffer(root->node); + if (root->commit_root) + free_extent_buffer(root->commit_root); + kfree(root); + return 0; +} + +static void __free_fs_root(struct rb_node *node) +{ + struct btrfs_root *root; + + root = container_of(node, struct btrfs_root, rb_node); + btrfs_free_fs_root(root); +} + +FREE_RB_BASED_TREE(fs_roots, __free_fs_root); + +struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path *path; + struct extent_buffer *l; + u64 generation; + u32 blocksize; + int ret = 0; + + root = calloc(1, sizeof(*root)); + if (!root) + return ERR_PTR(-ENOMEM); + if (location->offset == (u64)-1) { + ret = find_and_setup_root(tree_root, fs_info, + location->objectid, root); + if (ret) { + free(root); + return ERR_PTR(ret); + } + goto insert; + } + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, location->objectid); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); + if (ret != 0) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); + ret = 0; +out: + btrfs_free_path(path); + if (ret) { + free(root); + return ERR_PTR(ret); + } + generation = btrfs_root_generation(&root->root_item); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize, generation); + if (!extent_buffer_uptodate(root->node)) { + free(root); + return ERR_PTR(-EIO); + } +insert: + root->ref_cows = 1; + return root; +} + +static int btrfs_fs_roots_compare_objectids(struct rb_node *node, + void *data) +{ + u64 objectid = *((u64 *)data); + struct btrfs_root *root; + + root = rb_entry(node, struct btrfs_root, rb_node); + if (objectid > root->objectid) + return 1; + else if (objectid < root->objectid) + return -1; + else + return 0; +} + +static int btrfs_fs_roots_compare_roots(struct rb_node *node1, + struct rb_node *node2) +{ + struct btrfs_root *root; + + root = rb_entry(node2, struct btrfs_root, rb_node); + return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid); +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct rb_node *node; + int ret; + u64 objectid = location->objectid; + + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) + return fs_info->chunk_root; + if (location->objectid == BTRFS_DEV_TREE_OBJECTID) + return fs_info->dev_root; + if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) + return fs_info->csum_root; + if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) + return fs_info->quota_root; + + BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID || + location->offset != (u64)-1); + + node = rb_search(&fs_info->fs_root_tree, (void *)&objectid, + btrfs_fs_roots_compare_objectids, NULL); + if (node) + return container_of(node, struct btrfs_root, rb_node); + + root = btrfs_read_fs_root_no_cache(fs_info, location); + if (IS_ERR(root)) + return root; + + ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node, + btrfs_fs_roots_compare_roots); + BUG_ON(ret); + return root; +} + +void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) +{ + free(fs_info->tree_root); + free(fs_info->extent_root); + free(fs_info->chunk_root); + free(fs_info->dev_root); + free(fs_info->csum_root); + free(fs_info->quota_root); + free(fs_info->free_space_root); + free(fs_info->super_copy); + free(fs_info->log_root_tree); + free(fs_info); +} + +struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) +{ + struct btrfs_fs_info *fs_info; + + fs_info = calloc(1, sizeof(struct btrfs_fs_info)); + if (!fs_info) + return NULL; + + fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->extent_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->dev_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->quota_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->free_space_root = calloc(1, sizeof(struct btrfs_root)); + fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); + + if (!fs_info->tree_root || !fs_info->extent_root || + !fs_info->chunk_root || !fs_info->dev_root || + !fs_info->csum_root || !fs_info->quota_root || + !fs_info->free_space_root || !fs_info->super_copy) + goto free_all; + + extent_io_tree_init(&fs_info->extent_cache); + extent_io_tree_init(&fs_info->free_space_cache); + extent_io_tree_init(&fs_info->block_group_cache); + extent_io_tree_init(&fs_info->pinned_extents); + extent_io_tree_init(&fs_info->pending_del); + extent_io_tree_init(&fs_info->extent_ins); + fs_info->excluded_extents = NULL; + + fs_info->fs_root_tree = RB_ROOT; + cache_tree_init(&fs_info->mapping_tree.cache_tree); + + mutex_init(&fs_info->fs_mutex); + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->space_info); + INIT_LIST_HEAD(&fs_info->recow_ebs); + + if (!writable) + fs_info->readonly = 1; + + fs_info->super_bytenr = sb_bytenr; + fs_info->data_alloc_profile = (u64)-1; + fs_info->metadata_alloc_profile = (u64)-1; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + return fs_info; +free_all: + btrfs_free_fs_info(fs_info); + return NULL; +} + +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable) +{ + u64 features; + + features = btrfs_super_incompat_flags(sb) & + ~BTRFS_FEATURE_INCOMPAT_SUPP; + if (features) { + printk("couldn't open because of unsupported " + "option features (%Lx).\n", + (unsigned long long)features); + return -ENOTSUP; + } + + features = btrfs_super_incompat_flags(sb); + if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + btrfs_set_super_incompat_flags(sb, features); + } + + features = btrfs_super_compat_ro_flags(sb) & + ~BTRFS_FEATURE_COMPAT_RO_SUPP; + if (writable && features) { + printk("couldn't open RDWR because of unsupported " + "option features (%Lx).\n", + (unsigned long long)features); + return -ENOTSUP; + } + return 0; +} + +static int find_best_backup_root(struct btrfs_super_block *super) +{ + struct btrfs_root_backup *backup; + u64 orig_gen = btrfs_super_generation(super); + u64 gen = 0; + int best_index = 0; + int i; + + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { + backup = super->super_roots + i; + if (btrfs_backup_tree_root_gen(backup) != orig_gen && + btrfs_backup_tree_root_gen(backup) > gen) { + best_index = i; + gen = btrfs_backup_tree_root_gen(backup); + } + } + return best_index; +} + +static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, + enum btrfs_open_ctree_flags flags, + struct btrfs_root *info_root, + u64 objectid, char *str) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + struct btrfs_root *root = fs_info->tree_root; + u32 leafsize = btrfs_super_leafsize(sb); + int ret; + + ret = find_and_setup_root(root, fs_info, objectid, info_root); + if (ret) { + printk("Couldn't setup %s tree\n", str); + if (!(flags & OPEN_CTREE_PARTIAL)) + return -EIO; + /* + * Need a blank node here just so we don't screw up in the + * million of places that assume a root has a valid ->node + */ + info_root->node = + btrfs_find_create_tree_block(fs_info, 0, leafsize); + if (!info_root->node) + return -ENOMEM; + clear_extent_buffer_uptodate(NULL, info_root->node); + } + + return 0; +} + +int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + struct btrfs_root *root; + struct btrfs_key key; + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 stripesize; + u64 generation; + u32 blocksize; + int ret; + + nodesize = btrfs_super_nodesize(sb); + leafsize = btrfs_super_leafsize(sb); + sectorsize = btrfs_super_sectorsize(sb); + stripesize = btrfs_super_stripesize(sb); + + root = fs_info->tree_root; + __setup_root(nodesize, leafsize, sectorsize, stripesize, + root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + blocksize = btrfs_level_size(root, btrfs_super_root_level(sb)); + generation = btrfs_super_generation(sb); + + if (!root_tree_bytenr && !(flags & OPEN_CTREE_BACKUP_ROOT)) { + root_tree_bytenr = btrfs_super_root(sb); + } else if (flags & OPEN_CTREE_BACKUP_ROOT) { + struct btrfs_root_backup *backup; + int index = find_best_backup_root(sb); + if (index >= BTRFS_NUM_BACKUP_ROOTS) { + fprintf(stderr, "Invalid backup root number\n"); + return -EIO; + } + backup = fs_info->super_copy->super_roots + index; + root_tree_bytenr = btrfs_backup_tree_root(backup); + generation = btrfs_backup_tree_root_gen(backup); + } + + root->node = read_tree_block(root, root_tree_bytenr, blocksize, + generation); + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "Couldn't read tree root\n"); + return -EIO; + } + + ret = setup_root_or_create_block(fs_info, flags, fs_info->extent_root, + BTRFS_EXTENT_TREE_OBJECTID, "extent"); + if (ret) + return ret; + fs_info->extent_root->track_dirty = 1; + + ret = find_and_setup_root(root, fs_info, BTRFS_DEV_TREE_OBJECTID, + fs_info->dev_root); + if (ret) { + printk("Couldn't setup device tree\n"); + return -EIO; + } + fs_info->dev_root->track_dirty = 1; + + ret = setup_root_or_create_block(fs_info, flags, fs_info->csum_root, + BTRFS_CSUM_TREE_OBJECTID, "csum"); + if (ret) + return ret; + fs_info->csum_root->track_dirty = 1; + + ret = find_and_setup_root(root, fs_info, BTRFS_QUOTA_TREE_OBJECTID, + fs_info->quota_root); + if (ret == 0) + fs_info->quota_enabled = 1; + + if (btrfs_fs_compat_ro(fs_info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) { + ret = find_and_setup_root(root, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID, + fs_info->free_space_root); + if (ret) { + printk("Couldn't read free space tree\n"); + return -EIO; + } + fs_info->free_space_root->track_dirty = 1; + } + + ret = find_and_setup_log_root(root, fs_info, sb); + if (ret) { + printk("Couldn't setup log root tree\n"); + if (!(flags & OPEN_CTREE_PARTIAL)) + return -EIO; + } + + fs_info->generation = generation; + fs_info->last_trans_committed = generation; + if (extent_buffer_uptodate(fs_info->extent_root->node) && + !(flags & OPEN_CTREE_NO_BLOCK_GROUPS)) + btrfs_read_block_groups(fs_info->tree_root); + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); + + if (IS_ERR(fs_info->fs_root)) + return -EIO; + return 0; +} + +void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) +{ + if (fs_info->free_space_root) + free_extent_buffer(fs_info->free_space_root->node); + if (fs_info->quota_root) + free_extent_buffer(fs_info->quota_root->node); + if (fs_info->csum_root) + free_extent_buffer(fs_info->csum_root->node); + if (fs_info->dev_root) + free_extent_buffer(fs_info->dev_root->node); + if (fs_info->extent_root) + free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root) + free_extent_buffer(fs_info->tree_root->node); + if (fs_info->log_root_tree) + free_extent_buffer(fs_info->log_root_tree->node); + if (fs_info->chunk_root) + free_extent_buffer(fs_info->chunk_root->node); +} + +static void free_map_lookup(struct cache_extent *ce) +{ + struct map_lookup *map; + + map = container_of(ce, struct map_lookup, ce); + kfree(map); +} + +FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); + +void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) +{ + while (!list_empty(&fs_info->recow_ebs)) { + struct extent_buffer *eb; + eb = list_first_entry(&fs_info->recow_ebs, + struct extent_buffer, recow); + list_del_init(&eb->recow); + free_extent_buffer(eb); + } + free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); + extent_io_tree_cleanup(&fs_info->extent_cache); + extent_io_tree_cleanup(&fs_info->free_space_cache); + extent_io_tree_cleanup(&fs_info->block_group_cache); + extent_io_tree_cleanup(&fs_info->pinned_extents); + extent_io_tree_cleanup(&fs_info->pending_del); + extent_io_tree_cleanup(&fs_info->extent_ins); +} + +int btrfs_scan_fs_devices(int fd, const char *path, + struct btrfs_fs_devices **fs_devices, + u64 sb_bytenr, int super_recover, + int skip_devices) +{ + u64 total_devs; + u64 dev_size; + off_t seek_ret; + int ret; + if (!sb_bytenr) + sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + + seek_ret = lseek(fd, 0, SEEK_END); + if (seek_ret < 0) + return -errno; + + dev_size = seek_ret; + lseek(fd, 0, SEEK_SET); + if (sb_bytenr > dev_size) { + fprintf(stderr, "Superblock bytenr is larger than device size\n"); + return -EINVAL; + } + + ret = btrfs_scan_one_device(fd, path, fs_devices, + &total_devs, sb_bytenr, super_recover); + if (ret) { + fprintf(stderr, "No valid Btrfs found on %s\n", path); + return ret; + } + + if (!skip_devices && total_devs != 1) { + ret = btrfs_scan_lblkid(); + if (ret) + return ret; + } + return 0; +} + +int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info, + u64 chunk_root_bytenr) +{ + struct btrfs_super_block *sb = fs_info->super_copy; + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; + u32 stripesize; + u64 generation; + int ret; + + nodesize = btrfs_super_nodesize(sb); + leafsize = btrfs_super_leafsize(sb); + sectorsize = btrfs_super_sectorsize(sb); + stripesize = btrfs_super_stripesize(sb); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + + ret = btrfs_read_sys_array(fs_info->chunk_root); + if (ret) + return ret; + + blocksize = btrfs_level_size(fs_info->chunk_root, + btrfs_super_chunk_root_level(sb)); + generation = btrfs_super_chunk_root_generation(sb); + + if (chunk_root_bytenr && !IS_ALIGNED(chunk_root_bytenr, + btrfs_super_sectorsize(sb))) { + warning("chunk_root_bytenr %llu is unaligned to %u, ignore it", + chunk_root_bytenr, btrfs_super_sectorsize(sb)); + chunk_root_bytenr = 0; + } + + if (!chunk_root_bytenr) + chunk_root_bytenr = btrfs_super_chunk_root(sb); + else + generation = 0; + + fs_info->chunk_root->node = read_tree_block(fs_info->chunk_root, + chunk_root_bytenr, + blocksize, generation); + if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { + if (fs_info->ignore_chunk_tree_error) { + warning("cannot read chunk root, continue anyway"); + fs_info->chunk_root = NULL; + return 0; + } else { + error("cannot read chunk root"); + return -EIO; + } + } + + if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) { + ret = btrfs_read_chunk_tree(fs_info->chunk_root); + if (ret) { + fprintf(stderr, "Couldn't read chunk tree\n"); + return ret; + } + } + return 0; +} + +static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, + u64 sb_bytenr, + u64 root_tree_bytenr, + u64 chunk_root_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_super_block *disk_super; + struct btrfs_fs_devices *fs_devices = NULL; + struct extent_buffer *eb; + int ret; + int oflags; + + if (sb_bytenr == 0) + sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + + /* try to drop all the caches */ + if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + + fs_info = btrfs_new_fs_info(flags & OPEN_CTREE_WRITES, sb_bytenr); + if (!fs_info) { + fprintf(stderr, "Failed to allocate memory for fs_info\n"); + return NULL; + } + if (flags & OPEN_CTREE_RESTORE) + fs_info->on_restoring = 1; + if (flags & OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS) + fs_info->suppress_check_block_errors = 1; + if (flags & OPEN_CTREE_IGNORE_FSID_MISMATCH) + fs_info->ignore_fsid_mismatch = 1; + if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) + fs_info->ignore_chunk_tree_error = 1; + + ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, + (flags & OPEN_CTREE_RECOVER_SUPER), + (flags & OPEN_CTREE_NO_DEVICES)); + if (ret) + goto out; + + fs_info->fs_devices = fs_devices; + if (flags & OPEN_CTREE_WRITES) + oflags = O_RDWR; + else + oflags = O_RDONLY; + + if (flags & OPEN_CTREE_EXCLUSIVE) + oflags |= O_EXCL; + + ret = btrfs_open_devices(fs_devices, oflags); + if (ret) + goto out; + + disk_super = fs_info->super_copy; + if (!(flags & OPEN_CTREE_RECOVER_SUPER)) + ret = btrfs_read_dev_super(fs_devices->latest_bdev, + disk_super, sb_bytenr, 1); + else + ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, 0); + if (ret) { + printk("No valid btrfs found\n"); + goto out_devices; + } + + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID && + !fs_info->ignore_fsid_mismatch) { + fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); + goto out_devices; + } + + memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE); + + ret = btrfs_check_fs_compatibility(fs_info->super_copy, + flags & OPEN_CTREE_WRITES); + if (ret) + goto out_devices; + + ret = btrfs_setup_chunk_tree_and_device_map(fs_info, chunk_root_bytenr); + if (ret) + goto out_chunk; + + /* Chunk tree root is unable to read, return directly */ + if (!fs_info->chunk_root) + return fs_info; + + eb = fs_info->chunk_root->node; + read_extent_buffer(eb, fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(eb), + BTRFS_UUID_SIZE); + + ret = btrfs_setup_all_roots(fs_info, root_tree_bytenr, flags); + if (ret && !(flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) && + !fs_info->ignore_chunk_tree_error) + goto out_chunk; + + return fs_info; + +out_chunk: + btrfs_release_all_roots(fs_info); + btrfs_cleanup_all_caches(fs_info); +out_devices: + btrfs_close_devices(fs_devices); +out: + btrfs_free_fs_info(fs_info); + return NULL; +} + +struct btrfs_fs_info *open_ctree_fs_info(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + u64 chunk_root_bytenr, + enum btrfs_open_ctree_flags flags) +{ + int fp; + int ret; + struct btrfs_fs_info *info; + int oflags = O_CREAT | O_RDWR; + struct stat st; + + ret = stat(filename, &st); + if (ret < 0) { + error("cannot stat '%s': %s", filename, strerror(errno)); + return NULL; + } + if (!(((st.st_mode & S_IFMT) == S_IFREG) || ((st.st_mode & S_IFMT) == S_IFBLK))) { + error("not a regular file or block device: %s", filename); + return NULL; + } + + if (!(flags & OPEN_CTREE_WRITES)) + oflags = O_RDONLY; + + fp = open(filename, oflags, 0600); + if (fp < 0) { + error("cannot open '%s': %s", filename, strerror(errno)); + return NULL; + } + info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, + chunk_root_bytenr, flags); + close(fp); + return info; +} + +struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_fs_info *info; + + /* This flags may not return fs_info with any valid root */ + BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + info = open_ctree_fs_info(filename, sb_bytenr, 0, 0, flags); + if (!info) + return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; + return info->fs_root; +} + +struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, + enum btrfs_open_ctree_flags flags) +{ + struct btrfs_fs_info *info; + + /* This flags may not return fs_info with any valid root */ + BUG_ON(flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR); + info = __open_ctree_fd(fp, path, sb_bytenr, 0, 0, flags); + if (!info) + return NULL; + if (flags & __OPEN_CTREE_RETURN_CHUNK_ROOT) + return info->chunk_root; + return info->fs_root; +} + +/* + * Check if the super is valid: + * - nodesize/sectorsize - minimum, maximum, alignment + * - tree block starts - alignment + * - number of devices - something sane + * - sys array size - maximum + */ +static int check_super(struct btrfs_super_block *sb) +{ + char result[BTRFS_CSUM_SIZE]; + u32 crc; + u16 csum_type; + int csum_size; + + if (btrfs_super_magic(sb) != BTRFS_MAGIC) { + fprintf(stderr, "ERROR: superblock magic doesn't match\n"); + return -EIO; + } + + csum_type = btrfs_super_csum_type(sb); + if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { + fprintf(stderr, "ERROR: unsupported checksum algorithm %u\n", + csum_type); + return -EIO; + } + csum_size = btrfs_csum_sizes[csum_type]; + + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + + if (memcmp(result, sb->csum, csum_size)) { + fprintf(stderr, "ERROR: superblock checksum mismatch\n"); + return -EIO; + } + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + fprintf(stderr, "ERROR: tree_root level too big: %d >= %d\n", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); + return -EIO; + } + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + fprintf(stderr, "ERROR: chunk_root level too big: %d >= %d\n", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); + return -EIO; + } + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + fprintf(stderr, "ERROR: log_root level too big: %d >= %d\n", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); + return -EIO; + } + + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { + fprintf(stderr, "ERROR: tree_root block unaligned: %llu\n", + btrfs_super_root(sb)); + return -EIO; + } + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { + fprintf(stderr, "ERROR: chunk_root block unaligned: %llu\n", + btrfs_super_chunk_root(sb)); + return -EIO; + } + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { + fprintf(stderr, "ERROR: log_root block unaligned: %llu\n", + btrfs_super_log_root(sb)); + return -EIO; + } + if (btrfs_super_nodesize(sb) < 4096) { + fprintf(stderr, "ERROR: nodesize too small: %u < 4096\n", + btrfs_super_nodesize(sb)); + return -EIO; + } + if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { + fprintf(stderr, "ERROR: nodesize unaligned: %u\n", + btrfs_super_nodesize(sb)); + return -EIO; + } + if (btrfs_super_sectorsize(sb) < 4096) { + fprintf(stderr, "ERROR: sectorsize too small: %u < 4096\n", + btrfs_super_sectorsize(sb)); + return -EIO; + } + if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { + fprintf(stderr, "ERROR: sectorsize unaligned: %u\n", + btrfs_super_sectorsize(sb)); + return -EIO; + } + + if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + char fsid[BTRFS_UUID_UNPARSED_SIZE]; + char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; + + uuid_unparse(sb->fsid, fsid); + uuid_unparse(sb->dev_item.fsid, dev_fsid); + printk(KERN_ERR + "ERROR: dev_item UUID does not match fsid: %s != %s\n", + dev_fsid, fsid); + return -EIO; + } + + /* + * Hint to catch really bogus numbers, bitflips or so + */ + if (btrfs_super_num_devices(sb) > (1UL << 31)) { + fprintf(stderr, "WARNING: suspicious number of devices: %llu\n", + btrfs_super_num_devices(sb)); + } + + if (btrfs_super_num_devices(sb) == 0) { + fprintf(stderr, "ERROR: number of devices is 0\n"); + return -EIO; + } + + /* + * Obvious sys_chunk_array corruptions, it must hold at least one key + * and one chunk + */ + if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { + fprintf(stderr, "BTRFS: system chunk array too big %u > %u\n", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + return -EIO; + } + if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)) { + fprintf(stderr, "BTRFS: system chunk array too small %u < %lu\n", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); + return -EIO; + } + + return 0; +} + +int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, + int super_recover) +{ + u8 fsid[BTRFS_FSID_SIZE]; + int fsid_is_initialized = 0; + char tmp[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; + int i; + int ret; + int max_super = super_recover ? BTRFS_SUPER_MIRROR_MAX : 1; + u64 transid = 0; + u64 bytenr; + + if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) { + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, sb_bytenr + partition_offset); + if (ret < BTRFS_SUPER_INFO_SIZE) + return -1; + + if (btrfs_super_bytenr(buf) != sb_bytenr) + return -1; + + if (check_super(buf)) + return -1; + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); + return 0; + } + + /* + * we would like to check all the supers, but that would make + * a btrfs mount succeed after a mkfs from a different FS. + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + + for (i = 0; i < max_super; i++) { + bytenr = btrfs_sb_offset(i); + ret = pread64(fd, buf, BTRFS_SUPER_INFO_SIZE, bytenr + partition_offset); + if (ret < BTRFS_SUPER_INFO_SIZE) + break; + + if (btrfs_super_bytenr(buf) != bytenr ) + continue; + /* if magic is NULL, the device was removed */ + if (btrfs_super_magic(buf) == 0 && i == 0) + break; + if (check_super(buf)) + continue; + + if (!fsid_is_initialized) { + memcpy(fsid, buf->fsid, sizeof(fsid)); + fsid_is_initialized = 1; + } else if (memcmp(fsid, buf->fsid, sizeof(fsid))) { + /* + * the superblocks (the original one and + * its backups) contain data of different + * filesystems -> the super cannot be trusted + */ + continue; + } + + if (btrfs_super_generation(buf) > transid) { + memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); + transid = btrfs_super_generation(buf); + } + } + + return transid > 0 ? 0 : -1; +} + +static int write_dev_supers(struct btrfs_root *root, + struct btrfs_super_block *sb, + struct btrfs_device *device) +{ + u64 bytenr; + u32 crc; + int i, ret; + + if (root->fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) { + btrfs_set_super_bytenr(sb, root->fs_info->super_bytenr); + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, (char *)&sb->csum[0]); + + /* + * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is + * zero filled, we can use it directly + */ + ret = pwrite64(device->fd, root->fs_info->super_copy, + BTRFS_SUPER_INFO_SIZE, + root->fs_info->super_bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; + return 0; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) + break; + + btrfs_set_super_bytenr(sb, bytenr); + + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, (char *)&sb->csum[0]); + + /* + * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is + * zero filled, we can use it directly + */ + ret = pwrite64(device->fd, root->fs_info->super_copy, + BTRFS_SUPER_INFO_SIZE, bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto write_err; + } + + return 0; + +write_err: + if (ret > 0) + fprintf(stderr, "WARNING: failed to write all sb data\n"); + else + fprintf(stderr, "WARNING: failed to write sb: %s\n", + strerror(errno)); + return ret; +} + +int write_all_supers(struct btrfs_root *root) +{ + struct list_head *cur; + struct list_head *head = &root->fs_info->fs_devices->devices; + struct btrfs_device *dev; + struct btrfs_super_block *sb; + struct btrfs_dev_item *dev_item; + int ret; + u64 flags; + + sb = root->fs_info->super_copy; + dev_item = &sb->dev_item; + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->writeable) + continue; + + btrfs_set_stack_device_generation(dev_item, 0); + btrfs_set_stack_device_type(dev_item, dev->type); + btrfs_set_stack_device_id(dev_item, dev->devid); + btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_io_align(dev_item, dev->io_align); + btrfs_set_stack_device_io_width(dev_item, dev->io_width); + btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); + memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); + + flags = btrfs_super_flags(sb); + btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); + + ret = write_dev_supers(root, sb, dev); + BUG_ON(ret); + } + return 0; +} + +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_root *chunk_root = root->fs_info->chunk_root; + + if (root->fs_info->readonly) + return 0; + + btrfs_set_super_generation(root->fs_info->super_copy, + trans->transid); + btrfs_set_super_root(root->fs_info->super_copy, + tree_root->node->start); + btrfs_set_super_root_level(root->fs_info->super_copy, + btrfs_header_level(tree_root->node)); + btrfs_set_super_chunk_root(root->fs_info->super_copy, + chunk_root->node->start); + btrfs_set_super_chunk_root_level(root->fs_info->super_copy, + btrfs_header_level(chunk_root->node)); + btrfs_set_super_chunk_root_generation(root->fs_info->super_copy, + btrfs_header_generation(chunk_root->node)); + + ret = write_all_supers(root); + if (ret) + fprintf(stderr, "failed to write new super block err %d\n", ret); + return ret; +} + +int close_ctree_fs_info(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->tree_root; + + if (fs_info->last_trans_committed != + fs_info->generation) { + BUG_ON(!root); + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + trans = btrfs_start_transaction(root, 1); + ret = commit_tree_roots(trans, fs_info); + BUG_ON(ret); + ret = __commit_transaction(trans, root); + BUG_ON(ret); + write_ctree_super(trans, root); + btrfs_free_transaction(root, trans); + } + btrfs_free_block_groups(fs_info); + + free_fs_roots_tree(&fs_info->fs_root_tree); + + btrfs_release_all_roots(fs_info); + btrfs_close_devices(fs_info->fs_devices); + btrfs_cleanup_all_caches(fs_info); + btrfs_free_fs_info(fs_info); + return 0; +} + +int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *eb) +{ + return clear_extent_buffer_dirty(eb); +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *eb) +{ + return 0; +} + +void btrfs_mark_buffer_dirty(struct extent_buffer *eb) +{ + set_extent_buffer_dirty(eb); +} + +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) +{ + int ret; + + ret = extent_buffer_uptodate(buf); + if (!ret) + return ret; + + ret = verify_parent_transid(buf->tree, buf, parent_transid, 1); + return !ret; +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *eb) +{ + return set_extent_buffer_uptodate(eb); +} diff --git a/tools/libfsimage/btrfs/disk-io.h b/tools/libfsimage/btrfs/disk-io.h new file mode 100644 index 0000000..b97b90b --- /dev/null +++ b/tools/libfsimage/btrfs/disk-io.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_DISK_IO_H__ +#define __BTRFS_DISK_IO_H__ + +#include "kerncompat.h" +#include "ctree.h" + +#define BTRFS_SUPER_INFO_OFFSET (64 * 1024) +#define BTRFS_SUPER_INFO_SIZE 4096 + +#define BTRFS_SUPER_MIRROR_MAX 3 +#define BTRFS_SUPER_MIRROR_SHIFT 12 + +enum btrfs_open_ctree_flags { + OPEN_CTREE_WRITES = (1 << 0), + OPEN_CTREE_PARTIAL = (1 << 1), + OPEN_CTREE_BACKUP_ROOT = (1 << 2), + OPEN_CTREE_RECOVER_SUPER = (1 << 3), + OPEN_CTREE_RESTORE = (1 << 4), + OPEN_CTREE_NO_BLOCK_GROUPS = (1 << 5), + OPEN_CTREE_EXCLUSIVE = (1 << 6), + OPEN_CTREE_NO_DEVICES = (1 << 7), + /* + * Don't print error messages if bytenr or checksums do not match in + * tree block headers. Turn on by OPEN_CTREE_SUPPRESS_ERROR + */ + OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS = (1 << 8), + /* Return chunk root */ + __OPEN_CTREE_RETURN_CHUNK_ROOT = (1 << 9), + OPEN_CTREE_CHUNK_ROOT_ONLY = OPEN_CTREE_PARTIAL + + OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS + + __OPEN_CTREE_RETURN_CHUNK_ROOT, + /* + * TODO: cleanup: Split the open_ctree_flags into more indepent + * tree bits. + * Like split PARTIAL into SKIP_CSUM/SKIP_EXTENT + */ + + OPEN_CTREE_IGNORE_FSID_MISMATCH = (1 << 10), + + /* + * Allow open_ctree_fs_info() to return a incomplete fs_info with + * system chunks from super block only. + * It's useful for chunk corruption case. + * Makes no sense for open_ctree variants returning btrfs_root. + */ + OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR = (1 << 11) +}; + +static inline u64 btrfs_sb_offset(int mirror) +{ + u64 start = 16 * 1024; + if (mirror) + return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror); + return BTRFS_SUPER_INFO_OFFSET; +} + +struct btrfs_device; + +int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror); +struct extent_buffer* read_tree_block_fs_info( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize, + u64 parent_transid); +static inline struct extent_buffer* read_tree_block( + struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) +{ + return read_tree_block_fs_info(root->fs_info, bytenr, blocksize, + parent_transid); +} + +int read_extent_data(struct btrfs_root *root, char *data, u64 logical, + u64 *len, int mirror); +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid); +struct extent_buffer* btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, u64 bytenr, u32 blocksize); + +int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, + u32 stripesize, struct btrfs_root *root, + struct btrfs_fs_info *fs_info, u64 objectid); +int clean_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf); + +void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); +struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr); +int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable); +int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, + enum btrfs_open_ctree_flags flags); +void btrfs_release_all_roots(struct btrfs_fs_info *fs_info); +void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info); +int btrfs_scan_fs_devices(int fd, const char *path, + struct btrfs_fs_devices **fs_devices, u64 sb_bytenr, + int super_recover, int skip_devices); +int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info, + u64 chunk_root_bytenr); + +struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, + enum btrfs_open_ctree_flags flags); +struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, + enum btrfs_open_ctree_flags flags); +struct btrfs_fs_info *open_ctree_fs_info(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + u64 chunk_root_bytenr, + enum btrfs_open_ctree_flags flags); +int close_ctree_fs_info(struct btrfs_fs_info *fs_info); +static inline int close_ctree(struct btrfs_root *root) +{ + BUG_ON(!root); + return close_ctree_fs_info(root->fs_info); +} + +int write_all_supers(struct btrfs_root *root); +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, + int super_recover); +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *bh, + u64 logical); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize); +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); +int btrfs_free_fs_root(struct btrfs_root *root); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); +void btrfs_csum_final(u32 crc, char *result); + +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_open_device(struct btrfs_device *dev); +int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize, + int verify); +int verify_tree_block_csum_silent(struct extent_buffer *buf, u16 csum_size); +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); +int write_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb); +int write_and_map_eb(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *eb); + +/* raid6.c */ +void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs); + +#endif diff --git a/tools/libfsimage/btrfs/extent-cache.c b/tools/libfsimage/btrfs/extent-cache.c new file mode 100644 index 0000000..38bed8b --- /dev/null +++ b/tools/libfsimage/btrfs/extent-cache.c @@ -0,0 +1,341 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include <stdio.h> +#include <stdlib.h> +#include "kerncompat.h" +#include "extent-cache.h" +#include "rbtree-utils.h" + +struct cache_extent_search_range { + u64 objectid; + u64 start; + u64 size; +}; + +static int cache_tree_comp_range(struct rb_node *node, void *data) +{ + struct cache_extent *entry; + struct cache_extent_search_range *range; + + range = (struct cache_extent_search_range *)data; + entry = rb_entry(node, struct cache_extent, rb_node); + + if (entry->start + entry->size <= range->start) + return 1; + else if (range->start + range->size <= entry->start) + return -1; + else + return 0; +} + +static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2) +{ + struct cache_extent *entry; + struct cache_extent_search_range range; + + entry = rb_entry(node2, struct cache_extent, rb_node); + range.start = entry->start; + range.size = entry->size; + + return cache_tree_comp_range(node1, (void *)&range); +} + +static int cache_tree_comp_range2(struct rb_node *node, void *data) +{ + struct cache_extent *entry; + struct cache_extent_search_range *range; + + range = (struct cache_extent_search_range *)data; + entry = rb_entry(node, struct cache_extent, rb_node); + + if (entry->objectid < range->objectid) + return 1; + else if (entry->objectid > range->objectid) + return -1; + else if (entry->start + entry->size <= range->start) + return 1; + else if (range->start + range->size <= entry->start) + return -1; + else + return 0; +} + +static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2) +{ + struct cache_extent *entry; + struct cache_extent_search_range range; + + entry = rb_entry(node2, struct cache_extent, rb_node); + range.objectid = entry->objectid; + range.start = entry->start; + range.size = entry->size; + + return cache_tree_comp_range2(node1, (void *)&range); +} + +void cache_tree_init(struct cache_tree *tree) +{ + tree->root = RB_ROOT; +} + +static struct cache_extent * +alloc_cache_extent(u64 objectid, u64 start, u64 size) +{ + struct cache_extent *pe = malloc(sizeof(*pe)); + + if (!pe) + return pe; + + pe->objectid = objectid; + pe->start = start; + pe->size = size; + return pe; +} + +static int __add_cache_extent(struct cache_tree *tree, + u64 objectid, u64 start, u64 size) +{ + struct cache_extent *pe = alloc_cache_extent(objectid, start, size); + int ret; + + if (!pe) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + ret = insert_cache_extent(tree, pe); + if (ret) + free(pe); + + return ret; +} + +int add_cache_extent(struct cache_tree *tree, u64 start, u64 size) +{ + return __add_cache_extent(tree, 0, start, size); +} + +int add_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size) +{ + return __add_cache_extent(tree, objectid, start, size); +} + +int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe) +{ + return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes); +} + +int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe) +{ + return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2); +} + +struct cache_extent *lookup_cache_extent(struct cache_tree *tree, + u64 start, u64 size) +{ + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.start = start; + range.size = size; + node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL); + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *lookup_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size) +{ + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.objectid = objectid; + range.start = start; + range.size = size; + node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL); + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start) +{ + struct rb_node *next; + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.start = start; + range.size = 1; + node = rb_search(&tree->root, &range, cache_tree_comp_range, &next); + if (!node) + node = next; + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *search_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start) +{ + struct rb_node *next; + struct rb_node *node; + struct cache_extent *entry; + struct cache_extent_search_range range; + + range.objectid = objectid; + range.start = start; + range.size = 1; + node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next); + if (!node) + node = next; + if (!node) + return NULL; + + entry = rb_entry(node, struct cache_extent, rb_node); + return entry; +} + +struct cache_extent *first_cache_extent(struct cache_tree *tree) +{ + struct rb_node *node = rb_first(&tree->root); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *last_cache_extent(struct cache_tree *tree) +{ + struct rb_node *node = rb_last(&tree->root); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *prev_cache_extent(struct cache_extent *pe) +{ + struct rb_node *node = rb_prev(&pe->rb_node); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +struct cache_extent *next_cache_extent(struct cache_extent *pe) +{ + struct rb_node *node = rb_next(&pe->rb_node); + + if (!node) + return NULL; + return rb_entry(node, struct cache_extent, rb_node); +} + +void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe) +{ + rb_erase(&pe->rb_node, &tree->root); +} + +void cache_tree_free_extents(struct cache_tree *tree, + free_cache_extent free_func) +{ + struct cache_extent *ce; + + while ((ce = first_cache_extent(tree))) { + remove_cache_extent(tree, ce); + free_func(ce); + } +} + +static void free_extent_cache(struct cache_extent *pe) +{ + free(pe); +} + +void free_extent_cache_tree(struct cache_tree *tree) +{ + cache_tree_free_extents(tree, free_extent_cache); +} + +int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size) +{ + struct cache_extent *cache; + struct cache_extent *next = NULL; + struct cache_extent *prev = NULL; + int next_merged = 0; + int prev_merged = 0; + int ret = 0; + + if (cache_tree_empty(tree)) + goto insert; + + cache = search_cache_extent(tree, start); + if (!cache) { + /* + * Either the tree is completely empty, or the no range after + * start. + * Either way, the last cache_extent should be prev. + */ + prev = last_cache_extent(tree); + } else if (start <= cache->start) { + next = cache; + prev = prev_cache_extent(cache); + } else { + prev = cache; + next = next_cache_extent(cache); + } + + /* + * Ensure the range to be inserted won't cover with existings + * Or we will need extra loop to do merge + */ + BUG_ON(next && start + size > next->start); + BUG_ON(prev && prev->start + prev->size > start); + + if (next && start + size == next->start) { + next_merged = 1; + next->size = next->start + next->size - start; + next->start = start; + } + if (prev && prev->start + prev->size == start) { + prev_merged = 1; + if (next_merged) { + next->size = next->start + next->size - prev->start; + next->start = prev->start; + remove_cache_extent(tree, prev); + free(prev); + } else { + prev->size = start + size - prev->start; + } + } +insert: + if (!prev_merged && !next_merged) + ret = add_cache_extent(tree, start, size); + return ret; +} diff --git a/tools/libfsimage/btrfs/extent-cache.h b/tools/libfsimage/btrfs/extent-cache.h new file mode 100644 index 0000000..f031fbf --- /dev/null +++ b/tools/libfsimage/btrfs/extent-cache.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_EXTENT_CACHE_H__ +#define __BTRFS_EXTENT_CACHE_H__ + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#include "rbtree.h" +#else +#include <btrfs/kerncompat.h> +#include <btrfs/rbtree.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +struct cache_tree { + struct rb_root root; +}; + +struct cache_extent { + struct rb_node rb_node; + u64 objectid; + u64 start; + u64 size; +}; + +void cache_tree_init(struct cache_tree *tree); + +struct cache_extent *first_cache_extent(struct cache_tree *tree); +struct cache_extent *last_cache_extent(struct cache_tree *tree); +struct cache_extent *prev_cache_extent(struct cache_extent *pe); +struct cache_extent *next_cache_extent(struct cache_extent *pe); + +/* + * Find a cache_extent which covers start. + * + * If not found, return next cache_extent if possible. + */ +struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start); + +/* + * Find a cahce_extent which restrictly covers start. + * + * If not found, return NULL. + */ +struct cache_extent *lookup_cache_extent(struct cache_tree *tree, + u64 start, u64 size); + +/* + * Add an non-overlap extent into cache tree + * + * If [start, start+size) overlap with existing one, it will return -EEXIST. + */ +int add_cache_extent(struct cache_tree *tree, u64 start, u64 size); + +/* + * Same with add_cache_extent, but with cache_extent strcut. + */ +int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe); +void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe); + +static inline int cache_tree_empty(struct cache_tree *tree) +{ + return RB_EMPTY_ROOT(&tree->root); +} + +typedef void (*free_cache_extent)(struct cache_extent *pe); + +void cache_tree_free_extents(struct cache_tree *tree, + free_cache_extent free_func); + +#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func) \ +static void free_##name##_tree(struct cache_tree *tree) \ +{ \ + cache_tree_free_extents(tree, free_func); \ +} + +void free_extent_cache_tree(struct cache_tree *tree); + +/* + * Search a cache_extent with same objectid, and covers start. + * + * If not found, return next if possible. + */ +struct cache_extent *search_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start); +/* + * Search a cache_extent with same objectid, and covers the range + * [start, start + size) + * + * If not found, return next cache_extent if possible. + */ +struct cache_extent *lookup_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size); +int add_cache_extent2(struct cache_tree *tree, + u64 objectid, u64 start, u64 size); +int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe); + +/* + * Insert a cache_extent range [start, start + size). + * + * This function may merge with existing cache_extent. + * NOTE: caller must ensure the inserted range won't cover with any existing + * range. + */ +int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size); +#endif diff --git a/tools/libfsimage/btrfs/extent-tree.c b/tools/libfsimage/btrfs/extent-tree.c new file mode 100644 index 0000000..b9b00f0 --- /dev/null +++ b/tools/libfsimage/btrfs/extent-tree.c @@ -0,0 +1,4122 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "crc32c.h" +#include "volumes.h" +#include "free-space-cache.h" +#include "utils.h" + +#define PENDING_EXTENT_INSERT 0 +#define PENDING_EXTENT_DELETE 1 +#define PENDING_BACKREF_UPDATE 2 + +struct pending_extent_op { + int type; + u64 bytenr; + u64 num_bytes; + u64 flags; + struct btrfs_disk_key key; + int level; +}; + +static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, struct btrfs_key *ins); +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop); +static int finish_current_insert(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner); + +static int remove_sb_from_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + struct extent_io_tree *free_space_cache; + + free_space_cache = &root->fs_info->free_space_cache; + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); + BUG_ON(ret); + while (nr--) { + clear_extent_dirty(free_space_cache, logical[nr], + logical[nr] + stripe_len - 1, GFP_NOFS); + } + kfree(logical); + } + return 0; +} + +static int cache_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct extent_buffer *leaf; + struct extent_io_tree *free_space_cache; + int slot; + u64 last; + u64 hole_size; + + if (!block_group) + return 0; + + root = root->fs_info->extent_root; + free_space_cache = &root->fs_info->free_space_cache; + + if (block_group->cached) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = 2; + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + key.objectid = last; + key.offset = 0; + key.type = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; + if (ret == 0) { + continue; + } else { + break; + } + } + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid < block_group->key.objectid) { + goto next; + } + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) { + break; + } + + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { + if (key.objectid > last) { + hole_size = key.objectid - last; + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, + GFP_NOFS); + } + if (key.type == BTRFS_METADATA_ITEM_KEY) + last = key.objectid + root->leafsize; + else + last = key.objectid + key.offset; + } +next: + path->slots[0]++; + } + + if (block_group->key.objectid + + block_group->key.offset > last) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, GFP_NOFS); + } + remove_sb_from_cache(root, block_group); + block_group->cached = 1; +err: + btrfs_free_path(path); + return 0; +} + +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + return block_group; +} + +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + if (block_group->key.objectid <= bytenr && bytenr < + block_group->key.objectid + block_group->key.offset) + return block_group; + return NULL; +} + +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + return (cache->flags & bits) == bits; +} + +static int noinline find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 *start_ret, int num, int data) +{ + int ret; + struct btrfs_block_group_cache *cache = *cache_ret; + u64 last = *start_ret; + u64 start = 0; + u64 end = 0; + u64 search_start = *start_ret; + int wrapped = 0; + + if (!cache) + goto out; +again: + ret = cache_block_group(root, cache); + if (ret) + goto out; + + last = max(search_start, cache->key.objectid); + if (cache->ro || !block_group_bits(cache, data)) + goto new_group; + + while(1) { + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) { + goto new_group; + } + + start = max(last, start); + last = end + 1; + if (last - start < num) { + continue; + } + if (start + num > cache->key.objectid + cache->key.offset) { + goto new_group; + } + *start_ret = start; + return 0; + } +out: + *start_ret = last; + cache = btrfs_lookup_block_group(root->fs_info, search_start); + if (!cache) { + printk("Unable to find block group for %llu\n", + (unsigned long long)search_start); + WARN_ON(1); + } + return -ENOSPC; + +new_group: + last = cache->key.objectid + cache->key.offset; +wrapped: + cache = btrfs_lookup_first_block_group(root->fs_info, last); + if (!cache) { + if (!wrapped) { + wrapped = 1; + last = search_start; + goto wrapped; + } + goto out; + } + *cache_ret = cache; + goto again; +} + +static int block_group_state_bits(u64 flags) +{ + int bits = 0; + if (flags & BTRFS_BLOCK_GROUP_DATA) + bits |= BLOCK_GROUP_DATA; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + bits |= BLOCK_GROUP_METADATA; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + bits |= BLOCK_GROUP_SYSTEM; + return bits; +} + +static struct btrfs_block_group_cache * +btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache + *hint, u64 search_start, int data, int owner) +{ + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *found_group = NULL; + struct btrfs_fs_info *info = root->fs_info; + u64 used; + u64 last = 0; + u64 hint_last; + u64 start; + u64 end; + u64 free_check; + u64 ptr; + int bit; + int ret; + int full_search = 0; + int factor = 10; + + block_group_cache = &info->block_group_cache; + + if (!owner) + factor = 10; + + bit = block_group_state_bits(data); + + if (search_start) { + struct btrfs_block_group_cache *shint; + shint = btrfs_lookup_block_group(info, search_start); + if (shint && !shint->ro && block_group_bits(shint, data)) { + used = btrfs_block_group_used(&shint->item); + if (used + shint->pinned < + div_factor(shint->key.offset, factor)) { + return shint; + } + } + } + if (hint && !hint->ro && block_group_bits(hint, data)) { + used = btrfs_block_group_used(&hint->item); + if (used + hint->pinned < + div_factor(hint->key.offset, factor)) { + return hint; + } + last = hint->key.objectid + hint->key.offset; + hint_last = last; + } else { + if (hint) + hint_last = max(hint->key.objectid, search_start); + else + hint_last = search_start; + + last = hint_last; + } +again: + while(1) { + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, bit); + if (ret) + break; + + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + last = cache->key.objectid + cache->key.offset; + used = btrfs_block_group_used(&cache->item); + + if (!cache->ro && block_group_bits(cache, data)) { + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, + factor); + + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; + } + } + cond_resched(); + } + if (!full_search) { + last = search_start; + full_search = 1; + goto again; + } +found: + return found_group; +} + +/* + * Back reference rules. Back refs have three main goals: + * + * 1) differentiate between all holders of references to an extent so that + * when a reference is dropped we can make sure it was a valid reference + * before freeing the extent. + * + * 2) Provide enough information to quickly find the holders of an extent + * if we notice a given block is corrupted or bad. + * + * 3) Make it easy to migrate blocks for FS shrinking or storage pool + * maintenance. This is actually the same as #2, but with a slightly + * different use case. + * + * There are two kinds of back refs. The implicit back refs is optimized + * for pointers in non-shared tree blocks. For a given pointer in a block, + * back refs of this kind provide information about the block's owner tree + * and the pointer's key. These information allow us to find the block by + * b-tree searching. The full back refs is for pointers in tree blocks not + * referenced by their owner trees. The location of tree block is recorded + * in the back refs. Actually the full back refs is generic, and can be + * used in all cases the implicit back refs is used. The major shortcoming + * of the full back refs is its overhead. Every time a tree block gets + * COWed, we have to update back refs entry for all pointers in it. + * + * For a newly allocated tree block, we use implicit back refs for + * pointers in it. This means most tree related operations only involve + * implicit back refs. For a tree block created in old transaction, the + * only way to drop a reference to it is COW it. So we can detect the + * event that tree block loses its owner tree's reference and do the + * back refs conversion. + * + * When a tree block is COW'd through a tree, there are four cases: + * + * The reference count of the block is one and the tree is the block's + * owner tree. Nothing to do in this case. + * + * The reference count of the block is one and the tree is not the + * block's owner tree. In this case, full back refs is used for pointers + * in the block. Remove these full back refs, add implicit back refs for + * every pointers in the new block. + * + * The reference count of the block is greater than one and the tree is + * the block's owner tree. In this case, implicit back refs is used for + * pointers in the block. Add full back refs for every pointers in the + * block, increase lower level extents' reference counts. The original + * implicit back refs are entailed to the new block. + * + * The reference count of the block is greater than one and the tree is + * not the block's owner tree. Add implicit back refs for every pointer in + * the new block, increase lower level extents' reference count. + * + * Back Reference Key composing: + * + * The key objectid corresponds to the first byte in the extent, + * The key type is used to differentiate between types of back refs. + * There are different meanings of the key offset for different types + * of back refs. + * + * File extents can be referenced by: + * + * - multiple snapshots, subvolumes, or different generations in one subvol + * - different files inside a single subvolume + * - different offsets inside a file (bookend extents in file.c) + * + * The extent ref structure for the implicit back refs has fields for: + * + * - Objectid of the subvolume root + * - objectid of the file holding the reference + * - original offset in the file + * - how many bookend extents + * + * The key offset for the implicit back refs is hash of the first + * three fields. + * + * The extent ref structure for the full back refs has field for: + * + * - number of pointers in the tree leaf + * + * The key offset for the implicit back refs is the first byte of + * the tree leaf + * + * When a file extent is allocated, The implicit back refs is used. + * the fields are filled in: + * + * (root_key.objectid, inode objectid, offset in file, 1) + * + * When a file extent is removed file truncation, we find the + * corresponding implicit back refs and check the following fields: + * + * (btrfs_header_owner(leaf), inode objectid, offset in file) + * + * Btree extents can be referenced by: + * + * - Different subvolumes + * + * Both the implicit back refs and the full back refs for tree blocks + * only consist of key. The key offset for the implicit back refs is + * objectid of block's owner tree. The key offset for the full back refs + * is the first byte of parent block. + * + * When implicit back refs is used, information about the lowest key and + * level of the tree block are required. These information are stored in + * tree block info structure. + */ + +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 +static int convert_extent_item_v0(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 owner, u32 extra_size) +{ + struct btrfs_extent_item *item; + struct btrfs_extent_item_v0 *ei0; + struct btrfs_extent_ref_v0 *ref0; + struct btrfs_tree_block_info *bi; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + u32 new_size = sizeof(*item); + u64 refs; + int ret; + + leaf = path->nodes[0]; + BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ei0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item_v0); + refs = btrfs_extent_refs_v0(leaf, ei0); + + if (owner == (u64)-1) { + while (1) { + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + BUG_ON(ret > 0); + leaf = path->nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, + path->slots[0]); + BUG_ON(key.objectid != found_key.objectid); + if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { + path->slots[0]++; + continue; + } + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + owner = btrfs_ref_objectid_v0(leaf, ref0); + break; + } + } + btrfs_release_path(path); + + if (owner < BTRFS_FIRST_FREE_OBJECTID) + new_size += sizeof(*bi); + + new_size -= sizeof(*ei0); + ret = btrfs_search_slot(trans, root, &key, path, new_size, 1); + if (ret < 0) + return ret; + BUG_ON(ret); + + ret = btrfs_extend_item(trans, root, path, new_size); + BUG_ON(ret); + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + btrfs_set_extent_refs(leaf, item, refs); + /* FIXME: get real generation */ + btrfs_set_extent_generation(leaf, item, 0); + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + btrfs_set_extent_flags(leaf, item, + BTRFS_EXTENT_FLAG_TREE_BLOCK | + BTRFS_BLOCK_FLAG_FULL_BACKREF); + bi = (struct btrfs_tree_block_info *)(item + 1); + /* FIXME: get first key of the block */ + memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); + btrfs_set_tree_block_level(leaf, bi, (int)owner); + } else { + btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); + } + btrfs_mark_buffer_dirty(leaf); + return 0; +} +#endif + +static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) +{ + u32 high_crc = ~(u32)0; + u32 low_crc = ~(u32)0; + __le64 lenum; + + lenum = cpu_to_le64(root_objectid); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(owner); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(offset); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + + return ((u64)high_crc << 31) ^ (u64)low_crc; +} + +static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, + struct btrfs_extent_data_ref *ref) +{ + return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), + btrfs_extent_data_ref_objectid(leaf, ref), + btrfs_extent_data_ref_offset(leaf, ref)); +} + +static int match_extent_data_ref(struct extent_buffer *leaf, + struct btrfs_extent_data_ref *ref, + u64 root_objectid, u64 owner, u64 offset) +{ + if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || + btrfs_extent_data_ref_objectid(leaf, ref) != owner || + btrfs_extent_data_ref_offset(leaf, ref) != offset) + return 0; + return 1; +} + +static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid, + u64 owner, u64 offset) +{ + struct btrfs_key key; + struct btrfs_extent_data_ref *ref; + struct extent_buffer *leaf; + u32 nritems; + int ret; + int recow; + int err = -ENOENT; + + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_DATA_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_EXTENT_DATA_REF_KEY; + key.offset = hash_extent_data_ref(root_objectid, + owner, offset); + } +again: + recow = 0; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto fail; + } + + if (parent) { + if (!ret) + return 0; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + key.type = BTRFS_EXTENT_REF_V0_KEY; + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto fail; + } + if (!ret) + return 0; +#endif + goto fail; + } + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + while (1) { + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + err = ret; + if (ret) + goto fail; + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + recow = 1; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_DATA_REF_KEY) + goto fail; + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + + if (match_extent_data_ref(leaf, ref, root_objectid, + owner, offset)) { + if (recow) { + btrfs_release_path(path); + goto again; + } + err = 0; + break; + } + path->slots[0]++; + } +fail: + return err; +} + +static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid, u64 owner, + u64 offset, int refs_to_add) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + u32 size; + u32 num_refs; + int ret; + + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_DATA_REF_KEY; + key.offset = parent; + size = sizeof(struct btrfs_shared_data_ref); + } else { + key.type = BTRFS_EXTENT_DATA_REF_KEY; + key.offset = hash_extent_data_ref(root_objectid, + owner, offset); + size = sizeof(struct btrfs_extent_data_ref); + } + + ret = btrfs_insert_empty_item(trans, root, path, &key, size); + if (ret && ret != -EEXIST) + goto fail; + + leaf = path->nodes[0]; + if (parent) { + struct btrfs_shared_data_ref *ref; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + if (ret == 0) { + btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); + } else { + num_refs = btrfs_shared_data_ref_count(leaf, ref); + num_refs += refs_to_add; + btrfs_set_shared_data_ref_count(leaf, ref, num_refs); + } + } else { + struct btrfs_extent_data_ref *ref; + while (ret == -EEXIST) { + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + if (match_extent_data_ref(leaf, ref, root_objectid, + owner, offset)) + break; + btrfs_release_path(path); + + key.offset++; + ret = btrfs_insert_empty_item(trans, root, path, &key, + size); + if (ret && ret != -EEXIST) + goto fail; + + leaf = path->nodes[0]; + } + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + if (ret == 0) { + btrfs_set_extent_data_ref_root(leaf, ref, + root_objectid); + btrfs_set_extent_data_ref_objectid(leaf, ref, owner); + btrfs_set_extent_data_ref_offset(leaf, ref, offset); + btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); + } else { + num_refs = btrfs_extent_data_ref_count(leaf, ref); + num_refs += refs_to_add; + btrfs_set_extent_data_ref_count(leaf, ref, num_refs); + } + } + btrfs_mark_buffer_dirty(leaf); + ret = 0; +fail: + btrfs_release_path(path); + return ret; +} + +static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int refs_to_drop) +{ + struct btrfs_key key; + struct btrfs_extent_data_ref *ref1 = NULL; + struct btrfs_shared_data_ref *ref2 = NULL; + struct extent_buffer *leaf; + u32 num_refs = 0; + int ret = 0; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + ref2 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + num_refs = btrfs_ref_count_v0(leaf, ref0); +#endif + } else { + BUG(); + } + + BUG_ON(num_refs < refs_to_drop); + num_refs -= refs_to_drop; + + if (num_refs == 0) { + ret = btrfs_del_item(trans, root, path); + } else { + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) + btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); + else if (key.type == BTRFS_SHARED_DATA_REF_KEY) + btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + else { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + btrfs_set_ref_count_v0(leaf, ref0, num_refs); + } +#endif + btrfs_mark_buffer_dirty(leaf); + } + return ret; +} + +static noinline u32 extent_data_ref_count(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_data_ref *ref1; + struct btrfs_shared_data_ref *ref2; + u32 num_refs = 0; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (iref) { + if (btrfs_extent_inline_ref_type(leaf, iref) == + BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else { + ref2 = (struct btrfs_shared_data_ref *)(iref + 1); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); + } + } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + ref1 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_data_ref); + num_refs = btrfs_extent_data_ref_count(leaf, ref1); + } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + ref2 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_shared_data_ref); + num_refs = btrfs_shared_data_ref_count(leaf, ref2); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { + struct btrfs_extent_ref_v0 *ref0; + ref0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref_v0); + num_refs = btrfs_ref_count_v0(leaf, ref0); +#endif + } else { + BUG(); + } + return num_refs; +} + +static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid) +{ + struct btrfs_key key; + int ret; + + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_BLOCK_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root_objectid; + } + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (ret == -ENOENT && parent) { + btrfs_release_path(path); + key.type = BTRFS_EXTENT_REF_V0_KEY; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; + } +#endif + return ret; +} + +static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 root_objectid) +{ + struct btrfs_key key; + int ret; + + key.objectid = bytenr; + if (parent) { + key.type = BTRFS_SHARED_BLOCK_REF_KEY; + key.offset = parent; + } else { + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root_objectid; + } + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + + btrfs_release_path(path); + return ret; +} + +static inline int extent_ref_type(u64 parent, u64 owner) +{ + int type; + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + if (parent > 0) + type = BTRFS_SHARED_BLOCK_REF_KEY; + else + type = BTRFS_TREE_BLOCK_REF_KEY; + } else { + if (parent > 0) + type = BTRFS_SHARED_DATA_REF_KEY; + else + type = BTRFS_EXTENT_DATA_REF_KEY; + } + return type; +} + +static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref **ref_ret, + u64 bytenr, u64 num_bytes, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int insert) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + u64 flags; + u32 item_size; + unsigned long ptr; + unsigned long end; + int extra_size; + int type; + int want; + int ret; + int err = 0; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + + want = extent_ref_type(parent, owner); + if (insert) + extra_size = btrfs_extent_inline_ref_size(want); + else + extra_size = -1; + + if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) { + skinny_metadata = 1; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner; + } else if (skinny_metadata) { + skinny_metadata = 0; + } + +again: + ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); + if (ret < 0) { + err = ret; + goto out; + } + + /* + * We may be a newly converted file system which still has the old fat + * extent entries for metadata, so try and see if we have one of those. + */ + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + if (ret) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + btrfs_release_path(path); + goto again; + } + } + + if (ret) { + printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset); + return -ENOENT; + } + + BUG_ON(ret); + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*ei)) { + if (!insert) { + err = -ENOENT; + goto out; + } + ret = convert_extent_item_v0(trans, root, path, owner, + extra_size); + if (ret < 0) { + err = ret; + goto out; + } + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + } +#endif + if (item_size < sizeof(*ei)) { + printf("Size is %u, needs to be %u, slot %d\n", + (unsigned)item_size, + (unsigned)sizeof(*ei), path->slots[0]); + btrfs_print_leaf(root, leaf); + return -EINVAL; + } + BUG_ON(item_size < sizeof(*ei)); + + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + item_size; + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { + ptr += sizeof(struct btrfs_tree_block_info); + BUG_ON(ptr > end); + } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + if (!(flags & BTRFS_EXTENT_FLAG_DATA)) { + return -EIO; + } + } + + err = -ENOENT; + while (1) { + if (ptr >= end) { + WARN_ON(ptr > end); + break; + } + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + if (want < type) + break; + if (want > type) { + ptr += btrfs_extent_inline_ref_size(type); + continue; + } + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *dref; + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + if (match_extent_data_ref(leaf, dref, root_objectid, + owner, offset)) { + err = 0; + break; + } + if (hash_extent_data_ref_item(leaf, dref) < + hash_extent_data_ref(root_objectid, owner, offset)) + break; + } else { + u64 ref_offset; + ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); + if (parent > 0) { + if (parent == ref_offset) { + err = 0; + break; + } + if (ref_offset < parent) + break; + } else { + if (root_objectid == ref_offset) { + err = 0; + break; + } + if (ref_offset < root_objectid) + break; + } + } + ptr += btrfs_extent_inline_ref_size(type); + } + if (err == -ENOENT && insert) { + if (item_size + extra_size >= + BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { + err = -EAGAIN; + goto out; + } + /* + * To add new inline back ref, we have to make sure + * there is no corresponding back ref item. + * For simplicity, we just do not add new inline back + * ref if there is any back ref item. + */ + if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { + err = -EAGAIN; + goto out; + } + } + *ref_ret = (struct btrfs_extent_inline_ref *)ptr; +out: + return err; +} + +static int setup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add) +{ + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + unsigned long item_offset; + u64 refs; + int size; + int type; + int ret; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + item_offset = (unsigned long)iref - (unsigned long)ei; + + type = extent_ref_type(parent, owner); + size = btrfs_extent_inline_ref_size(type); + + ret = btrfs_extend_item(trans, root, path, size); + BUG_ON(ret); + + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, ei); + refs += refs_to_add; + btrfs_set_extent_refs(leaf, ei, refs); + + ptr = (unsigned long)ei + item_offset; + end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); + if (ptr < end - size) + memmove_extent_buffer(leaf, ptr + size, ptr, + end - size - ptr); + + iref = (struct btrfs_extent_inline_ref *)ptr; + btrfs_set_extent_inline_ref_type(leaf, iref, type); + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *dref; + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); + btrfs_set_extent_data_ref_objectid(leaf, dref, owner); + btrfs_set_extent_data_ref_offset(leaf, dref, offset); + btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + struct btrfs_shared_data_ref *sref; + sref = (struct btrfs_shared_data_ref *)(iref + 1); + btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); + btrfs_set_extent_inline_ref_offset(leaf, iref, parent); + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + btrfs_set_extent_inline_ref_offset(leaf, iref, parent); + } else { + btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); + } + btrfs_mark_buffer_dirty(leaf); + return 0; +} + +static int lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref **ref_ret, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) +{ + int ret; + + ret = lookup_inline_extent_backref(trans, root, path, ref_ret, + bytenr, num_bytes, parent, + root_objectid, owner, offset, 0); + if (ret != -ENOENT) + return ret; + + btrfs_release_path(path); + *ref_ret = NULL; + + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, + root_objectid); + } else { + ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, + root_objectid, owner, offset); + } + return ret; +} + +static int update_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_mod) +{ + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_data_ref *dref = NULL; + struct btrfs_shared_data_ref *sref = NULL; + unsigned long ptr; + unsigned long end; + u32 item_size; + int size; + int type; + int ret; + u64 refs; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, ei); + WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); + refs += refs_to_mod; + btrfs_set_extent_refs(leaf, ei, refs); + + type = btrfs_extent_inline_ref_type(leaf, iref); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + refs = btrfs_extent_data_ref_count(leaf, dref); + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + sref = (struct btrfs_shared_data_ref *)(iref + 1); + refs = btrfs_shared_data_ref_count(leaf, sref); + } else { + refs = 1; + BUG_ON(refs_to_mod != -1); + } + + BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); + refs += refs_to_mod; + + if (refs > 0) { + if (type == BTRFS_EXTENT_DATA_REF_KEY) + btrfs_set_extent_data_ref_count(leaf, dref, refs); + else + btrfs_set_shared_data_ref_count(leaf, sref, refs); + } else { + size = btrfs_extent_inline_ref_size(type); + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + if (ptr + size < end) + memmove_extent_buffer(leaf, ptr, ptr + size, + end - ptr - size); + item_size -= size; + ret = btrfs_truncate_item(trans, root, path, item_size, 1); + BUG_ON(ret); + } + btrfs_mark_buffer_dirty(leaf); + return 0; +} + +static int insert_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, + u64 offset, int refs_to_add) +{ + struct btrfs_extent_inline_ref *iref; + int ret; + + ret = lookup_inline_extent_backref(trans, root, path, &iref, + bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + if (ret == 0) { + BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); + ret = update_inline_extent_backref(trans, root, path, iref, + refs_to_add); + } else if (ret == -ENOENT) { + ret = setup_inline_extent_backref(trans, root, path, iref, + parent, root_objectid, + owner, offset, refs_to_add); + } + return ret; +} + +static int insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add) +{ + int ret; + + if (owner >= BTRFS_FIRST_FREE_OBJECTID) { + ret = insert_extent_data_ref(trans, root, path, bytenr, + parent, root_objectid, + owner, offset, refs_to_add); + } else { + BUG_ON(refs_to_add != 1); + ret = insert_tree_block_ref(trans, root, path, bytenr, + parent, root_objectid); + } + return ret; +} + +static int remove_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_drop, int is_data) +{ + int ret; + + BUG_ON(!is_data && refs_to_drop != 1); + if (iref) { + ret = update_inline_extent_backref(trans, root, path, iref, + -refs_to_drop); + } else if (is_data) { + ret = remove_extent_data_ref(trans, root, path, refs_to_drop); + } else { + ret = btrfs_del_item(trans, root, path); + } + return ret; +} + +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_extent_item *item; + u64 refs; + int ret; + int err = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = 1; + + ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + if (ret == 0) + goto out; + + if (ret != -EAGAIN) { + err = ret; + goto out; + } + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, item); + btrfs_set_extent_refs(leaf, item, refs + 1); + + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + path->reada = 1; + + /* now insert the actual backref */ + ret = insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, parent, root_objectid, + owner, offset, 1); + if (ret) + err = ret; +out: + btrfs_free_path(path); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + BUG_ON(err); + return err; +} + +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 offset, int metadata, u64 *refs, u64 *flags) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + u32 item_size; + u64 num_refs; + u64 extent_flags; + + if (metadata && + !btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + offset = root->leafsize; + metadata = 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; + + key.objectid = bytenr; + key.offset = offset; + if (metadata) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, + 0, 0); + if (ret < 0) + goto out; + + /* + * Deal with the fact that we may have mixed SKINNY and normal refs. If + * we didn't find what we wanted check and see if we have a normal ref + * right next to us, or re-search if we are on the edge of the leaf just + * to make sure. + */ + if (ret > 0 && metadata) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + + if (ret) { + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + metadata = 0; + goto again; + } + } + + if (ret != 0) { + ret = -EIO; + goto out; + } + + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); + if (item_size >= sizeof(*item)) { + item = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_item); + num_refs = btrfs_extent_refs(l, item); + extent_flags = btrfs_extent_flags(l, item); + } else { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + struct btrfs_extent_item_v0 *ei0; + BUG_ON(item_size != sizeof(*ei0)); + ei0 = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_item_v0); + num_refs = btrfs_extent_refs_v0(l, ei0); + /* FIXME: this isn't correct for data */ + extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; +#else + BUG(); +#endif + } + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + if (refs) + *refs = num_refs; + if (flags) + *flags = extent_flags; +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_set_block_flags(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, int level, u64 flags) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + u32 item_size; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; + + key.objectid = bytenr; + if (skinny_metadata) { + key.offset = level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = root->leafsize; + key.type = BTRFS_EXTENT_ITEM_KEY; + } + +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, + 0, 0); + if (ret < 0) + goto out; + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.offset == root->leafsize && + key.type == BTRFS_EXTENT_ITEM_KEY) + ret = 0; + } + if (ret) { + btrfs_release_path(path); + key.offset = root->leafsize; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } + + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", + (unsigned long long)bytenr); + BUG(); + } + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*item)) { + ret = convert_extent_item_v0(trans, root->fs_info->extent_root, + path, (u64)-1, 0); + if (ret < 0) + goto out; + + l = path->nodes[0]; + item_size = btrfs_item_size_nr(l, path->slots[0]); + } +#endif + BUG_ON(item_size < sizeof(*item)); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + flags |= btrfs_extent_flags(l, item); + btrfs_set_extent_flags(l, item, flags); +out: + btrfs_free_path(path); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return ret; +} + +static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + int record_parent, int inc) +{ + u64 bytenr; + u64 num_bytes; + u64 parent; + u64 ref_root; + u32 nritems; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int level; + int ret = 0; + int (*process_func)(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64, u64, u64, u64, u64, u64); + + ref_root = btrfs_header_owner(buf); + nritems = btrfs_header_nritems(buf); + level = btrfs_header_level(buf); + + if (!root->ref_cows && level == 0) + return 0; + + if (inc) + process_func = btrfs_inc_extent_ref; + else + process_func = btrfs_free_extent; + + if (record_parent) + parent = buf->start; + else + parent = 0; + + for (i = 0; i < nritems; i++) { + cond_resched(); + if (level == 0) { + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) + continue; + + num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); + key.offset -= btrfs_file_extent_offset(buf, fi); + ret = process_func(trans, root, bytenr, num_bytes, + parent, ref_root, key.objectid, + key.offset); + if (ret) { + WARN_ON(1); + goto fail; + } + } else { + bytenr = btrfs_node_blockptr(buf, i); + num_bytes = btrfs_level_size(root, level - 1); + ret = process_func(trans, root, bytenr, num_bytes, + parent, ref_root, level - 1, 0); + if (ret) { + WARN_ON(1); + goto fail; + } + } + } + return 0; +fail: + WARN_ON(1); + return ret; +} + +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent) +{ + return __btrfs_mod_ref(trans, root, buf, record_parent, 1); +} + +int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int record_parent) +{ + return __btrfs_mod_ref(trans, root, buf, record_parent, 0); +} + +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + unsigned long bi; + struct extent_buffer *leaf; + + ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + if (ret < 0) + goto fail; + BUG_ON(ret); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); +fail: + finish_current_insert(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; + +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *cache; + int ret; + struct btrfs_path *path; + u64 last = 0; + u64 start; + u64 end; + u64 ptr; + + block_group_cache = &root->fs_info->block_group_cache; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, BLOCK_GROUP_DIRTY); + if (ret) { + if (last == 0) + break; + last = 0; + continue; + } + + last = end + 1; + ret = get_state_private(block_group_cache, start, &ptr); + BUG_ON(ret); + + clear_extent_bits(block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); + + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + ret = write_one_cache_group(trans, root, path, cache); + } + btrfs_free_path(path); + return 0; +} + +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) +{ + struct btrfs_space_info *found; + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + list_for_each_entry(found, &info->space_info, list) { + if (found->flags & flags) + return found; + } + return NULL; + +} + +static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + /* only support free block group which is empty */ + if (bytes_used) + return -ENOTEMPTY; + + found = __find_space_info(fs_info, flags); + if (!found) + return -ENOENT; + if (found->total_bytes < total_bytes) { + fprintf(stderr, + "WARNING: bad space info to free %llu only have %llu\n", + total_bytes, found->total_bytes); + return -EINVAL; + } + found->total_bytes -= total_bytes; + if (space_info) + *space_info = found; + return 0; +} + +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + if (found->total_bytes < found->bytes_used) { + fprintf(stderr, "warning, bad space info total_bytes " + "%llu used %llu\n", + (unsigned long long)found->total_bytes, + (unsigned long long)found->bytes_used); + } + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + + +static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_DUP); + if (extra_flags) { + if (flags & BTRFS_BLOCK_GROUP_DATA) + fs_info->avail_data_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + fs_info->avail_metadata_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + fs_info->avail_system_alloc_bits |= extra_flags; + } +} + +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags) +{ + struct btrfs_space_info *space_info; + u64 thresh; + u64 start; + u64 num_bytes; + int ret; + + space_info = __find_space_info(extent_root->fs_info, flags); + if (!space_info) { + ret = update_space_info(extent_root->fs_info, flags, + 0, 0, &space_info); + BUG_ON(ret); + } + BUG_ON(!space_info); + + if (space_info->full) + return 0; + + thresh = div_factor(space_info->total_bytes, 7); + if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + thresh) + return 0; + + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, + space_info->flags); + if (ret == -ENOSPC) { + space_info->full = 1; + return 0; + } + + BUG_ON(ret); + + ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); + BUG_ON(ret); + return 0; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int alloc, + int mark_free) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num_bytes; + u64 old_val; + u64 byte_in_group; + u64 start; + u64 end; + + /* block accounting for super block */ + old_val = btrfs_super_bytes_used(info->super_copy); + if (alloc) + old_val += num_bytes; + else + old_val -= num_bytes; + btrfs_set_super_bytes_used(info->super_copy, old_val); + + /* block accounting for root item */ + old_val = btrfs_root_used(&root->root_item); + if (alloc) + old_val += num_bytes; + else + old_val -= num_bytes; + btrfs_set_root_used(&root->root_item, old_val); + + while(total) { + cache = btrfs_lookup_block_group(info, bytenr); + if (!cache) { + return -1; + } + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + set_extent_bits(&info->block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); + + old_val = btrfs_block_group_used(&cache->item); + num_bytes = min(total, cache->key.offset - byte_in_group); + + if (alloc) { + old_val += num_bytes; + cache->space_info->bytes_used += num_bytes; + } else { + old_val -= num_bytes; + cache->space_info->bytes_used -= num_bytes; + if (mark_free) { + set_extent_dirty(&info->free_space_cache, + bytenr, bytenr + num_bytes - 1, + GFP_NOFS); + } + } + btrfs_set_block_group_used(&cache->item, old_val); + total -= num_bytes; + bytenr += num_bytes; + } + return 0; +} + +static int update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (pin) { + set_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } else { + clear_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) { + len = min((u64)root->sectorsize, num); + goto next; + } + WARN_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (pin) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + fs_info->total_pinned += len; + } else { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + fs_info->total_pinned -= len; + } +next: + bytenr += len; + num -= len; + } + return 0; +} + +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_io_tree *unpin) +{ + u64 start; + u64 end; + int ret; + struct extent_io_tree *free_space_cache; + free_space_cache = &root->fs_info->free_space_cache; + + while(1) { + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + update_pinned_extents(root, start, end + 1 - start, 0); + clear_extent_dirty(unpin, start, end, GFP_NOFS); + set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + } + return 0; +} + +static int extent_root_pending_ops(struct btrfs_fs_info *info) +{ + u64 start; + u64 end; + int ret; + + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (!ret) { + ret = find_first_extent_bit(&info->pending_del, 0, &start, &end, + EXTENT_LOCKED); + } + return ret == 0; + +} +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root) +{ + u64 start; + u64 end; + u64 priv; + struct btrfs_fs_info *info = extent_root->fs_info; + struct pending_extent_op *extent_op; + struct btrfs_key key; + int ret; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + while(1) { + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (ret) + break; + + ret = get_state_private(&info->extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + if (extent_op->type == PENDING_EXTENT_INSERT) { + key.objectid = start; + if (skinny_metadata) { + key.offset = extent_op->level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = extent_op->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + } + ret = alloc_reserved_tree_block(trans, extent_root, + extent_root->root_key.objectid, + trans->transid, + extent_op->flags, + &extent_op->key, + extent_op->level, &key); + BUG_ON(ret); + } else { + BUG_ON(1); + } + + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, + GFP_NOFS); + kfree(extent_op); + } + return 0; +} + +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int is_data) +{ + int err = 0; + struct extent_buffer *buf; + + if (is_data) + goto pinit; + + buf = btrfs_find_tree_block(root, bytenr, num_bytes); + if (!buf) + goto pinit; + + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ + if (btrfs_buffer_uptodate(buf, 0)) { + u64 header_owner = btrfs_header_owner(buf); + u64 header_transid = btrfs_header_generation(buf); + if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_transid == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + clean_tree_block(NULL, root, buf); + free_extent_buffer(buf); + return 1; + } + } + free_extent_buffer(buf); +pinit: + update_pinned_extents(root, bytenr, num_bytes, 1); + + BUG_ON(err < 0); + return 0; +} + +void btrfs_pin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1); +} + +void btrfs_unpin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0); +} + +/* + * remove an extent from the root, returns 0 on success + */ +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop) +{ + + struct btrfs_key key; + struct btrfs_path *path; + struct btrfs_extent_ops *ops = root->fs_info->extent_ops; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + int ret; + int is_data; + int extent_slot = 0; + int found_extent = 0; + int num_to_del = 1; + u32 item_size; + u64 refs; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + if (root->fs_info->free_extent_hook) { + root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes, + parent, root_objectid, owner_objectid, + owner_offset, refs_to_drop); + + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = 1; + + is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; + if (is_data) + skinny_metadata = 0; + BUG_ON(!is_data && refs_to_drop != 1); + + ret = lookup_extent_backref(trans, extent_root, path, &iref, + bytenr, num_bytes, parent, + root_objectid, owner_objectid, + owner_offset); + if (ret == 0) { + extent_slot = path->slots[0]; + while (extent_slot >= 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + extent_slot); + if (key.objectid != bytenr) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) { + found_extent = 1; + break; + } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.offset == owner_objectid) { + found_extent = 1; + break; + } + if (path->slots[0] - extent_slot > 5) + break; + extent_slot--; + } +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); + if (found_extent && item_size < sizeof(*ei)) + found_extent = 0; +#endif + if (!found_extent) { + BUG_ON(iref); + ret = remove_extent_backref(trans, extent_root, path, + NULL, refs_to_drop, + is_data); + BUG_ON(ret); + btrfs_release_path(path); + + key.objectid = bytenr; + + if (skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner_objectid; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + } + + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + if (ret > 0 && skinny_metadata && path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], + &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + btrfs_release_path(path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + } + + if (ret) { + printk(KERN_ERR "umm, got %d back from search" + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); + btrfs_print_leaf(extent_root, path->nodes[0]); + } + BUG_ON(ret); + extent_slot = path->slots[0]; + } + } else { + printk(KERN_ERR "btrfs unable to find ref byte nr %llu " + "parent %llu root %llu owner %llu offset %llu\n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root_objectid, + (unsigned long long)owner_objectid, + (unsigned long long)owner_offset); + ret = -EIO; + goto fail; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, extent_slot); +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (item_size < sizeof(*ei)) { + BUG_ON(found_extent || extent_slot != path->slots[0]); + ret = convert_extent_item_v0(trans, extent_root, path, + owner_objectid, 0); + BUG_ON(ret < 0); + + btrfs_release_path(path); + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + + ret = btrfs_search_slot(trans, extent_root, &key, path, + -1, 1); + if (ret) { + printk(KERN_ERR "umm, got %d back from search" + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); + btrfs_print_leaf(extent_root, path->nodes[0]); + } + BUG_ON(ret); + extent_slot = path->slots[0]; + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, extent_slot); + } +#endif + BUG_ON(item_size < sizeof(*ei)); + ei = btrfs_item_ptr(leaf, extent_slot, + struct btrfs_extent_item); + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && + key.type == BTRFS_EXTENT_ITEM_KEY) { + struct btrfs_tree_block_info *bi; + BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); + bi = (struct btrfs_tree_block_info *)(ei + 1); + WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); + } + + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs < refs_to_drop); + refs -= refs_to_drop; + + if (refs > 0) { + /* + * In the case of inline back ref, reference count will + * be updated by remove_extent_backref + */ + if (iref) { + BUG_ON(!found_extent); + } else { + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + } + if (found_extent) { + ret = remove_extent_backref(trans, extent_root, path, + iref, refs_to_drop, + is_data); + BUG_ON(ret); + } + } else { + int mark_free = 0; + int pin = 1; + + if (found_extent) { + BUG_ON(is_data && refs_to_drop != + extent_data_ref_count(root, path, iref)); + if (iref) { + BUG_ON(path->slots[0] != extent_slot); + } else { + BUG_ON(path->slots[0] != extent_slot + 1); + path->slots[0] = extent_slot; + num_to_del = 2; + } + } + + if (ops && ops->free_extent) { + ret = ops->free_extent(root, bytenr, num_bytes); + if (ret > 0) { + pin = 0; + mark_free = 0; + } + } + + if (pin) { + ret = pin_down_bytes(trans, root, bytenr, num_bytes, + is_data); + if (ret > 0) + mark_free = 1; + BUG_ON(ret < 0); + } + + ret = btrfs_del_items(trans, extent_root, path, path->slots[0], + num_to_del); + BUG_ON(ret); + btrfs_release_path(path); + + if (is_data) { + ret = btrfs_del_csums(trans, root, bytenr, num_bytes); + BUG_ON(ret); + } + + update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); + } +fail: + btrfs_free_path(path); + finish_current_insert(trans, extent_root); + return ret; +} + +/* + * find all the blocks marked as pending in the radix tree and remove + * them from the extent map + */ +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) +{ + int ret; + int err = 0; + u64 start; + u64 end; + u64 priv; + struct extent_io_tree *pending_del; + struct extent_io_tree *extent_ins; + struct pending_extent_op *extent_op; + + extent_ins = &extent_root->fs_info->extent_ins; + pending_del = &extent_root->fs_info->pending_del; + + while(1) { + ret = find_first_extent_bit(pending_del, 0, &start, &end, + EXTENT_LOCKED); + if (ret) + break; + + ret = get_state_private(pending_del, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + GFP_NOFS); + + if (!test_range_bit(extent_ins, start, end, + EXTENT_LOCKED, 0)) { + ret = __free_extent(trans, extent_root, + start, end + 1 - start, 0, + extent_root->root_key.objectid, + extent_op->level, 0, 1); + kfree(extent_op); + } else { + kfree(extent_op); + ret = get_state_private(extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + + clear_extent_bits(extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + if (extent_op->type == PENDING_BACKREF_UPDATE) + BUG_ON(1); + + kfree(extent_op); + } + if (ret) + err = ret; + } + return err; +} + +/* + * remove an extent from the root, returns 0 on success + */ + +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + int pending_ret; + int ret; + + WARN_ON(num_bytes < root->sectorsize); + if (root == extent_root) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_DELETE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->level = (int)owner; + + set_extent_bits(&root->fs_info->pending_del, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->pending_del, + bytenr, (unsigned long)extent_op); + return 0; + } + ret = __free_extent(trans, root, bytenr, num_bytes, parent, + root_objectid, owner, offset, 1); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); + return ret ? ret : pending_ret; +} + +static u64 stripe_align(struct btrfs_root *root, u64 val) +{ + u64 mask = ((u64)root->stripesize - 1); + u64 ret = (val + mask) & ~mask; + return ret; +} + +/* + * walks the btree of allocated extents and find a hole of a given size. + * The key ins is changed to record the hole: + * ins->objectid == block start + * ins->flags = BTRFS_EXTENT_ITEM_KEY + * ins->offset == number of blocks + * Any available blocks before search_start are skipped. + */ +static int noinline find_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *orig_root, + u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, + u64 hint_byte, struct btrfs_key *ins, + u64 exclude_start, u64 exclude_nr, + int data) +{ + int ret; + u64 orig_search_start = search_start; + struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; + u64 total_needed = num_bytes; + struct btrfs_block_group_cache *block_group; + int full_scan = 0; + int wrapped = 0; + + WARN_ON(num_bytes < root->sectorsize); + btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + + search_start = stripe_align(root, search_start); + + if (hint_byte) { + block_group = btrfs_lookup_first_block_group(info, hint_byte); + if (!block_group) + hint_byte = search_start; + block_group = btrfs_find_block_group(root, block_group, + hint_byte, data, 1); + } else { + block_group = btrfs_find_block_group(root, + trans->block_group, + search_start, data, 1); + } + + total_needed += empty_size; + +check_failed: + search_start = stripe_align(root, search_start); + if (!block_group) { + block_group = btrfs_lookup_first_block_group(info, + search_start); + if (!block_group) + block_group = btrfs_lookup_first_block_group(info, + orig_search_start); + } + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + if (ret) + goto new_group; + + ins->objectid = search_start; + ins->offset = num_bytes; + + if (ins->objectid + num_bytes > + block_group->key.objectid + block_group->key.offset) { + search_start = block_group->key.objectid + + block_group->key.offset; + goto new_group; + } + + if (test_range_bit(&info->extent_ins, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (test_range_bit(&info->pinned_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && + ins->objectid < exclude_start + exclude_nr)) { + search_start = exclude_start + exclude_nr; + goto new_group; + } + + if (!(data & BTRFS_BLOCK_GROUP_DATA)) { + if (check_crossing_stripes(ins->objectid, num_bytes)) { + search_start = round_down(ins->objectid + num_bytes, + BTRFS_STRIPE_LEN); + goto new_group; + } + block_group = btrfs_lookup_block_group(info, ins->objectid); + if (block_group) + trans->block_group = block_group; + } + ins->offset = num_bytes; + return 0; + +new_group: + block_group = btrfs_lookup_first_block_group(info, search_start); + if (!block_group) { + search_start = orig_search_start; + if (full_scan) { + ret = -ENOSPC; + goto error; + } + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; + full_scan = 1; + } else + wrapped = 1; + } + cond_resched(); + block_group = btrfs_find_block_group(root, block_group, + search_start, data, 0); + goto check_failed; + +error: + return ret; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 empty_size, + u64 hint_byte, u64 search_end, + struct btrfs_key *ins, int data) +{ + int ret; + u64 search_start = 0; + u64 alloc_profile; + struct btrfs_fs_info *info = root->fs_info; + + if (info->extent_ops) { + struct btrfs_extent_ops *ops = info->extent_ops; + ret = ops->alloc_extent(root, num_bytes, hint_byte, ins, !data); + BUG_ON(ret); + goto found; + } + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if ((info->system_allocs > 0 || root == info->chunk_root) && + info->system_allocs >= 0) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + if (root->ref_cows) { + if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, + BTRFS_BLOCK_GROUP_METADATA); + BUG_ON(ret); + } + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes + 2 * 1024 * 1024, data); + BUG_ON(ret); + } + + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, + trans->alloc_exclude_start, + trans->alloc_exclude_nr, data); + BUG_ON(ret); +found: + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return ret; +} + +static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, struct btrfs_key *ins) +{ + int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_extent_item *extent_item; + struct btrfs_tree_block_info *block_info; + struct btrfs_extent_inline_ref *iref; + struct btrfs_path *path; + struct extent_buffer *leaf; + u32 size = sizeof(*extent_item) + sizeof(*iref); + int skinny_metadata = + btrfs_fs_incompat(fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + if (!skinny_metadata) + size += sizeof(*block_info); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, + ins, size); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + btrfs_set_extent_refs(leaf, extent_item, 1); + btrfs_set_extent_generation(leaf, extent_item, generation); + btrfs_set_extent_flags(leaf, extent_item, + flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); + + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + } else { + block_info = (struct btrfs_tree_block_info *)(extent_item + 1); + btrfs_set_tree_block_key(leaf, block_info, key); + btrfs_set_tree_block_level(leaf, block_info, level); + iref = (struct btrfs_extent_inline_ref *)(block_info + 1); + } + + btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); + btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); + + btrfs_mark_buffer_dirty(leaf); + btrfs_free_path(path); + + ret = update_block_group(trans, root, ins->objectid, root->leafsize, + 1, 0); + return ret; +} + +static int alloc_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 num_bytes, + u64 root_objectid, u64 generation, + u64 flags, struct btrfs_disk_key *key, + int level, u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins) +{ + int ret; + ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size, + hint_byte, search_end, ins, 0); + BUG_ON(ret); + + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_INSERT; + extent_op->bytenr = ins->objectid; + extent_op->num_bytes = ins->offset; + extent_op->level = level; + extent_op->flags = flags; + memcpy(&extent_op->key, key, sizeof(*key)); + + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, + ins->objectid + ins->offset - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + ins->objectid, (unsigned long)extent_op); + } else { + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + ins->offset = level; + ins->type = BTRFS_METADATA_ITEM_KEY; + } + ret = alloc_reserved_tree_block(trans, root, root_objectid, + generation, flags, + key, level, ins); + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + } + return ret; +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, u64 root_objectid, + struct btrfs_disk_key *key, int level, + u64 hint, u64 empty_size) +{ + struct btrfs_key ins; + int ret; + struct extent_buffer *buf; + + ret = alloc_tree_block(trans, root, blocksize, root_objectid, + trans->transid, 0, key, level, + empty_size, hint, (u64)-1, &ins); + if (ret) { + BUG_ON(ret > 0); + return ERR_PTR(ret); + } + + buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid, + blocksize); + if (!buf) { + btrfs_free_extent(trans, root, ins.objectid, ins.offset, + 0, root->root_key.objectid, level, 0); + BUG_ON(1); + return ERR_PTR(-ENOMEM); + } + btrfs_set_buffer_uptodate(buf); + trans->blocks_used++; + + return buf; +} + +#if 0 + +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) +{ + u64 leaf_owner; + u64 leaf_generation; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int nritems; + int ret; + + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); + leaf_owner = btrfs_header_owner(leaf); + leaf_generation = btrfs_header_generation(leaf); + + for (i = 0; i < nritems; i++) { + u64 disk_bytenr; + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + /* + * FIXME make sure to insert a trans record that + * repeats the snapshot del on crash + */ + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) + continue; + ret = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(leaf, fi), + leaf->start, leaf_owner, leaf_generation, + key.objectid, 0); + BUG_ON(ret); + } + return 0; +} + +static void noinline reada_walk_down(struct btrfs_root *root, + struct extent_buffer *node, + int slot) +{ + u64 bytenr; + u64 last = 0; + u32 nritems; + u32 refs; + u32 blocksize; + int ret; + int i; + int level; + int skipped = 0; + + nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); + if (level) + return; + + for (i = slot; i < nritems && skipped < 32; i++) { + bytenr = btrfs_node_blockptr(node, i); + if (last && ((bytenr > last && bytenr - last > 32 * 1024) || + (last > bytenr && last - bytenr > 32 * 1024))) { + skipped++; + continue; + } + blocksize = btrfs_level_size(root, level - 1); + if (i != slot) { + ret = btrfs_lookup_extent_ref(NULL, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + if (refs != 1) { + skipped++; + continue; + } + } + mutex_unlock(&root->fs_info->fs_mutex); + ret = readahead_tree_block(root, bytenr, blocksize, + btrfs_node_ptr_generation(node, i)); + last = bytenr + blocksize; + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + if (ret) + break; + } +} + +/* + * helper function for drop_snapshot, this walks down the tree dropping ref + * counts as it goes. + */ +static int noinline walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) +{ + u64 root_owner; + u64 root_gen; + u64 bytenr; + u64 ptr_gen; + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u32 blocksize; + int ret; + u32 refs; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + ret = btrfs_lookup_extent_ref(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); + BUG_ON(ret); + if (refs > 1) + goto out; + + /* + * walk down to the last node level and free all the leaves + */ + while(*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(cur)) + break; + if (*level == 0) { + ret = drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + break; + } + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, + &refs); + BUG_ON(ret); + if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + path->slots[*level]++; + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + parent->start, root_owner, + root_gen, *level - 1, 1); + BUG_ON(ret); + continue; + } + next = btrfs_find_tree_block(root, bytenr, blocksize); + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { + free_extent_buffer(next); + reada_walk_down(root, cur, path->slots[*level]); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); + mutex_lock(&root->fs_info->fs_mutex); + if (!extent_buffer_uptodate(next)) { + if (IS_ERR(next)) + ret = PTR_ERR(next); + else + ret = -EIO; + break; + } + } + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + free_extent_buffer(path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(next); + path->slots[*level] = 0; + } +out: + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(parent); + } + + root_gen = btrfs_header_generation(parent); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, parent->start, + root_owner, root_gen, *level, 1); + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + BUG_ON(ret); + return 0; +} + +/* + * helper for dropping snapshots. This walks back up the tree in the path + * to find the first node higher up where we haven't yet gone through + * all the slots + */ +static int noinline walk_up_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) +{ + u64 root_owner; + u64 root_gen; + struct btrfs_root_item *root_item = &root->root_item; + int i; + int slot; + int ret; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; + path->slots[i]++; + *level = i; + WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); + memcpy(&root_item->drop_progress, + &disk_key, sizeof(disk_key)); + root_item->drop_level = i; + return 0; + } else { + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, + parent->start, root_owner, + root_gen, *level, 1); + BUG_ON(ret); + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +#endif + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + struct btrfs_space_info *sinfo; + struct btrfs_block_group_cache *cache; + u64 start; + u64 end; + u64 ptr; + int ret; + + while(1) { + ret = find_first_extent_bit(&info->block_group_cache, 0, + &start, &end, (unsigned int)-1); + if (ret) + break; + ret = get_state_private(&info->block_group_cache, start, &ptr); + if (!ret) { + cache = u64_to_ptr(ptr); + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + kfree(cache); + } + clear_extent_bits(&info->block_group_cache, start, + end, (unsigned int)-1, GFP_NOFS); + } + while(1) { + ret = find_first_extent_bit(&info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) + break; + clear_extent_dirty(&info->free_space_cache, start, + end, GFP_NOFS); + } + + while (!list_empty(&info->space_info)) { + sinfo = list_entry(info->space_info.next, + struct btrfs_space_info, list); + list_del_init(&sinfo->list); + kfree(sinfo); + } + return 0; +} + +static int find_first_block_group(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_key *key) +{ + int ret; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int slot; + + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + slot = path->slots[0]; + leaf = path->nodes[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid >= key->objectid && + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) + return 0; + path->slots[0]++; + } + ret = -ENOENT; +error: + return ret; +} + +static void account_super_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + } + + kfree(logical); + } +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + int bit; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *space_info; + struct extent_io_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + key.objectid = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = find_first_block_group(root, path, &key); + if (ret > 0) { + ret = 0; + goto error; + } + if (ret != 0) { + goto error; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) { + ret = -ENOMEM; + goto error; + } + + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + cache->cached = 0; + cache->pinned = 0; + key.objectid = found_key.objectid + found_key.offset; + btrfs_release_path(path); + cache->flags = btrfs_block_group_flags(&cache->item); + bit = 0; + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + bit = BLOCK_GROUP_METADATA; + } + set_avail_alloc_bits(info, cache->flags); + if (btrfs_chunk_readonly(root, cache->key.objectid)) + cache->ro = 1; + + account_super_bytes(info, cache); + + ret = update_space_info(info, cache->flags, found_key.offset, + btrfs_block_group_used(&cache->item), + &space_info); + BUG_ON(ret); + cache->space_info = space_info; + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, found_key.objectid, + (unsigned long)cache); + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_objectid, u64 chunk_offset, u64 size) +{ + int ret; + int bit = 0; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + block_group_cache = &fs_info->block_group_cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + cache->key.objectid = chunk_offset; + cache->key.offset = size; + + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + btrfs_set_block_group_used(&cache->item, bytes_used); + btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + cache->flags = type; + btrfs_set_block_group_flags(&cache->item, type); + + account_super_bytes(fs_info, cache); + ret = update_space_info(fs_info, cache->flags, size, bytes_used, + &cache->space_info); + BUG_ON(ret); + + bit = block_group_state_bits(type); + ret = set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + BUG_ON(ret); + + ret = set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); + BUG_ON(ret); + set_avail_alloc_bits(fs_info, type); + + return cache; +} + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_objectid, u64 chunk_offset, + u64 size) +{ + int ret; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + + cache = btrfs_add_block_group(root->fs_info, bytes_used, type, + chunk_objectid, chunk_offset, size); + extent_root = root->fs_info->extent_root; + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + ret = finish_current_insert(trans, extent_root); + BUG_ON(ret); + ret = del_pending_extents(trans, extent_root); + BUG_ON(ret); + + return 0; +} + +/* + * This is for converter use only. + * + * In that case, we don't know where are free blocks located. + * Therefore all block group cache entries must be setup properly + * before doing any block allocation. + */ +int btrfs_make_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + u64 total_bytes; + u64 cur_start; + u64 group_type; + u64 group_size; + u64 group_align; + u64 total_data = 0; + u64 total_metadata = 0; + u64 chunk_objectid; + int ret; + int bit; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + extent_root = root->fs_info->extent_root; + block_group_cache = &root->fs_info->block_group_cache; + chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + group_align = 64 * root->sectorsize; + + cur_start = 0; + while (cur_start < total_bytes) { + group_size = total_bytes / 12; + group_size = min_t(u64, group_size, total_bytes - cur_start); + if (cur_start == 0) { + bit = BLOCK_GROUP_SYSTEM; + group_type = BTRFS_BLOCK_GROUP_SYSTEM; + group_size /= 4; + group_size &= ~(group_align - 1); + group_size = max_t(u64, group_size, 8 * 1024 * 1024); + group_size = min_t(u64, group_size, 32 * 1024 * 1024); + } else { + group_size &= ~(group_align - 1); + if (total_data >= total_metadata * 2) { + group_type = BTRFS_BLOCK_GROUP_METADATA; + group_size = min_t(u64, group_size, + 1ULL * 1024 * 1024 * 1024); + total_metadata += group_size; + } else { + group_type = BTRFS_BLOCK_GROUP_DATA; + group_size = min_t(u64, group_size, + 5ULL * 1024 * 1024 * 1024); + total_data += group_size; + } + if ((total_bytes - cur_start) * 4 < group_size * 5) + group_size = total_bytes - cur_start; + } + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + cache->key.objectid = cur_start; + cache->key.offset = group_size; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + btrfs_set_block_group_used(&cache->item, 0); + btrfs_set_block_group_chunk_objectid(&cache->item, + chunk_objectid); + btrfs_set_block_group_flags(&cache->item, group_type); + + cache->flags = group_type; + + ret = update_space_info(root->fs_info, group_type, group_size, + 0, &cache->space_info); + BUG_ON(ret); + set_avail_alloc_bits(extent_root->fs_info, group_type); + + set_extent_bits(block_group_cache, cur_start, + cur_start + group_size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, cur_start, + (unsigned long)cache); + cur_start += group_size; + } + /* then insert all the items */ + cur_start = 0; + while(cur_start < total_bytes) { + cache = btrfs_lookup_block_group(root->fs_info, cur_start); + BUG_ON(!cache); + + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + finish_current_insert(trans, extent_root); + ret = del_pending_extents(trans, extent_root); + BUG_ON(ret); + + cur_start = cache->key.objectid + cache->key.offset; + } + return 0; +} + +int btrfs_update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int alloc, + int mark_free) +{ + return update_block_group(trans, root, bytenr, num_bytes, + alloc, mark_free); +} + +/* + * Just remove a block group item in extent tree + * Caller should ensure the block group is empty and all space is pinned. + * Or new tree block/data may be allocated into it. + */ +static int free_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->extent_root; + int ret = 0; + + key.objectid = bytenr; + key.offset = len; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_dev_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 devid, u64 dev_offset) +{ + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = devid; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = dev_offset; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} + +static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 chunk_offset) +{ + struct btrfs_chunk *chunk = NULL; + struct btrfs_root *root= fs_info->chunk_root; + struct btrfs_path *path; + struct btrfs_key key; + u16 num_stripes; + int i; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk); + for (i = 0; i < num_stripes; i++) { + ret = free_dev_extent_item(trans, fs_info, + btrfs_stripe_devid_nr(path->nodes[0], chunk, i), + btrfs_stripe_offset_nr(path->nodes[0], chunk, i)); + if (ret < 0) + goto out; + } +out: + btrfs_free_path(path); + return ret; +} + +static int free_system_chunk_item(struct btrfs_super_block *super, + struct btrfs_key *key) +{ + struct btrfs_disk_key *disk_key; + struct btrfs_key cpu_key; + u32 array_size = btrfs_super_sys_array_size(super); + char *ptr = (char *)super->sys_chunk_array; + int cur = 0; + int ret = -ENOENT; + + while (cur < btrfs_super_sys_array_size(super)) { + struct btrfs_chunk *chunk; + u32 num_stripes; + u32 chunk_len; + + disk_key = (struct btrfs_disk_key *)(ptr + cur); + btrfs_disk_key_to_cpu(&cpu_key, disk_key); + if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) { + /* just in case */ + ret = -EIO; + goto out; + } + + chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key)); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk_len = btrfs_chunk_item_size(num_stripes) + + sizeof(*disk_key); + + if (key->objectid == cpu_key.objectid && + key->offset == cpu_key.offset && + key->type == cpu_key.type) { + memmove(ptr + cur, ptr + cur + chunk_len, + array_size - cur - chunk_len); + array_size -= chunk_len; + btrfs_set_super_sys_array_size(super, array_size); + ret = 0; + goto out; + } + + cur += chunk_len; + } +out: + return ret; +} + +static int free_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_chunk *chunk; + u64 chunk_type; + int ret; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = bytenr; + key.type = BTRFS_CHUNK_ITEM_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(path->nodes[0], chunk); + + ret = btrfs_del_item(trans, root, path); + if (ret < 0) + goto out; + + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + ret = free_system_chunk_item(fs_info->super_copy, &key); +out: + btrfs_free_path(path); + return ret; +} + +static u64 get_dev_extent_len(struct map_lookup *map) +{ + int div; + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* Single */ + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID1: + div = 1; + break; + case BTRFS_BLOCK_GROUP_RAID5: + div = (map->num_stripes - 1); + break; + case BTRFS_BLOCK_GROUP_RAID6: + div = (map->num_stripes - 2); + break; + case BTRFS_BLOCK_GROUP_RAID10: + div = (map->num_stripes / map->sub_stripes); + break; + default: + /* normally, read chunk security hook should handled it */ + BUG_ON(1); + } + return map->ce.size / div; +} + +/* free block group/chunk related caches */ +static int free_block_group_cache(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 len) +{ + struct btrfs_block_group_cache *cache; + struct cache_extent *ce; + struct map_lookup *map; + int ret; + int i; + u64 flags; + + /* Free block group cache first */ + cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) + return -ENOENT; + flags = cache->flags; + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len, + (unsigned int)-1, GFP_NOFS); + ret = free_space_info(fs_info, flags, len, 0, NULL); + if (ret < 0) + goto out; + kfree(cache); + + /* Then free mapping info and dev usage info */ + ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr); + if (!ce || ce->start != bytenr) { + ret = -ENOENT; + goto out; + } + map = container_of(ce, struct map_lookup, ce); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device; + + device = map->stripes[i].dev; + device->bytes_used -= get_dev_extent_len(map); + ret = btrfs_update_device(trans, device); + if (ret < 0) + goto out; + } + remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce); + free(map); +out: + return ret; +} + +int btrfs_free_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_block_group_item *bgi; + struct btrfs_key key; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = len; + + /* Double check the block group to ensure it's empty */ + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret > 0) { + ret = -ENONET; + goto out; + } + if (ret < 0) + goto out; + + bgi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_block_group_item); + if (btrfs_disk_block_group_used(path->nodes[0], bgi)) { + fprintf(stderr, + "WARNING: block group [%llu,%llu) is not empty\n", + bytenr, bytenr + len); + ret = -EINVAL; + goto out; + } + btrfs_release_path(path); + + /* + * Now pin all space in the block group, to prevent further transaction + * allocate space from it. + * Every operation needs a transaction must be in the range. + */ + btrfs_pin_extent(fs_info, bytenr, len); + + /* delete block group item and chunk item */ + ret = free_block_group_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free block group item for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + ret = free_chunk_dev_extent_items(trans, fs_info, bytenr); + if (ret < 0) { + fprintf(stderr, + "failed to dev extents belongs to [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + ret = free_chunk_item(trans, fs_info, bytenr, len); + if (ret < 0) { + fprintf(stderr, + "failed to free chunk for [%llu,%llu)\n", + bytenr, bytenr + len); + btrfs_unpin_extent(fs_info, bytenr, len); + goto out; + } + + /* Now release the block_group_cache */ + ret = free_block_group_cache(trans, fs_info, bytenr, len); + btrfs_unpin_extent(fs_info, bytenr, len); + +out: + btrfs_free_path(path); + return ret; +} + +/* + * Fixup block accounting. The initial block accounting created by + * make_block_groups isn't accuracy in this case. + */ +int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int slot; + u64 start = 0; + u64 bytes_used = 0; + struct btrfs_path path; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + root = root->fs_info->extent_root; + + while(extent_root_pending_ops(fs_info)) { + ret = finish_current_insert(trans, root); + if (ret) + return ret; + ret = del_pending_extents(trans, root); + if (ret) + return ret; + } + + while(1) { + cache = btrfs_lookup_first_block_group(fs_info, start); + if (!cache) + break; + start = cache->key.objectid + cache->key.offset; + btrfs_set_block_group_used(&cache->item, 0); + cache->space_info->bytes_used = 0; + set_extent_bits(&root->fs_info->block_group_cache, + cache->key.objectid, + cache->key.objectid + cache->key.offset -1, + BLOCK_GROUP_DIRTY, GFP_NOFS); + } + + btrfs_init_path(&path); + key.offset = 0; + key.objectid = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, &path, 0, 0); + if (ret < 0) + return ret; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + return ret; + if (ret > 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bytes_used += key.offset; + ret = btrfs_update_block_group(trans, root, + key.objectid, key.offset, 1, 0); + BUG_ON(ret); + } else if (key.type == BTRFS_METADATA_ITEM_KEY) { + bytes_used += root->leafsize; + ret = btrfs_update_block_group(trans, root, + key.objectid, root->leafsize, 1, 0); + BUG_ON(ret); + } + path.slots[0]++; + } + btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used); + btrfs_release_path(&path); + return 0; +} + +/* + * Record a file extent. Do all the required works, such as inserting + * file extent item, inserting extent item and backref item into extent + * tree and updating block accounting. + */ +int btrfs_record_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode, + u64 file_pos, u64 disk_bytenr, + u64 num_bytes) +{ + int ret; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key ins_key; + struct btrfs_path path; + struct btrfs_extent_item *ei; + u64 nbytes; + + if (disk_bytenr == 0) { + ret = btrfs_insert_file_extent(trans, root, objectid, + file_pos, disk_bytenr, + num_bytes, num_bytes); + return ret; + } + + btrfs_init_path(&path); + + ins_key.objectid = objectid; + ins_key.offset = file_pos; + btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY); + ret = btrfs_insert_empty_item(trans, root, &path, &ins_key, + sizeof(*fi)); + if (ret) + goto fail; + leaf = path.nodes[0]; + fi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_compression(leaf, fi, 0); + btrfs_set_file_extent_encryption(leaf, fi, 0); + btrfs_set_file_extent_other_encoding(leaf, fi, 0); + btrfs_mark_buffer_dirty(leaf); + + nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes; + btrfs_set_stack_inode_nbytes(inode, nbytes); + + btrfs_release_path(&path); + + ins_key.objectid = disk_bytenr; + ins_key.offset = num_bytes; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, &path, + &ins_key, sizeof(*ei)); + if (ret == 0) { + leaf = path.nodes[0]; + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, 0); + btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA); + + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_update_block_group(trans, root, disk_bytenr, + num_bytes, 1, 0); + if (ret) + goto fail; + } else if (ret != -EEXIST) { + goto fail; + } + btrfs_extent_post_op(trans, extent_root); + + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0, + root->root_key.objectid, + objectid, file_pos); + if (ret) + goto fail; + ret = 0; +fail: + btrfs_release_path(&path); + return ret; +} + + +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) +{ + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE, GFP_NOFS); + return 0; +} + +void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; + + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->pinned_extents, + start, end, EXTENT_UPTODATE, GFP_NOFS); +} + +int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, cache->key.objectid, + stripe_len); + if (ret) + return ret; + } + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + u64 start, len; + + if (logical[nr] > cache->key.objectid + + cache->key.offset) + continue; + + if (logical[nr] + stripe_len <= cache->key.objectid) + continue; + + start = logical[nr]; + if (start < cache->key.objectid) { + start = cache->key.objectid; + len = (logical[nr] + stripe_len) - start; + } else { + len = min_t(u64, stripe_len, + cache->key.objectid + + cache->key.offset - start); + } + + cache->bytes_super += len; + ret = add_excluded_extent(root, start, len); + if (ret) { + kfree(logical); + return ret; + } + } + + kfree(logical); + } + return 0; +} + +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size, total_added = 0; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY | EXTENT_UPTODATE); + if (ret) + break; + + if (extent_start <= start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, + start, size); + BUG_ON(ret); /* -ENOMEM or logic error */ + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + total_added += size; + ret = btrfs_add_free_space(block_group->free_space_ctl, start, + size); + BUG_ON(ret); /* -ENOMEM or logic error */ + } + + return total_added; +} diff --git a/tools/libfsimage/btrfs/extent_io.c b/tools/libfsimage/btrfs/extent_io.c new file mode 100644 index 0000000..23532db --- /dev/null +++ b/tools/libfsimage/btrfs/extent_io.c @@ -0,0 +1,895 @@ + +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include "kerncompat.h" +#include "extent_io.h" +#include "list.h" +#include "ctree.h" +#include "volumes.h" +#include "internal.h" + +extern uint64_t partition_offset; + +void extent_io_tree_init(struct extent_io_tree *tree) +{ + cache_tree_init(&tree->state); + cache_tree_init(&tree->cache); + INIT_LIST_HEAD(&tree->lru); + tree->cache_size = 0; +} + +static struct extent_state *alloc_extent_state(void) +{ + struct extent_state *state; + + state = malloc(sizeof(*state)); + if (!state) + return NULL; + state->cache_node.objectid = 0; + state->refs = 1; + state->state = 0; + state->xprivate = 0; + return state; +} + +static void btrfs_free_extent_state(struct extent_state *state) +{ + state->refs--; + BUG_ON(state->refs < 0); + if (state->refs == 0) + free(state); +} + +static void free_extent_state_func(struct cache_extent *cache) +{ + struct extent_state *es; + + es = container_of(cache, struct extent_state, cache_node); + btrfs_free_extent_state(es); +} + +void extent_io_tree_cleanup(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + + while(!list_empty(&tree->lru)) { + eb = list_entry(tree->lru.next, struct extent_buffer, lru); + fprintf(stderr, "extent buffer leak: " + "start %llu len %u\n", + (unsigned long long)eb->start, eb->len); + free_extent_buffer(eb); + } + + cache_tree_free_extents(&tree->state, free_extent_state_func); +} + +static inline void update_extent_state(struct extent_state *state) +{ + state->cache_node.start = state->start; + state->cache_node.size = state->end + 1 - state->start; +} + +/* + * Utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field are + * not merged + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct cache_extent *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = prev_cache_extent(&state->cache_node); + if (other_node) { + other = container_of(other_node, struct extent_state, + cache_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + update_extent_state(state); + remove_cache_extent(&tree->state, &other->cache_node); + btrfs_free_extent_state(other); + } + } + other_node = next_cache_extent(&state->cache_node); + if (other_node) { + other = container_of(other_node, struct extent_state, + cache_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + update_extent_state(other); + remove_cache_extent(&tree->state, &state->cache_node); + btrfs_free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + int ret; + + BUG_ON(end < start); + state->state |= bits; + state->start = start; + state->end = end; + update_extent_state(state); + ret = insert_cache_extent(&tree->state, &state->cache_node); + BUG_ON(ret); + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + int ret; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + update_extent_state(prealloc); + orig->start = split; + update_extent_state(orig); + ret = insert_cache_extent(&tree->state, &prealloc->cache_node); + BUG_ON(ret); + return 0; +} + +/* + * clear some bits on a range in the tree. + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits) +{ + int ret = state->state & bits; + + state->state &= ~bits; + if (state->state == 0) { + remove_cache_extent(&tree->state, &state->cache_node); + btrfs_free_extent_state(state); + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. + */ +int clear_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, int bits, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct cache_extent *node; + u64 last_end; + int err; + int set = 0; + +again: + if (!prealloc) { + prealloc = alloc_extent_state(); + if (!prealloc) + return -ENOMEM; + } + + /* + * this search will find the extents that end after + * our range starts + */ + node = search_cache_extent(&tree->state, start); + if (!node) + goto out; + state = container_of(node, struct extent_state, cache_node); + if (state->start > end) + goto out; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set |= clear_state_bit(tree, state, bits); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set |= clear_state_bit(tree, prealloc, bits); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + goto search_again; +out: + if (prealloc) + btrfs_free_extent_state(prealloc); + return set; + +search_again: + if (start > end) + goto out; + goto again; +} + +/* + * set some bits on a range in the tree. + */ +int set_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, int bits, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct cache_extent *node; + int err = 0; + u64 last_start; + u64 last_end; +again: + if (!prealloc) { + prealloc = alloc_extent_state(); + if (!prealloc) + return -ENOMEM; + } + + /* + * this search will find the extents that end after + * our range starts + */ + node = search_cache_extent(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + BUG_ON(err == -EEXIST); + prealloc = NULL; + goto out; + } + + state = container_of(node, struct extent_state, cache_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + state->state |= bits; + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | ---------- state ---------- | + * We need to split the extent, and set the bit + * on the first half + */ + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + state->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; +out: + if (prealloc) + btrfs_free_extent_state(prealloc); + return err; +search_again: + if (start > end) + goto out; + goto again; +} + +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask); +} + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask); +} + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 1; + + /* + * this search will find all the extents that end after + * our range starts. + */ + node = search_cache_extent(&tree->state, start); + if (!node) + goto out; + + while(1) { + state = container_of(node, struct extent_state, cache_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = next_cache_extent(node); + if (!node) + break; + } +out: + return ret; +} + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct cache_extent *node; + int bitset = 0; + + node = search_cache_extent(&tree->state, start); + while (node && start <= end) { + state = container_of(node, struct extent_state, cache_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + if (state->start > end) + break; + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = next_cache_extent(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + return bitset; +} + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 0; + + node = search_cache_extent(&tree->state, start); + if (!node) { + ret = -ENOENT; + goto out; + } + state = container_of(node, struct extent_state, cache_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->xprivate = private; +out: + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct cache_extent *node; + struct extent_state *state; + int ret = 0; + + node = search_cache_extent(&tree->state, start); + if (!node) { + ret = -ENOENT; + goto out; + } + state = container_of(node, struct extent_state, cache_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->xprivate; +out: + return ret; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb; + + eb = calloc(1, sizeof(struct extent_buffer) + blocksize); + if (!eb) { + BUG(); + return NULL; + } + + eb->start = bytenr; + eb->len = blocksize; + eb->refs = 1; + eb->flags = 0; + eb->tree = tree; + eb->fd = -1; + eb->dev_bytenr = (u64)-1; + eb->cache_node.start = bytenr; + eb->cache_node.size = blocksize; + INIT_LIST_HEAD(&eb->recow); + + return eb; +} + +struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) +{ + struct extent_buffer *new; + + new = __alloc_extent_buffer(NULL, src->start, src->len); + if (new == NULL) + return NULL; + + copy_extent_buffer(new, src, 0, 0, src->len); + new->flags |= EXTENT_BUFFER_DUMMY; + + return new; +} + +void free_extent_buffer(struct extent_buffer *eb) +{ + if (!eb || IS_ERR(eb)) + return; + + eb->refs--; + BUG_ON(eb->refs < 0); + if (eb->refs == 0) { + struct extent_io_tree *tree = eb->tree; + BUG_ON(eb->flags & EXTENT_DIRTY); + list_del_init(&eb->lru); + list_del_init(&eb->recow); + if (!(eb->flags & EXTENT_BUFFER_DUMMY)) { + BUG_ON(tree->cache_size < eb->len); + remove_cache_extent(&tree->cache, &eb->cache_node); + tree->cache_size -= eb->len; + } + free(eb); + } +} + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb = NULL; + struct cache_extent *cache; + + cache = lookup_cache_extent(&tree->cache, bytenr, blocksize); + if (cache && cache->start == bytenr && + cache->size == blocksize) { + eb = container_of(cache, struct extent_buffer, cache_node); + list_move_tail(&eb->lru, &tree->lru); + eb->refs++; + } + return eb; +} + +struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, + u64 start) +{ + struct extent_buffer *eb = NULL; + struct cache_extent *cache; + + cache = search_cache_extent(&tree->cache, start); + if (cache) { + eb = container_of(cache, struct extent_buffer, cache_node); + list_move_tail(&eb->lru, &tree->lru); + eb->refs++; + } + return eb; +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *eb; + struct cache_extent *cache; + + cache = lookup_cache_extent(&tree->cache, bytenr, blocksize); + if (cache && cache->start == bytenr && + cache->size == blocksize) { + eb = container_of(cache, struct extent_buffer, cache_node); + list_move_tail(&eb->lru, &tree->lru); + eb->refs++; + } else { + int ret; + + if (cache) { + eb = container_of(cache, struct extent_buffer, + cache_node); + free_extent_buffer(eb); + } + eb = __alloc_extent_buffer(tree, bytenr, blocksize); + if (!eb) + return NULL; + ret = insert_cache_extent(&tree->cache, &eb->cache_node); + if (ret) { + free(eb); + return NULL; + } + list_add_tail(&eb->lru, &tree->lru); + tree->cache_size += blocksize; + } + return eb; +} + +int read_extent_from_disk(struct extent_buffer *eb, + unsigned long offset, unsigned long len) +{ + int ret; + ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr + partition_offset); + if (ret < 0) { + ret = -errno; + goto out; + } + if (ret != len) { + ret = -EIO; + goto out; + } + ret = 0; +out: + return ret; +} + +int write_extent_to_disk(struct extent_buffer *eb) +{ + int ret; + ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr); + if (ret < 0) + goto out; + if (ret != eb->len) { + ret = -EIO; + goto out; + } + ret = 0; +out: + return ret; +} + +int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + u64 bytes_left = bytes; + u64 read_len; + u64 total_read = 0; + int ret; + + while (bytes_left) { + read_len = bytes_left; + ret = btrfs_map_block(&info->mapping_tree, READ, offset, + &read_len, &multi, mirror, NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %Lu\n", + offset); + return -EIO; + } + device = multi->stripes[0].dev; + + read_len = min(bytes_left, read_len); + if (device->fd <= 0) { + kfree(multi); + return -EIO; + } + + ret = pread(device->fd, buf + total_read, read_len, + multi->stripes[0].physical + partition_offset); + kfree(multi); + if (ret < 0) { + fprintf(stderr, "Error reading %Lu, %d\n", offset, + ret); + return ret; + } + if (ret != read_len) { + fprintf(stderr, "Short read for %Lu, read %d, " + "read_len %Lu\n", offset, ret, read_len); + return -EIO; + } + + bytes_left -= read_len; + offset += read_len; + total_read += read_len; + } + + return 0; +} + +int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + u64 bytes_left = bytes; + u64 this_len; + u64 total_write = 0; + u64 *raid_map = NULL; + u64 dev_bytenr; + int dev_nr; + int ret = 0; + + while (bytes_left > 0) { + this_len = bytes_left; + dev_nr = 0; + + ret = btrfs_map_block(&info->mapping_tree, WRITE, offset, + &this_len, &multi, mirror, &raid_map); + if (ret) { + fprintf(stderr, "Couldn't map the block %Lu\n", + offset); + return -EIO; + } + + if (raid_map) { + struct extent_buffer *eb; + u64 stripe_len = this_len; + + this_len = min(this_len, bytes_left); + this_len = min(this_len, (u64)info->tree_root->leafsize); + + eb = malloc(sizeof(struct extent_buffer) + this_len); + BUG_ON(!eb); + + memset(eb, 0, sizeof(struct extent_buffer) + this_len); + eb->start = offset; + eb->len = this_len; + + memcpy(eb->data, buf + total_write, this_len); + ret = write_raid56_with_parity(info, eb, multi, + stripe_len, raid_map); + BUG_ON(ret); + + free(eb); + kfree(raid_map); + raid_map = NULL; + } else while (dev_nr < multi->num_stripes) { + device = multi->stripes[dev_nr].dev; + if (device->fd <= 0) { + kfree(multi); + return -EIO; + } + + dev_bytenr = multi->stripes[dev_nr].physical; + this_len = min(this_len, bytes_left); + dev_nr++; + + ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr); + if (ret != this_len) { + if (ret < 0) { + fprintf(stderr, "Error writing to " + "device %d\n", errno); + ret = errno; + kfree(multi); + return ret; + } else { + fprintf(stderr, "Short write\n"); + kfree(multi); + return -EIO; + } + } + } + + BUG_ON(bytes_left < this_len); + + bytes_left -= this_len; + offset += this_len; + total_write += this_len; + + kfree(multi); + multi = NULL; + } + return 0; +} + +int set_extent_buffer_dirty(struct extent_buffer *eb) +{ + struct extent_io_tree *tree = eb->tree; + if (!(eb->flags & EXTENT_DIRTY)) { + eb->flags |= EXTENT_DIRTY; + set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0); + extent_buffer_get(eb); + } + return 0; +} + +int clear_extent_buffer_dirty(struct extent_buffer *eb) +{ + struct extent_io_tree *tree = eb->tree; + if (eb->flags & EXTENT_DIRTY) { + eb->flags &= ~EXTENT_DIRTY; + clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0); + free_extent_buffer(eb); + } + return 0; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len) +{ + return memcmp(eb->data + start, ptrv, len); +} + +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, unsigned long len) +{ + memcpy(dst, eb->data + start, len); +} + +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len) +{ + memcpy(eb->data + start, src, len); +} + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + memcpy(dst->data + dst_offset, src->data + src_offset, len); +} + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + memmove(dst->data + dst_offset, dst->data + src_offset, len); +} + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + memset(eb->data + start, c, len); +} + +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long nr) +{ + return test_bit(nr, (unsigned long *)(eb->data + start)); +} diff --git a/tools/libfsimage/btrfs/extent_io.h b/tools/libfsimage/btrfs/extent_io.h new file mode 100644 index 0000000..a9a7353 --- /dev/null +++ b/tools/libfsimage/btrfs/extent_io.h @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_EXTENT_IO_H__ +#define __BTRFS_EXTENT_IO_H__ + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#include "extent-cache.h" +#include "list.h" +#else +#include <btrfs/kerncompat.h> +#include <btrfs/extent-cache.h> +#include <btrfs/list.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_BAD_TRANSID (1 << 10) +#define EXTENT_BUFFER_DUMMY (1 << 11) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_SYSTEM EXTENT_NEW + +#define BLOCK_GROUP_DIRTY EXTENT_DIRTY + +struct btrfs_fs_info; + +struct extent_io_tree { + struct cache_tree state; + struct cache_tree cache; + struct list_head lru; + u64 cache_size; +}; + +struct extent_state { + struct cache_extent cache_node; + u64 start; + u64 end; + int refs; + unsigned long state; + u64 xprivate; +}; + +struct extent_buffer { + struct cache_extent cache_node; + u64 start; + u64 dev_bytenr; + u32 len; + struct extent_io_tree *tree; + struct list_head lru; + struct list_head recow; + int refs; + int flags; + int fd; + char data[]; +}; + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + eb->refs++; +} + +void extent_io_tree_init(struct extent_io_tree *tree); +void extent_io_tree_cleanup(struct extent_io_tree *tree); +int set_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, int bits, gfp_t mask); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, int bits, gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); +static inline int set_extent_buffer_uptodate(struct extent_buffer *eb) +{ + eb->flags |= EXTENT_UPTODATE; + return 0; +} + +static inline int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + eb->flags &= ~EXTENT_UPTODATE; + return 0; +} + +static inline int extent_buffer_uptodate(struct extent_buffer *eb) +{ + if (!eb || IS_ERR(eb)) + return 0; + if (eb->flags & EXTENT_UPTODATE) + return 1; + return 0; +} + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize); +struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, + u64 start); +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 bytenr, u32 blocksize); +struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_from_disk(struct extent_buffer *eb, + unsigned long offset, unsigned long len); +int write_extent_to_disk(struct extent_buffer *eb); +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long nr); +int set_extent_buffer_dirty(struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_buffer *eb); +int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror); +int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror); +#endif diff --git a/tools/libfsimage/btrfs/file-item.c b/tools/libfsimage/btrfs/file-item.c new file mode 100644 index 0000000..7a3bbf3 --- /dev/null +++ b/tools/libfsimage/btrfs/file-item.c @@ -0,0 +1,487 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "crc32c.h" +#include "internal.h" + +#define MAX_CSUM_ITEMS(r,size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) * 2) / \ + size) - 1)) +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 pos, u64 offset, + u64 disk_num_bytes, u64 num_bytes) +{ + int ret = 0; + struct btrfs_file_extent_item *item; + struct btrfs_key file_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + + path = btrfs_alloc_path(); + BUG_ON(!path); + file_key.objectid = objectid; + file_key.offset = pos; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + + ret = btrfs_insert_empty_item(trans, root, path, &file_key, + sizeof(*item)); + if (ret < 0) + goto out; + BUG_ON(ret); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_compression(leaf, item, 0); + btrfs_set_file_extent_encryption(leaf, item, 0); + btrfs_set_file_extent_other_encoding(leaf, item, 0); + btrfs_mark_buffer_dirty(leaf); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + u64 offset, char *buffer, size_t size) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + unsigned long ptr; + struct btrfs_file_extent_item *ei; + u32 datasize; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = objectid; + key.offset = offset; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + + datasize = btrfs_file_extent_calc_inline_size(size); + ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); + if (ret) { + err = ret; + goto fail; + } + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); + btrfs_set_file_extent_ram_bytes(leaf, ei, size); + btrfs_set_file_extent_compression(leaf, ei, 0); + btrfs_set_file_extent_encryption(leaf, ei, 0); + btrfs_set_file_extent_other_encoding(leaf, ei, 0); + + ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; + write_extent_buffer(leaf, buffer, ptr, size); + btrfs_mark_buffer_dirty(leaf); +fail: + btrfs_free_path(path); + return err; +} + +static struct btrfs_csum_item * +btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, int cow) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_key found_key; + struct btrfs_csum_item *item; + struct extent_buffer *leaf; + u64 csum_offset = 0; + u16 csum_size = + btrfs_super_csum_size(root->fs_info->super_copy); + int csums_in_item; + + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + file_key.offset = bytenr; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); + if (ret < 0) + goto fail; + leaf = path->nodes[0]; + if (ret > 0) { + ret = 1; + if (path->slots[0] == 0) + goto fail; + path->slots[0]--; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY) + goto fail; + + csum_offset = (bytenr - found_key.offset) / root->sectorsize; + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); + csums_in_item /= csum_size; + + if (csum_offset >= csums_in_item) { + ret = -EFBIG; + goto fail; + } + } + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); + item = (struct btrfs_csum_item *)((unsigned char *)item + + csum_offset * csum_size); + return item; +fail: + if (ret > 0) + ret = -ENOENT; + return ERR_PTR(ret); +} + +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 alloc_end, + u64 bytenr, char *data, size_t len) +{ + int ret = 0; + struct btrfs_key file_key; + struct btrfs_key found_key; + u64 next_offset = (u64)-1; + int found_next = 0; + struct btrfs_path *path; + struct btrfs_csum_item *item; + struct extent_buffer *leaf = NULL; + u64 csum_offset; + u32 csum_result = ~(u32)0; + u32 nritems; + u32 ins_size; + u16 csum_size = + btrfs_super_csum_size(root->fs_info->super_copy); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + file_key.offset = bytenr; + file_key.type = BTRFS_EXTENT_CSUM_KEY; + + item = btrfs_lookup_csum(trans, root, path, bytenr, 1); + if (!IS_ERR(item)) { + leaf = path->nodes[0]; + ret = 0; + goto found; + } + ret = PTR_ERR(item); + if (ret == -EFBIG) { + u32 item_size; + /* we found one, but it isn't big enough yet */ + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if ((item_size / csum_size) >= MAX_CSUM_ITEMS(root, csum_size)) { + /* already at max size, make a new one */ + goto insert; + } + } else { + int slot = path->slots[0] + 1; + /* we didn't find a csum item, insert one */ + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems - 1) { + ret = btrfs_next_leaf(root, path); + if (ret == 1) + found_next = 1; + if (ret != 0) + goto insert; + slot = 0; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); + if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + found_key.type != BTRFS_EXTENT_CSUM_KEY) { + found_next = 1; + goto insert; + } + next_offset = found_key.offset; + found_next = 1; + goto insert; + } + + /* + * at this point, we know the tree has an item, but it isn't big + * enough yet to put our csum in. Grow it + */ + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &file_key, path, + csum_size, 1); + if (ret < 0) + goto fail; + if (ret == 0) { + BUG(); + } + if (path->slots[0] == 0) { + goto insert; + } + path->slots[0]--; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + csum_offset = (file_key.offset - found_key.offset) / root->sectorsize; + if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + found_key.type != BTRFS_EXTENT_CSUM_KEY || + csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { + goto insert; + } + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / + csum_size) { + u32 diff = (csum_offset + 1) * csum_size; + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); + if (diff != csum_size) + goto insert; + ret = btrfs_extend_item(trans, root, path, diff); + BUG_ON(ret); + goto csum; + } + +insert: + btrfs_release_path(path); + csum_offset = 0; + if (found_next) { + u64 tmp = min(alloc_end, next_offset); + tmp -= file_key.offset; + tmp /= root->sectorsize; + tmp = max((u64)1, tmp); + tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size)); + ins_size = csum_size * tmp; + } else { + ins_size = csum_size; + } + ret = btrfs_insert_empty_item(trans, root, path, &file_key, + ins_size); + if (ret < 0) + goto fail; + if (ret != 0) { + WARN_ON(1); + goto fail; + } +csum: + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); + ret = 0; + item = (struct btrfs_csum_item *)((unsigned char *)item + + csum_offset * csum_size); +found: + csum_result = btrfs_csum_data(root, data, csum_result, len); + btrfs_csum_final(csum_result, (char *)&csum_result); + if (csum_result == 0) { + printk("csum result is 0 for block %llu\n", + (unsigned long long)bytenr); + } + + write_extent_buffer(leaf, &csum_result, (unsigned long)item, + csum_size); + btrfs_mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_free_path(path); + return ret; +} + +/* + * helper function for csum removal, this expects the + * key to describe the csum pointed to by the path, and it expects + * the csum to overlap the range [bytenr, len] + * + * The csum should not be entirely contained in the range and the + * range should not be entirely contained in the csum. + * + * This calls btrfs_truncate_item with the correct args based on the + * overlap, and fixes up the key as required. + */ +static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 bytenr, u64 len) +{ + struct extent_buffer *leaf; + u16 csum_size = + btrfs_super_csum_size(root->fs_info->super_copy); + u64 csum_end; + u64 end_byte = bytenr + len; + u32 blocksize = root->sectorsize; + int ret; + + leaf = path->nodes[0]; + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end *= root->sectorsize; + csum_end += key->offset; + + if (key->offset < bytenr && csum_end <= end_byte) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * A simple truncate off the end of the item + */ + u32 new_size = (bytenr - key->offset) / blocksize; + new_size *= csum_size; + ret = btrfs_truncate_item(trans, root, path, new_size, 1); + BUG_ON(ret); + } else if (key->offset >= bytenr && csum_end > end_byte && + end_byte > key->offset) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * we need to truncate from the beginning of the csum + */ + u32 new_size = (csum_end - end_byte) / blocksize; + new_size *= csum_size; + + ret = btrfs_truncate_item(trans, root, path, new_size, 0); + BUG_ON(ret); + + key->offset = end_byte; + ret = btrfs_set_item_key_safe(root, path, key); + BUG_ON(ret); + } else { + BUG(); + } + return 0; +} + +/* + * deletes the csum items from the csum tree for a given + * range of bytes. + */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + u64 end_byte = bytenr + len; + u64 csum_end; + struct extent_buffer *leaf; + int ret; + u16 csum_size = + btrfs_super_csum_size(root->fs_info->super_copy); + int blocksize = root->sectorsize; + + root = root->fs_info->csum_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while (1) { + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = end_byte - 1; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) { + break; + } + + if (key.offset >= end_byte) + break; + + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end *= blocksize; + csum_end += key.offset; + + /* this csum ends before we start, we're done */ + if (csum_end <= bytenr) + break; + + /* delete the entire item, it is inside our range */ + if (key.offset >= bytenr && csum_end <= end_byte) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else if (key.offset < bytenr && csum_end > end_byte) { + unsigned long offset; + unsigned long shift_len; + unsigned long item_offset; + /* + * [ bytenr - len ] + * [csum ] + * + * Our bytes are in the middle of the csum, + * we need to split this item and insert a new one. + * + * But we can't drop the path because the + * csum could change, get removed, extended etc. + * + * The trick here is the max size of a csum item leaves + * enough room in the tree block for a single + * item header. So, we split the item in place, + * adding a new header pointing to the existing + * bytes. Then we loop around again and we have + * a nicely formed csum item that we can neatly + * truncate. + */ + offset = (bytenr - key.offset) / blocksize; + offset *= csum_size; + + shift_len = (len / blocksize) * csum_size; + + item_offset = btrfs_item_ptr_offset(leaf, + path->slots[0]); + + memset_extent_buffer(leaf, 0, item_offset + offset, + shift_len); + key.offset = bytenr; + + /* + * btrfs_split_item returns -EAGAIN when the + * item changed size or key + */ + ret = btrfs_split_item(trans, root, path, &key, offset); + BUG_ON(ret && ret != -EAGAIN); + + key.offset = end_byte - 1; + } else { + ret = truncate_one_csum(trans, root, path, + &key, bytenr, len); + BUG_ON(ret); + } + btrfs_release_path(path); + } +out: + btrfs_free_path(path); + return 0; +} diff --git a/tools/libfsimage/btrfs/free-space-cache.c b/tools/libfsimage/btrfs/free-space-cache.c new file mode 100644 index 0000000..357d69e --- /dev/null +++ b/tools/libfsimage/btrfs/free-space-cache.c @@ -0,0 +1,879 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "kerncompat.h" +#include "ctree.h" +#include "free-space-cache.h" +#include "transaction.h" +#include "disk-io.h" +#include "extent_io.h" +#include "crc32c.h" +#include "bitops.h" +#include "internal.h" + +/* + * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have + * anything like that in userspace and have to get the value from the + * filesystem + */ +#define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8) +#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); +static void merge_space_tree(struct btrfs_free_space_ctl *ctl); + +struct io_ctl { + void *cur, *orig; + void *buffer; + struct btrfs_root *root; + unsigned long size; + u64 total_size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino, + struct btrfs_root *root) +{ + memset(io_ctl, 0, sizeof(struct io_ctl)); + io_ctl->num_pages = (size + root->sectorsize - 1) / root->sectorsize; + io_ctl->buffer = kzalloc(size, GFP_NOFS); + if (!io_ctl->buffer) + return -ENOMEM; + io_ctl->total_size = size; + io_ctl->root = root; + if (ino != BTRFS_FREE_INO_OBJECTID) + io_ctl->check_crcs = 1; + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->buffer); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + BUG_ON(io_ctl->index >= io_ctl->num_pages); + io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * io_ctl->root->sectorsize); + io_ctl->orig = io_ctl->cur; + io_ctl->size = io_ctl->root->sectorsize; + if (clear) + memset(io_ctl->cur, 0, io_ctl->root->sectorsize); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + io_ctl_unmap_page(io_ctl); +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root, + struct btrfs_path *path, u64 ino) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, len; + u64 total_read = 0; + int ret = 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + fprintf(stderr, + "Couldn't find file extent item for free space inode" + " %Lu\n", ino); + btrfs_release_path(path); + return -EINVAL; + } + + while (total_read < io_ctl->total_size) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = -EINVAL; + break; + } + } + leaf = path->nodes[0]; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino) { + ret = -EINVAL; + break; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) { + ret = -EINVAL; + break; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) { + fprintf(stderr, "Not the file extent type we wanted\n"); + ret = -EINVAL; + break; + } + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) + + btrfs_file_extent_offset(leaf, fi); + len = btrfs_file_extent_num_bytes(leaf, fi); + ret = read_data_from_disk(root->fs_info, + io_ctl->buffer + key.offset, bytenr, + len, 0); + if (ret) + break; + total_read += len; + path->slots[0]++; + } + + btrfs_release_path(path); + return ret; +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + __le64 *gen; + + /* + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk("btrfs: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); + return 0; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + tmp = io_ctl->buffer; + tmp += index; + val = *tmp; + + io_ctl_map_page(io_ctl, 0); + crc = crc32c(crc, io_ctl->orig + offset, io_ctl->root->sectorsize - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk("btrfs: csum mismatch on free space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + int ret; + + if (!io_ctl->cur) { + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + } + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + return 0; +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->sectorsize); + io_ctl_unmap_page(io_ctl); + + return 0; +} + + +static int __load_free_space_cache(struct btrfs_root *root, + struct btrfs_free_space_ctl *ctl, + struct btrfs_path *path, u64 offset) +{ + struct btrfs_free_space_header *header; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct io_ctl io_ctl; + struct btrfs_key key; + struct btrfs_key inode_location; + struct btrfs_disk_key disk_key; + struct btrfs_free_space *e, *n; + struct list_head bitmaps; + u64 num_entries; + u64 num_bitmaps; + u64 generation; + u64 inode_size; + u8 type; + int ret = 0; + + INIT_LIST_HEAD(&bitmaps); + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = offset; + key.type = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return 0; + } else if (ret > 0) { + btrfs_release_path(path); + return 0; + } + + leaf = path->nodes[0]; + header = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_header); + num_entries = btrfs_free_space_entries(leaf, header); + num_bitmaps = btrfs_free_space_bitmaps(leaf, header); + generation = btrfs_free_space_generation(leaf, header); + btrfs_free_space_key(leaf, header, &disk_key); + btrfs_disk_key_to_cpu(&inode_location, &disk_key); + btrfs_release_path(path); + + ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0); + if (ret) { + fprintf(stderr, "Couldn't find free space inode %d\n", ret); + return 0; + } + + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode_size = btrfs_inode_size(leaf, inode_item); + if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) { + btrfs_release_path(path); + return 0; + } + + if (btrfs_inode_generation(leaf, inode_item) != generation) { + fprintf(stderr, + "free space inode generation (%llu) did not match " + "free space cache generation (%llu)\n", + (unsigned long long)btrfs_inode_generation(leaf, + inode_item), + (unsigned long long)generation); + btrfs_release_path(path); + return 0; + } + + btrfs_release_path(path); + + if (!num_entries) + return 0; + + ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root); + if (ret) + return ret; + + ret = io_ctl_prepare_pages(&io_ctl, root, path, + inode_location.objectid); + if (ret) + goto out; + + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; + + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; + + while (num_entries) { + e = calloc(1, sizeof(*e)); + if (!e) + goto free_cache; + + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + free(e); + goto free_cache; + } + + if (!e->bytes) { + free(e); + goto free_cache; + } + + if (type == BTRFS_FREE_SPACE_EXTENT) { + ret = link_free_space(ctl, e); + if (ret) { + fprintf(stderr, + "Duplicate entries in free space cache\n"); + free(e); + goto free_cache; + } + } else { + BUG_ON(!num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS); + if (!e->bitmap) { + free(e); + goto free_cache; + } + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + if (ret) { + fprintf(stderr, + "Duplicate entries in free space cache\n"); + free(e->bitmap); + free(e); + goto free_cache; + } + list_add_tail(&e->list, &bitmaps); + } + + num_entries--; + } + + io_ctl_unmap_page(&io_ctl); + + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { + list_del_init(&e->list); + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; + } + + io_ctl_drop_pages(&io_ctl); + merge_space_tree(ctl); + ret = 1; +out: + io_ctl_free(&io_ctl); + return ret; +free_cache: + io_ctl_drop_pages(&io_ctl); + __btrfs_remove_free_space_cache(ctl); + goto out; +} + +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_path *path; + u64 used = btrfs_block_group_used(&block_group->item); + int ret = 0; + int matched; + + path = btrfs_alloc_path(); + if (!path) + return 0; + + ret = __load_free_space_cache(fs_info->tree_root, ctl, path, + block_group->key.objectid); + btrfs_free_path(path); + + matched = (ctl->free_space == (block_group->key.offset - used - + block_group->bytes_super)); + if (ret == 1 && !matched) { + __btrfs_remove_free_space_cache(ctl); + fprintf(stderr, + "block group %llu has wrong amount of free space\n", + block_group->key.objectid); + ret = -1; + } + + if (ret < 0) { + ret = 0; + + fprintf(stderr, + "failed to load free space cache for block group %llu\n", + block_group->key.objectid); + } + + return ret; +} + +static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, + u64 offset) +{ + BUG_ON(offset < bitmap_start); + offset -= bitmap_start; + return (unsigned long)(offset / unit); +} + +static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) +{ + return (unsigned long)(bytes / unit); +} + +static int tree_insert_offset(struct rb_root *root, u64 offset, + struct rb_node *node, int bitmap) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, offset_index); + + if (offset < info->offset) { + p = &(*p)->rb_left; + } else if (offset > info->offset) { + p = &(*p)->rb_right; + } else { + /* + * we could have a bitmap entry and an extent entry + * share the same offset. If this is the case, we want + * the extent entry to always be found first if we do a + * linear search through the tree, since we want to have + * the quickest allocation time, and allocating from an + * extent is faster than allocating from a bitmap. So + * if we're inserting a bitmap and we find an entry at + * this offset, we want to go right, or after this entry + * logically. If we are inserting an extent and we've + * found a bitmap, we want to go left, or before + * logically. + */ + if (bitmap) { + if (info->bitmap) + return -EEXIST; + p = &(*p)->rb_right; + } else { + if (!info->bitmap) + return -EEXIST; + p = &(*p)->rb_left; + } + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +/* + * searches the tree for the given offset. + * + * fuzzy - If this is set, then we are trying to make an allocation, and we just + * want a section that has at least bytes size and comes at or after the given + * offset. + */ +static struct btrfs_free_space * +tree_search_offset(struct btrfs_free_space_ctl *ctl, + u64 offset, int bitmap_only, int fuzzy) +{ + struct rb_node *n = ctl->free_space_offset.rb_node; + struct btrfs_free_space *entry, *prev = NULL; + u32 sectorsize = ctl->sectorsize; + + /* find entry that is closest to the 'offset' */ + while (1) { + if (!n) { + entry = NULL; + break; + } + + entry = rb_entry(n, struct btrfs_free_space, offset_index); + prev = entry; + + if (offset < entry->offset) + n = n->rb_left; + else if (offset > entry->offset) + n = n->rb_right; + else + break; + } + + if (bitmap_only) { + if (!entry) + return NULL; + if (entry->bitmap) + return entry; + + /* + * bitmap entry and extent entry may share same offset, + * in that case, bitmap entry comes after extent entry. + */ + n = rb_next(n); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + if (entry->offset != offset) + return NULL; + + WARN_ON(!entry->bitmap); + return entry; + } else if (entry) { + if (entry->bitmap) { + /* + * if previous extent entry covers the offset, + * we should return it instead of the bitmap entry + */ + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + entry = prev; + } + } + return entry; + } + + if (!prev) + return NULL; + + /* find last entry before the 'offset' */ + entry = prev; + if (entry->offset > offset) { + n = rb_prev(&entry->offset_index); + if (n) { + entry = rb_entry(n, struct btrfs_free_space, + offset_index); + BUG_ON(entry->offset > offset); + } else { + if (fuzzy) + return entry; + else + return NULL; + } + } + + if (entry->bitmap) { + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + return prev; + } + if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset) + return entry; + } else if (entry->offset + entry->bytes > offset) + return entry; + + if (!fuzzy) + return NULL; + + while (1) { + if (entry->bitmap) { + if (entry->offset + BITS_PER_BITMAP(sectorsize) * + ctl->unit > offset) + break; + } else { + if (entry->offset + entry->bytes > offset) + break; + } + + n = rb_next(&entry->offset_index); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + } + return entry; +} + +void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + rb_erase(&info->offset_index, &ctl->free_space_offset); + ctl->free_extents--; + ctl->free_space -= info->bytes; +} + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + int ret = 0; + + BUG_ON(!info->bitmap && !info->bytes); + ret = tree_insert_offset(&ctl->free_space_offset, info->offset, + &info->offset_index, (info->bitmap != NULL)); + if (ret) + return ret; + + ctl->free_space += info->bytes; + ctl->free_extents++; + return ret; +} + +static int search_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *bitmap_info, u64 *offset, + u64 *bytes) +{ + unsigned long found_bits = 0; + unsigned long bits, i; + unsigned long next_zero; + u32 sectorsize = ctl->sectorsize; + + i = offset_to_bit(bitmap_info->offset, ctl->unit, + max_t(u64, *offset, bitmap_info->offset)); + bits = bytes_to_bits(*bytes, ctl->unit); + + for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) { + next_zero = find_next_zero_bit(bitmap_info->bitmap, + BITS_PER_BITMAP(sectorsize), i); + if ((next_zero - i) >= bits) { + found_bits = next_zero - i; + break; + } + i = next_zero; + } + + if (found_bits) { + *offset = (u64)(i * ctl->unit) + bitmap_info->offset; + *bytes = (u64)(found_bits) * ctl->unit; + return 0; + } + + return -1; +} + +struct btrfs_free_space * +btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes) +{ + return tree_search_offset(ctl, offset, 0, 0); +} + +static void try_merge_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct btrfs_free_space *left_info; + struct btrfs_free_space *right_info; + u64 offset = info->offset; + u64 bytes = info->bytes; + + /* + * first we want to see if there is free space adjacent to the range we + * are adding, if there is remove that struct and add a new one to + * cover the entire range + */ + right_info = tree_search_offset(ctl, offset + bytes, 0, 0); + if (right_info && rb_prev(&right_info->offset_index)) + left_info = rb_entry(rb_prev(&right_info->offset_index), + struct btrfs_free_space, offset_index); + else + left_info = tree_search_offset(ctl, offset - 1, 0, 0); + + if (right_info && !right_info->bitmap) { + unlink_free_space(ctl, right_info); + info->bytes += right_info->bytes; + free(right_info); + } + + if (left_info && !left_info->bitmap && + left_info->offset + left_info->bytes == offset) { + unlink_free_space(ctl, left_info); + info->offset = left_info->offset; + info->bytes += left_info->bytes; + free(left_info); + } +} + +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *n; + int count = 0; + + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes && !block_group->ro) + count++; + printk("entry offset %llu, bytes %llu, bitmap %s\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (info->bitmap) ? "yes" : "no"); + } + printk("%d blocks of free space at or bigger than bytes is \n", count); +} + +int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group, + int sectorsize) +{ + struct btrfs_free_space_ctl *ctl; + + ctl = calloc(1, sizeof(*ctl)); + if (!ctl) + return -ENOMEM; + + ctl->sectorsize = sectorsize; + ctl->unit = sectorsize; + ctl->start = block_group->key.objectid; + ctl->private = block_group; + block_group->free_space_ctl = ctl; + + return 0; +} + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + while ((node = rb_last(&ctl->free_space_offset)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + unlink_free_space(ctl, info); + free(info->bitmap); + free(info); + } +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + __btrfs_remove_free_space_cache(block_group->free_space_ctl); +} + +int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + info = calloc(1, sizeof(*info)); + if (!info) + return -ENOMEM; + + info->offset = offset; + info->bytes = bytes; + + try_merge_free_space(ctl, info); + + ret = link_free_space(ctl, info); + if (ret) { + printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); + BUG_ON(ret == -EEXIST); + } + + return ret; +} + +/* + * Merges all the free space cache and kills the bitmap entries since we just + * want to use the free space cache to verify it's correct, no reason to keep + * the bitmaps around to confuse things. + */ +static void merge_space_tree(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *e, *prev = NULL; + struct rb_node *n; + int ret; + u32 sectorsize = ctl->sectorsize; + +again: + prev = NULL; + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + e = rb_entry(n, struct btrfs_free_space, offset_index); + if (e->bitmap) { + u64 offset = e->offset, bytes = ctl->unit; + u64 end; + + end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit); + + unlink_free_space(ctl, e); + while (!(search_bitmap(ctl, e, &offset, &bytes))) { + ret = btrfs_add_free_space(ctl, offset, + bytes); + BUG_ON(ret); + offset += bytes; + if (offset >= end) + break; + bytes = ctl->unit; + } + free(e->bitmap); + free(e); + goto again; + } + if (!prev) + goto next; + if (prev->offset + prev->bytes == e->offset) { + unlink_free_space(ctl, prev); + unlink_free_space(ctl, e); + prev->bytes += e->bytes; + free(e); + link_free_space(ctl, prev); + goto again; + } +next: + prev = e; + } +} diff --git a/tools/libfsimage/btrfs/free-space-cache.h b/tools/libfsimage/btrfs/free-space-cache.h new file mode 100644 index 0000000..9214077 --- /dev/null +++ b/tools/libfsimage/btrfs/free-space-cache.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2009 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_FREE_SPACE_CACHE_H__ +#define __BTRFS_FREE_SPACE_CACHE_H__ + +#include "kerncompat.h" +#include "ctree.h" +#include "rbtree.h" + +struct btrfs_free_space { + struct rb_node offset_index; + u64 offset; + u64 bytes; + unsigned long *bitmap; + struct list_head list; +}; + +struct btrfs_free_space_ctl { + struct rb_root free_space_offset; + u64 free_space; + int extents_thresh; + int free_extents; + int total_bitmaps; + int unit; + u64 start; + void *private; + u32 sectorsize; +}; + +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group); + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl); +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache + *block_group); +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes); +struct btrfs_free_space * +btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes); +int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group, + int sectorsize); +void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); +int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, + u64 bytes); +#endif diff --git a/tools/libfsimage/btrfs/fsys_btrfs.c b/tools/libfsimage/btrfs/fsys_btrfs.c new file mode 100644 index 0000000..164e46e --- /dev/null +++ b/tools/libfsimage/btrfs/fsys_btrfs.c @@ -0,0 +1,862 @@ +/* + * Copyright (C) 2016 Citrix Systems R&D Ltd. + * + * This source code is licensed under the GNU General Public License, + * Version 2 or later. See the file COPYING for more details. + */ + +#include <fsimage_plugin.h> + +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/types.h> +#include <sys/xattr.h> +#ifdef HAVE_LZO1X +#include <lzo/lzoconf.h> +#include <lzo/lzo1x.h> +#endif +#include <zlib.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" +#include "internal.h" +#include "volumes.h" + +/* #define DDEBUG */ + +#ifdef DDEBUG +__attribute__ ((format (printf, 1, 2))) +static inline void debug(const char *fmt, ...) +{ + va_list args; + int saved_errno = errno; + + va_start(args, fmt); + vfprintf(stdout, fmt, args); + va_end(args); + + errno = saved_errno; +} +#else +__attribute__ ((format (printf, 1, 2))) +static inline void debug(const char *fmt, ...) +{ +} +#endif + +struct btrfs_file { + struct btrfs_key key; + struct btrfs_root *root; + size_t size; + uint64_t filepos; +}; + +/* + * To avoid significant modifications to the import btrfs files, use a global + * offset variable which is added to every pread() call so that a partition + * within a disk image can be used. + */ +uint64_t partition_offset; + +/* + * This code only supports opening a single disk image due to apparent + * limitations in the btrfs code when doing a sequence of open_ctree(), + * open_ctree(), close_ctree(), close_ctree(). + */ +static struct btrfs_fs_info *fs_info; +static char *mount_dev; +static char *mount_options; + +static int +btrfs_mount(fsi_t *fsi, const char *dev, const char *options) +{ + u64 bytenr; + uint64_t offset; + int i; + + offset = fsip_fs_offset(fsi); + if (!options) + options = ""; + + if (fs_info) { + if (strcmp(dev, mount_dev) || strcmp(options, mount_options) || + offset != partition_offset) { + errno = ENOTSUP; + return -1; + } + return 0; + } + + partition_offset = offset; + debug("Mount %s, offset %lu\n", dev, partition_offset); + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + fs_info = open_ctree_fs_info(dev, bytenr, 0, 0, OPEN_CTREE_PARTIAL); + if (fs_info) + break; + debug("Could not open root, trying backup super\n"); + } + + if (!fs_info) + return -1; + + mount_dev = strdup(dev); + if (options) + mount_options = strdup(options); + else + mount_options = ""; + + return 0; +} + +static int +btrfs_umount(fsi_t *fsi) +{ + return 0; +} + +static int +symlink_lookup(struct btrfs_root *root, struct btrfs_key key, + char *symlink_target) +{ + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *extent_item; + u32 len; + u32 name_offset; + int ret; + + btrfs_init_path(&path); + + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return 0; + + leaf = path.nodes[0]; + if (!leaf) { + debug("Error getting leaf for symlink\n"); + ret = -EIO; + goto out; + } + + extent_item = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_file_extent_item); + + len = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(path.slots[0])); + if (len >= PATH_MAX) { + debug("Symlink target length %d is longer than PATH_MAX\n", len); + ret = -ENAMETOOLONG; + goto out; + } + + name_offset = (unsigned long) extent_item + + offsetof(struct btrfs_file_extent_item, disk_bytenr); + read_extent_buffer(leaf, symlink_target, name_offset, len); + + symlink_target[len] = 0; + +out: + btrfs_release_path(&path); + return ret; +} + +char * +skip_slashes(char *s) +{ + while (*s && *s == '/') + s++; + return s; +} + +/* Returns 0 on success, or -errno on failure. */ +static int +lookup_inode(struct btrfs_file *file, char *search_name) +{ + struct btrfs_path path; + struct btrfs_key *key = &file->key; + struct btrfs_dir_item *di; + struct btrfs_inode_item *inode_item; + int ret, link = 0; + char *ptr; + char *name = skip_slashes(search_name); + u64 dir; + struct btrfs_root *root; + +again: + btrfs_init_path(&path); + + /* Lookup default subvolume */ + dir = btrfs_super_root_dir(file->root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, file->root, &path, dir, "default", 7, 0); + if (!di) { + btrfs_release_path(&path); + return -ENOENT; + } + + /* Find fs_root for default subvolume */ + btrfs_dir_item_key_to_cpu(path.nodes[0], di, key); + btrfs_release_path(&path); + key->type = BTRFS_ROOT_ITEM_KEY; + key->offset = (u64)-1; + root = btrfs_read_fs_root(file->root->fs_info, key); + if (IS_ERR(root)) { + debug("Error reading subvolume %lu\n", PTR_ERR(root)); + return PTR_ERR(root); + } + dir = btrfs_root_dirid(&root->root_item); + + for (;; ) { + ptr = strchr(name, '/'); + if (ptr) + *ptr = '\0'; + + debug("lookup component %s\n", name); + + di = btrfs_lookup_dir_item(NULL, root, &path, dir, name, strlen(name), 0); + if (IS_ERR(di)) + return PTR_ERR(di); + else if (!di) + return -ENOENT; + + btrfs_dir_item_key_to_cpu(path.nodes[0], di, key); + btrfs_release_path(&path); + + /* Follow subvolume if needed */ + if (key->type == BTRFS_ROOT_ITEM_KEY) { + key->offset = (u64)-1; + root = btrfs_read_fs_root(file->root->fs_info, key); + if (IS_ERR(root)) { + debug("Error reading subvolume %lu\n", PTR_ERR(root)); + return PTR_ERR(root); + } + dir = btrfs_root_dirid(&root->root_item); + goto next; + } + + ret = btrfs_lookup_inode(NULL, root, &path, key, 0); + if (ret < 0) + return ret; + + inode_item = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_inode_item); + file->size = btrfs_inode_size(path.nodes[0], inode_item); + + /* Follow symlink if needed */ + if ((btrfs_inode_mode(path.nodes[0], inode_item) & S_IFMT) == S_IFLNK) { + char symlink_buf[PATH_MAX]; + + btrfs_release_path(&path); + if (++link == 40) + return -ELOOP; + + ret = symlink_lookup(root, *key, symlink_buf); + if (ret < 0) + return ret; + + debug("symlink lookup %s\n", symlink_buf); + + /* Restart lookup if symlink is absolute */ + if (*symlink_buf == '/') { + strcpy(search_name, symlink_buf); + goto again; + } + + if (((name - search_name) + strlen(symlink_buf)) > (PATH_MAX - 1)) + return -ENAMETOOLONG; + strcpy(name, symlink_buf); + continue; + } + + dir = key->objectid; + btrfs_release_path(&path); + +next: + if (!ptr) + break; + + name = skip_slashes(ptr + 1); + } + + file->root = root; + return 0; +} + +/* Copied from btrfs-progs/cmds-restore.c */ +#define LZO_LEN 4 +#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) + +static int decompress_zlib(char *inbuf, char *outbuf, u64 compress_len, + u64 decompress_len) +{ + z_stream strm; + int ret; + + memset(&strm, 0, sizeof(strm)); + ret = inflateInit(&strm); + if (ret != Z_OK) { + debug("zlib init returned %d", ret); + return -EIO; + } + + strm.avail_in = compress_len; + strm.next_in = (unsigned char *)inbuf; + strm.avail_out = decompress_len; + strm.next_out = (unsigned char *)outbuf; + ret = inflate(&strm, Z_NO_FLUSH); + if (ret != Z_STREAM_END) { + (void)inflateEnd(&strm); + debug("zlib inflate failed: %d", ret); + return -EIO; + } + + (void)inflateEnd(&strm); + return 0; +} + +/* Copied from btrfs-progs/cmds-restore.c */ +static inline size_t read_compress_length(unsigned char *buf) +{ + __le32 dlen; + memcpy(&dlen, buf, LZO_LEN); + return le32_to_cpu(dlen); +} + +#ifdef HAVE_LZO1X +/* Copied from btrfs-progs/cmds-restore.c */ +static int decompress_lzo(struct btrfs_root *root, unsigned char *inbuf, + char *outbuf, u64 compress_len, u64 *decompress_len) +{ + size_t new_len; + size_t in_len; + size_t out_len = 0; + size_t tot_len; + size_t tot_in; + int ret; + + ret = lzo_init(); + if (ret != LZO_E_OK) { + debug("lzo init returned %d", ret); + return -EIO; + } + + tot_len = read_compress_length(inbuf); + inbuf += LZO_LEN; + tot_in = LZO_LEN; + + while (tot_in < tot_len) { + size_t mod_page; + size_t rem_page; + in_len = read_compress_length(inbuf); + + if ((tot_in + LZO_LEN + in_len) > tot_len) { + debug("bad compress length %lu", (unsigned long)in_len); + return -EIO; + } + + inbuf += LZO_LEN; + tot_in += LZO_LEN; + new_len = lzo1x_worst_compress(root->sectorsize); + ret = lzo1x_decompress_safe((const unsigned char *)inbuf, in_len, + (unsigned char *)outbuf, + (void *)&new_len, NULL); + if (ret != LZO_E_OK) { + debug("lzo decompress failed: %d", ret); + return -EIO; + } + out_len += new_len; + outbuf += new_len; + inbuf += in_len; + tot_in += in_len; + + /* + * If the 4 byte header does not fit to the rest of the page we + * have to move to the next one, unless we read some garbage + */ + mod_page = tot_in % root->sectorsize; + rem_page = root->sectorsize - mod_page; + if (rem_page < LZO_LEN) { + inbuf += rem_page; + tot_in += rem_page; + } + } + + *decompress_len = out_len; + + return 0; +} +#else +static int decompress_lzo(struct btrfs_root *root, unsigned char *inbuf, + char *outbuf, u64 compress_len, u64 *decompress_len) +{ + return -ENOTSUP; +} +#endif + +/* Copied from btrfs-progs/cmds-restore.c */ +static int decompress(struct btrfs_root *root, char *inbuf, char *outbuf, + u64 compress_len, u64 *decompress_len, int compress) +{ + switch (compress) { + case BTRFS_COMPRESS_ZLIB: + return decompress_zlib(inbuf, outbuf, compress_len, + *decompress_len); + case BTRFS_COMPRESS_LZO: + return decompress_lzo(root, (unsigned char *)inbuf, outbuf, + compress_len, decompress_len); + default: + break; + } + + debug("invalid compression type: %d", compress); + return -EIO; +} + +/* Copied from btrfs-progs/cmds-restore.c */ +static int next_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + int offset = 1; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + +again: + for (; level < BTRFS_MAX_LEVEL; level++) { + if (path->nodes[level]) + break; + } + + if (level >= BTRFS_MAX_LEVEL) + return 1; + + slot = path->slots[level] + 1; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level] + offset; + c = path->nodes[level]; + if (slot >= btrfs_header_nritems(c)) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + offset = 1; + continue; + } + + if (path->reada) + reada_for_search(root, path, level, slot, 0); + + next = read_node_slot(root, c, slot); + if (extent_buffer_uptodate(next)) + break; + offset++; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + path->nodes[level] = next; + path->slots[level] = 0; + if (!level) + break; + if (path->reada) + reada_for_search(root, path, level, 0, 0); + next = read_node_slot(root, next, 0); + if (!extent_buffer_uptodate(next)) + goto again; + } + return 0; +} + +/* + * Copies data from the input buffer to the output buffer where they overlap. + * in_buf: Input buffer (read from disk) + * in_len: Number of bytes in input buffer + * in_offset: Offset of input buffer in file + * out_buf: Ouput buffer (returned to the caller) + * out_len: Number of available bytes in output buffer + * out_offset: Offset of output buffer in file + * file_size: Length of file + */ +static u64 +fill_buffer(char *in_buf, u64 in_len, u64 in_offset, + char *out_buf, u64 out_len, u64 out_offset, + u64 file_size) +{ + u64 amount, delta; + + if (in_offset + in_len <= out_offset) + return 0; + if (in_offset >= (out_offset + out_len)) + return 0; + + if (in_len + in_offset > file_size) + in_len = file_size - in_offset; + if (in_offset >= out_offset) { + delta = in_offset - out_offset; + amount = min(in_len, out_len - delta); + memcpy(out_buf + delta, in_buf, amount); + } else { + delta = out_offset - in_offset; + amount = min(in_len - delta, out_len); + memcpy(out_buf, in_buf + delta, amount); + } + return amount; +} + +/* Adapted from btrfs-progs/cmds-restore.c */ +static ssize_t +read_inline(struct btrfs_root *root, struct btrfs_path *path, + u64 file_size, u64 in_offset, + char *out_buf, u64 out_len, u64 out_offset) +{ + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_file_extent_item *fi; + unsigned long ptr; + char in_buf[4096]; + int len; + int inline_item_len; + int compress; + char *comp_buf; + u64 ram_size; + ssize_t ret; + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + ptr = btrfs_file_extent_inline_start(fi); + len = btrfs_file_extent_inline_len(leaf, path->slots[0], fi); + inline_item_len = btrfs_file_extent_inline_item_len(leaf, + btrfs_item_nr(path->slots[0])); + read_extent_buffer(leaf, in_buf, ptr, inline_item_len); + + compress = btrfs_file_extent_compression(leaf, fi); + if (compress == BTRFS_COMPRESS_NONE) + return fill_buffer(in_buf, len, in_offset, + out_buf, out_len, out_offset, + file_size); + + ram_size = btrfs_file_extent_ram_bytes(leaf, fi); + comp_buf = calloc(1, ram_size); + if (!comp_buf) { + debug("not enough memory"); + return -ENOMEM; + } + + ret = decompress(root, in_buf, comp_buf, len, &ram_size, compress); + if (ret) { + free(comp_buf); + return ret; + } + + ret = fill_buffer(comp_buf, len, in_offset, + out_buf, out_len, out_offset, + file_size); + free(comp_buf); + + return ret; +} + +/* Adapted from btrfs-progs/cmds-restore.c */ +static ssize_t read_extent(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_file_extent_item *fi, + u64 file_size, u64 in_offset, + char *out_buf, u64 out_len, u64 out_offset) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + char *in_buf, *comp_buf = NULL; + u64 bytenr; + u64 ram_size; + u64 disk_size; + u64 num_bytes; + u64 length; + u64 size_left; + u64 dev_bytenr; + u64 offset; + u64 count = 0; + int compress; + ssize_t ret; + int dev_fd; + + compress = btrfs_file_extent_compression(leaf, fi); + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + disk_size = btrfs_file_extent_disk_num_bytes(leaf, fi); + ram_size = btrfs_file_extent_ram_bytes(leaf, fi); + offset = btrfs_file_extent_offset(leaf, fi); + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + size_left = disk_size; + if (compress == BTRFS_COMPRESS_NONE) + bytenr += offset; + + /* we found a hole */ + if (disk_size == 0) + return 0; + + in_buf = malloc(size_left); + if (!in_buf) { + debug("not enough memory\n"); + return -ENOMEM; + } + + if (compress != BTRFS_COMPRESS_NONE) { + comp_buf = calloc(1, ram_size); + if (!comp_buf) { + debug("not enough memory"); + free(in_buf); + return -ENOMEM; + } + } + + while (size_left) { + length = size_left; + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &length, &multi, 1, NULL); + if (ret) { + debug("cannot map block logical %llu length %llu: %ld", + bytenr, length, ret); + goto out; + } + device = multi->stripes[0].dev; + dev_fd = device->fd; + dev_bytenr = multi->stripes[0].physical; + kfree(multi); + + if (size_left < length) + length = size_left; + + ret = pread(dev_fd, in_buf + count, length, + dev_bytenr + partition_offset); + /* Need both checks, or we miss negative values due to u64 conversion */ + if (ret < 0 || ret < length) { + debug("read error\n"); + goto out; + } + + size_left -= length; + count += length; + bytenr += length; + } + + if (compress == BTRFS_COMPRESS_NONE) { + ret = fill_buffer(in_buf, num_bytes, in_offset, + out_buf, out_len, out_offset, + file_size); + goto out; + } + + ret = decompress(root, in_buf, comp_buf, disk_size, &ram_size, compress); + if (ret) + goto out; + + ret = fill_buffer(comp_buf, num_bytes, in_offset, + out_buf, out_len, out_offset, + file_size); +out: + free(in_buf); + free(comp_buf); + return ret; +} + +/* + * Adapted from btrfs-progs/cmds-restore.c + * Returns the number of bytes written on success. + * On failure, returns -1 and sets errno. + * */ +static ssize_t +read_file(struct btrfs_file *file, char *buf, size_t len, uint64_t offset) +{ + struct extent_buffer *leaf; + struct btrfs_path path; + struct btrfs_file_extent_item *fi; + struct btrfs_root *root = file->root; + struct btrfs_key key = file->key; + struct btrfs_key found_key; + ssize_t written = 0; + int ret; + int extent_type; + int compression; + + debug("read %lu bytes from offset %lu\n", len, offset); + + /* Zero buffer in case of holes */ + memset(buf, 0, len); + + btrfs_init_path(&path); + + key.offset = 0; + key.type = BTRFS_EXTENT_DATA_KEY; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + debug("searching extent data returned %d", ret); + goto out; + } + + leaf = path.nodes[0]; + while (!leaf) { + ret = next_leaf(root, &path); + if (ret < 0) { + debug("cannot get next leaf: %d", ret); + goto out; + } else if (ret > 0) { + /* No more leaves to search */ + ret = 0; + goto out; + } + leaf = path.nodes[0]; + } + + for (;;) { + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + do { + ret = next_leaf(root, &path); + if (ret < 0) { + debug("Error searching %d\n", ret); + goto out; + } else if (ret) { + /* No more leaves to search */ + goto done; + } + leaf = path.nodes[0]; + } while (!leaf); + continue; + } + + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); + if (found_key.objectid != key.objectid) + break; + if (found_key.type != key.type) + break; + fi = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(leaf, fi); + compression = btrfs_file_extent_compression(leaf, fi); + if (compression >= BTRFS_COMPRESS_LAST) { + debug("compression type %d not supported", compression); + ret = -EIO; + goto out; + } + + if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) + goto next; + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + ret = read_inline(root, &path, file->size, found_key.offset, + buf, len, offset); + if (ret < 0) + goto out; + written += ret; + } else if (extent_type == BTRFS_FILE_EXTENT_REG) { + ret = read_extent(root, leaf, fi, file->size, found_key.offset, + buf, len, offset); + if (ret < 0) + goto out; + written += ret; + } else { + debug("weird extent type %d", extent_type); + } +next: + path.slots[0]++; + } + +done: + return written; + +out: + errno = -ret; + return -1; +} + +fsi_file_t * +btrfs_open(fsi_t *fsi, const char *path) +{ + struct btrfs_file *f; + char s[PATH_MAX]; + int ret; + + debug("open %s\n", path); + + if (strlen(path) > (PATH_MAX - 1)) { + errno = EINVAL; + return NULL; + } + strcpy(s, path); + + f = calloc(1, sizeof (*f)); + if (!f) { + errno = ENOMEM; + return NULL; + } + + f->root = fs_info->tree_root; + ret = lookup_inode(f, s); + if (ret < 0) { + errno = -ret; + free(f); + return NULL; + } + + return fsip_file_alloc(fsi, f); +} + +ssize_t +btrfs_pread(fsi_file_t *file, void *buf, size_t nbytes, uint64_t off) +{ + struct btrfs_file *f = fsip_file_data(file); + + debug("pread: %lu %lu\n", nbytes, off); + + return read_file(f, buf, nbytes, off); +} + +ssize_t +btrfs_read(fsi_file_t *file, void *buf, size_t nbytes) +{ + struct btrfs_file *f = fsip_file_data(file); + ssize_t ret = btrfs_pread(file, buf, nbytes, f->filepos); + + if (ret > 0) + f->filepos += ret; + + return ret; +} + +int +btrfs_close(fsi_file_t *file) +{ + struct btrfs_file *f = fsip_file_data(file); + free(f); + return 0; +} + +fsi_plugin_ops_t * +fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name) +{ + static fsi_plugin_ops_t ops = { + FSIMAGE_PLUGIN_VERSION, + .fpo_mount = btrfs_mount, + .fpo_umount = btrfs_umount, + .fpo_open = btrfs_open, + .fpo_read = btrfs_read, + .fpo_pread = btrfs_pread, + .fpo_close = btrfs_close + }; + + *name = "btrfs"; + return &ops; +} diff --git a/tools/libfsimage/btrfs/hash.h b/tools/libfsimage/btrfs/hash.h new file mode 100644 index 0000000..ac4c411 --- /dev/null +++ b/tools/libfsimage/btrfs/hash.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_HASH_H__ +#define __BTRFS_HASH_H__ + +#include "crc32c.h" + +static inline u64 btrfs_name_hash(const char *name, int len) +{ + return crc32c((u32)~1, name, len); +} +#endif diff --git a/tools/libfsimage/btrfs/inode-item.c b/tools/libfsimage/btrfs/inode-item.c new file mode 100644 index 0000000..522d25a --- /dev/null +++ b/tools/libfsimage/btrfs/inode-item.c @@ -0,0 +1,460 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "crc32c.h" + +static int find_name_in_backref(struct btrfs_path *path, const char * name, + int name_len, struct btrfs_inode_ref **ref_ret) +{ + struct extent_buffer *leaf; + struct btrfs_inode_ref *ref; + unsigned long ptr; + unsigned long name_ptr; + u32 item_size; + u32 cur_offset = 0; + int len; + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + while (cur_offset < item_size) { + ref = (struct btrfs_inode_ref *)(ptr + cur_offset); + len = btrfs_inode_ref_name_len(leaf, ref); + name_ptr = (unsigned long)(ref + 1); + cur_offset += len + sizeof(*ref); + if (len != name_len) + continue; + if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) { + *ref_ret = ref; + return 1; + } + } + return 0; +} + +int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, u64 index) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_ref *ref; + unsigned long ptr; + int ret; + int ins_len = name_len + sizeof(*ref); + + key.objectid = inode_objectid; + key.offset = ref_objectid; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + ins_len); + if (ret == -EEXIST) { + u32 old_size; + + if (find_name_in_backref(path, name, name_len, &ref)) + goto out; + + old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); + ret = btrfs_extend_item(trans, root, path, ins_len); + BUG_ON(ret); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); + ptr = (unsigned long)(ref + 1); + ret = 0; + } else if (ret < 0) { + if (ret == -EOVERFLOW) + ret = -EMLINK; + goto out; + } else { + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); + ptr = (unsigned long)(ref + 1); + } + write_extent_buffer(path->nodes[0], name, ptr, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + +out: + btrfs_free_path(path); + + if (ret == -EMLINK) { + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)) + ret = btrfs_insert_inode_extref(trans, root, name, + name_len, + inode_objectid, + ref_objectid, index); + } + return ret; +} + +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod) +{ + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + int ret; + int slot; + struct extent_buffer *leaf; + struct btrfs_key found_key; + + ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); + if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && + location->offset == (u64)-1 && path->slots[0] != 0) { + slot = path->slots[0] - 1; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid == location->objectid && + btrfs_key_type(&found_key) == btrfs_key_type(location)) { + path->slots[0]--; + return 0; + } + } + return ret; +} + +int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 objectid, struct btrfs_inode_item + *inode_item) +{ + int ret; + struct btrfs_key key; + + key.objectid = objectid; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_insert_item(trans, root, &key, inode_item, + sizeof(*inode_item)); + return ret; +} + +struct btrfs_inode_ref *btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + const char *name, int namelen, u64 ino, u64 parent_ino, + u64 index, int ins_len) +{ + struct btrfs_key key; + struct btrfs_inode_ref *ret_inode_ref = NULL; + int ret = 0; + + key.objectid = ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = parent_ino; + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, + ins_len ? 1 : 0); + if (ret) + goto out; + + find_name_in_backref(path, name, namelen, &ret_inode_ref); +out: + if (ret < 0) + return ERR_PTR(ret); + else + return ret_inode_ref; +} + +static inline u64 btrfs_extref_hash(u64 parent_ino, const char *name, + int namelen) +{ + return (u64)btrfs_crc32c(parent_ino, name, namelen); +} + +static int btrfs_find_name_in_ext_backref(struct btrfs_path *path, + u64 parent_ino, const char *name, int namelen, + struct btrfs_inode_extref **extref_ret) +{ + struct extent_buffer *node; + struct btrfs_inode_extref *extref; + unsigned long ptr; + unsigned long name_ptr; + u32 item_size; + u32 cur_offset = 0; + int ref_name_len; + int slot; + + node = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(node, slot); + ptr = btrfs_item_ptr_offset(node, slot); + + /* + * Search all extended backrefs in this item. We're only looking + * through any collisions so most of the time this is just going to + * compare against one buffer. If all is well, we'll return success and + * the inode ref object. + */ + while (cur_offset < item_size) { + extref = (struct btrfs_inode_extref *) (ptr + cur_offset); + name_ptr = (unsigned long)(&extref->name); + ref_name_len = btrfs_inode_extref_name_len(node, extref); + + if (ref_name_len == namelen && + btrfs_inode_extref_parent(node, extref) == parent_ino && + (memcmp_extent_buffer(node, name, name_ptr, namelen) == 0)) + { + if (extref_ret) + *extref_ret = extref; + return 1; + } + + cur_offset += ref_name_len + sizeof(*extref); + } + + return 0; +} + +struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_trans_handle + *trans, struct btrfs_path *path, struct btrfs_root *root, + u64 ino, u64 parent_ino, u64 index, const char *name, + int namelen, int ins_len) +{ + struct btrfs_key key; + struct btrfs_inode_extref *extref; + int ret = 0; + + key.objectid = ino; + key.type = BTRFS_INODE_EXTREF_KEY; + key.offset = btrfs_extref_hash(parent_ino, name, namelen); + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, + ins_len ? 1 : 0); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return NULL; + if (!btrfs_find_name_in_ext_backref(path, parent_ino, name, + namelen, &extref)) + return NULL; + + return extref; +} + +int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, + u64 *index) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + int ret; + int del_len = name_len + sizeof(*extref); + unsigned long ptr; + unsigned long item_start; + u32 item_size; + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_EXTREF_KEY; + key.offset = btrfs_extref_hash(ref_objectid, name, name_len); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; + + /* + * Sanity check - did we find the right item for this name? This + * should always succeed so error here will make the FS readonly. + */ + if (!btrfs_find_name_in_ext_backref(path, ref_objectid, + name, name_len, &extref)) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if (index) + *index = btrfs_inode_extref_index(leaf, extref); + + if (del_len == item_size) { + /* + * Common case only one ref in the item, remove the whole item. + */ + ret = btrfs_del_item(trans, root, path); + goto out; + } + + ptr = (unsigned long)extref; + item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); + + memmove_extent_buffer(leaf, ptr, ptr + del_len, + item_size - (ptr + del_len - item_start)); + + btrfs_truncate_item(trans, root, path, item_size - del_len, 1); + +out: + btrfs_free_path(path); + + return ret; +} + +/* + * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree. + * + * The caller must have checked against BTRFS_LINK_MAX already. + */ +int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, u64 index) +{ + struct btrfs_inode_extref *extref; + int ret; + int ins_len = name_len + sizeof(*extref); + unsigned long ptr; + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_item *item; + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_EXTREF_KEY; + key.offset = btrfs_extref_hash(ref_objectid, name, name_len); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + ins_len); + if (ret == -EEXIST) { + if (btrfs_find_name_in_ext_backref(path, ref_objectid, + name, name_len, NULL)) + goto out; + + btrfs_extend_item(trans, root, path, ins_len); + ret = 0; + } + + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + item = btrfs_item_nr(path->slots[0]); + ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); + ptr += btrfs_item_size(leaf, item) - ins_len; + extref = (struct btrfs_inode_extref *)ptr; + + btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); + btrfs_set_inode_extref_index(path->nodes[0], extref, index); + btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); + + ptr = (unsigned long)&extref->name; + write_extent_buffer(path->nodes[0], name, ptr, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + +out: + btrfs_free_path(path); + + return ret; +} + +int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, int name_len, + u64 ino, u64 parent_ino, u64 *index) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_ref *ref; + struct extent_buffer *leaf; + unsigned long ptr; + unsigned long item_start; + u32 item_size; + u32 sub_item_len; + int ret; + int search_ext_refs = 0; + int del_len = name_len + sizeof(*ref); + + key.objectid = ino; + key.offset = parent_ino; + key.type = BTRFS_INODE_REF_KEY; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + search_ext_refs = 1; + goto out; + } else if (ret < 0) { + goto out; + } + if (!find_name_in_backref(path, name, name_len, &ref)) { + ret = -ENOENT; + search_ext_refs = 1; + goto out; + } + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + if (index) + *index = btrfs_inode_ref_index(leaf, ref); + + if (del_len == item_size) { + ret = btrfs_del_item(trans, root, path); + goto out; + } + ptr = (unsigned long)ref; + sub_item_len = name_len + sizeof(*ref); + item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, + item_size - (ptr + sub_item_len - item_start)); + btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_free_path(path); + + if (search_ext_refs && + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)) { + /* + * No refs were found, or we could not find the name in our ref + * array. Find and remove the extended inode ref then. + */ + return btrfs_del_inode_extref(trans, root, name, name_len, + ino, parent_ino, index); + } + + return ret; +} diff --git a/tools/libfsimage/btrfs/inode.c b/tools/libfsimage/btrfs/inode.c new file mode 100644 index 0000000..be03a52 --- /dev/null +++ b/tools/libfsimage/btrfs/inode.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2014 Fujitsu. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +/* + * Unlike inode.c in kernel, which can use most of the kernel infrastructure + * like inode/dentry things, in user-land, we can only use inode number to + * do directly operation on extent buffer, which may cause extra searching, + * but should not be a huge problem since progs is less performence sensitive. + */ +#include <sys/stat.h> + +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "time.h" + +/* + * Find a free inode index for later btrfs_add_link(). + * Currently just search from the largest dir_index and +1. + */ +static int btrfs_find_free_dir_index(struct btrfs_root *root, u64 dir_ino, + u64 *ret_ino) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key found_key; + u64 ret_val = 2; + int ret = 0; + + if (!ret_ino) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = dir_ino; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + ret = 0; + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + /* + * This shouldn't happen since there must be a leaf + * containing the DIR_ITEM. + * Can only happen when the previous leaf is corrupted. + */ + ret = -EIO; + goto out; + } + } else { + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); + if (found_key.objectid != dir_ino || + found_key.type != BTRFS_DIR_INDEX_KEY) + goto out; + ret_val = found_key.offset + 1; +out: + btrfs_free_path(path); + if (ret == 0) + *ret_ino = ret_val; + return ret; +} + +/* Check the dir_item/index conflicts before insert */ +int check_dir_conflict(struct btrfs_root *root, char *name, int namelen, + u64 dir, u64 index) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_item *inode_item; + struct btrfs_dir_item *dir_item; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* Given dir exists? */ + key.objectid = dir; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + /* Is it a dir? */ + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + if (!(btrfs_inode_mode(path->nodes[0], inode_item) & S_IFDIR)) { + ret = -ENOTDIR; + goto out; + } + btrfs_release_path(path); + + /* Name conflicting? */ + dir_item = btrfs_lookup_dir_item(NULL, root, path, dir, name, + namelen, 0); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (dir_item) { + ret = -EEXIST; + goto out; + } + btrfs_release_path(path); + + /* Index conflicting? */ + dir_item = btrfs_lookup_dir_index(NULL, root, path, dir, name, + namelen, index, 0); + if (IS_ERR(dir_item) && PTR_ERR(dir_item) == -ENOENT) + dir_item = NULL; + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (dir_item) { + ret = -EEXIST; + goto out; + } + +out: + btrfs_free_path(path); + return ret; +} + +/* + * Add dir_item/index for 'parent_ino' if add_backref is true, also insert a + * backref from the ino to parent dir and update the nlink(Kernel version does + * not do this thing) + * + * Currently only supports adding link from an inode to another inode. + */ +int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u64 parent_ino, char *name, int namelen, + u8 type, u64 *index, int add_backref) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_item *inode_item; + u32 nlink; + u64 inode_size; + u64 ret_index = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + if (index && *index) { + ret_index = *index; + } else { + ret = btrfs_find_free_dir_index(root, parent_ino, &ret_index); + if (ret < 0) + goto out; + } + + ret = check_dir_conflict(root, name, namelen, parent_ino, ret_index); + if (ret < 0) + goto out; + + /* Add inode ref */ + if (add_backref) { + ret = btrfs_insert_inode_ref(trans, root, name, namelen, + ino, parent_ino, ret_index); + if (ret < 0) + goto out; + + /* Update nlinks for the inode */ + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nlink = btrfs_inode_nlink(path->nodes[0], inode_item); + nlink++; + btrfs_set_inode_nlink(path->nodes[0], inode_item, nlink); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + } + + /* Add dir_item and dir_index */ + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_insert_dir_item(trans, root, name, namelen, parent_ino, + &key, type, ret_index); + if (ret < 0) + goto out; + + /* Update inode size of the parent inode */ + key.objectid = parent_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + if (ret) + goto out; + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + inode_size = btrfs_inode_size(path->nodes[0], inode_item); + inode_size += namelen * 2; + btrfs_set_inode_size(path->nodes[0], inode_item, inode_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + +out: + btrfs_free_path(path); + if (ret == 0 && index) + *index = ret_index; + return ret; +} + +int btrfs_add_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 ino) +{ + struct btrfs_key key; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = ino; + + return btrfs_insert_empty_item(trans, root, path, &key, 0); +} + +/* + * Unlink an inode, which will remove its backref and corresponding dir_index/ + * dir_item if any of them exists. + * + * If an inode's nlink is reduced to 0 and 'add_orphan' is true, it will be + * added to orphan inode and wairing to be deleted by next kernel mount. + */ +int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u64 parent_ino, u64 index, const char *name, + int namelen, int add_orphan) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_item *inode_item; + struct btrfs_inode_ref *inode_ref; + struct btrfs_dir_item *dir_item; + u64 inode_size; + u32 nlinks; + int del_inode_ref = 0; + int del_dir_item = 0; + int del_dir_index = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* check the ref and backref exists */ + inode_ref = btrfs_lookup_inode_ref(trans, root, path, name, namelen, + ino, parent_ino, index, 0); + if (IS_ERR(inode_ref)) { + ret = PTR_ERR(inode_ref); + goto out; + } + if (inode_ref) + del_inode_ref = 1; + btrfs_release_path(path); + + dir_item = btrfs_lookup_dir_item(NULL, root, path, parent_ino, + name, namelen, 0); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (dir_item) + del_dir_item = 1; + btrfs_release_path(path); + + dir_item = btrfs_lookup_dir_index(NULL, root, path, parent_ino, + name, namelen, index, 0); + /* + * Since lookup_dir_index() will return -ENOENT when not found, + * we need to do extra check. + */ + if (IS_ERR(dir_item) && PTR_ERR(dir_item) == -ENOENT) + dir_item = NULL; + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (dir_item) + del_dir_index = 1; + btrfs_release_path(path); + + if (!del_inode_ref && !del_dir_item && !del_dir_index) { + /* All not found, shouldn't happen */ + ret = -ENOENT; + goto out; + } + + if (del_inode_ref) { + /* Only decrease nlink when deleting inode_ref */ + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto out; + } + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nlinks = btrfs_inode_nlink(path->nodes[0], inode_item); + if (nlinks > 0) + nlinks--; + btrfs_set_inode_nlink(path->nodes[0], inode_item, nlinks); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + + /* For nlinks == 0, add it to orphan list if needed */ + if (nlinks == 0 && add_orphan) { + ret = btrfs_add_orphan_item(trans, root, path, ino); + if (ret < 0) + goto out; + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + } + + ret = btrfs_del_inode_ref(trans, root, name, namelen, ino, + parent_ino, &index); + if (ret < 0) + goto out; + } + + if (del_dir_index) { + dir_item = btrfs_lookup_dir_index(trans, root, path, + parent_ino, name, namelen, + index, -1); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (!dir_item) { + ret = -ENOENT; + goto out; + } + ret = btrfs_delete_one_dir_name(trans, root, path, dir_item); + if (ret) + goto out; + btrfs_release_path(path); + + /* Update inode size of the parent inode */ + key.objectid = parent_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + if (ret) + goto out; + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + inode_size = btrfs_inode_size(path->nodes[0], inode_item); + if (inode_size >= namelen) + inode_size -= namelen; + btrfs_set_inode_size(path->nodes[0], inode_item, inode_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + } + + if (del_dir_item) { + dir_item = btrfs_lookup_dir_item(trans, root, path, parent_ino, + name, namelen, -1); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + if (!dir_item) { + ret = -ENOENT; + goto out; + } + ret = btrfs_delete_one_dir_name(trans, root, path, dir_item); + if (ret < 0) + goto out; + btrfs_release_path(path); + + /* Update inode size of the parent inode */ + key.objectid = parent_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + if (ret) + goto out; + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + inode_size = btrfs_inode_size(path->nodes[0], inode_item); + if (inode_size >= namelen) + inode_size -= namelen; + btrfs_set_inode_size(path->nodes[0], inode_item, inode_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + } + +out: + btrfs_free_path(path); + return ret; +} + +/* Fill inode item with 'mode'. Uid/gid to root/root */ +static void fill_inode_item(struct btrfs_trans_handle *trans, + struct btrfs_inode_item *inode_item, + u32 mode, u32 nlink) +{ + time_t now = time(NULL); + + btrfs_set_stack_inode_generation(inode_item, trans->transid); + btrfs_set_stack_inode_uid(inode_item, 0); + btrfs_set_stack_inode_gid(inode_item, 0); + btrfs_set_stack_inode_size(inode_item, 0); + btrfs_set_stack_inode_mode(inode_item, mode); + btrfs_set_stack_inode_nlink(inode_item, nlink); + btrfs_set_stack_timespec_sec(&inode_item->atime, now); + btrfs_set_stack_timespec_nsec(&inode_item->atime, 0); + btrfs_set_stack_timespec_sec(&inode_item->mtime, now); + btrfs_set_stack_timespec_nsec(&inode_item->mtime, 0); + btrfs_set_stack_timespec_sec(&inode_item->ctime, now); + btrfs_set_stack_timespec_nsec(&inode_item->ctime, 0); +} + +/* + * Unlike kernel btrfs_new_inode(), we only create the INODE_ITEM, without + * its backref. + * The backref is added by btrfs_add_link(). + */ +int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 ino, u32 mode) +{ + struct btrfs_inode_item inode_item = {0}; + int ret = 0; + + fill_inode_item(trans, &inode_item, mode, 0); + ret = btrfs_insert_inode(trans, root, ino, &inode_item); + return ret; +} + +/* + * Make a dir under the parent inode 'parent_ino' with 'name' + * and 'mode', The owner will be root/root. + */ +int btrfs_mkdir(struct btrfs_trans_handle *trans, struct btrfs_root *root, + char *name, int namelen, u64 parent_ino, u64 *ino, int mode) +{ + struct btrfs_dir_item *dir_item; + struct btrfs_path *path; + u64 ret_ino = 0; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + if (ino && *ino) + ret_ino = *ino; + + dir_item = btrfs_lookup_dir_item(NULL, root, path, parent_ino, + name, namelen, 0); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + + if (dir_item) { + struct btrfs_key found_key; + + /* + * Already have conflicting name, check if it is a dir. + * Either way, no need to continue. + */ + btrfs_dir_item_key_to_cpu(path->nodes[0], dir_item, &found_key); + ret_ino = found_key.objectid; + if (btrfs_dir_type(path->nodes[0], dir_item) != BTRFS_FT_DIR) + ret = -EEXIST; + goto out; + } + + if (!ret_ino) + /* + * This is *UNSAFE* if some leaf is corrupted, + * only used as a fallback method. Caller should either + * ensure the fs is OK or pass ino with unused inode number. + */ + ret = btrfs_find_free_objectid(NULL, root, parent_ino, + &ret_ino); + if (ret) + goto out; + ret = btrfs_new_inode(trans, root, ret_ino, mode | S_IFDIR); + if (ret) + goto out; + ret = btrfs_add_link(trans, root, ret_ino, parent_ino, name, namelen, + BTRFS_FT_DIR, NULL, 1); + if (ret) + goto out; +out: + btrfs_free_path(path); + if (ret == 0 && ino) + *ino = ret_ino; + return ret; +} diff --git a/tools/libfsimage/btrfs/internal.h b/tools/libfsimage/btrfs/internal.h new file mode 100644 index 0000000..d5ea998 --- /dev/null +++ b/tools/libfsimage/btrfs/internal.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __INTERNAL_H__ +#define __INTERNAL_H__ + +/* + * max/min macro + */ +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#define max_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) + +#endif diff --git a/tools/libfsimage/btrfs/ioctl.h b/tools/libfsimage/btrfs/ioctl.h new file mode 100644 index 0000000..7c80807 --- /dev/null +++ b/tools/libfsimage/btrfs/ioctl.h @@ -0,0 +1,729 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_IOCTL_H__ +#define __BTRFS_IOCTL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <asm/types.h> +#include <linux/ioctl.h> + +#ifndef __user +#define __user +#endif + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) +#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3) + +#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \ + (BTRFS_SUBVOL_CREATE_ASYNC | \ + BTRFS_SUBVOL_RDONLY | \ + BTRFS_SUBVOL_QGROUP_INHERIT | \ + BTRFS_DEVICE_SPEC_BY_ID) + +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_referenced; + __u64 max_exclusive; + __u64 rsv_referenced; + __u64 rsv_exclusive; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; + union { + char name[BTRFS_SUBVOL_NAME_MAX + 1]; + __u64 devid; + }; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT -1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS 3 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u32 nodesize; /* out */ + __u32 sectorsize; /* out */ + __u32 clone_alignment; /* out */ + __u32 reserved32; + __u64 reserved[122]; /* pad to 1k */ +}; + +struct btrfs_ioctl_feature_flags { + __u64 compat_flags; + __u64 compat_ro_flags; + __u64 incompat_flags; +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 +#define BTRFS_BALANCE_CTL_RESUME 3 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + + /* + * usage filter + * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' + * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max + */ + union { + __u64 usage; + struct { + __u32 usage_min; + __u32 usage_max; + }; + }; + + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' + * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum + * and maximum + */ + union { + __u64 limit; /* limit number of processed chunks */ + struct { + __u32 limit_min; + __u32 limit_max; + }; + }; + __u32 stripes_min; + __u32 stripes_max; + __u64 unused[6]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +} __attribute__((may_alias)); + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +/* + * Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes. + * The allocated size of the buffer is set in buf_size. + */ +struct btrfs_ioctl_search_args_v2 { + struct btrfs_ioctl_search_key key; /* in/out - search parameters */ + __u64 buf_size; /* in - size of buffer + * out - on EOVERFLOW: needed size + * to store item */ + __u64 buf[0]; /* out - found items */ +}; + +/* With a @src_length of zero, the range from @src_offset->EOF is cloned! */ +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +#define BTRFS_SAME_DATA_DIFFERS 1 +/* For extent-same ioctl */ +struct btrfs_ioctl_same_extent_info { + __s64 fd; /* in - destination file */ + __u64 logical_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file */ + /* status of this dedupe operation: + * 0 if dedup succeeds + * < 0 for error + * == BTRFS_SAME_DATA_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; /* in - start of extent in source */ + __u64 length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; + __u32 reserved2; + struct btrfs_ioctl_same_extent_info info[0]; +}; + +struct btrfs_ioctl_defrag_range_args { + /* start of the defrag operation */ + __u64 start; + + /* number of bytes to defrag, use (u64)-1 to say all */ + __u64 len; + + /* + * flags for the operation, which can include turning + * on compression for this one defrag + */ + __u64 flags; + + /* + * any extent bigger than this will be considered + * already defragged. Use 0 to take the kernel default + * Use 1 to say every single extent must be rewritten + */ + __u32 extent_thresh; + + /* + * which compression method to use if turning on compression + * for this defrag operation. If unspecified, zlib will + * be used + */ + __u32 compress_type; + + /* spare for later */ + __u32 unused[4]; +}; + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + +/* BTRFS_IOC_SNAP_CREATE is no longer used by the btrfs command */ +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +/* 3 has formerly been reserved for BTRFS_QUOTA_CTL_RESCAN */ +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_quota_rescan_args { + __u64 flags; + __u64 progress; + __u64 reserved[6]; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +/* + * Caller doesn't want file data in the send stream, even if the + * search of clone sources doesn't find an extent. UPDATE_EXTENT + * commands will be sent instead of WRITE commands. + */ +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 + +/* + * Do not add the leading stream header. Used when multiple snapshots + * are sent back to back. + */ +#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER 0x2 + +/* + * Omit the command at the end of the stream that indicated the end + * of the stream. This option is used when multiple snapshots are + * sent back to back. + */ +#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4 + +#define BTRFS_SEND_FLAG_MASK \ + (BTRFS_SEND_FLAG_NO_FILE_DATA | \ + BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ + BTRFS_SEND_FLAG_OMIT_END_CMD) + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +/* Error codes as returned by the kernel */ +enum btrfs_err_code { + notused, + BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, + BTRFS_ERROR_DEV_TGT_REPLACE, + BTRFS_ERROR_DEV_MISSING_NOT_FOUND, + BTRFS_ERROR_DEV_ONLY_WRITABLE, + BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS +}; + +/* An error code to error string mapping for the kernel +* error codes +*/ +static inline char *btrfs_err_str(enum btrfs_err_code err_code) +{ + switch (err_code) { + case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET: + return "unable to go below two devices on raid1"; + case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET: + return "unable to go below four devices on raid10"; + case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET: + return "unable to go below two devices on raid5"; + case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET: + return "unable to go below three devices on raid6"; + case BTRFS_ERROR_DEV_TGT_REPLACE: + return "unable to remove the dev_replace target dev"; + case BTRFS_ERROR_DEV_MISSING_NOT_FOUND: + return "no missing devices found to remove"; + case BTRFS_ERROR_DEV_ONLY_WRITABLE: + return "unable to remove the only writeable device"; + case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS: + return "add/delete/balance/replace/resize operation " + "in progress"; + default: + return NULL; + } +} + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args_v2) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46) +#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ + char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ + char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ + struct btrfs_ioctl_same_args) +#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags) +#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags[2]) +#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags[3]) +#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \ + struct btrfs_ioctl_vol_args_v2) +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/libfsimage/btrfs/kerncompat.h b/tools/libfsimage/btrfs/kerncompat.h new file mode 100644 index 0000000..ee65aa7 --- /dev/null +++ b/tools/libfsimage/btrfs/kerncompat.h @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __KERNCOMPAT_H__ +#define __KERNCOMPAT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> +#include <assert.h> +#include <stddef.h> +#include <linux/types.h> +#include <stdint.h> + +#include <features.h> + +#ifndef __GLIBC__ +#ifndef BTRFS_DISABLE_BACKTRACE +#define BTRFS_DISABLE_BACKTRACE +#endif +#define __always_inline __inline __attribute__ ((__always_inline__)) +#endif + +#ifndef BTRFS_DISABLE_BACKTRACE +#include <execinfo.h> +#endif + +#define ptr_to_u64(x) ((u64)(uintptr_t)x) +#define u64_to_ptr(x) ((void *)(uintptr_t)x) + +#ifndef READ +#define READ 0 +#define WRITE 1 +#define READA 2 +#endif + +#define gfp_t int +#define get_cpu_var(p) (p) +#define __get_cpu_var(p) (p) +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) +#define __GFP_BITS_SHIFT 20 +#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) +#define GFP_KERNEL 0 +#define GFP_NOFS 0 +#define __read_mostly +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#ifndef ULONG_MAX +#define ULONG_MAX (~0UL) +#endif + +#ifndef BTRFS_DISABLE_BACKTRACE +#define MAX_BACKTRACE 16 +static inline void print_trace(void) +{ + void *array[MAX_BACKTRACE]; + size_t size; + + size = backtrace(array, MAX_BACKTRACE); + backtrace_symbols_fd(array, size, 2); +} + +static inline void assert_trace(const char *assertion, const char *filename, + const char *func, unsigned line, int val) +{ + if (val) + return; + if (assertion) + fprintf(stderr, "%s:%d: %s: Assertion `%s` failed.\n", + filename, line, func, assertion); + else + fprintf(stderr, "%s:%d: %s: Assertion failed.\n", filename, + line, func); + print_trace(); + exit(1); +} + +#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 0) +#else +#define BUG() assert(0) +#endif + +#ifdef __CHECKER__ +#define __force __attribute__((force)) +#define __bitwise__ __attribute__((bitwise)) +#else +#define __force +#define __bitwise__ +#endif + +#ifndef __CHECKER__ +/* + * Since we're using primitive definitions from kernel-space, we need to + * define __KERNEL__ so that system header files know which definitions + * to use. + */ +#define __KERNEL__ +#include <asm/types.h> +typedef __u32 u32; +typedef __u64 u64; +typedef __u16 u16; +typedef __u8 u8; +typedef __s64 s64; +typedef __s32 s32; + +/* + * Continuing to define __KERNEL__ breaks others parts of the code, so + * we can just undefine it now that we have the correct headers... + */ +#undef __KERNEL__ +#else +typedef unsigned int u32; +typedef unsigned int __u32; +typedef unsigned long long u64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef long long s64; +typedef int s32; +#endif + + +struct vma_shared { int prio_tree_node; }; +struct vm_area_struct { + unsigned long vm_pgoff; + unsigned long vm_start; + unsigned long vm_end; + struct vma_shared shared; +}; + +struct page { + unsigned long index; +}; + +struct mutex { + unsigned long lock; +}; + +#define mutex_init(m) \ +do { \ + (m)->lock = 1; \ +} while (0) + +static inline void mutex_lock(struct mutex *m) +{ + m->lock--; +} + +static inline void mutex_unlock(struct mutex *m) +{ + m->lock++; +} + +static inline int mutex_is_locked(struct mutex *m) +{ + return (m->lock != 1); +} + +#define cond_resched() do { } while (0) +#define preempt_enable() do { } while (0) +#define preempt_disable() do { } while (0) + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +#ifndef __attribute_const__ +#define __attribute_const__ __attribute__((__const__)) +#endif + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/* + * error pointer + */ +#define MAX_ERRNO 4095 +#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) + +static inline void *ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +/* + * printk + */ +#define printk(fmt, args...) fprintf(stderr, fmt, ##args) +#define KERN_CRIT "" +#define KERN_ERR "" + +/* + * kmalloc/kfree + */ +#define kmalloc(x, y) malloc(x) +#define kzalloc(x, y) calloc(1, x) +#define kstrdup(x, y) strdup(x) +#define kfree(x) free(x) +#define vmalloc(x) malloc(x) +#define vfree(x) free(x) + +#ifndef BTRFS_DISABLE_BACKTRACE +#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, !(c)) +#else +#define BUG_ON(c) assert(!(c)) +#endif + +#define WARN_ON(c) BUG_ON(c) + +#ifndef BTRFS_DISABLE_BACKTRACE +#define ASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, (c)) +#else +#define ASSERT(c) assert(c) +#endif + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#ifdef __CHECKER__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +/* Alignment check */ +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +static inline int is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +typedef u16 __bitwise __le16; +typedef u16 __bitwise __be16; +typedef u32 __bitwise __le32; +typedef u32 __bitwise __be32; +typedef u64 __bitwise __le64; +typedef u64 __bitwise __be64; + +/* Macros to generate set/get funcs for the struct fields + * assume there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define cpu_to_le64(x) ((__force __le64)(u64)(bswap_64(x))) +#define le64_to_cpu(x) ((__force u64)(__le64)(bswap_64(x))) +#define cpu_to_le32(x) ((__force __le32)(u32)(bswap_32(x))) +#define le32_to_cpu(x) ((__force u32)(__le32)(bswap_32(x))) +#define cpu_to_le16(x) ((__force __le16)(u16)(bswap_16(x))) +#define le16_to_cpu(x) ((__force u16)(__le16)(bswap_16(x))) +#else +#define cpu_to_le64(x) ((__force __le64)(u64)(x)) +#define le64_to_cpu(x) ((__force u64)(__le64)(x)) +#define cpu_to_le32(x) ((__force __le32)(u32)(x)) +#define le32_to_cpu(x) ((__force u32)(__le32)(x)) +#define cpu_to_le16(x) ((__force __le16)(u16)(x)) +#define le16_to_cpu(x) ((__force u16)(__le16)(x)) +#endif + +struct __una_u16 { __le16 x; } __attribute__((__packed__)); +struct __una_u32 { __le32 x; } __attribute__((__packed__)); +struct __una_u64 { __le64 x; } __attribute__((__packed__)); + +#define get_unaligned_le8(p) (*((u8 *)(p))) +#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val)) +#define get_unaligned_le16(p) le16_to_cpu(((const struct __una_u16 *)(p))->x) +#define put_unaligned_le16(val,p) (((struct __una_u16 *)(p))->x = cpu_to_le16(val)) +#define get_unaligned_le32(p) le32_to_cpu(((const struct __una_u32 *)(p))->x) +#define put_unaligned_le32(val,p) (((struct __una_u32 *)(p))->x = cpu_to_le32(val)) +#define get_unaligned_le64(p) le64_to_cpu(((const struct __una_u64 *)(p))->x) +#define put_unaligned_le64(val,p) (((struct __una_u64 *)(p))->x = cpu_to_le64(val)) + +#ifndef true +#define true 1 +#define false 0 +#endif + +#ifndef noinline +#define noinline +#endif + +#endif diff --git a/tools/libfsimage/btrfs/list.h b/tools/libfsimage/btrfs/list.h new file mode 100644 index 0000000..db7a58c --- /dev/null +++ b/tools/libfsimage/btrfs/list.h @@ -0,0 +1,486 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +#define LIST_POISON1 ((struct list_head *) 0x00100100) +#define LIST_POISON2 ((struct list_head *) 0x00200200) + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +#ifndef CONFIG_DEBUG_LIST +static inline void __list_add(struct list_head *xnew, + struct list_head *prev, + struct list_head *next) +{ + next->prev = xnew; + xnew->next = next; + xnew->prev = prev; + prev->next = xnew; +} +#else +extern void __list_add(struct list_head *xnew, + struct list_head *prev, + struct list_head *next); +#endif + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +#ifndef CONFIG_DEBUG_LIST +static inline void list_add(struct list_head *xnew, struct list_head *head) +{ + __list_add(xnew, head, head->next); +} +#else +extern void list_add(struct list_head *xnew, struct list_head *head); +#endif + + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *xnew, struct list_head *head) +{ + __list_add(xnew, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +#ifndef CONFIG_DEBUG_LIST +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +#else +extern void list_del(struct list_head *entry); +#endif + +/** + * list_replace - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * Note: if 'old' was empty, it will be overwritten. + */ +static inline void list_replace(struct list_head *old, + struct list_head *xnew) +{ + xnew->next = old->next; + xnew->next->prev = xnew; + xnew->prev = old->prev; + xnew->prev->next = xnew; +} + +static inline void list_replace_init(struct list_head *old, + struct list_head *xnew) +{ + list_replace(old, xnew); + INIT_LIST_HEAD(old); +} +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_last(const struct list_head *list, + const struct list_head *head) +{ + return list->next == head; +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * list_empty_careful - tests whether a list is empty and not being modified + * @head: the list to test + * + * Description: + * tests whether a list is empty _and_ checks that no other CPU might be + * in the process of modifying either member (next or prev) + * + * NOTE: using list_empty_careful() without synchronization + * can only be safe if the only activity that can happen + * to the list entry is list_del_init(). Eg. it cannot be used + * if another CPU could re-list_add() it. + */ +static inline int list_empty_careful(const struct list_head *head) +{ + struct list_head *next = head->next; + return (next == head) && (next == head->prev); +} + +static inline void __list_splice(const struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head, head->next); +} + +/** + * list_splice_tail - join two lists, each list being a queue + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice_tail(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head->prev, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head, head->next); + INIT_LIST_HEAD(list); + } +} + +/** + * list_splice_tail_init - join two lists and reinitialise the emptied list + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void list_splice_tail_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head->prev, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_first_entry - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * list_next_entry - get the next element from a list + * @ptr: the list head to take the element from. + * @member: the name of the list_struct within the struct. + * + * Note, that next is expected to be not null. + */ +#define list_next_entry(ptr, member) \ + list_entry((ptr)->member.next, typeof(*ptr), member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); \ + pos = pos->next) + +/** + * __list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This variant differs from list_for_each() in that it's the + * simplest possible list iteration code, no prefetching is done. + * Use this for code that knows the list to be very short (empty + * or 1 entry) most of the time. + */ +#define __list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); \ + pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) + +/** + * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue + * @pos: the type * to use as a start point + * @head: the head of the list + * @member: the name of the list_struct within the struct. + * + * Prepares a pos entry for use as a start point in list_for_each_entry_continue. + */ +#define list_prepare_entry(pos, head, member) \ + ((pos) ? : list_entry(head, typeof(*pos), member)) + +/** + * list_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_for_each_entry_continue(pos, head, member) \ + for (pos = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_from - iterate over list of given type from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type, continuing from current position. + */ +#define list_for_each_entry_from(pos, head, member) \ + for (; &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_continue + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type, continuing after current point, + * safe against removal of list entry. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = list_entry(pos->member.next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_from + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_reverse + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate backwards over list of given type, safe against removal + * of list entry. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +#endif diff --git a/tools/libfsimage/btrfs/print-tree.h b/tools/libfsimage/btrfs/print-tree.h new file mode 100644 index 0000000..f0153c1 --- /dev/null +++ b/tools/libfsimage/btrfs/print-tree.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __PRINT_TREE_H__ +#define __PRINT_TREE_H__ + +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t, int follow); +void btrfs_print_key(struct btrfs_disk_key *disk_key); +void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk); +void print_extent_item(struct extent_buffer *eb, int slot, int metadata); +#endif diff --git a/tools/libfsimage/btrfs/radix-tree.h b/tools/libfsimage/btrfs/radix-tree.h new file mode 100644 index 0000000..bf96d83 --- /dev/null +++ b/tools/libfsimage/btrfs/radix-tree.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +/* + * Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _LINUX_RADIX_TREE_H +#define _LINUX_RADIX_TREE_H + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#define RADIX_TREE_MAX_TAGS 2 + +/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ +struct radix_tree_root { + unsigned int height; + gfp_t gfp_mask; + struct radix_tree_node *rnode; +}; + +#define RADIX_TREE_INIT(mask) { \ + .height = 0, \ + .gfp_mask = (mask), \ + .rnode = NULL, \ +} + +#define RADIX_TREE(name, mask) \ + struct radix_tree_root name = RADIX_TREE_INIT(mask) + +#define INIT_RADIX_TREE(root, mask) \ +do { \ + (root)->height = 0; \ + (root)->gfp_mask = (mask); \ + (root)->rnode = NULL; \ +} while (0) + +int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); +void *radix_tree_lookup(struct radix_tree_root *, unsigned long); +void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); +void *radix_tree_delete(struct radix_tree_root *, unsigned long); +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items); +int radix_tree_preload(gfp_t gfp_mask); +void radix_tree_init(void); +void *radix_tree_tag_set(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +void *radix_tree_tag_clear(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +int radix_tree_tag_get(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +unsigned int +radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items, + unsigned int tag); +int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); + +static inline void radix_tree_preload_end(void) +{ + preempt_enable(); +} + +#endif /* _LINUX_RADIX_TREE_H */ diff --git a/tools/libfsimage/btrfs/raid6.c b/tools/libfsimage/btrfs/raid6.c new file mode 100644 index 0000000..a6ee483 --- /dev/null +++ b/tools/libfsimage/btrfs/raid6.c @@ -0,0 +1,101 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int1.c + * + * 1-way unrolled portable integer math RAID-6 instruction set + * + * This file was postprocessed using unroll.pl and then ported to userspace + */ +#include <stdint.h> +#include <unistd.h> +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE 8 +# define NSHIFT 3 +typedef uint64_t unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE 4 +# define NSHIFT 2 +typedef uint32_t unative_t; +#endif + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + + +void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + uint8_t **dptr = (uint8_t **)ptrs; + uint8_t *p, *q; + int d, z, z0; + + unative_t wd0, wq0, wp0, w10, w20; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*1 ) { + wq0 = wp0 = *(unative_t *)&dptr[z0][d+0*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd0 = *(unative_t *)&dptr[z][d+0*NSIZE]; + wp0 ^= wd0; + w20 = MASK(wq0); + w10 = SHLBYTE(wq0); + w20 &= NBYTES(0x1d); + w10 ^= w20; + wq0 = w10 ^ wd0; + } + *(unative_t *)&p[d+NSIZE*0] = wp0; + *(unative_t *)&q[d+NSIZE*0] = wq0; + } +} + diff --git a/tools/libfsimage/btrfs/rbtree-utils.c b/tools/libfsimage/btrfs/rbtree-utils.c new file mode 100644 index 0000000..7371bbb --- /dev/null +++ b/tools/libfsimage/btrfs/rbtree-utils.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2014 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "rbtree-utils.h" + +int rb_insert(struct rb_root *root, struct rb_node *node, + rb_compare_nodes comp) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + int ret; + + while(*p) { + parent = *p; + + ret = comp(parent, node); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return 0; +} + +struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp, + struct rb_node **next_ret) +{ + struct rb_node *n = root->rb_node; + struct rb_node *parent = NULL; + int ret = 0; + + while(n) { + parent = n; + + ret = comp(n, key); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return n; + } + + if (!next_ret) + return NULL; + + if (parent && ret > 0) + parent = rb_next(parent); + + *next_ret = parent; + return NULL; +} + +void rb_free_nodes(struct rb_root *root, rb_free_node free_node) +{ + struct rb_node *node; + + while ((node = rb_first(root))) { + rb_erase(node, root); + free_node(node); + } +} diff --git a/tools/libfsimage/btrfs/rbtree-utils.h b/tools/libfsimage/btrfs/rbtree-utils.h new file mode 100644 index 0000000..718581f --- /dev/null +++ b/tools/libfsimage/btrfs/rbtree-utils.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2014 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __RBTREE_UTILS__ +#define __RBTREE_UTILS__ + +#include "rbtree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* The common insert/search/free functions */ +typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2); +typedef int (*rb_compare_keys)(struct rb_node *node, void *key); +typedef void (*rb_free_node)(struct rb_node *node); + +int rb_insert(struct rb_root *root, struct rb_node *node, + rb_compare_nodes comp); +/* + * In some cases, we need return the next node if we don't find the node we + * specify. At this time, we can use next_ret. + */ +struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp, + struct rb_node **next_ret); +void rb_free_nodes(struct rb_root *root, rb_free_node free_node); + +#define FREE_RB_BASED_TREE(name, free_func) \ +static void free_##name##_tree(struct rb_root *root) \ +{ \ + rb_free_nodes(root, free_func); \ +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/libfsimage/btrfs/rbtree.c b/tools/libfsimage/btrfs/rbtree.c new file mode 100644 index 0000000..92590a5 --- /dev/null +++ b/tools/libfsimage/btrfs/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@xxxxxxx> + (C) 2002 David Woodhouse <dwmw2@xxxxxxxxxxxxx> + (C) 2012 Michel Lespinasse <walken@xxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include "rbtree_augmented.h" + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/libfsimage/btrfs/rbtree.h b/tools/libfsimage/btrfs/rbtree.h new file mode 100644 index 0000000..47b662a --- /dev/null +++ b/tools/libfsimage/btrfs/rbtree.h @@ -0,0 +1,118 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@xxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rtbree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new_node, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, + struct rb_node ** rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + +/** + * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of + * given type safe against removal of rb_node entry + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + +#ifdef __cplusplus +} +#endif + +#endif /* _LINUX_RBTREE_H */ diff --git a/tools/libfsimage/btrfs/rbtree_augmented.h b/tools/libfsimage/btrfs/rbtree_augmented.h new file mode 100644 index 0000000..5d26978 --- /dev/null +++ b/tools/libfsimage/btrfs/rbtree_augmented.h @@ -0,0 +1,249 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@xxxxxxx> + (C) 2002 David Woodhouse <dwmw2@xxxxxxxxxxxxx> + (C) 2012 Michel Lespinasse <walken@xxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _LINUX_RBTREE_AUGMENTED_H +#define _LINUX_RBTREE_AUGMENTED_H + +#include "rbtree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/libfsimage/btrfs/repair.c b/tools/libfsimage/btrfs/repair.c new file mode 100644 index 0000000..4f74742 --- /dev/null +++ b/tools/libfsimage/btrfs/repair.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" +#include "extent-cache.h" +#include "utils.h" +#include "repair.h" + +int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info, + struct btrfs_key *first_key, + u64 start, u64 len, int level) + +{ + int ret = 0; + struct btrfs_corrupt_block *corrupt; + + if (!info->corrupt_blocks) + return 0; + + corrupt = malloc(sizeof(*corrupt)); + if (!corrupt) + return -ENOMEM; + + memcpy(&corrupt->key, first_key, sizeof(*first_key)); + corrupt->cache.start = start; + corrupt->cache.size = len; + corrupt->level = level; + + ret = insert_cache_extent(info->corrupt_blocks, &corrupt->cache); + if (ret) + free(corrupt); + BUG_ON(ret && ret != -EEXIST); + return ret; +} + diff --git a/tools/libfsimage/btrfs/repair.h b/tools/libfsimage/btrfs/repair.h new file mode 100644 index 0000000..3fc0e8b --- /dev/null +++ b/tools/libfsimage/btrfs/repair.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2012 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_REPAIR_H__ +#define __BTRFS_REPAIR_H__ + +#include "ctree.h" + +struct btrfs_corrupt_block { + struct cache_extent cache; + struct btrfs_key key; + int level; +}; + +int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info, + struct btrfs_key *first_key, + u64 start, u64 len, int level); + +#endif diff --git a/tools/libfsimage/btrfs/root-tree.c b/tools/libfsimage/btrfs/root-tree.c new file mode 100644 index 0000000..934d02e --- /dev/null +++ b/tools/libfsimage/btrfs/root-tree.c @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "print-tree.h" + +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key) +{ + struct btrfs_path *path; + struct btrfs_key search_key; + struct btrfs_key found_key; + struct extent_buffer *l; + int ret; + int slot; + + search_key.objectid = objectid; + search_key.type = BTRFS_ROOT_ITEM_KEY; + search_key.offset = (u64)-1; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + + BUG_ON(ret == 0); + l = path->nodes[0]; + slot = path->slots[0] - 1; + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { + ret = -ENOENT; + goto out; + } + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) +{ + struct btrfs_path *path; + struct extent_buffer *l; + int ret; + int slot; + unsigned long ptr; + u32 old_len; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + if (ret < 0) + goto out; + BUG_ON(ret != 0); + l = path->nodes[0]; + slot = path->slots[0]; + ptr = btrfs_item_ptr_offset(l, slot); + old_len = btrfs_item_size_nr(l, slot); + + /* + * If this is the first time we update the root item which originated + * from an older kernel, we need to enlarge the item size to make room + * for the added fields. + */ + if (old_len < sizeof(*item)) { + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, key, path, + -1, 1); + if (ret < 0) { + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret < 0) { + goto out; + } + btrfs_release_path(path); + ret = btrfs_insert_empty_item(trans, root, path, + key, sizeof(*item)); + if (ret < 0) { + goto out; + } + l = path->nodes[0]; + slot = path->slots[0]; + ptr = btrfs_item_ptr_offset(l, slot); + } + + /* + * Update generation_v2 so at the next mount we know the new root + * fields are valid. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); + + write_extent_buffer(l, item, ptr, sizeof(*item)); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) +{ + int ret; + + /* + * Make sure generation v1 and v2 match. See update_root for details. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); + ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); + return ret; +} + +/* + * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY + * or BTRFS_ROOT_BACKREF_KEY. + * + * The dirid, sequence, name and name_len refer to the directory entry + * that is referencing the root. + * + * For a forward ref, the root_id is the id of the tree referencing + * the root and ref_id is the id of the subvol or snapshot. + * + * For a back ref the root_id is the id of the subvol or snapshot and + * ref_id is the id of the tree referencing it. + */ +int btrfs_add_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, + u64 root_id, u8 type, u64 ref_id, + u64 dirid, u64 sequence, + const char *name, int name_len) +{ + struct btrfs_key key; + int ret; + struct btrfs_path *path; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; + unsigned long ptr; + + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = root_id; + key.type = type; + key.offset = ref_id; + + ret = btrfs_insert_empty_item(trans, tree_root, path, &key, + sizeof(*ref) + name_len); + BUG_ON(ret); + + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + btrfs_set_root_ref_dirid(leaf, ref, dirid); + btrfs_set_root_ref_sequence(leaf, ref, sequence); + btrfs_set_root_ref_name_len(leaf, ref, name_len); + ptr = (unsigned long)(ref + 1); + write_extent_buffer(leaf, name, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); + + btrfs_free_path(path); + return ret; +} diff --git a/tools/libfsimage/btrfs/transaction.h b/tools/libfsimage/btrfs/transaction.h new file mode 100644 index 0000000..13e09a6 --- /dev/null +++ b/tools/libfsimage/btrfs/transaction.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_TRANSACTION_H__ +#define __BTRFS_TRANSACTION_H__ + +#include "kerncompat.h" +#include "ctree.h" + +struct btrfs_trans_handle { + u64 transid; + u64 alloc_exclude_start; + u64 alloc_exclude_nr; + unsigned long blocks_reserved; + unsigned long blocks_used; + struct btrfs_block_group_cache *block_group; +}; + +static inline struct btrfs_trans_handle * +btrfs_start_transaction(struct btrfs_root *root, int num_blocks) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_trans_handle *h = malloc(sizeof(*h)); + + BUG_ON(!h); + BUG_ON(root->commit_root); + BUG_ON(fs_info->running_transaction); + fs_info->running_transaction = h; + fs_info->generation++; + h->transid = fs_info->generation; + h->alloc_exclude_start = 0; + h->alloc_exclude_nr = 0; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + h->block_group = NULL; + root->last_trans = h->transid; + root->commit_root = root->node; + extent_buffer_get(root->node); + return h; +} + +static inline void btrfs_free_transaction(struct btrfs_root *root, + struct btrfs_trans_handle *handle) +{ + memset(handle, 0, sizeof(*handle)); + free(handle); +} + +#endif diff --git a/tools/libfsimage/btrfs/utils.c b/tools/libfsimage/btrfs/utils.c new file mode 100644 index 0000000..7e45702 --- /dev/null +++ b/tools/libfsimage/btrfs/utils.c @@ -0,0 +1,3242 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2008 Morey Roof. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <uuid/uuid.h> +#include <fcntl.h> +#include <unistd.h> +#include <mntent.h> +#include <ctype.h> +#include <linux/loop.h> +#include <linux/major.h> +#include <linux/kdev_t.h> +#include <limits.h> +#include <blkid/blkid.h> +#include <sys/vfs.h> +#include <sys/statfs.h> +#include <linux/magic.h> +#include <getopt.h> + +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "crc32c.h" +#include "utils.h" +#include "volumes.h" +#include "ioctl.h" +#include "commands.h" + +#ifndef BLKDISCARD +#define BLKDISCARD _IO(0x12,119) +#endif + +static int btrfs_scan_done = 0; + +static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs"; + +const char *get_argv0_buf(void) +{ + return argv0_buf; +} + +void fixup_argv0(char **argv, const char *token) +{ + int len = strlen(argv0_buf); + + snprintf(argv0_buf + len, sizeof(argv0_buf) - len, " %s", token); + argv[0] = argv0_buf; +} + +void set_argv0(char **argv) +{ + strncpy(argv0_buf, argv[0], sizeof(argv0_buf)); + argv0_buf[sizeof(argv0_buf) - 1] = 0; +} + +int check_argc_exact(int nargs, int expected) +{ + if (nargs < expected) + fprintf(stderr, "%s: too few arguments\n", argv0_buf); + if (nargs > expected) + fprintf(stderr, "%s: too many arguments\n", argv0_buf); + + return nargs != expected; +} + +int check_argc_min(int nargs, int expected) +{ + if (nargs < expected) { + fprintf(stderr, "%s: too few arguments\n", argv0_buf); + return 1; + } + + return 0; +} + +int check_argc_max(int nargs, int expected) +{ + if (nargs > expected) { + fprintf(stderr, "%s: too many arguments\n", argv0_buf); + return 1; + } + + return 0; +} + + +/* + * Discard the given range in one go + */ +static int discard_range(int fd, u64 start, u64 len) +{ + u64 range[2] = { start, len }; + + if (ioctl(fd, BLKDISCARD, &range) < 0) + return errno; + return 0; +} + +/* + * Discard blocks in the given range in 1G chunks, the process is interruptible + */ +static int discard_blocks(int fd, u64 start, u64 len) +{ + while (len > 0) { + /* 1G granularity */ + u64 chunk_size = min_t(u64, len, 1*1024*1024*1024); + int ret; + + ret = discard_range(fd, start, chunk_size); + if (ret) + return ret; + len -= chunk_size; + start += chunk_size; + } + + return 0; +} + +static u64 reference_root_table[] = { + [1] = BTRFS_ROOT_TREE_OBJECTID, + [2] = BTRFS_EXTENT_TREE_OBJECTID, + [3] = BTRFS_CHUNK_TREE_OBJECTID, + [4] = BTRFS_DEV_TREE_OBJECTID, + [5] = BTRFS_FS_TREE_OBJECTID, + [6] = BTRFS_CSUM_TREE_OBJECTID, +}; + +int test_uuid_unique(char *fs_uuid) +{ + int unique = 1; + blkid_dev_iterate iter = NULL; + blkid_dev dev = NULL; + blkid_cache cache = NULL; + + if (blkid_get_cache(&cache, NULL) < 0) { + printf("ERROR: lblkid cache get failed\n"); + return 1; + } + blkid_probe_all(cache); + iter = blkid_dev_iterate_begin(cache); + blkid_dev_set_search(iter, "UUID", fs_uuid); + + while (blkid_dev_next(iter, &dev) == 0) { + dev = blkid_verify(cache, dev); + if (dev) { + unique = 0; + break; + } + } + + blkid_dev_iterate_end(iter); + blkid_put_cache(cache); + + return unique; +} + +/* + * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID + */ +int make_btrfs(int fd, struct btrfs_mkfs_config *cfg) +{ + struct btrfs_super_block super; + struct extent_buffer *buf; + struct btrfs_root_item root_item; + struct btrfs_disk_key disk_key; + struct btrfs_extent_item *extent_item; + struct btrfs_inode_item *inode_item; + struct btrfs_chunk *chunk; + struct btrfs_dev_item *dev_item; + struct btrfs_dev_extent *dev_extent; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + u8 *ptr; + int i; + int ret; + u32 itemoff; + u32 nritems = 0; + u64 first_free; + u64 ref_root; + u32 array_size; + u32 item_size; + int skinny_metadata = !!(cfg->features & + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + u64 num_bytes; + + buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize)); + if (!buf) + return -ENOMEM; + + first_free = BTRFS_SUPER_INFO_OFFSET + cfg->sectorsize * 2 - 1; + first_free &= ~((u64)cfg->sectorsize - 1); + + memset(&super, 0, sizeof(super)); + + num_bytes = (cfg->num_bytes / cfg->sectorsize) * cfg->sectorsize; + if (cfg->fs_uuid && *cfg->fs_uuid) { + if (uuid_parse(cfg->fs_uuid, super.fsid) != 0) { + fprintf(stderr, "could not parse UUID: %s\n", + cfg->fs_uuid); + ret = -EINVAL; + goto out; + } + if (!test_uuid_unique(cfg->fs_uuid)) { + fprintf(stderr, "non-unique UUID: %s\n", cfg->fs_uuid); + ret = -EBUSY; + goto out; + } + } else { + uuid_generate(super.fsid); + if (cfg->fs_uuid) + uuid_unparse(super.fsid, cfg->fs_uuid); + } + uuid_generate(super.dev_item.uuid); + uuid_generate(chunk_tree_uuid); + + btrfs_set_super_bytenr(&super, cfg->blocks[0]); + btrfs_set_super_num_devices(&super, 1); + btrfs_set_super_magic(&super, BTRFS_MAGIC); + btrfs_set_super_generation(&super, 1); + btrfs_set_super_root(&super, cfg->blocks[1]); + btrfs_set_super_chunk_root(&super, cfg->blocks[3]); + btrfs_set_super_total_bytes(&super, num_bytes); + btrfs_set_super_bytes_used(&super, 6 * cfg->nodesize); + btrfs_set_super_sectorsize(&super, cfg->sectorsize); + btrfs_set_super_leafsize(&super, cfg->nodesize); + btrfs_set_super_nodesize(&super, cfg->nodesize); + btrfs_set_super_stripesize(&super, cfg->stripesize); + btrfs_set_super_csum_type(&super, BTRFS_CSUM_TYPE_CRC32); + btrfs_set_super_chunk_root_generation(&super, 1); + btrfs_set_super_cache_generation(&super, -1); + btrfs_set_super_incompat_flags(&super, cfg->features); + if (cfg->label) + __strncpy_null(super.label, cfg->label, BTRFS_LABEL_SIZE - 1); + + /* create the tree of root objects */ + memset(buf->data, 0, cfg->nodesize); + buf->len = cfg->nodesize; + btrfs_set_header_bytenr(buf, cfg->blocks[1]); + btrfs_set_header_nritems(buf, 4); + btrfs_set_header_generation(buf, 1); + btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(buf, BTRFS_ROOT_TREE_OBJECTID); + write_extent_buffer(buf, super.fsid, btrfs_header_fsid(), + BTRFS_FSID_SIZE); + + write_extent_buffer(buf, chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(buf), + BTRFS_UUID_SIZE); + + /* create the items for the root tree */ + memset(&root_item, 0, sizeof(root_item)); + inode_item = &root_item.inode; + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, cfg->nodesize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); + btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_used(&root_item, cfg->nodesize); + btrfs_set_root_generation(&root_item, 1); + + memset(&disk_key, 0, sizeof(disk_key)); + btrfs_set_disk_key_type(&disk_key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_disk_key_offset(&disk_key, 0); + nritems = 0; + + itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) - sizeof(root_item); + btrfs_set_root_bytenr(&root_item, cfg->blocks[2]); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + sizeof(root_item)); + write_extent_buffer(buf, &root_item, btrfs_item_ptr_offset(buf, + nritems), sizeof(root_item)); + nritems++; + + itemoff = itemoff - sizeof(root_item); + btrfs_set_root_bytenr(&root_item, cfg->blocks[4]); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_TREE_OBJECTID); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + sizeof(root_item)); + write_extent_buffer(buf, &root_item, + btrfs_item_ptr_offset(buf, nritems), + sizeof(root_item)); + nritems++; + + itemoff = itemoff - sizeof(root_item); + btrfs_set_root_bytenr(&root_item, cfg->blocks[5]); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_FS_TREE_OBJECTID); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + sizeof(root_item)); + write_extent_buffer(buf, &root_item, + btrfs_item_ptr_offset(buf, nritems), + sizeof(root_item)); + nritems++; + + itemoff = itemoff - sizeof(root_item); + btrfs_set_root_bytenr(&root_item, cfg->blocks[6]); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_CSUM_TREE_OBJECTID); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + sizeof(root_item)); + write_extent_buffer(buf, &root_item, + btrfs_item_ptr_offset(buf, nritems), + sizeof(root_item)); + nritems++; + + + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[1]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + /* create the items for the extent tree */ + memset(buf->data + sizeof(struct btrfs_header), 0, + cfg->nodesize - sizeof(struct btrfs_header)); + nritems = 0; + itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize); + for (i = 1; i < 7; i++) { + item_size = sizeof(struct btrfs_extent_item); + if (!skinny_metadata) + item_size += sizeof(struct btrfs_tree_block_info); + + BUG_ON(cfg->blocks[i] < first_free); + BUG_ON(cfg->blocks[i] < cfg->blocks[i - 1]); + + /* create extent item */ + itemoff -= item_size; + btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[i]); + if (skinny_metadata) { + btrfs_set_disk_key_type(&disk_key, + BTRFS_METADATA_ITEM_KEY); + btrfs_set_disk_key_offset(&disk_key, 0); + } else { + btrfs_set_disk_key_type(&disk_key, + BTRFS_EXTENT_ITEM_KEY); + btrfs_set_disk_key_offset(&disk_key, cfg->nodesize); + } + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), + itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + item_size); + extent_item = btrfs_item_ptr(buf, nritems, + struct btrfs_extent_item); + btrfs_set_extent_refs(buf, extent_item, 1); + btrfs_set_extent_generation(buf, extent_item, 1); + btrfs_set_extent_flags(buf, extent_item, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + nritems++; + + /* create extent ref */ + ref_root = reference_root_table[i]; + btrfs_set_disk_key_objectid(&disk_key, cfg->blocks[i]); + btrfs_set_disk_key_offset(&disk_key, ref_root); + btrfs_set_disk_key_type(&disk_key, BTRFS_TREE_BLOCK_REF_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), + itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), 0); + nritems++; + } + btrfs_set_header_bytenr(buf, cfg->blocks[2]); + btrfs_set_header_owner(buf, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_header_nritems(buf, nritems); + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[2]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + /* create the chunk tree */ + memset(buf->data + sizeof(struct btrfs_header), 0, + cfg->nodesize - sizeof(struct btrfs_header)); + nritems = 0; + item_size = sizeof(*dev_item); + itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) - item_size; + + /* first device 1 (there is no device 0) */ + btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID); + btrfs_set_disk_key_offset(&disk_key, 1); + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size); + + dev_item = btrfs_item_ptr(buf, nritems, struct btrfs_dev_item); + btrfs_set_device_id(buf, dev_item, 1); + btrfs_set_device_generation(buf, dev_item, 0); + btrfs_set_device_total_bytes(buf, dev_item, num_bytes); + btrfs_set_device_bytes_used(buf, dev_item, + BTRFS_MKFS_SYSTEM_GROUP_SIZE); + btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize); + btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize); + btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize); + btrfs_set_device_type(buf, dev_item, 0); + + write_extent_buffer(buf, super.dev_item.uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + write_extent_buffer(buf, super.fsid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_UUID_SIZE); + read_extent_buffer(buf, &super.dev_item, (unsigned long)dev_item, + sizeof(*dev_item)); + + nritems++; + item_size = btrfs_chunk_item_size(1); + itemoff = itemoff - item_size; + + /* then we have chunk 0 */ + btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_offset(&disk_key, 0); + btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), item_size); + + chunk = btrfs_item_ptr(buf, nritems, struct btrfs_chunk); + btrfs_set_chunk_length(buf, chunk, BTRFS_MKFS_SYSTEM_GROUP_SIZE); + btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024); + btrfs_set_chunk_type(buf, chunk, BTRFS_BLOCK_GROUP_SYSTEM); + btrfs_set_chunk_io_align(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_io_width(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_sector_size(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_num_stripes(buf, chunk, 1); + btrfs_set_stripe_devid_nr(buf, chunk, 0, 1); + btrfs_set_stripe_offset_nr(buf, chunk, 0, 0); + nritems++; + + write_extent_buffer(buf, super.dev_item.uuid, + (unsigned long)btrfs_stripe_dev_uuid(&chunk->stripe), + BTRFS_UUID_SIZE); + + /* copy the key for the chunk to the system array */ + ptr = super.sys_chunk_array; + array_size = sizeof(disk_key); + + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + + /* copy the chunk to the system array */ + read_extent_buffer(buf, ptr, (unsigned long)chunk, item_size); + array_size += item_size; + ptr += item_size; + btrfs_set_super_sys_array_size(&super, array_size); + + btrfs_set_header_bytenr(buf, cfg->blocks[3]); + btrfs_set_header_owner(buf, BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_header_nritems(buf, nritems); + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[3]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + /* create the device tree */ + memset(buf->data + sizeof(struct btrfs_header), 0, + cfg->nodesize - sizeof(struct btrfs_header)); + nritems = 0; + itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize) - + sizeof(struct btrfs_dev_extent); + + btrfs_set_disk_key_objectid(&disk_key, 1); + btrfs_set_disk_key_offset(&disk_key, 0); + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY); + btrfs_set_item_key(buf, &disk_key, nritems); + btrfs_set_item_offset(buf, btrfs_item_nr(nritems), itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(nritems), + sizeof(struct btrfs_dev_extent)); + dev_extent = btrfs_item_ptr(buf, nritems, struct btrfs_dev_extent); + btrfs_set_dev_extent_chunk_tree(buf, dev_extent, + BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_dev_extent_chunk_objectid(buf, dev_extent, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_dev_extent_chunk_offset(buf, dev_extent, 0); + + write_extent_buffer(buf, chunk_tree_uuid, + (unsigned long)btrfs_dev_extent_chunk_tree_uuid(dev_extent), + BTRFS_UUID_SIZE); + + btrfs_set_dev_extent_length(buf, dev_extent, + BTRFS_MKFS_SYSTEM_GROUP_SIZE); + nritems++; + + btrfs_set_header_bytenr(buf, cfg->blocks[4]); + btrfs_set_header_owner(buf, BTRFS_DEV_TREE_OBJECTID); + btrfs_set_header_nritems(buf, nritems); + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[4]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + /* create the FS root */ + memset(buf->data + sizeof(struct btrfs_header), 0, + cfg->nodesize - sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(buf, cfg->blocks[5]); + btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_nritems(buf, 0); + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[5]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + /* finally create the csum root */ + memset(buf->data + sizeof(struct btrfs_header), 0, + cfg->nodesize - sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(buf, cfg->blocks[6]); + btrfs_set_header_owner(buf, BTRFS_CSUM_TREE_OBJECTID); + btrfs_set_header_nritems(buf, 0); + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, cfg->nodesize, cfg->blocks[6]); + if (ret != cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + /* and write out the super block */ + BUG_ON(sizeof(super) > cfg->sectorsize); + memset(buf->data, 0, BTRFS_SUPER_INFO_SIZE); + memcpy(buf->data, &super, sizeof(super)); + buf->len = BTRFS_SUPER_INFO_SIZE; + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + ret = pwrite(fd, buf->data, BTRFS_SUPER_INFO_SIZE, cfg->blocks[0]); + if (ret != BTRFS_SUPER_INFO_SIZE) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + ret = 0; + +out: + free(buf); + return ret; +} + +static const struct btrfs_fs_feature { + const char *name; + u64 flag; + const char *desc; +} mkfs_features[] = { + { "mixed-bg", BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS, + "mixed data and metadata block groups" }, + { "extref", BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF, + "increased hardlink limit per file to 65536" }, + { "raid56", BTRFS_FEATURE_INCOMPAT_RAID56, + "raid56 extended format" }, + { "skinny-metadata", BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA, + "reduced-size metadata extent refs" }, + { "no-holes", BTRFS_FEATURE_INCOMPAT_NO_HOLES, + "no explicit hole extents for files" }, + /* Keep this one last */ + { "list-all", BTRFS_FEATURE_LIST_ALL, NULL } +}; + +static int parse_one_fs_feature(const char *name, u64 *flags) +{ + int i; + int found = 0; + + for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) { + if (name[0] == '^' && + !strcmp(mkfs_features[i].name, name + 1)) { + *flags &= ~ mkfs_features[i].flag; + found = 1; + } else if (!strcmp(mkfs_features[i].name, name)) { + *flags |= mkfs_features[i].flag; + found = 1; + } + } + + return !found; +} + +void btrfs_parse_features_to_string(char *buf, u64 flags) +{ + int i; + + buf[0] = 0; + + for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) { + if (flags & mkfs_features[i].flag) { + if (*buf) + strcat(buf, ", "); + strcat(buf, mkfs_features[i].name); + } + } +} + +void btrfs_process_fs_features(u64 flags) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mkfs_features); i++) { + if (flags & mkfs_features[i].flag) { + printf("Turning ON incompat feature '%s': %s\n", + mkfs_features[i].name, + mkfs_features[i].desc); + } + } +} + +void btrfs_list_all_fs_features(u64 mask_disallowed) +{ + int i; + + fprintf(stderr, "Filesystem features available:\n"); + for (i = 0; i < ARRAY_SIZE(mkfs_features) - 1; i++) { + char *is_default = ""; + + if (mkfs_features[i].flag & mask_disallowed) + continue; + if (mkfs_features[i].flag & BTRFS_MKFS_DEFAULT_FEATURES) + is_default = ", default"; + fprintf(stderr, "%-20s- %s (0x%llx%s)\n", + mkfs_features[i].name, + mkfs_features[i].desc, + mkfs_features[i].flag, + is_default); + } +} + +/* + * Return NULL if all features were parsed fine, otherwise return the name of + * the first unparsed. + */ +char* btrfs_parse_fs_features(char *namelist, u64 *flags) +{ + char *this_char; + char *save_ptr = NULL; /* Satisfy static checkers */ + + for (this_char = strtok_r(namelist, ",", &save_ptr); + this_char != NULL; + this_char = strtok_r(NULL, ",", &save_ptr)) { + if (parse_one_fs_feature(this_char, flags)) + return this_char; + } + + return NULL; +} + +u64 btrfs_device_size(int fd, struct stat *st) +{ + u64 size; + if (S_ISREG(st->st_mode)) { + return st->st_size; + } + if (!S_ISBLK(st->st_mode)) { + return 0; + } + if (ioctl(fd, BLKGETSIZE64, &size) >= 0) { + return size; + } + return 0; +} + +static int zero_blocks(int fd, off_t start, size_t len) +{ + char *buf = malloc(len); + int ret = 0; + ssize_t written; + + if (!buf) + return -ENOMEM; + memset(buf, 0, len); + written = pwrite(fd, buf, len, start); + if (written != len) + ret = -EIO; + free(buf); + return ret; +} + +#define ZERO_DEV_BYTES (2 * 1024 * 1024) + +/* don't write outside the device by clamping the region to the device size */ +static int zero_dev_clamped(int fd, off_t start, ssize_t len, u64 dev_size) +{ + off_t end = max(start, start + len); + +#ifdef __sparc__ + /* and don't overwrite the disk labels on sparc */ + start = max(start, 1024); + end = max(end, 1024); +#endif + + start = min_t(u64, start, dev_size); + end = min_t(u64, end, dev_size); + + return zero_blocks(fd, start, end - start); +} + +int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int fd, char *path, + u64 device_total_bytes, u32 io_width, u32 io_align, + u32 sectorsize) +{ + struct btrfs_super_block *disk_super; + struct btrfs_super_block *super = root->fs_info->super_copy; + struct btrfs_device *device; + struct btrfs_dev_item *dev_item; + char *buf = NULL; + u64 fs_total_bytes; + u64 num_devs; + int ret; + + device_total_bytes = (device_total_bytes / sectorsize) * sectorsize; + + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) + goto err_nomem; + buf = kzalloc(sectorsize, GFP_NOFS); + if (!buf) + goto err_nomem; + BUG_ON(sizeof(*disk_super) > sectorsize); + + disk_super = (struct btrfs_super_block *)buf; + dev_item = &disk_super->dev_item; + + uuid_generate(device->uuid); + device->devid = 0; + device->type = 0; + device->io_width = io_width; + device->io_align = io_align; + device->sector_size = sectorsize; + device->fd = fd; + device->writeable = 1; + device->total_bytes = device_total_bytes; + device->bytes_used = 0; + device->total_ios = 0; + device->dev_root = root->fs_info->dev_root; + device->name = strdup(path); + if (!device->name) + goto err_nomem; + + INIT_LIST_HEAD(&device->dev_list); + ret = btrfs_add_device(trans, root, device); + BUG_ON(ret); + + fs_total_bytes = btrfs_super_total_bytes(super) + device_total_bytes; + btrfs_set_super_total_bytes(super, fs_total_bytes); + + num_devs = btrfs_super_num_devices(super) + 1; + btrfs_set_super_num_devices(super, num_devs); + + memcpy(disk_super, super, sizeof(*disk_super)); + + btrfs_set_super_bytenr(disk_super, BTRFS_SUPER_INFO_OFFSET); + btrfs_set_stack_device_id(dev_item, device->devid); + btrfs_set_stack_device_type(dev_item, device->type); + btrfs_set_stack_device_io_align(dev_item, device->io_align); + btrfs_set_stack_device_io_width(dev_item, device->io_width); + btrfs_set_stack_device_sector_size(dev_item, device->sector_size); + btrfs_set_stack_device_total_bytes(dev_item, device->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, device->bytes_used); + memcpy(&dev_item->uuid, device->uuid, BTRFS_UUID_SIZE); + + ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET); + BUG_ON(ret != sectorsize); + + kfree(buf); + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + device->fs_devices = root->fs_info->fs_devices; + return 0; + +err_nomem: + kfree(device); + kfree(buf); + return -ENOMEM; +} + +static int btrfs_wipe_existing_sb(int fd) +{ + const char *off = NULL; + size_t len = 0; + loff_t offset; + char buf[BUFSIZ]; + int ret = 0; + blkid_probe pr = NULL; + + pr = blkid_new_probe(); + if (!pr) + return -1; + + if (blkid_probe_set_device(pr, fd, 0, 0)) { + ret = -1; + goto out; + } + + ret = blkid_probe_lookup_value(pr, "SBMAGIC_OFFSET", &off, NULL); + if (!ret) + ret = blkid_probe_lookup_value(pr, "SBMAGIC", NULL, &len); + + if (ret || len == 0 || off == NULL) { + /* + * If lookup fails, the probe did not find any values, eg. for + * a file image or a loop device. Soft error. + */ + ret = 1; + goto out; + } + + offset = strtoll(off, NULL, 10); + if (len > sizeof(buf)) + len = sizeof(buf); + + memset(buf, 0, len); + ret = pwrite(fd, buf, len, offset); + if (ret < 0) { + error("cannot wipe existing superblock: %s", strerror(errno)); + ret = -1; + } else if (ret != len) { + error("cannot wipe existing superblock: wrote %d of %zd", ret, len); + ret = -1; + } + fsync(fd); + +out: + blkid_free_probe(pr); + return ret; +} + +int btrfs_prepare_device(int fd, const char *file, int zero_end, + u64 *block_count_ret, u64 max_block_count, int discard) +{ + u64 block_count; + struct stat st; + int i, ret; + + ret = fstat(fd, &st); + if (ret < 0) { + error("unable to stat %s: %s", file, strerror(errno)); + return 1; + } + + block_count = btrfs_device_size(fd, &st); + if (block_count == 0) { + error("unable to determine size of %s", file); + return 1; + } + if (max_block_count) + block_count = min(block_count, max_block_count); + + if (discard) { + /* + * We intentionally ignore errors from the discard ioctl. It + * is not necessary for the mkfs functionality but just an + * optimization. + */ + if (discard_range(fd, 0, 0) == 0) { + printf("Performing full device TRIM (%s) ...\n", + pretty_size(block_count)); + discard_blocks(fd, 0, block_count); + } + } + + ret = zero_dev_clamped(fd, 0, ZERO_DEV_BYTES, block_count); + for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++) + ret = zero_dev_clamped(fd, btrfs_sb_offset(i), + BTRFS_SUPER_INFO_SIZE, block_count); + if (!ret && zero_end) + ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES, + ZERO_DEV_BYTES, block_count); + + if (ret < 0) { + error("failed to zero device '%s': %s", file, strerror(-ret)); + return 1; + } + + ret = btrfs_wipe_existing_sb(fd); + if (ret < 0) { + error("cannot wipe superblocks on %s", file); + return 1; + } + + *block_count_ret = block_count; + return 0; +} + +int btrfs_make_root_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid) +{ + int ret; + struct btrfs_inode_item inode_item; + time_t now = time(NULL); + + memset(&inode_item, 0, sizeof(inode_item)); + btrfs_set_stack_inode_generation(&inode_item, trans->transid); + btrfs_set_stack_inode_size(&inode_item, 0); + btrfs_set_stack_inode_nlink(&inode_item, 1); + btrfs_set_stack_inode_nbytes(&inode_item, root->nodesize); + btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755); + btrfs_set_stack_timespec_sec(&inode_item.atime, now); + btrfs_set_stack_timespec_nsec(&inode_item.atime, 0); + btrfs_set_stack_timespec_sec(&inode_item.ctime, now); + btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0); + btrfs_set_stack_timespec_sec(&inode_item.mtime, now); + btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0); + btrfs_set_stack_timespec_sec(&inode_item.otime, 0); + btrfs_set_stack_timespec_nsec(&inode_item.otime, 0); + + if (root->fs_info->tree_root == root) + btrfs_set_super_root_dir(root->fs_info->super_copy, objectid); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + if (ret) + goto error; + + ret = btrfs_insert_inode_ref(trans, root, "..", 2, objectid, objectid, 0); + if (ret) + goto error; + + btrfs_set_root_dirid(&root->root_item, objectid); + ret = 0; +error: + return ret; +} + +/* + * checks if a path is a block device node + * Returns negative errno on failure, otherwise + * returns 1 for blockdev, 0 for not-blockdev + */ +int is_block_device(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf) < 0) + return -errno; + + return !!S_ISBLK(statbuf.st_mode); +} + +/* + * check if given path is a mount point + * return 1 if yes. 0 if no. -1 for error + */ +int is_mount_point(const char *path) +{ + FILE *f; + struct mntent *mnt; + int ret = 0; + + f = setmntent("/proc/self/mounts", "r"); + if (f == NULL) + return -1; + + while ((mnt = getmntent(f)) != NULL) { + if (strcmp(mnt->mnt_dir, path)) + continue; + ret = 1; + break; + } + endmntent(f); + return ret; +} + +static int is_reg_file(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf) < 0) + return -errno; + return S_ISREG(statbuf.st_mode); +} + +/* + * This function checks if the given input parameter is + * an uuid or a path + * return <0 : some error in the given input + * return BTRFS_ARG_UNKNOWN: unknown input + * return BTRFS_ARG_UUID: given input is uuid + * return BTRFS_ARG_MNTPOINT: given input is path + * return BTRFS_ARG_REG: given input is regular file + * return BTRFS_ARG_BLKDEV: given input is block device + */ +int check_arg_type(const char *input) +{ + uuid_t uuid; + char path[PATH_MAX]; + + if (!input) + return -EINVAL; + + if (realpath(input, path)) { + if (is_block_device(path) == 1) + return BTRFS_ARG_BLKDEV; + + if (is_mount_point(path) == 1) + return BTRFS_ARG_MNTPOINT; + + if (is_reg_file(path)) + return BTRFS_ARG_REG; + + return BTRFS_ARG_UNKNOWN; + } + + if (strlen(input) == (BTRFS_UUID_UNPARSED_SIZE - 1) && + !uuid_parse(input, uuid)) + return BTRFS_ARG_UUID; + + return BTRFS_ARG_UNKNOWN; +} + +/* + * Find the mount point for a mounted device. + * On success, returns 0 with mountpoint in *mp. + * On failure, returns -errno (not mounted yields -EINVAL) + * Is noisy on failures, expects to be given a mounted device. + */ +int get_btrfs_mount(const char *dev, char *mp, size_t mp_size) +{ + int ret; + int fd = -1; + + ret = is_block_device(dev); + if (ret <= 0) { + if (!ret) { + error("not a block device: %s", dev); + ret = -EINVAL; + } else { + error("cannot check %s: %s", dev, strerror(-ret)); + } + goto out; + } + + fd = open(dev, O_RDONLY); + if (fd < 0) { + ret = -errno; + error("cannot open %s: %s", dev, strerror(errno)); + goto out; + } + + ret = check_mounted_where(fd, dev, mp, mp_size, NULL); + if (!ret) { + ret = -EINVAL; + } else { /* mounted, all good */ + ret = 0; + } +out: + if (fd != -1) + close(fd); + return ret; +} + +/* + * Given a pathname, return a filehandle to: + * the original pathname or, + * if the pathname is a mounted btrfs device, to its mountpoint. + * + * On error, return -1, errno should be set. + */ +int open_path_or_dev_mnt(const char *path, DIR **dirstream, int verbose) +{ + char mp[PATH_MAX]; + int ret; + + if (is_block_device(path)) { + ret = get_btrfs_mount(path, mp, sizeof(mp)); + if (ret < 0) { + /* not a mounted btrfs dev */ + error_on(verbose, "'%s' is not a mounted btrfs device", + path); + errno = EINVAL; + return -1; + } + ret = open_file_or_dir(mp, dirstream); + error_on(verbose && ret < 0, "can't access '%s': %s", + path, strerror(errno)); + } else { + ret = btrfs_open_dir(path, dirstream, 1); + } + + return ret; +} + +/* + * Do the following checks before calling open_file_or_dir(): + * 1: path is in a btrfs filesystem + * 2: path is a directory + */ +int btrfs_open_dir(const char *path, DIR **dirstream, int verbose) +{ + struct statfs stfs; + struct stat st; + int ret; + + if (statfs(path, &stfs) != 0) { + error_on(verbose, "cannot access '%s': %s", path, + strerror(errno)); + return -1; + } + + if (stfs.f_type != BTRFS_SUPER_MAGIC) { + error_on(verbose, "not a btrfs filesystem: %s", path); + return -2; + } + + if (stat(path, &st) != 0) { + error_on(verbose, "cannot access '%s': %s", path, + strerror(errno)); + return -1; + } + + if (!S_ISDIR(st.st_mode)) { + error_on(verbose, "not a directory: %s", path); + return -3; + } + + ret = open_file_or_dir(path, dirstream); + if (ret < 0) { + error_on(verbose, "cannot access '%s': %s", path, + strerror(errno)); + } + + return ret; +} + +/* checks if a device is a loop device */ +static int is_loop_device (const char* device) { + struct stat statbuf; + + if(stat(device, &statbuf) < 0) + return -errno; + + return (S_ISBLK(statbuf.st_mode) && + MAJOR(statbuf.st_rdev) == LOOP_MAJOR); +} + +/* + * Takes a loop device path (e.g. /dev/loop0) and returns + * the associated file (e.g. /images/my_btrfs.img) using + * loopdev API + */ +static int resolve_loop_device_with_loopdev(const char* loop_dev, char* loop_file) +{ + int fd; + int ret; + struct loop_info64 lo64; + + fd = open(loop_dev, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return -errno; + ret = ioctl(fd, LOOP_GET_STATUS64, &lo64); + if (ret < 0) { + ret = -errno; + goto out; + } + + memcpy(loop_file, lo64.lo_file_name, sizeof(lo64.lo_file_name)); + loop_file[sizeof(lo64.lo_file_name)] = 0; + +out: + close(fd); + + return ret; +} + +/* Takes a loop device path (e.g. /dev/loop0) and returns + * the associated file (e.g. /images/my_btrfs.img) */ +static int resolve_loop_device(const char* loop_dev, char* loop_file, + int max_len) +{ + int ret; + FILE *f; + char fmt[20]; + char p[PATH_MAX]; + char real_loop_dev[PATH_MAX]; + + if (!realpath(loop_dev, real_loop_dev)) + return -errno; + snprintf(p, PATH_MAX, "/sys/block/%s/loop/backing_file", strrchr(real_loop_dev, '/')); + if (!(f = fopen(p, "r"))) { + if (errno == ENOENT) + /* + * It's possibly a partitioned loop device, which is + * resolvable with loopdev API. + */ + return resolve_loop_device_with_loopdev(loop_dev, loop_file); + return -errno; + } + + snprintf(fmt, 20, "%%%i[^\n]", max_len-1); + ret = fscanf(f, fmt, loop_file); + fclose(f); + if (ret == EOF) + return -errno; + + return 0; +} + +/* + * Checks whether a and b are identical or device + * files associated with the same block device + */ +static int is_same_blk_file(const char* a, const char* b) +{ + struct stat st_buf_a, st_buf_b; + char real_a[PATH_MAX]; + char real_b[PATH_MAX]; + + if (!realpath(a, real_a)) + strncpy_null(real_a, a); + + if (!realpath(b, real_b)) + strncpy_null(real_b, b); + + /* Identical path? */ + if (strcmp(real_a, real_b) == 0) + return 1; + + if (stat(a, &st_buf_a) < 0 || stat(b, &st_buf_b) < 0) { + if (errno == ENOENT) + return 0; + return -errno; + } + + /* Same blockdevice? */ + if (S_ISBLK(st_buf_a.st_mode) && S_ISBLK(st_buf_b.st_mode) && + st_buf_a.st_rdev == st_buf_b.st_rdev) { + return 1; + } + + /* Hardlink? */ + if (st_buf_a.st_dev == st_buf_b.st_dev && + st_buf_a.st_ino == st_buf_b.st_ino) { + return 1; + } + + return 0; +} + +/* checks if a and b are identical or device + * files associated with the same block device or + * if one file is a loop device that uses the other + * file. + */ +static int is_same_loop_file(const char* a, const char* b) +{ + char res_a[PATH_MAX]; + char res_b[PATH_MAX]; + const char* final_a = NULL; + const char* final_b = NULL; + int ret; + + /* Resolve a if it is a loop device */ + if((ret = is_loop_device(a)) < 0) { + if (ret == -ENOENT) + return 0; + return ret; + } else if (ret) { + ret = resolve_loop_device(a, res_a, sizeof(res_a)); + if (ret < 0) { + if (errno != EPERM) + return ret; + } else { + final_a = res_a; + } + } else { + final_a = a; + } + + /* Resolve b if it is a loop device */ + if ((ret = is_loop_device(b)) < 0) { + if (ret == -ENOENT) + return 0; + return ret; + } else if (ret) { + ret = resolve_loop_device(b, res_b, sizeof(res_b)); + if (ret < 0) { + if (errno != EPERM) + return ret; + } else { + final_b = res_b; + } + } else { + final_b = b; + } + + return is_same_blk_file(final_a, final_b); +} + +/* Checks if a file exists and is a block or regular file*/ +static int is_existing_blk_or_reg_file(const char* filename) +{ + struct stat st_buf; + + if(stat(filename, &st_buf) < 0) { + if(errno == ENOENT) + return 0; + else + return -errno; + } + + return (S_ISBLK(st_buf.st_mode) || S_ISREG(st_buf.st_mode)); +} + +/* Checks if a file is used (directly or indirectly via a loop device) + * by a device in fs_devices + */ +static int blk_file_in_dev_list(struct btrfs_fs_devices* fs_devices, + const char* file) +{ + int ret; + struct list_head *head; + struct list_head *cur; + struct btrfs_device *device; + + head = &fs_devices->devices; + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + + if((ret = is_same_loop_file(device->name, file))) + return ret; + } + + return 0; +} + +/* + * Resolve a pathname to a device mapper node to /dev/mapper/<name> + * Returns NULL on invalid input or malloc failure; Other failures + * will be handled by the caller using the input pathame. + */ +char *canonicalize_dm_name(const char *ptname) +{ + FILE *f; + size_t sz; + char path[PATH_MAX], name[PATH_MAX], *res = NULL; + + if (!ptname || !*ptname) + return NULL; + + snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptname); + if (!(f = fopen(path, "r"))) + return NULL; + + /* read <name>\n from sysfs */ + if (fgets(name, sizeof(name), f) && (sz = strlen(name)) > 1) { + name[sz - 1] = '\0'; + snprintf(path, sizeof(path), "/dev/mapper/%s", name); + + if (access(path, F_OK) == 0) + res = strdup(path); + } + fclose(f); + return res; +} + +/* + * Resolve a pathname to a canonical device node, e.g. /dev/sda1 or + * to a device mapper pathname. + * Returns NULL on invalid input or malloc failure; Other failures + * will be handled by the caller using the input pathame. + */ +char *canonicalize_path(const char *path) +{ + char *canonical, *p; + + if (!path || !*path) + return NULL; + + canonical = realpath(path, NULL); + if (!canonical) + return strdup(path); + p = strrchr(canonical, '/'); + if (p && strncmp(p, "/dm-", 4) == 0 && isdigit(*(p + 4))) { + char *dm = canonicalize_dm_name(p + 1); + + if (dm) { + free(canonical); + return dm; + } + } + return canonical; +} + +/* + * returns 1 if the device was mounted, < 0 on error or 0 if everything + * is safe to continue. + */ +int check_mounted(const char* file) +{ + int fd; + int ret; + + fd = open(file, O_RDONLY); + if (fd < 0) { + error("mount check: cannot open %s: %s", file, + strerror(errno)); + return -errno; + } + + ret = check_mounted_where(fd, file, NULL, 0, NULL); + close(fd); + + return ret; +} + +int check_mounted_where(int fd, const char *file, char *where, int size, + struct btrfs_fs_devices **fs_dev_ret) +{ + int ret; + u64 total_devs = 1; + int is_btrfs; + struct btrfs_fs_devices *fs_devices_mnt = NULL; + FILE *f; + struct mntent *mnt; + + /* scan the initial device */ + ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt, + &total_devs, BTRFS_SUPER_INFO_OFFSET, 0); + is_btrfs = (ret >= 0); + + /* scan other devices */ + if (is_btrfs && total_devs > 1) { + ret = btrfs_scan_lblkid(); + if (ret) + return ret; + } + + /* iterate over the list of currently mountes filesystems */ + if ((f = setmntent ("/proc/self/mounts", "r")) == NULL) + return -errno; + + while ((mnt = getmntent (f)) != NULL) { + if(is_btrfs) { + if(strcmp(mnt->mnt_type, "btrfs") != 0) + continue; + + ret = blk_file_in_dev_list(fs_devices_mnt, mnt->mnt_fsname); + } else { + /* ignore entries in the mount table that are not + associated with a file*/ + if((ret = is_existing_blk_or_reg_file(mnt->mnt_fsname)) < 0) + goto out_mntloop_err; + else if(!ret) + continue; + + ret = is_same_loop_file(file, mnt->mnt_fsname); + } + + if(ret < 0) + goto out_mntloop_err; + else if(ret) + break; + } + + /* Did we find an entry in mnt table? */ + if (mnt && size && where) { + strncpy(where, mnt->mnt_dir, size); + where[size-1] = 0; + } + if (fs_dev_ret) + *fs_dev_ret = fs_devices_mnt; + + ret = (mnt != NULL); + +out_mntloop_err: + endmntent (f); + + return ret; +} + +struct pending_dir { + struct list_head list; + char name[PATH_MAX]; +}; + +int btrfs_register_one_device(const char *fname) +{ + struct btrfs_ioctl_vol_args args; + int fd; + int ret; + + fd = open("/dev/btrfs-control", O_RDWR); + if (fd < 0) { + warning( + "failed to open /dev/btrfs-control, skipping device registration: %s", + strerror(errno)); + return -errno; + } + memset(&args, 0, sizeof(args)); + strncpy_null(args.name, fname); + ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args); + if (ret < 0) { + error("device scan failed on '%s': %s", fname, + strerror(errno)); + ret = -errno; + } + close(fd); + return ret; +} + +/* + * Register all devices in the fs_uuid list created in the user + * space. Ensure btrfs_scan_lblkid() is called before this func. + */ +int btrfs_register_all_devices(void) +{ + int err = 0; + int ret = 0; + struct btrfs_fs_devices *fs_devices; + struct btrfs_device *device; + struct list_head *all_uuids; + + all_uuids = btrfs_scanned_uuids(); + + list_for_each_entry(fs_devices, all_uuids, list) { + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (*device->name) + err = btrfs_register_one_device(device->name); + + if (err) + ret++; + } + } + + return ret; +} + +int btrfs_device_already_in_root(struct btrfs_root *root, int fd, + int super_offset) +{ + struct btrfs_super_block *disk_super; + char *buf; + int ret = 0; + + buf = malloc(BTRFS_SUPER_INFO_SIZE); + if (!buf) { + ret = -ENOMEM; + goto out; + } + ret = pread(fd, buf, BTRFS_SUPER_INFO_SIZE, super_offset); + if (ret != BTRFS_SUPER_INFO_SIZE) + goto brelse; + + ret = 0; + disk_super = (struct btrfs_super_block *)buf; + if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) + goto brelse; + + if (!memcmp(disk_super->fsid, root->fs_info->super_copy->fsid, + BTRFS_FSID_SIZE)) + ret = 1; +brelse: + free(buf); +out: + return ret; +} + +/* + * Note: this function uses a static per-thread buffer. Do not call this + * function more than 10 times within one argument list! + */ +const char *pretty_size_mode(u64 size, unsigned mode) +{ + static __thread int ps_index = 0; + static __thread char ps_array[10][32]; + char *ret; + + ret = ps_array[ps_index]; + ps_index++; + ps_index %= 10; + (void)pretty_size_snprintf(size, ret, 32, mode); + + return ret; +} + +static const char* unit_suffix_binary[] = + { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"}; +static const char* unit_suffix_decimal[] = + { "B", "kB", "MB", "GB", "TB", "PB", "EB"}; + +int pretty_size_snprintf(u64 size, char *str, size_t str_size, unsigned unit_mode) +{ + int num_divs; + float fraction; + u64 base = 0; + int mult = 0; + const char** suffix = NULL; + u64 last_size; + + if (str_size == 0) + return 0; + + if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_RAW) { + snprintf(str, str_size, "%llu", size); + return 0; + } + + if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_BINARY) { + base = 1024; + mult = 1024; + suffix = unit_suffix_binary; + } else if ((unit_mode & ~UNITS_MODE_MASK) == UNITS_DECIMAL) { + base = 1000; + mult = 1000; + suffix = unit_suffix_decimal; + } + + /* Unknown mode */ + if (!base) { + fprintf(stderr, "INTERNAL ERROR: unknown unit base, mode %d\n", + unit_mode); + assert(0); + return -1; + } + + num_divs = 0; + last_size = size; + switch (unit_mode & UNITS_MODE_MASK) { + case UNITS_TBYTES: base *= mult; num_divs++; + case UNITS_GBYTES: base *= mult; num_divs++; + case UNITS_MBYTES: base *= mult; num_divs++; + case UNITS_KBYTES: num_divs++; + break; + case UNITS_BYTES: + base = 1; + num_divs = 0; + break; + default: + while (size >= mult) { + last_size = size; + size /= mult; + num_divs++; + } + /* + * If the value is smaller than base, we didn't do any + * division, in that case, base should be 1, not original + * base, or the unit will be wrong + */ + if (num_divs == 0) + base = 1; + } + + if (num_divs >= ARRAY_SIZE(unit_suffix_binary)) { + str[0] = '\0'; + printf("INTERNAL ERROR: unsupported unit suffix, index %d\n", + num_divs); + assert(0); + return -1; + } + fraction = (float)last_size / base; + + return snprintf(str, str_size, "%.2f%s", fraction, suffix[num_divs]); +} + +/* + * __strncpy_null - strncpy with null termination + * @dest: the target array + * @src: the source string + * @n: maximum bytes to copy (size of *dest) + * + * Like strncpy, but ensures destination is null-terminated. + * + * Copies the string pointed to by src, including the terminating null + * byte ('\0'), to the buffer pointed to by dest, up to a maximum + * of n bytes. Then ensure that dest is null-terminated. + */ +char *__strncpy_null(char *dest, const char *src, size_t n) +{ + strncpy(dest, src, n); + if (n > 0) + dest[n - 1] = '\0'; + return dest; +} + +/* + * Checks to make sure that the label matches our requirements. + * Returns: + 0 if everything is safe and usable + -1 if the label is too long + */ +static int check_label(const char *input) +{ + int len = strlen(input); + + if (len > BTRFS_LABEL_SIZE - 1) { + fprintf(stderr, "ERROR: Label %s is too long (max %d)\n", + input, BTRFS_LABEL_SIZE - 1); + return -1; + } + + return 0; +} + +static int set_label_unmounted(const char *dev, const char *label) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + int ret; + + ret = check_mounted(dev); + if (ret < 0) { + fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + return -1; + } + if (ret > 0) { + fprintf(stderr, "ERROR: dev %s is mounted, use mount point\n", + dev); + return -1; + } + + /* Open the super_block at the default location + * and as read-write. + */ + root = open_ctree(dev, 0, OPEN_CTREE_WRITES); + if (!root) /* errors are printed by open_ctree() */ + return -1; + + trans = btrfs_start_transaction(root, 1); + __strncpy_null(root->fs_info->super_copy->label, label, BTRFS_LABEL_SIZE - 1); + + btrfs_commit_transaction(trans, root); + + /* Now we close it since we are done. */ + close_ctree(root); + return 0; +} + +static int set_label_mounted(const char *mount_path, const char *labelp) +{ + int fd; + char label[BTRFS_LABEL_SIZE]; + + fd = open(mount_path, O_RDONLY | O_NOATIME); + if (fd < 0) { + fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path); + return -1; + } + + memset(label, 0, sizeof(label)); + __strncpy_null(label, labelp, BTRFS_LABEL_SIZE - 1); + if (ioctl(fd, BTRFS_IOC_SET_FSLABEL, label) < 0) { + fprintf(stderr, "ERROR: unable to set label %s\n", + strerror(errno)); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +int get_label_unmounted(const char *dev, char *label) +{ + struct btrfs_root *root; + int ret; + + ret = check_mounted(dev); + if (ret < 0) { + fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + return -1; + } + + /* Open the super_block at the default location + * and as read-only. + */ + root = open_ctree(dev, 0, 0); + if(!root) + return -1; + + __strncpy_null(label, root->fs_info->super_copy->label, + BTRFS_LABEL_SIZE - 1); + + /* Now we close it since we are done. */ + close_ctree(root); + return 0; +} + +/* + * If a partition is mounted, try to get the filesystem label via its + * mounted path rather than device. Return the corresponding error + * the user specified the device path. + */ +int get_label_mounted(const char *mount_path, char *labelp) +{ + char label[BTRFS_LABEL_SIZE]; + int fd; + int ret; + + fd = open(mount_path, O_RDONLY | O_NOATIME); + if (fd < 0) { + fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path); + return -1; + } + + memset(label, '\0', sizeof(label)); + ret = ioctl(fd, BTRFS_IOC_GET_FSLABEL, label); + if (ret < 0) { + if (errno != ENOTTY) + fprintf(stderr, "ERROR: unable to get label %s\n", + strerror(errno)); + ret = -errno; + close(fd); + return ret; + } + + __strncpy_null(labelp, label, BTRFS_LABEL_SIZE - 1); + close(fd); + return 0; +} + +int get_label(const char *btrfs_dev, char *label) +{ + int ret; + + ret = is_existing_blk_or_reg_file(btrfs_dev); + if (!ret) + ret = get_label_mounted(btrfs_dev, label); + else if (ret > 0) + ret = get_label_unmounted(btrfs_dev, label); + + return ret; +} + +int set_label(const char *btrfs_dev, const char *label) +{ + int ret; + + if (check_label(label)) + return -1; + + ret = is_existing_blk_or_reg_file(btrfs_dev); + if (!ret) + ret = set_label_mounted(btrfs_dev, label); + else if (ret > 0) + ret = set_label_unmounted(btrfs_dev, label); + + return ret; +} + +/* + * A not-so-good version fls64. No fascinating optimization since + * no one except parse_size use it + */ +static int fls64(u64 x) +{ + int i; + + for (i = 0; i <64; i++) + if (x << i & (1ULL << 63)) + return 64 - i; + return 64 - i; +} + +u64 parse_size(char *s) +{ + char c; + char *endptr; + u64 mult = 1; + u64 ret; + + if (!s) { + fprintf(stderr, "ERROR: Size value is empty\n"); + exit(1); + } + if (s[0] == '-') { + fprintf(stderr, + "ERROR: Size value '%s' is less equal than 0\n", s); + exit(1); + } + ret = strtoull(s, &endptr, 10); + if (endptr == s) { + fprintf(stderr, "ERROR: Size value '%s' is invalid\n", s); + exit(1); + } + if (endptr[0] && endptr[1]) { + fprintf(stderr, "ERROR: Illegal suffix contains character '%c' in wrong position\n", + endptr[1]); + exit(1); + } + /* + * strtoll returns LLONG_MAX when overflow, if this happens, + * need to call strtoull to get the real size + */ + if (errno == ERANGE && ret == ULLONG_MAX) { + fprintf(stderr, + "ERROR: Size value '%s' is too large for u64\n", s); + exit(1); + } + if (endptr[0]) { + c = tolower(endptr[0]); + switch (c) { + case 'e': + mult *= 1024; + /* fallthrough */ + case 'p': + mult *= 1024; + /* fallthrough */ + case 't': + mult *= 1024; + /* fallthrough */ + case 'g': + mult *= 1024; + /* fallthrough */ + case 'm': + mult *= 1024; + /* fallthrough */ + case 'k': + mult *= 1024; + /* fallthrough */ + case 'b': + break; + default: + fprintf(stderr, "ERROR: Unknown size descriptor '%c'\n", + c); + exit(1); + } + } + /* Check whether ret * mult overflow */ + if (fls64(ret) + fls64(mult) - 1 > 64) { + fprintf(stderr, + "ERROR: Size value '%s' is too large for u64\n", s); + exit(1); + } + ret *= mult; + return ret; +} + +u64 parse_qgroupid(const char *p) +{ + char *s = strchr(p, '/'); + const char *ptr_src_end = p + strlen(p); + char *ptr_parse_end = NULL; + u64 level; + u64 id; + int fd; + int ret = 0; + + if (p[0] == '/') + goto path; + + /* Numeric format like '0/257' is the primary case */ + if (!s) { + id = strtoull(p, &ptr_parse_end, 10); + if (ptr_parse_end != ptr_src_end) + goto path; + return id; + } + level = strtoull(p, &ptr_parse_end, 10); + if (ptr_parse_end != s) + goto path; + + id = strtoull(s + 1, &ptr_parse_end, 10); + if (ptr_parse_end != ptr_src_end) + goto path; + + return (level << BTRFS_QGROUP_LEVEL_SHIFT) | id; + +path: + /* Path format like subv at 'my_subvol' is the fallback case */ + ret = test_issubvolume(p); + if (ret < 0 || !ret) + goto err; + fd = open(p, O_RDONLY); + if (fd < 0) + goto err; + ret = lookup_ino_rootid(fd, &id); + close(fd); + if (ret < 0) + goto err; + return id; + +err: + fprintf(stderr, "ERROR: invalid qgroupid or subvolume path: %s\n", p); + exit(-1); +} + +int open_file_or_dir3(const char *fname, DIR **dirstream, int open_flags) +{ + int ret; + struct stat st; + int fd; + + ret = stat(fname, &st); + if (ret < 0) { + return -1; + } + if (S_ISDIR(st.st_mode)) { + *dirstream = opendir(fname); + if (!*dirstream) + return -1; + fd = dirfd(*dirstream); + } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { + fd = open(fname, open_flags); + } else { + /* + * we set this on purpose, in case the caller output + * strerror(errno) as success + */ + errno = EINVAL; + return -1; + } + if (fd < 0) { + fd = -1; + if (*dirstream) { + closedir(*dirstream); + *dirstream = NULL; + } + } + return fd; +} + +int open_file_or_dir(const char *fname, DIR **dirstream) +{ + return open_file_or_dir3(fname, dirstream, O_RDWR); +} + +void close_file_or_dir(int fd, DIR *dirstream) +{ + if (dirstream) + closedir(dirstream); + else if (fd >= 0) + close(fd); +} + +int get_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args) +{ + int ret; + + di_args->devid = devid; + memset(&di_args->uuid, '\0', sizeof(di_args->uuid)); + + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); + return ret < 0 ? -errno : 0; +} + +static u64 find_max_device_id(struct btrfs_ioctl_search_args *search_args, + int nr_items) +{ + struct btrfs_dev_item *dev_item; + char *buf = search_args->buf; + + buf += (nr_items - 1) * (sizeof(struct btrfs_ioctl_search_header) + + sizeof(struct btrfs_dev_item)); + buf += sizeof(struct btrfs_ioctl_search_header); + + dev_item = (struct btrfs_dev_item *)buf; + + return btrfs_stack_device_id(dev_item); +} + +static int search_chunk_tree_for_fs_info(int fd, + struct btrfs_ioctl_fs_info_args *fi_args) +{ + int ret; + int max_items; + u64 start_devid = 1; + struct btrfs_ioctl_search_args search_args; + struct btrfs_ioctl_search_key *search_key = &search_args.key; + + fi_args->num_devices = 0; + + max_items = BTRFS_SEARCH_ARGS_BUFSIZE + / (sizeof(struct btrfs_ioctl_search_header) + + sizeof(struct btrfs_dev_item)); + + search_key->tree_id = BTRFS_CHUNK_TREE_OBJECTID; + search_key->min_objectid = BTRFS_DEV_ITEMS_OBJECTID; + search_key->max_objectid = BTRFS_DEV_ITEMS_OBJECTID; + search_key->min_type = BTRFS_DEV_ITEM_KEY; + search_key->max_type = BTRFS_DEV_ITEM_KEY; + search_key->min_transid = 0; + search_key->max_transid = (u64)-1; + search_key->nr_items = max_items; + search_key->max_offset = (u64)-1; + +again: + search_key->min_offset = start_devid; + + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args); + if (ret < 0) + return -errno; + + fi_args->num_devices += (u64)search_key->nr_items; + + if (search_key->nr_items == max_items) { + start_devid = find_max_device_id(&search_args, + search_key->nr_items) + 1; + goto again; + } + + /* get the lastest max_id to stay consistent with the num_devices */ + if (search_key->nr_items == 0) + /* + * last tree_search returns an empty buf, use the devid of + * the last dev_item of the previous tree_search + */ + fi_args->max_id = start_devid - 1; + else + fi_args->max_id = find_max_device_id(&search_args, + search_key->nr_items); + + return 0; +} + +/* + * For a given path, fill in the ioctl fs_ and info_ args. + * If the path is a btrfs mountpoint, fill info for all devices. + * If the path is a btrfs device, fill in only that device. + * + * The path provided must be either on a mounted btrfs fs, + * or be a mounted btrfs device. + * + * Returns 0 on success, or a negative errno. + */ +int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret) +{ + int fd = -1; + int ret = 0; + int ndevs = 0; + int i = 0; + int replacing = 0; + struct btrfs_fs_devices *fs_devices_mnt = NULL; + struct btrfs_ioctl_dev_info_args *di_args; + struct btrfs_ioctl_dev_info_args tmp; + char mp[PATH_MAX]; + DIR *dirstream = NULL; + + memset(fi_args, 0, sizeof(*fi_args)); + + if (is_block_device(path) == 1) { + struct btrfs_super_block *disk_super; + char buf[BTRFS_SUPER_INFO_SIZE]; + u64 devid; + + /* Ensure it's mounted, then set path to the mountpoint */ + fd = open(path, O_RDONLY); + if (fd < 0) { + ret = -errno; + error("cannot open %s: %s", path, strerror(errno)); + goto out; + } + ret = check_mounted_where(fd, path, mp, sizeof(mp), + &fs_devices_mnt); + if (!ret) { + ret = -EINVAL; + goto out; + } + if (ret < 0) + goto out; + path = mp; + /* Only fill in this one device */ + fi_args->num_devices = 1; + + disk_super = (struct btrfs_super_block *)buf; + ret = btrfs_read_dev_super(fd, disk_super, + BTRFS_SUPER_INFO_OFFSET, 0); + if (ret < 0) { + ret = -EIO; + goto out; + } + devid = btrfs_stack_device_id(&disk_super->dev_item); + + fi_args->max_id = devid; + i = devid; + + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); + close(fd); + } + + /* at this point path must not be for a block device */ + fd = open_file_or_dir(path, &dirstream); + if (fd < 0) { + ret = -errno; + goto out; + } + + /* fill in fi_args if not just a single device */ + if (fi_args->num_devices != 1) { + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); + if (ret < 0) { + ret = -errno; + goto out; + } + + /* + * The fs_args->num_devices does not include seed devices + */ + ret = search_chunk_tree_for_fs_info(fd, fi_args); + if (ret) + goto out; + + /* + * search_chunk_tree_for_fs_info() will lacks the devid 0 + * so manual probe for it here. + */ + ret = get_device_info(fd, 0, &tmp); + if (!ret) { + fi_args->num_devices++; + ndevs++; + replacing = 1; + if (i == 0) + i++; + } + } + + if (!fi_args->num_devices) + goto out; + + di_args = *di_ret = malloc((fi_args->num_devices) * sizeof(*di_args)); + if (!di_args) { + ret = -errno; + goto out; + } + + if (replacing) + memcpy(di_args, &tmp, sizeof(tmp)); + for (; i <= fi_args->max_id; ++i) { + ret = get_device_info(fd, i, &di_args[ndevs]); + if (ret == -ENODEV) + continue; + if (ret) + goto out; + ndevs++; + } + + /* + * only when the only dev we wanted to find is not there then + * let any error be returned + */ + if (fi_args->num_devices != 1) { + BUG_ON(ndevs == 0); + ret = 0; + } + +out: + close_file_or_dir(fd, dirstream); + return ret; +} + +#define isoctal(c) (((c) & ~7) == '0') + +static inline void translate(char *f, char *t) +{ + while (*f != '\0') { + if (*f == '\\' && + isoctal(f[1]) && isoctal(f[2]) && isoctal(f[3])) { + *t++ = 64*(f[1] & 7) + 8*(f[2] & 7) + (f[3] & 7); + f += 4; + } else + *t++ = *f++; + } + *t = '\0'; + return; +} + +/* + * Checks if the swap device. + * Returns 1 if swap device, < 0 on error or 0 if not swap device. + */ +static int is_swap_device(const char *file) +{ + FILE *f; + struct stat st_buf; + dev_t dev; + ino_t ino = 0; + char tmp[PATH_MAX]; + char buf[PATH_MAX]; + char *cp; + int ret = 0; + + if (stat(file, &st_buf) < 0) + return -errno; + if (S_ISBLK(st_buf.st_mode)) + dev = st_buf.st_rdev; + else if (S_ISREG(st_buf.st_mode)) { + dev = st_buf.st_dev; + ino = st_buf.st_ino; + } else + return 0; + + if ((f = fopen("/proc/swaps", "r")) == NULL) + return 0; + + /* skip the first line */ + if (fgets(tmp, sizeof(tmp), f) == NULL) + goto out; + + while (fgets(tmp, sizeof(tmp), f) != NULL) { + if ((cp = strchr(tmp, ' ')) != NULL) + *cp = '\0'; + if ((cp = strchr(tmp, '\t')) != NULL) + *cp = '\0'; + translate(tmp, buf); + if (stat(buf, &st_buf) != 0) + continue; + if (S_ISBLK(st_buf.st_mode)) { + if (dev == st_buf.st_rdev) { + ret = 1; + break; + } + } else if (S_ISREG(st_buf.st_mode)) { + if (dev == st_buf.st_dev && ino == st_buf.st_ino) { + ret = 1; + break; + } + } + } + +out: + fclose(f); + + return ret; +} + +/* + * Check for existing filesystem or partition table on device. + * Returns: + * 1 for existing fs or partition + * 0 for nothing found + * -1 for internal error + */ +static int check_overwrite(const char *device) +{ + const char *type; + blkid_probe pr = NULL; + int ret; + blkid_loff_t size; + + if (!device || !*device) + return 0; + + ret = -1; /* will reset on success of all setup calls */ + + pr = blkid_new_probe_from_filename(device); + if (!pr) + goto out; + + size = blkid_probe_get_size(pr); + if (size < 0) + goto out; + + /* nothing to overwrite on a 0-length device */ + if (size == 0) { + ret = 0; + goto out; + } + + ret = blkid_probe_enable_partitions(pr, 1); + if (ret < 0) + goto out; + + ret = blkid_do_fullprobe(pr); + if (ret < 0) + goto out; + + /* + * Blkid returns 1 for nothing found and 0 when it finds a signature, + * but we want the exact opposite, so reverse the return value here. + * + * In addition print some useful diagnostics about what actually is + * on the device. + */ + if (ret) { + ret = 0; + goto out; + } + + if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) { + fprintf(stderr, + "%s appears to contain an existing " + "filesystem (%s).\n", device, type); + } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) { + fprintf(stderr, + "%s appears to contain a partition " + "table (%s).\n", device, type); + } else { + fprintf(stderr, + "%s appears to contain something weird " + "according to blkid\n", device); + } + ret = 1; + +out: + if (pr) + blkid_free_probe(pr); + if (ret == -1) + fprintf(stderr, + "probe of %s failed, cannot detect " + "existing filesystem.\n", device); + return ret; +} + +static int group_profile_devs_min(u64 flag) +{ + switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + case BTRFS_BLOCK_GROUP_DUP: + return 1; + case BTRFS_BLOCK_GROUP_RAID0: + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID5: + return 2; + case BTRFS_BLOCK_GROUP_RAID6: + return 3; + case BTRFS_BLOCK_GROUP_RAID10: + return 4; + default: + return -1; + } +} + +int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile, + u64 dev_cnt, int mixed, int ssd) +{ + u64 allowed = 0; + + switch (dev_cnt) { + default: + case 4: + allowed |= BTRFS_BLOCK_GROUP_RAID10; + case 3: + allowed |= BTRFS_BLOCK_GROUP_RAID6; + case 2: + allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5; + case 1: + allowed |= BTRFS_BLOCK_GROUP_DUP; + } + + if (dev_cnt > 1 && + ((metadata_profile | data_profile) & BTRFS_BLOCK_GROUP_DUP)) { + warning("DUP is not recommended on filesystem with multiple devices"); + } + if (metadata_profile & ~allowed) { + fprintf(stderr, + "ERROR: unable to create FS with metadata profile %s " + "(have %llu devices but %d devices are required)\n", + btrfs_group_profile_str(metadata_profile), dev_cnt, + group_profile_devs_min(metadata_profile)); + return 1; + } + if (data_profile & ~allowed) { + fprintf(stderr, + "ERROR: unable to create FS with data profile %s " + "(have %llu devices but %d devices are required)\n", + btrfs_group_profile_str(data_profile), dev_cnt, + group_profile_devs_min(data_profile)); + return 1; + } + + warning_on(!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP) && ssd, + "DUP may not actually lead to 2 copies on the device, see manual page"); + + return 0; +} + +int group_profile_max_safe_loss(u64 flags) +{ + switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + case BTRFS_BLOCK_GROUP_DUP: + case BTRFS_BLOCK_GROUP_RAID0: + return 0; + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID5: + case BTRFS_BLOCK_GROUP_RAID10: + return 1; + case BTRFS_BLOCK_GROUP_RAID6: + return 2; + default: + return -1; + } +} + +/* + * Check if a device is suitable for btrfs + * returns: + * 1: something is wrong, an error is printed + * 0: all is fine + */ +int test_dev_for_mkfs(const char *file, int force_overwrite) +{ + int ret, fd; + struct stat st; + + ret = is_swap_device(file); + if (ret < 0) { + fprintf(stderr, "ERROR: checking status of %s: %s\n", file, + strerror(-ret)); + return 1; + } + if (ret == 1) { + fprintf(stderr, "ERROR: %s is a swap device\n", file); + return 1; + } + if (!force_overwrite) { + if (check_overwrite(file)) { + fprintf(stderr, "Use the -f option to force overwrite.\n"); + return 1; + } + } + ret = check_mounted(file); + if (ret < 0) { + fprintf(stderr, "ERROR: checking mount status of %s: %s\n", + file, strerror(-ret)); + return 1; + } + if (ret == 1) { + fprintf(stderr, "ERROR: %s is mounted\n", file); + return 1; + } + /* check if the device is busy */ + fd = open(file, O_RDWR|O_EXCL); + if (fd < 0) { + fprintf(stderr, "ERROR: unable to open %s: %s\n", file, + strerror(errno)); + return 1; + } + if (fstat(fd, &st)) { + fprintf(stderr, "ERROR: unable to stat %s: %s\n", file, + strerror(errno)); + close(fd); + return 1; + } + if (!S_ISBLK(st.st_mode)) { + fprintf(stderr, "ERROR: %s is not a block device\n", file); + close(fd); + return 1; + } + close(fd); + return 0; +} + +int btrfs_scan_lblkid(void) +{ + int fd = -1; + int ret; + u64 num_devices; + struct btrfs_fs_devices *tmp_devices; + blkid_dev_iterate iter = NULL; + blkid_dev dev = NULL; + blkid_cache cache = NULL; + char path[PATH_MAX]; + + if (btrfs_scan_done) + return 0; + + if (blkid_get_cache(&cache, NULL) < 0) { + printf("ERROR: lblkid cache get failed\n"); + return 1; + } + blkid_probe_all(cache); + iter = blkid_dev_iterate_begin(cache); + blkid_dev_set_search(iter, "TYPE", "btrfs"); + while (blkid_dev_next(iter, &dev) == 0) { + dev = blkid_verify(cache, dev); + if (!dev) + continue; + /* if we are here its definitely a btrfs disk*/ + strncpy_null(path, blkid_dev_devname(dev)); + + fd = open(path, O_RDONLY); + if (fd < 0) { + printf("ERROR: could not open %s\n", path); + continue; + } + ret = btrfs_scan_one_device(fd, path, &tmp_devices, + &num_devices, BTRFS_SUPER_INFO_OFFSET, 0); + if (ret) { + printf("ERROR: could not scan %s\n", path); + close (fd); + continue; + } + + close(fd); + } + blkid_dev_iterate_end(iter); + blkid_put_cache(cache); + + btrfs_scan_done = 1; + + return 0; +} + +int is_vol_small(const char *file) +{ + int fd = -1; + int e; + struct stat st; + u64 size; + + fd = open(file, O_RDONLY); + if (fd < 0) + return -errno; + if (fstat(fd, &st) < 0) { + e = -errno; + close(fd); + return e; + } + size = btrfs_device_size(fd, &st); + if (size == 0) { + close(fd); + return -1; + } + if (size < BTRFS_MKFS_SMALL_VOLUME_SIZE) { + close(fd); + return 1; + } else { + close(fd); + return 0; + } +} + +/* + * This reads a line from the stdin and only returns non-zero if the + * first whitespace delimited token is a case insensitive match with yes + * or y. + */ +int ask_user(const char *question) +{ + char buf[30] = {0,}; + char *saveptr = NULL; + char *answer; + + printf("%s [y/N]: ", question); + + return fgets(buf, sizeof(buf) - 1, stdin) && + (answer = strtok_r(buf, " \t\n\r", &saveptr)) && + (!strcasecmp(answer, "yes") || !strcasecmp(answer, "y")); +} + +/* + * For a given: + * - file or directory return the containing tree root id + * - subvolume return its own tree id + * - BTRFS_EMPTY_SUBVOL_DIR_OBJECTID (directory with ino == 2) the result is + * undefined and function returns -1 + */ +int lookup_ino_rootid(int fd, u64 *rootid) +{ + struct btrfs_ioctl_ino_lookup_args args; + int ret; + + memset(&args, 0, sizeof(args)); + args.treeid = 0; + args.objectid = BTRFS_FIRST_FREE_OBJECTID; + + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: Failed to lookup root id - %s\n", + strerror(errno)); + return ret; + } + + *rootid = args.treeid; + + return 0; +} + +/* + * return 0 if a btrfs mount point is found + * return 1 if a mount point is found but not btrfs + * return <0 if something goes wrong + */ +int find_mount_root(const char *path, char **mount_root) +{ + FILE *mnttab; + int fd; + struct mntent *ent; + int len; + int ret; + int not_btrfs = 1; + int longest_matchlen = 0; + char *longest_match = NULL; + + fd = open(path, O_RDONLY | O_NOATIME); + if (fd < 0) + return -errno; + close(fd); + + mnttab = setmntent("/proc/self/mounts", "r"); + if (!mnttab) + return -errno; + + while ((ent = getmntent(mnttab))) { + len = strlen(ent->mnt_dir); + if (strncmp(ent->mnt_dir, path, len) == 0) { + /* match found and use the latest match */ + if (longest_matchlen <= len) { + free(longest_match); + longest_matchlen = len; + longest_match = strdup(ent->mnt_dir); + not_btrfs = strcmp(ent->mnt_type, "btrfs"); + } + } + } + endmntent(mnttab); + + if (!longest_match) + return -ENOENT; + if (not_btrfs) { + free(longest_match); + return 1; + } + + ret = 0; + *mount_root = realpath(longest_match, NULL); + if (!*mount_root) + ret = -errno; + + free(longest_match); + return ret; +} + +int test_minimum_size(const char *file, u32 nodesize) +{ + int fd; + struct stat statbuf; + + fd = open(file, O_RDONLY); + if (fd < 0) + return -errno; + if (stat(file, &statbuf) < 0) { + close(fd); + return -errno; + } + if (btrfs_device_size(fd, &statbuf) < btrfs_min_dev_size(nodesize)) { + close(fd); + return 1; + } + close(fd); + return 0; +} + + +/* + * Test if path is a directory + * Returns: + * 0 - path exists but it is not a directory + * 1 - path exists and it is a directory + * < 0 - error + */ +int test_isdir(const char *path) +{ + struct stat st; + int ret; + + ret = stat(path, &st); + if (ret < 0) + return -errno; + + return !!S_ISDIR(st.st_mode); +} + +void units_set_mode(unsigned *units, unsigned mode) +{ + unsigned base = *units & UNITS_MODE_MASK; + + *units = base | mode; +} + +void units_set_base(unsigned *units, unsigned base) +{ + unsigned mode = *units & ~UNITS_MODE_MASK; + + *units = base | mode; +} + +int find_next_key(struct btrfs_path *path, struct btrfs_key *key) +{ + int level; + + for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + if (!path->nodes[level]) + break; + if (path->slots[level] + 1 >= + btrfs_header_nritems(path->nodes[level])) + continue; + if (level == 0) + btrfs_item_key_to_cpu(path->nodes[level], key, + path->slots[level] + 1); + else + btrfs_node_key_to_cpu(path->nodes[level], key, + path->slots[level] + 1); + return 0; + } + return 1; +} + +const char* btrfs_group_type_str(u64 flag) +{ + u64 mask = BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_SPACE_INFO_GLOBAL_RSV; + + switch (flag & mask) { + case BTRFS_BLOCK_GROUP_DATA: + return "Data"; + case BTRFS_BLOCK_GROUP_SYSTEM: + return "System"; + case BTRFS_BLOCK_GROUP_METADATA: + return "Metadata"; + case BTRFS_BLOCK_GROUP_DATA|BTRFS_BLOCK_GROUP_METADATA: + return "Data+Metadata"; + case BTRFS_SPACE_INFO_GLOBAL_RSV: + return "GlobalReserve"; + default: + return "unknown"; + } +} + +const char* btrfs_group_profile_str(u64 flag) +{ + switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: + return "single"; + case BTRFS_BLOCK_GROUP_RAID0: + return "RAID0"; + case BTRFS_BLOCK_GROUP_RAID1: + return "RAID1"; + case BTRFS_BLOCK_GROUP_RAID5: + return "RAID5"; + case BTRFS_BLOCK_GROUP_RAID6: + return "RAID6"; + case BTRFS_BLOCK_GROUP_DUP: + return "DUP"; + case BTRFS_BLOCK_GROUP_RAID10: + return "RAID10"; + default: + return "unknown"; + } +} + +u64 disk_size(const char *path) +{ + struct statfs sfs; + + if (statfs(path, &sfs) < 0) + return 0; + else + return sfs.f_bsize * sfs.f_blocks; +} + +u64 get_partition_size(const char *dev) +{ + u64 result; + int fd = open(dev, O_RDONLY); + + if (fd < 0) + return 0; + if (ioctl(fd, BLKGETSIZE64, &result) < 0) { + close(fd); + return 0; + } + close(fd); + + return result; +} + +int btrfs_tree_search2_ioctl_supported(int fd) +{ + struct btrfs_ioctl_search_args_v2 *args2; + struct btrfs_ioctl_search_key *sk; + int args2_size = 1024; + char args2_buf[args2_size]; + int ret; + static int v2_supported = -1; + + if (v2_supported != -1) + return v2_supported; + + args2 = (struct btrfs_ioctl_search_args_v2 *)args2_buf; + sk = &(args2->key); + + /* + * Search for the extent tree item in the root tree. + */ + sk->tree_id = BTRFS_ROOT_TREE_OBJECTID; + sk->min_objectid = BTRFS_EXTENT_TREE_OBJECTID; + sk->max_objectid = BTRFS_EXTENT_TREE_OBJECTID; + sk->min_type = BTRFS_ROOT_ITEM_KEY; + sk->max_type = BTRFS_ROOT_ITEM_KEY; + sk->min_offset = 0; + sk->max_offset = (u64)-1; + sk->min_transid = 0; + sk->max_transid = (u64)-1; + sk->nr_items = 1; + args2->buf_size = args2_size - sizeof(struct btrfs_ioctl_search_args_v2); + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH_V2, args2); + if (ret == -EOPNOTSUPP) + v2_supported = 0; + else if (ret == 0) + v2_supported = 1; + else + return ret; + + return v2_supported; +} + +int btrfs_check_nodesize(u32 nodesize, u32 sectorsize, u64 features) +{ + if (nodesize < sectorsize) { + fprintf(stderr, + "ERROR: Illegal nodesize %u (smaller than %u)\n", + nodesize, sectorsize); + return -1; + } else if (nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { + fprintf(stderr, + "ERROR: Illegal nodesize %u (larger than %u)\n", + nodesize, BTRFS_MAX_METADATA_BLOCKSIZE); + return -1; + } else if (nodesize & (sectorsize - 1)) { + fprintf(stderr, + "ERROR: Illegal nodesize %u (not aligned to %u)\n", + nodesize, sectorsize); + return -1; + } else if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS && + nodesize != sectorsize) { + fprintf(stderr, + "ERROR: Illegal nodesize %u (not equal to %u for mixed block group)\n", + nodesize, sectorsize); + return -1; + } + return 0; +} + +/* + * Copy a path argument from SRC to DEST and check the SRC length if it's at + * most PATH_MAX and fits into DEST. DESTLEN is supposed to be exact size of + * the buffer. + * The destination buffer is zero terminated. + * Return < 0 for error, 0 otherwise. + */ +int arg_copy_path(char *dest, const char *src, int destlen) +{ + size_t len = strlen(src); + + if (len >= PATH_MAX || len >= destlen) + return -ENAMETOOLONG; + + __strncpy_null(dest, src, destlen); + + return 0; +} + +unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode) +{ + unsigned int unit_mode = UNITS_DEFAULT; + int arg_i; + int arg_end; + + for (arg_i = 0; arg_i < *argc; arg_i++) { + if (!strcmp(argv[arg_i], "--")) + break; + + if (!strcmp(argv[arg_i], "--raw")) { + unit_mode = UNITS_RAW; + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "--human-readable")) { + unit_mode = UNITS_HUMAN_BINARY; + argv[arg_i] = NULL; + continue; + } + + if (!strcmp(argv[arg_i], "--iec")) { + units_set_mode(&unit_mode, UNITS_BINARY); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "--si")) { + units_set_mode(&unit_mode, UNITS_DECIMAL); + argv[arg_i] = NULL; + continue; + } + + if (!strcmp(argv[arg_i], "--kbytes")) { + units_set_base(&unit_mode, UNITS_KBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "--mbytes")) { + units_set_base(&unit_mode, UNITS_MBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "--gbytes")) { + units_set_base(&unit_mode, UNITS_GBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "--tbytes")) { + units_set_base(&unit_mode, UNITS_TBYTES); + argv[arg_i] = NULL; + continue; + } + + if (!df_mode) + continue; + + if (!strcmp(argv[arg_i], "-b")) { + unit_mode = UNITS_RAW; + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-h")) { + unit_mode = UNITS_HUMAN_BINARY; + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-H")) { + unit_mode = UNITS_HUMAN_DECIMAL; + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-k")) { + units_set_base(&unit_mode, UNITS_KBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-m")) { + units_set_base(&unit_mode, UNITS_MBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-g")) { + units_set_base(&unit_mode, UNITS_GBYTES); + argv[arg_i] = NULL; + continue; + } + if (!strcmp(argv[arg_i], "-t")) { + units_set_base(&unit_mode, UNITS_TBYTES); + argv[arg_i] = NULL; + continue; + } + } + + for (arg_i = 0, arg_end = 0; arg_i < *argc; arg_i++) { + if (!argv[arg_i]) + continue; + argv[arg_end] = argv[arg_i]; + arg_end++; + } + + *argc = arg_end; + + return unit_mode; +} + +int string_is_numerical(const char *str) +{ + if (!(*str >= '0' && *str <= '9')) + return 0; + while (*str >= '0' && *str <= '9') + str++; + if (*str != '\0') + return 0; + return 1; +} + +/* + * Preprocess @argv with getopt_long to reorder options and consume the "--" + * option separator. + * Unknown short and long options are reported, optionally the @usage is printed + * before exit. + */ +void clean_args_no_options(int argc, char *argv[], const char * const *usagestr) +{ + static const struct option long_options[] = { + {NULL, 0, NULL, 0} + }; + + while (1) { + int c = getopt_long(argc, argv, "", long_options, NULL); + + if (c < 0) + break; + + switch (c) { + default: + if (usagestr) + usage(usagestr); + } + } +} + +/* Subvolume helper functions */ +/* + * test if name is a correct subvolume name + * this function return + * 0-> name is not a correct subvolume name + * 1-> name is a correct subvolume name + */ +int test_issubvolname(const char *name) +{ + return name[0] != '\0' && !strchr(name, '/') && + strcmp(name, ".") && strcmp(name, ".."); +} + +/* + * Test if path is a subvolume + * Returns: + * 0 - path exists but it is not a subvolume + * 1 - path exists and it is a subvolume + * < 0 - error + */ +int test_issubvolume(const char *path) +{ + struct stat st; + struct statfs stfs; + int res; + + res = stat(path, &st); + if (res < 0) + return -errno; + + if (st.st_ino != BTRFS_FIRST_FREE_OBJECTID || !S_ISDIR(st.st_mode)) + return 0; + + res = statfs(path, &stfs); + if (res < 0) + return -errno; + + return (int)stfs.f_type == BTRFS_SUPER_MAGIC; +} + +const char *subvol_strip_mountpoint(const char *mnt, const char *full_path) +{ + int len = strlen(mnt); + if (!len) + return full_path; + + if (mnt[len - 1] != '/') + len += 1; + + return full_path + len; +} + +/* + * Returns + * <0: Std error + * 0: All fine + * 1: Error; and error info printed to the terminal. Fixme. + * 2: If the fullpath is root tree instead of subvol tree + */ +int get_subvol_info(const char *fullpath, struct root_info *get_ri) +{ + u64 sv_id; + int ret = 1; + int fd = -1; + int mntfd = -1; + char *mnt = NULL; + const char *svpath = NULL; + DIR *dirstream1 = NULL; + DIR *dirstream2 = NULL; + + ret = test_issubvolume(fullpath); + if (ret < 0) + return ret; + if (!ret) { + error("not a subvolume: %s", fullpath); + return 1; + } + + ret = find_mount_root(fullpath, &mnt); + if (ret < 0) + return ret; + if (ret > 0) { + error("%s doesn't belong to btrfs mount point", fullpath); + return 1; + } + ret = 1; + svpath = subvol_strip_mountpoint(mnt, fullpath); + + fd = btrfs_open_dir(fullpath, &dirstream1, 1); + if (fd < 0) + goto out; + + ret = btrfs_list_get_path_rootid(fd, &sv_id); + if (ret) { + error("can't get rootid for '%s'", fullpath); + goto out; + } + + mntfd = btrfs_open_dir(mnt, &dirstream2, 1); + if (mntfd < 0) + goto out; + + if (sv_id == BTRFS_FS_TREE_OBJECTID) { + ret = 2; + /* + * So that caller may decide if thats an error or just fine. + */ + goto out; + } + + memset(get_ri, 0, sizeof(*get_ri)); + get_ri->root_id = sv_id; + + ret = btrfs_get_subvol(mntfd, get_ri); + if (ret) + error("can't find '%s': %d", svpath, ret); + +out: + close_file_or_dir(mntfd, dirstream2); + close_file_or_dir(fd, dirstream1); + free(mnt); + + return ret; +} diff --git a/tools/libfsimage/btrfs/utils.h b/tools/libfsimage/btrfs/utils.h new file mode 100644 index 0000000..9e890f9 --- /dev/null +++ b/tools/libfsimage/btrfs/utils.h @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_UTILS_H__ +#define __BTRFS_UTILS_H__ + +#include <sys/stat.h> +#include "ctree.h" +#include <dirent.h> +#include <stdarg.h> +#include "internal.h" +#include "btrfs-list.h" + +#define BTRFS_MKFS_SYSTEM_GROUP_SIZE (4 * 1024 * 1024) +#define BTRFS_MKFS_SMALL_VOLUME_SIZE (1024 * 1024 * 1024) +#define BTRFS_MKFS_DEFAULT_NODE_SIZE 16384 +#define BTRFS_MKFS_DEFAULT_FEATURES \ + (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF \ + | BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) + +/* + * Avoid multi-device features (RAID56) and mixed block groups + */ +#define BTRFS_CONVERT_ALLOWED_FEATURES \ + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF \ + | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL \ + | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO \ + | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 \ + | BTRFS_FEATURE_INCOMPAT_BIG_METADATA \ + | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF \ + | BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA \ + | BTRFS_FEATURE_INCOMPAT_NO_HOLES) + +#define BTRFS_FEATURE_LIST_ALL (1ULL << 63) + +#define BTRFS_SCAN_MOUNTED (1ULL << 0) +#define BTRFS_SCAN_LBLKID (1ULL << 1) + +#define BTRFS_UPDATE_KERNEL 1 + +#define BTRFS_ARG_UNKNOWN 0 +#define BTRFS_ARG_MNTPOINT 1 +#define BTRFS_ARG_UUID 2 +#define BTRFS_ARG_BLKDEV 3 +#define BTRFS_ARG_REG 4 + +#define BTRFS_UUID_UNPARSED_SIZE 37 + +#define ARGV0_BUF_SIZE PATH_MAX + +#define GETOPT_VAL_SI 256 +#define GETOPT_VAL_IEC 257 +#define GETOPT_VAL_RAW 258 +#define GETOPT_VAL_HUMAN_READABLE 259 +#define GETOPT_VAL_KBYTES 260 +#define GETOPT_VAL_MBYTES 261 +#define GETOPT_VAL_GBYTES 262 +#define GETOPT_VAL_TBYTES 263 + +#define GETOPT_VAL_HELP 270 + +int check_argc_exact(int nargs, int expected); +int check_argc_min(int nargs, int expected); +int check_argc_max(int nargs, int expected); + +void fixup_argv0(char **argv, const char *token); +void set_argv0(char **argv); + +/* + * Output modes of size + */ +#define UNITS_RESERVED (0) +#define UNITS_BYTES (1) +#define UNITS_KBYTES (2) +#define UNITS_MBYTES (3) +#define UNITS_GBYTES (4) +#define UNITS_TBYTES (5) +#define UNITS_RAW (1U << UNITS_MODE_SHIFT) +#define UNITS_BINARY (2U << UNITS_MODE_SHIFT) +#define UNITS_DECIMAL (3U << UNITS_MODE_SHIFT) +#define UNITS_MODE_MASK ((1U << UNITS_MODE_SHIFT) - 1) +#define UNITS_MODE_SHIFT (8) +#define UNITS_HUMAN_BINARY (UNITS_BINARY) +#define UNITS_HUMAN_DECIMAL (UNITS_DECIMAL) +#define UNITS_HUMAN (UNITS_HUMAN_BINARY) +#define UNITS_DEFAULT (UNITS_HUMAN) + +void units_set_mode(unsigned *units, unsigned mode); +void units_set_base(unsigned *units, unsigned base); + +void btrfs_list_all_fs_features(u64 mask_disallowed); +char* btrfs_parse_fs_features(char *namelist, u64 *flags); +void btrfs_process_fs_features(u64 flags); +void btrfs_parse_features_to_string(char *buf, u64 flags); + +struct btrfs_mkfs_config { + char *label; + char *fs_uuid; + char *chunk_uuid; + u64 blocks[8]; + u64 num_bytes; + u32 nodesize; + u32 sectorsize; + u32 stripesize; + u64 features; + + /* Super bytenr after make_btrfs */ + u64 super_bytenr; +}; + +int make_btrfs(int fd, struct btrfs_mkfs_config *cfg); +int btrfs_make_root_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid); +int btrfs_prepare_device(int fd, const char *file, int zero_end, + u64 *block_count_ret, u64 max_block_count, int discard); +int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int fd, char *path, + u64 block_count, u32 io_width, u32 io_align, + u32 sectorsize); +int btrfs_scan_for_fsid(int run_ioctls); +int btrfs_register_one_device(const char *fname); +int btrfs_register_all_devices(void); +char *canonicalize_dm_name(const char *ptname); +char *canonicalize_path(const char *path); +int check_mounted(const char *devicename); +int check_mounted_where(int fd, const char *file, char *where, int size, + struct btrfs_fs_devices **fs_devices_mnt); +int btrfs_device_already_in_root(struct btrfs_root *root, int fd, + int super_offset); + +int pretty_size_snprintf(u64 size, char *str, size_t str_bytes, unsigned unit_mode); +#define pretty_size(size) pretty_size_mode(size, UNITS_DEFAULT) +const char *pretty_size_mode(u64 size, unsigned mode); + +int get_mountpt(char *dev, char *mntpt, size_t size); +u64 parse_size(char *s); +u64 parse_qgroupid(const char *p); +u64 arg_strtou64(const char *str); +int arg_copy_path(char *dest, const char *src, int destlen); +int open_file_or_dir(const char *fname, DIR **dirstream); +int open_file_or_dir3(const char *fname, DIR **dirstream, int open_flags); +void close_file_or_dir(int fd, DIR *dirstream); +int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret); +int get_label(const char *btrfs_dev, char *label); +int set_label(const char *btrfs_dev, const char *label); + +char *__strncpy_null(char *dest, const char *src, size_t n); +int is_block_device(const char *file); +int is_mount_point(const char *file); +int check_arg_type(const char *input); +int open_path_or_dev_mnt(const char *path, DIR **dirstream, int verbose); +int btrfs_open_dir(const char *path, DIR **dirstream, int verbose); +u64 btrfs_device_size(int fd, struct stat *st); +/* Helper to always get proper size of the destination string */ +#define strncpy_null(dest, src) __strncpy_null(dest, src, sizeof(dest)) +int test_dev_for_mkfs(const char *file, int force_overwrite); +int get_label_mounted(const char *mount_path, char *labelp); +int get_label_unmounted(const char *dev, char *label); +int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile, + u64 dev_cnt, int mixed, int ssd); +int group_profile_max_safe_loss(u64 flags); +int is_vol_small(const char *file); +int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify); +int ask_user(const char *question); +int lookup_ino_rootid(int fd, u64 *rootid); +int btrfs_scan_lblkid(void); +int get_btrfs_mount(const char *dev, char *mp, size_t mp_size); +int find_mount_root(const char *path, char **mount_root); +int get_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args); +int test_uuid_unique(char *fs_uuid); +u64 disk_size(const char *path); +int get_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args); +u64 get_partition_size(const char *dev); + +int test_minimum_size(const char *file, u32 leafsize); +int test_issubvolname(const char *name); +int test_issubvolume(const char *path); +int test_isdir(const char *path); + +const char *subvol_strip_mountpoint(const char *mnt, const char *full_path); +int get_subvol_info(const char *fullpath, struct root_info *get_ri); + +/* + * Btrfs minimum size calculation is complicated, it should include at least: + * 1. system group size + * 2. minimum global block reserve + * 3. metadata used at mkfs + * 4. space reservation to create uuid for first mount. + * Also, raid factor should also be taken into consideration. + * To avoid the overkill calculation, (system group + global block rsv) * 2 + * for *EACH* device should be good enough. + */ +static inline u64 btrfs_min_global_blk_rsv_size(u32 leafsize) +{ + return leafsize << 10; +} +static inline u64 btrfs_min_dev_size(u32 leafsize) +{ + return 2 * (BTRFS_MKFS_SYSTEM_GROUP_SIZE + + btrfs_min_global_blk_rsv_size(leafsize)); +} + +int find_next_key(struct btrfs_path *path, struct btrfs_key *key); +const char* btrfs_group_type_str(u64 flag); +const char* btrfs_group_profile_str(u64 flag); + +/* + * Get the length of the string converted from a u64 number. + * + * Result is equal to log10(num) + 1, but without the use of math library. + */ +static inline int count_digits(u64 num) +{ + int ret = 0; + + if (num == 0) + return 1; + while (num > 0) { + ret++; + num /= 10; + } + return ret; +} + +static inline u64 div_factor(u64 num, int factor) +{ + if (factor == 10) + return num; + num *= factor; + num /= 10; + return num; +} + +int btrfs_tree_search2_ioctl_supported(int fd); +int btrfs_check_nodesize(u32 nodesize, u32 sectorsize, u64 features); + +const char *get_argv0_buf(void); + +#define HELPINFO_UNITS_LONG \ + "--raw raw numbers in bytes", \ + "--human-readable human friendly numbers, base 1024 (default)", \ + "--iec use 1024 as a base (KiB, MiB, GiB, TiB)", \ + "--si use 1000 as a base (kB, MB, GB, TB)", \ + "--kbytes show sizes in KiB, or kB with --si", \ + "--mbytes show sizes in MiB, or MB with --si", \ + "--gbytes show sizes in GiB, or GB with --si", \ + "--tbytes show sizes in TiB, or TB with --si" + +#define HELPINFO_UNITS_SHORT_LONG \ + "-b|--raw raw numbers in bytes", \ + "-h|--human-readable", \ + " human friendly numbers, base 1024 (default)", \ + "-H human friendly numbers, base 1000", \ + "--iec use 1024 as a base (KiB, MiB, GiB, TiB)", \ + "--si use 1000 as a base (kB, MB, GB, TB)", \ + "-k|--kbytes show sizes in KiB, or kB with --si", \ + "-m|--mbytes show sizes in MiB, or MB with --si", \ + "-g|--gbytes show sizes in GiB, or GB with --si", \ + "-t|--tbytes show sizes in TiB, or TB with --si" + +unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode); +void clean_args_no_options(int argc, char *argv[], const char * const *usage); +int string_is_numerical(const char *str); + +__attribute__ ((format (printf, 1, 2))) +static inline void warning(const char *fmt, ...) +{ + va_list args; + + fputs("WARNING: ", stderr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); +} + +__attribute__ ((format (printf, 1, 2))) +static inline void error(const char *fmt, ...) +{ + va_list args; + + fputs("ERROR: ", stderr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); +} + +__attribute__ ((format (printf, 2, 3))) +static inline int warning_on(int condition, const char *fmt, ...) +{ + va_list args; + + if (!condition) + return 0; + + fputs("WARNING: ", stderr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); + + return 1; +} + +__attribute__ ((format (printf, 2, 3))) +static inline int error_on(int condition, const char *fmt, ...) +{ + va_list args; + + if (!condition) + return 0; + + fputs("ERROR: ", stderr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); + + return 1; +} + +#endif diff --git a/tools/libfsimage/btrfs/volumes.c b/tools/libfsimage/btrfs/volumes.c new file mode 100644 index 0000000..4d22db2 --- /dev/null +++ b/tools/libfsimage/btrfs/volumes.c @@ -0,0 +1,2131 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <uuid/uuid.h> +#include <fcntl.h> +#include <unistd.h> +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "volumes.h" +#include "utils.h" + +struct stripe { + struct btrfs_device *dev; + u64 physical; +}; + +static inline int nr_parity_stripes(struct map_lookup *map) +{ + if (map->type & BTRFS_BLOCK_GROUP_RAID5) + return 1; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + return 2; + else + return 0; +} + +static inline int nr_data_stripes(struct map_lookup *map) +{ + return map->num_stripes - nr_parity_stripes(map); +} + +#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) ) + +static LIST_HEAD(fs_uuids); + +static struct btrfs_device *__find_device(struct list_head *head, u64 devid, + u8 *uuid) +{ + struct btrfs_device *dev; + struct list_head *cur; + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid && + !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) { + return dev; + } + } + return NULL; +} + +static struct btrfs_fs_devices *find_fsid(u8 *fsid) +{ + struct list_head *cur; + struct btrfs_fs_devices *fs_devices; + + list_for_each(cur, &fs_uuids) { + fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + return NULL; +} + +static int device_list_add(const char *path, + struct btrfs_super_block *disk_super, + u64 devid, struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices; + u64 found_transid = btrfs_super_generation(disk_super); + + fs_devices = find_fsid(disk_super->fsid); + if (!fs_devices) { + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return -ENOMEM; + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + fs_devices->lowest_devid = (u64)-1; + device = NULL; + } else { + device = __find_device(&fs_devices->devices, devid, + disk_super->dev_item.uuid); + } + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + return -ENOMEM; + } + device->fd = -1; + device->devid = devid; + device->generation = found_transid; + memcpy(device->uuid, disk_super->dev_item.uuid, + BTRFS_UUID_SIZE); + device->name = kstrdup(path, GFP_NOFS); + if (!device->name) { + kfree(device); + return -ENOMEM; + } + device->label = kstrdup(disk_super->label, GFP_NOFS); + if (!device->label) { + kfree(device->name); + kfree(device); + return -ENOMEM; + } + device->total_devs = btrfs_super_num_devices(disk_super); + device->super_bytes_used = btrfs_super_bytes_used(disk_super); + device->total_bytes = + btrfs_stack_device_total_bytes(&disk_super->dev_item); + device->bytes_used = + btrfs_stack_device_bytes_used(&disk_super->dev_item); + list_add(&device->dev_list, &fs_devices->devices); + device->fs_devices = fs_devices; + } else if (!device->name || strcmp(device->name, path)) { + char *name = strdup(path); + if (!name) + return -ENOMEM; + kfree(device->name); + device->name = name; + } + + + if (found_transid > fs_devices->latest_trans) { + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + } + if (fs_devices->lowest_devid > devid) { + fs_devices->lowest_devid = devid; + } + *fs_devices_ret = fs_devices; + return 0; +} + +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + struct btrfs_fs_devices *seed_devices; + struct btrfs_device *device; + +again: + while (!list_empty(&fs_devices->devices)) { + device = list_entry(fs_devices->devices.next, + struct btrfs_device, dev_list); + if (device->fd != -1) { + fsync(device->fd); + if (posix_fadvise(device->fd, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + close(device->fd); + device->fd = -1; + } + device->writeable = 0; + list_del(&device->dev_list); + /* free the memory */ + free(device->name); + free(device->label); + free(device); + } + + seed_devices = fs_devices->seed; + fs_devices->seed = NULL; + if (seed_devices) { + struct btrfs_fs_devices *orig; + + orig = fs_devices; + fs_devices = seed_devices; + list_del(&orig->list); + free(orig); + goto again; + } else { + list_del(&fs_devices->list); + free(fs_devices); + } + + return 0; +} + +void btrfs_close_all_devices(void) +{ + struct btrfs_fs_devices *fs_devices; + + while (!list_empty(&fs_uuids)) { + fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices, + list); + btrfs_close_devices(fs_devices); + } +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags) +{ + int fd; + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + int ret; + + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->name) { + printk("no name for device %llu, skip it now\n", device->devid); + continue; + } + + fd = open(device->name, flags); + if (fd < 0) { + ret = -errno; + goto fail; + } + + if (posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + + if (device->devid == fs_devices->latest_devid) + fs_devices->latest_bdev = fd; + if (device->devid == fs_devices->lowest_devid) + fs_devices->lowest_bdev = fd; + device->fd = fd; + if (flags & O_RDWR) + device->writeable = 1; + } + return 0; +fail: + btrfs_close_devices(fs_devices); + return ret; +} + +int btrfs_scan_one_device(int fd, const char *path, + struct btrfs_fs_devices **fs_devices_ret, + u64 *total_devs, u64 super_offset, int super_recover) +{ + struct btrfs_super_block *disk_super; + char buf[BTRFS_SUPER_INFO_SIZE]; + int ret; + u64 devid; + + disk_super = (struct btrfs_super_block *)buf; + ret = btrfs_read_dev_super(fd, disk_super, super_offset, super_recover); + if (ret < 0) + return -EIO; + devid = btrfs_stack_device_id(&disk_super->dev_item); + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP) + *total_devs = 1; + else + *total_devs = btrfs_super_num_devices(disk_super); + + ret = device_list_add(path, disk_super, devid, fs_devices_ret); + + return ret; +} + +/* + * this uses a pretty simple search, the expectation is that it is + * called very infrequently and that a given device has a small number + * of extents + */ +static int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + struct btrfs_path *path, + u64 num_bytes, u64 *start) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + u64 hole_size = 0; + u64 last_byte = 0; + u64 search_start = root->fs_info->alloc_start; + u64 search_end = device->total_bytes; + int ret; + int slot = 0; + int start_found; + struct extent_buffer *l; + + start_found = 0; + path->reada = 2; + + /* FIXME use last free of some kind */ + + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ + search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start); + + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + + key.objectid = device->devid; + key.offset = search_start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto error; + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto error; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; +no_more_items: + if (!start_found) { + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + *start = search_start; + start_found = 1; + goto check_pending; + } + *start = last_byte > search_start ? + last_byte : search_start; + if (search_end <= *start) { + ret = -ENOSPC; + goto error; + } + goto check_pending; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + goto no_more_items; + + if (key.offset >= search_start && key.offset > last_byte && + start_found) { + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.offset - last_byte; + if (key.offset > last_byte && + hole_size >= num_bytes) { + *start = last_byte; + goto check_pending; + } + } + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + goto next; + } + + start_found = 1; + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); +next: + path->slots[0]++; + cond_resched(); + } +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + btrfs_release_path(path); + BUG_ON(*start < search_start); + + if (*start + num_bytes > search_end) { + ret = -ENOSPC; + goto error; + } + /* check for pending inserts here */ + return 0; + +error: + btrfs_release_path(path); + return ret; +} + +static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, + u64 num_bytes, u64 *start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *extent; + struct extent_buffer *leaf; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_free_dev_extent(trans, device, path, num_bytes, start); + if (ret) { + goto err; + } + + key.objectid = device->devid; + key.offset = *start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*extent)); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); + btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); + btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); + + write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), + BTRFS_UUID_SIZE); + + btrfs_set_dev_extent_length(leaf, extent, num_bytes); + btrfs_mark_buffer_dirty(leaf); +err: + btrfs_free_path(path); + return ret; +} + +static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_chunk *chunk; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = objectid; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); + if (ret) { + *offset = 0; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != objectid) + *offset = 0; + else { + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + *offset = found_key.offset + + btrfs_chunk_length(path->nodes[0], chunk); + } + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, + u64 *objectid) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + BTRFS_DEV_ITEM_KEY); + if (ret) { + *objectid = 1; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.offset + 1; + } + ret = 0; +error: + btrfs_release_path(path); + return ret; +} + +/* + * the device information is stored in the chunk root + * the btrfs_device struct should be fully filled in + */ +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + u64 free_devid = 0; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_next_devid(root, path, &free_devid); + if (ret) + goto out; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = free_devid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*dev_item)); + if (ret) + goto out; + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + device->devid = free_devid; + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_generation(leaf, dev_item, 0); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_set_device_group(leaf, dev_item, 0); + btrfs_set_device_seek_speed(leaf, dev_item, 0); + btrfs_set_device_bandwidth(leaf, dev_item, 0); + btrfs_set_device_start_offset(leaf, dev_item, 0); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + ptr = (unsigned long)btrfs_device_fsid(dev_item); + write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + ret = 0; + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + + root = device->dev_root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_mark_buffer_dirty(leaf); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_chunk *chunk, int item_size) +{ + struct btrfs_super_block *super_copy = root->fs_info->super_copy; + struct btrfs_disk_key disk_key; + u32 array_size; + u8 *ptr; + + array_size = btrfs_super_sys_array_size(super_copy); + if (array_size + item_size + sizeof(disk_key) + > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + return -EFBIG; + + ptr = super_copy->sys_chunk_array + array_size; + btrfs_cpu_key_to_disk(&disk_key, key); + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + memcpy(ptr, chunk, item_size); + item_size += sizeof(disk_key); + btrfs_set_super_sys_array_size(super_copy, array_size + item_size); + return 0; +} + +static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, + int sub_stripes) +{ + if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) + return calc_size; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + return calc_size * (num_stripes / sub_stripes); + else if (type & BTRFS_BLOCK_GROUP_RAID5) + return calc_size * (num_stripes - 1); + else if (type & BTRFS_BLOCK_GROUP_RAID6) + return calc_size * (num_stripes - 2); + else + return calc_size * num_stripes; +} + + +static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) +{ + /* TODO, add a way to store the preferred stripe size */ + return BTRFS_STRIPE_LEN; +} + +/* + * btrfs_device_avail_bytes - count bytes available for alloc_chunk + * + * It is not equal to "device->total_bytes - device->bytes_used". + * We do not allocate any chunk in 1M at beginning of device, and not + * allowed to allocate any chunk before alloc_start if it is specified. + * So search holes from max(1M, alloc_start) to device->total_bytes. + */ +static int btrfs_device_avail_bytes(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 *avail_bytes) +{ + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_key key; + struct btrfs_dev_extent *dev_extent = NULL; + struct extent_buffer *l; + u64 search_start = root->fs_info->alloc_start; + u64 search_end = device->total_bytes; + u64 extent_end = 0; + u64 free_bytes = 0; + int ret; + int slot = 0; + + search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = device->devid; + key.offset = root->fs_info->alloc_start; + key.type = BTRFS_DEV_EXTENT_KEY; + + path->reada = 2; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto error; + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto error; + + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + if (key.objectid > device->devid) + break; + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + goto next; + if (key.offset > search_end) + break; + if (key.offset > search_start) + free_bytes += key.offset - search_start; + + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + extent_end = key.offset + btrfs_dev_extent_length(l, + dev_extent); + if (extent_end > search_start) + search_start = extent_end; + if (search_start > search_end) + break; +next: + path->slots[0]++; + cond_resched(); + } + + if (search_start < search_end) + free_bytes += search_end - search_start; + + *avail_bytes = free_bytes; + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ + - sizeof(struct btrfs_item) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u64 type) +{ + u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_root *chunk_root = info->chunk_root; + struct btrfs_stripe *stripes; + struct btrfs_device *device = NULL; + struct btrfs_chunk *chunk; + struct list_head private_devs; + struct list_head *dev_list = &info->fs_devices->devices; + struct list_head *cur; + struct map_lookup *map; + int min_stripe_size = 1 * 1024 * 1024; + u64 calc_size = 8 * 1024 * 1024; + u64 min_free; + u64 max_chunk_size = 4 * calc_size; + u64 avail = 0; + u64 max_avail = 0; + u64 percent_max; + int num_stripes = 1; + int max_stripes = 0; + int min_stripes = 1; + int sub_stripes = 0; + int looped = 0; + int ret; + int index; + int stripe_len = BTRFS_STRIPE_LEN; + struct btrfs_key key; + u64 offset; + + if (list_empty(dev_list)) { + return -ENOSPC; + } + + if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { + if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + calc_size = 8 * 1024 * 1024; + max_chunk_size = calc_size * 2; + min_stripe_size = 1 * 1024 * 1024; + max_stripes = BTRFS_MAX_DEVS_SYS_CHUNK; + } else if (type & BTRFS_BLOCK_GROUP_DATA) { + calc_size = 1024 * 1024 * 1024; + max_chunk_size = 10 * calc_size; + min_stripe_size = 64 * 1024 * 1024; + max_stripes = BTRFS_MAX_DEVS(chunk_root); + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { + calc_size = 1024 * 1024 * 1024; + max_chunk_size = 4 * calc_size; + min_stripe_size = 32 * 1024 * 1024; + max_stripes = BTRFS_MAX_DEVS(chunk_root); + } + } + if (type & BTRFS_BLOCK_GROUP_RAID1) { + num_stripes = min_t(u64, 2, + btrfs_super_num_devices(info->super_copy)); + if (num_stripes < 2) + return -ENOSPC; + min_stripes = 2; + } + if (type & BTRFS_BLOCK_GROUP_DUP) { + num_stripes = 2; + min_stripes = 2; + } + if (type & (BTRFS_BLOCK_GROUP_RAID0)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes > max_stripes) + num_stripes = max_stripes; + min_stripes = 2; + } + if (type & (BTRFS_BLOCK_GROUP_RAID10)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes > max_stripes) + num_stripes = max_stripes; + if (num_stripes < 4) + return -ENOSPC; + num_stripes &= ~(u32)1; + sub_stripes = 2; + min_stripes = 4; + } + if (type & (BTRFS_BLOCK_GROUP_RAID5)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes > max_stripes) + num_stripes = max_stripes; + if (num_stripes < 2) + return -ENOSPC; + min_stripes = 2; + stripe_len = find_raid56_stripe_len(num_stripes - 1, + btrfs_super_stripesize(info->super_copy)); + } + if (type & (BTRFS_BLOCK_GROUP_RAID6)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes > max_stripes) + num_stripes = max_stripes; + if (num_stripes < 3) + return -ENOSPC; + min_stripes = 3; + stripe_len = find_raid56_stripe_len(num_stripes - 2, + btrfs_super_stripesize(info->super_copy)); + } + + /* we don't want a chunk larger than 10% of the FS */ + percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1); + max_chunk_size = min(percent_max, max_chunk_size); + +again: + if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) > + max_chunk_size) { + calc_size = max_chunk_size; + calc_size /= num_stripes; + calc_size /= stripe_len; + calc_size *= stripe_len; + } + /* we don't want tiny stripes */ + calc_size = max_t(u64, calc_size, min_stripe_size); + + calc_size /= stripe_len; + calc_size *= stripe_len; + INIT_LIST_HEAD(&private_devs); + cur = dev_list->next; + index = 0; + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_free = calc_size * 2; + else + min_free = calc_size; + + /* build a private list of devices we will allocate from */ + while(index < num_stripes) { + device = list_entry(cur, struct btrfs_device, dev_list); + ret = btrfs_device_avail_bytes(trans, device, &avail); + if (ret) + return ret; + cur = cur->next; + if (avail >= min_free) { + list_move_tail(&device->dev_list, &private_devs); + index++; + if (type & BTRFS_BLOCK_GROUP_DUP) + index++; + } else if (avail > max_avail) + max_avail = avail; + if (cur == dev_list) + break; + } + if (index < num_stripes) { + list_splice(&private_devs, dev_list); + if (index >= min_stripes) { + num_stripes = index; + if (type & (BTRFS_BLOCK_GROUP_RAID10)) { + num_stripes /= sub_stripes; + num_stripes *= sub_stripes; + } + looped = 1; + goto again; + } + if (!looped && max_avail > 0) { + looped = 1; + calc_size = max_avail; + goto again; + } + return -ENOSPC; + } + ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, + &offset); + if (ret) + return ret; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = offset; + + chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); + if (!chunk) + return -ENOMEM; + + map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + + stripes = &chunk->stripe; + *num_bytes = chunk_bytes_by_type(type, calc_size, + num_stripes, sub_stripes); + index = 0; + while(index < num_stripes) { + struct btrfs_stripe *stripe; + BUG_ON(list_empty(&private_devs)); + cur = private_devs.next; + device = list_entry(cur, struct btrfs_device, dev_list); + + /* loop over this device again if we're doing a dup group */ + if (!(type & BTRFS_BLOCK_GROUP_DUP) || + (index == num_stripes - 1)) + list_move_tail(&device->dev_list, dev_list); + + ret = btrfs_alloc_dev_extent(trans, device, + info->chunk_root->root_key.objectid, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, + calc_size, &dev_offset); + BUG_ON(ret); + + device->bytes_used += calc_size; + ret = btrfs_update_device(trans, device); + BUG_ON(ret); + + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; + stripe = stripes + index; + btrfs_set_stack_stripe_devid(stripe, device->devid); + btrfs_set_stack_stripe_offset(stripe, dev_offset); + memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); + index++; + } + BUG_ON(!list_empty(&private_devs)); + + /* key was set above */ + btrfs_set_stack_chunk_length(chunk, *num_bytes); + btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); + btrfs_set_stack_chunk_type(chunk, type); + btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); + btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; + map->sub_stripes = sub_stripes; + + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, + btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + *start = key.offset;; + + map->ce.start = key.offset; + map->ce.size = *num_bytes; + + ret = insert_cache_extent(&info->mapping_tree.cache_tree, &map->ce); + BUG_ON(ret); + + if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_add_system_chunk(trans, chunk_root, &key, + chunk, btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + } + + kfree(chunk); + return ret; +} + +int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 num_bytes, u64 type) +{ + u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_root *chunk_root = info->chunk_root; + struct btrfs_stripe *stripes; + struct btrfs_device *device = NULL; + struct btrfs_chunk *chunk; + struct list_head *dev_list = &info->fs_devices->devices; + struct list_head *cur; + struct map_lookup *map; + u64 calc_size = 8 * 1024 * 1024; + int num_stripes = 1; + int sub_stripes = 0; + int ret; + int index; + int stripe_len = BTRFS_STRIPE_LEN; + struct btrfs_key key; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, + &key.offset); + if (ret) + return ret; + + chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); + if (!chunk) + return -ENOMEM; + + map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + + stripes = &chunk->stripe; + calc_size = num_bytes; + + index = 0; + cur = dev_list->next; + device = list_entry(cur, struct btrfs_device, dev_list); + + while (index < num_stripes) { + struct btrfs_stripe *stripe; + + ret = btrfs_alloc_dev_extent(trans, device, + info->chunk_root->root_key.objectid, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, + calc_size, &dev_offset); + BUG_ON(ret); + + device->bytes_used += calc_size; + ret = btrfs_update_device(trans, device); + BUG_ON(ret); + + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; + stripe = stripes + index; + btrfs_set_stack_stripe_devid(stripe, device->devid); + btrfs_set_stack_stripe_offset(stripe, dev_offset); + memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); + index++; + } + + /* key was set above */ + btrfs_set_stack_chunk_length(chunk, num_bytes); + btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); + btrfs_set_stack_chunk_type(chunk, type); + btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); + btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; + map->sub_stripes = sub_stripes; + + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, + btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + *start = key.offset; + + map->ce.start = key.offset; + map->ce.size = num_bytes; + + ret = insert_cache_extent(&info->mapping_tree.cache_tree, &map->ce); + BUG_ON(ret); + + kfree(chunk); + return ret; +} + +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) +{ + struct cache_extent *ce; + struct map_lookup *map; + int ret; + + ce = search_cache_extent(&map_tree->cache_tree, logical); + if (!ce) { + fprintf(stderr, "No mapping for %llu-%llu\n", + (unsigned long long)logical, + (unsigned long long)logical+len); + return 1; + } + if (ce->start > logical || ce->start + ce->size < logical) { + fprintf(stderr, "Invalid mapping for %llu-%llu, got " + "%llu-%llu\n", (unsigned long long)logical, + (unsigned long long)logical+len, + (unsigned long long)ce->start, + (unsigned long long)ce->start + ce->size); + return 1; + } + map = container_of(ce, struct map_lookup, ce); + + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) + ret = map->num_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID10) + ret = map->sub_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID5) + ret = 2; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + ret = 3; + else + ret = 1; + return ret; +} + +int btrfs_next_bg(struct btrfs_mapping_tree *map_tree, u64 *logical, + u64 *size, u64 type) +{ + struct cache_extent *ce; + struct map_lookup *map; + u64 cur = *logical; + + ce = search_cache_extent(&map_tree->cache_tree, cur); + + while (ce) { + /* + * only jump to next bg if our cur is not 0 + * As the initial logical for btrfs_next_bg() is 0, and + * if we jump to next bg, we skipped a valid bg. + */ + if (cur) { + ce = next_cache_extent(ce); + if (!ce) + return -ENOENT; + } + + cur = ce->start; + map = container_of(ce, struct map_lookup, ce); + if (map->type & type) { + *logical = ce->start; + *size = ce->size; + return 0; + } + } + + return -ENOENT; +} + +int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, + u64 chunk_start, u64 physical, u64 devid, + u64 **logical, int *naddrs, int *stripe_len) +{ + struct cache_extent *ce; + struct map_lookup *map; + u64 *buf; + u64 bytenr; + u64 length; + u64 stripe_nr; + u64 rmap_len; + int i, j, nr = 0; + + ce = search_cache_extent(&map_tree->cache_tree, chunk_start); + BUG_ON(!ce); + map = container_of(ce, struct map_lookup, ce); + + length = ce->size; + rmap_len = map->stripe_len; + if (map->type & BTRFS_BLOCK_GROUP_RAID10) + length = ce->size / (map->num_stripes / map->sub_stripes); + else if (map->type & BTRFS_BLOCK_GROUP_RAID0) + length = ce->size / map->num_stripes; + else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + length = ce->size / nr_data_stripes(map); + rmap_len = map->stripe_len * nr_data_stripes(map); + } + + buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + + for (i = 0; i < map->num_stripes; i++) { + if (devid && map->stripes[i].dev->devid != devid) + continue; + if (map->stripes[i].physical > physical || + map->stripes[i].physical + length <= physical) + continue; + + stripe_nr = (physical - map->stripes[i].physical) / + map->stripe_len; + + if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripe_nr = (stripe_nr * map->num_stripes + i) / + map->sub_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + stripe_nr = stripe_nr * map->num_stripes + i; + } /* else if RAID[56], multiply by nr_data_stripes(). + * Alternatively, just use rmap_len below instead of + * map->stripe_len */ + + bytenr = ce->start + stripe_nr * rmap_len; + for (j = 0; j < nr; j++) { + if (buf[j] == bytenr) + break; + } + if (j == nr) + buf[nr++] = bytenr; + } + + *logical = buf; + *naddrs = nr; + *stripe_len = rmap_len; + + return 0; +} + +static inline int parity_smaller(u64 a, u64 b) +{ + return a > b; +} + +/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ +static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map) +{ + struct btrfs_bio_stripe s; + int i; + u64 l; + int again = 1; + + while (again) { + again = 0; + for (i = 0; i < bbio->num_stripes - 1; i++) { + if (parity_smaller(raid_map[i], raid_map[i+1])) { + s = bbio->stripes[i]; + l = raid_map[i]; + bbio->stripes[i] = bbio->stripes[i+1]; + raid_map[i] = raid_map[i+1]; + bbio->stripes[i+1] = s; + raid_map[i+1] = l; + again = 1; + } + } + } +} + +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) +{ + return __btrfs_map_block(map_tree, rw, logical, length, NULL, + multi_ret, mirror_num, raid_map_ret); +} + +int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, u64 *type, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) +{ + struct cache_extent *ce; + struct map_lookup *map; + u64 offset; + u64 stripe_offset; + u64 stripe_nr; + u64 *raid_map = NULL; + int stripes_allocated = 8; + int stripes_required = 1; + int stripe_index; + int i; + struct btrfs_multi_bio *multi = NULL; + + if (multi_ret && rw == READ) { + stripes_allocated = 1; + } +again: + ce = search_cache_extent(&map_tree->cache_tree, logical); + if (!ce) { + kfree(multi); + *length = (u64)-1; + return -ENOENT; + } + if (ce->start > logical) { + kfree(multi); + *length = ce->start - logical; + return -ENOENT; + } + + if (multi_ret) { + multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), + GFP_NOFS); + if (!multi) + return -ENOMEM; + } + map = container_of(ce, struct map_lookup, ce); + offset = logical - ce->start; + + if (rw == WRITE) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + stripes_required = map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripes_required = map->sub_stripes; + } + } + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) + && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) { + /* RAID[56] write or recovery. Return all stripes */ + stripes_required = map->num_stripes; + + /* Only allocate the map if we've already got a large enough multi_ret */ + if (stripes_allocated >= stripes_required) { + raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + if (!raid_map) { + kfree(multi); + return -ENOMEM; + } + } + } + + /* if our multi bio struct is too small, back off and try again */ + if (multi_ret && stripes_allocated < stripes_required) { + stripes_allocated = stripes_required; + kfree(multi); + multi = NULL; + goto again; + } + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + stripe_nr = stripe_nr / map->stripe_len; + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, ce->size - offset, + map->stripe_len - stripe_offset); + } else { + *length = ce->size - offset; + } + + if (!multi_ret) + goto out; + + multi->num_stripes = 1; + stripe_index = 0; + if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + if (rw == WRITE) + multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; + else + stripe_index = stripe_nr % map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + int factor = map->num_stripes / map->sub_stripes; + + stripe_index = stripe_nr % factor; + stripe_index *= map->sub_stripes; + + if (rw == WRITE) + multi->num_stripes = map->sub_stripes; + else if (mirror_num) + stripe_index += mirror_num - 1; + + stripe_nr = stripe_nr / factor; + } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { + if (rw == WRITE) + multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + + if (raid_map) { + int rot; + u64 tmp; + u64 raid56_full_stripe_start; + u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len; + + /* + * align the start of our data stripe in the logical + * address space + */ + raid56_full_stripe_start = offset / full_stripe_len; + raid56_full_stripe_start *= full_stripe_len; + + /* get the data stripe number */ + stripe_nr = raid56_full_stripe_start / map->stripe_len; + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* Work out the disk rotation on this stripe-set */ + rot = stripe_nr % map->num_stripes; + + /* Fill in the logical address of each stripe */ + tmp = stripe_nr * nr_data_stripes(map); + + for (i = 0; i < nr_data_stripes(map); i++) + raid_map[(i+rot) % map->num_stripes] = + ce->start + (tmp + i) * map->stripe_len; + + raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE; + if (map->type & BTRFS_BLOCK_GROUP_RAID6) + raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE; + + *length = map->stripe_len; + stripe_index = 0; + stripe_offset = 0; + multi->num_stripes = map->num_stripes; + } else { + stripe_index = stripe_nr % nr_data_stripes(map); + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* + * Mirror #0 or #1 means the original data block. + * Mirror #2 is RAID5 parity block. + * Mirror #3 is RAID6 Q block. + */ + if (mirror_num > 1) + stripe_index = nr_data_stripes(map) + mirror_num - 2; + + /* We distribute the parity blocks across stripes */ + stripe_index = (stripe_nr + stripe_index) % map->num_stripes; + } + } else { + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = stripe_nr % map->num_stripes; + stripe_nr = stripe_nr / map->num_stripes; + } + BUG_ON(stripe_index >= map->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + multi->stripes[i].physical = + map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + multi->stripes[i].dev = map->stripes[stripe_index].dev; + stripe_index++; + } + *multi_ret = multi; + + if (type) + *type = map->type; + + if (raid_map) { + sort_parity_stripes(multi, raid_map); + *raid_map_ret = raid_map; + } +out: + return 0; +} + +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, + u8 *uuid, u8 *fsid) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *cur_devices; + + cur_devices = root->fs_info->fs_devices; + while (cur_devices) { + if (!fsid || + (!memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE) || + root->fs_info->ignore_fsid_mismatch)) { + device = __find_device(&cur_devices->devices, + devid, uuid); + if (device) + return device; + } + cur_devices = cur_devices->seed; + } + return NULL; +} + +struct btrfs_device * +btrfs_find_device_by_devid(struct btrfs_fs_devices *fs_devices, + u64 devid, int instance) +{ + struct list_head *head = &fs_devices->devices; + struct btrfs_device *dev; + int num_found = 0; + + list_for_each_entry(dev, head, dev_list) { + if (dev->devid == devid && num_found++ == instance) + return dev; + } + return NULL; +} + +int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) +{ + struct cache_extent *ce; + struct map_lookup *map; + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + int readonly = 0; + int i; + + /* + * During chunk recovering, we may fail to find block group's + * corresponding chunk, we will rebuild it later + */ + ce = search_cache_extent(&map_tree->cache_tree, chunk_offset); + if (!root->fs_info->is_chunk_recover) + BUG_ON(!ce); + else + return 0; + + map = container_of(ce, struct map_lookup, ce); + for (i = 0; i < map->num_stripes; i++) { + if (!map->stripes[i].dev->writeable) { + readonly = 1; + break; + } + } + + return readonly; +} + +static struct btrfs_device *fill_missing_device(u64 devid) +{ + struct btrfs_device *device; + + device = kzalloc(sizeof(*device), GFP_NOFS); + device->devid = devid; + device->fd = -1; + return device; +} + +/* + * Slot is used to verfy the chunk item is valid + * + * For sys chunk in superblock, pass -1 to indicate sys chunk. + */ +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, int slot) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct cache_extent *ce; + u64 logical; + u64 length; + u64 stripe_len; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + int num_stripes; + int ret; + int i; + + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + /* Validation check */ + if (!num_stripes) { + error("invalid chunk num_stripes: %u", num_stripes); + return -EIO; + } + if (!IS_ALIGNED(logical, root->sectorsize)) { + error("invalid chunk logical %llu", logical); + return -EIO; + } + if (!length || !IS_ALIGNED(length, root->sectorsize)) { + error("invalid chunk length %llu", length); + return -EIO; + } + if (!is_power_of_2(stripe_len)) { + error("invalid chunk stripe length: %llu", stripe_len); + return -EIO; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)) { + error("unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } + + ce = search_cache_extent(&map_tree->cache_tree, logical); + + /* already mapped? */ + if (ce && ce->start <= logical && ce->start + ce->size > logical) { + return 0; + } + + map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS); + if (!map) + return -ENOMEM; + + map->ce.start = logical; + map->ce.size = length; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + + /* Check on chunk item type */ + if (map->type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK)) { + fprintf(stderr, "Unknown chunk type bits: %llu\n", + map->type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK)); + ret = -EIO; + goto out; + } + + /* + * Btrfs_chunk contains at least one stripe, and for sys_chunk + * it can't exceed the system chunk array size + * For normal chunk, it should match its chunk item size. + */ + if (num_stripes < 1 || + (slot == -1 && sizeof(struct btrfs_stripe) * num_stripes > + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) || + (slot >= 0 && sizeof(struct btrfs_stripe) * (num_stripes - 1) > + btrfs_item_size_nr(leaf, slot))) { + fprintf(stderr, "Invalid num_stripes: %u\n", + num_stripes); + ret = -EIO; + goto out; + } + + /* + * Device number check against profile + */ + if ((map->type & BTRFS_BLOCK_GROUP_RAID10 && map->sub_stripes == 0) || + (map->type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (map->type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (map->type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (map->type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + fprintf(stderr, + "Invalid num_stripes:sub_stripes %u:%u for profile %llu\n", + num_stripes, map->sub_stripes, + map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + ret = -EIO; + goto out; + } + + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + read_extent_buffer(leaf, uuid, (unsigned long) + btrfs_stripe_dev_uuid_nr(chunk, i), + BTRFS_UUID_SIZE); + map->stripes[i].dev = btrfs_find_device(root, devid, uuid, + NULL); + if (!map->stripes[i].dev) { + map->stripes[i].dev = fill_missing_device(devid); + printf("warning, device %llu is missing\n", + (unsigned long long)devid); + } + + } + ret = insert_cache_extent(&map_tree->cache_tree, &map->ce); + BUG_ON(ret); + + return 0; +out: + free(map); + return ret; +} + +static int fill_device_from_item(struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item, + struct btrfs_device *device) +{ + unsigned long ptr; + + device->devid = btrfs_device_id(leaf, dev_item); + device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->type = btrfs_device_type(leaf, dev_item); + device->io_align = btrfs_device_io_align(leaf, dev_item); + device->io_width = btrfs_device_io_width(leaf, dev_item); + device->sector_size = btrfs_device_sector_size(leaf, dev_item); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + + return 0; +} + +static int open_seed_devices(struct btrfs_root *root, u8 *fsid) +{ + struct btrfs_fs_devices *fs_devices; + int ret; + + fs_devices = root->fs_info->fs_devices->seed; + while (fs_devices) { + if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) { + ret = 0; + goto out; + } + fs_devices = fs_devices->seed; + } + + fs_devices = find_fsid(fsid); + if (!fs_devices) { + /* missing all seed devices */ + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, fsid, BTRFS_FSID_SIZE); + } + + ret = btrfs_open_devices(fs_devices, O_RDONLY); + if (ret) + goto out; + + fs_devices->seed = root->fs_info->fs_devices->seed; + root->fs_info->fs_devices->seed = fs_devices; +out: + return ret; +} + +static int read_one_dev(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item) +{ + struct btrfs_device *device; + u64 devid; + int ret = 0; + u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 dev_uuid[BTRFS_UUID_SIZE]; + + devid = btrfs_device_id(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_UUID_SIZE); + + if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { + ret = open_seed_devices(root, fs_uuid); + if (ret) + return ret; + } + + device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + device->fd = -1; + list_add(&device->dev_list, + &root->fs_info->fs_devices->devices); + } + + fill_device_from_item(leaf, dev_item, device); + device->dev_root = root->fs_info->dev_root; + return ret; +} + +int btrfs_read_sys_array(struct btrfs_root *root) +{ + struct btrfs_super_block *super_copy = root->fs_info->super_copy; + struct extent_buffer *sb; + struct btrfs_disk_key *disk_key; + struct btrfs_chunk *chunk; + u8 *array_ptr; + unsigned long sb_array_offset; + int ret = 0; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u32 cur_offset; + struct btrfs_key key; + + sb = btrfs_find_create_tree_block(root->fs_info, + BTRFS_SUPER_INFO_OFFSET, + BTRFS_SUPER_INFO_SIZE); + if (!sb) + return -ENOMEM; + btrfs_set_buffer_uptodate(sb); + write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy)); + array_size = btrfs_super_sys_array_size(super_copy); + + array_ptr = super_copy->sys_chunk_array; + sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array); + cur_offset = 0; + + while (cur_offset < array_size) { + disk_key = (struct btrfs_disk_key *)array_ptr; + len = sizeof(*disk_key); + if (cur_offset + len > array_size) + goto out_short_read; + + btrfs_disk_key_to_cpu(&key, disk_key); + + array_ptr += len; + sb_array_offset += len; + cur_offset += len; + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + chunk = (struct btrfs_chunk *)sb_array_offset; + /* + * At least one btrfs_chunk with one stripe must be + * present, exact stripe count check comes afterwards + */ + len = btrfs_chunk_item_size(1); + if (cur_offset + len > array_size) + goto out_short_read; + + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk( + "ERROR: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + + len = btrfs_chunk_item_size(num_stripes); + if (cur_offset + len > array_size) + goto out_short_read; + + ret = read_one_chunk(root, &key, sb, chunk, -1); + if (ret) + break; + } else { + printk( + "ERROR: unexpected item type %u in sys_array at offset %u\n", + (u32)key.type, cur_offset); + ret = -EIO; + break; + } + array_ptr += len; + sb_array_offset += len; + cur_offset += len; + } + free_extent_buffer(sb); + return ret; + +out_short_read: + printk("ERROR: sys_array too short to read %u bytes at offset %u\n", + len, cur_offset); + free_extent_buffer(sb); + return -EIO; +} + +int btrfs_read_chunk_tree(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + int ret; + int slot; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). + */ + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, + struct btrfs_dev_item); + ret = read_one_dev(root, leaf, dev_item); + BUG_ON(ret); + } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { + struct btrfs_chunk *chunk; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = read_one_chunk(root, &found_key, leaf, chunk, + slot); + BUG_ON(ret); + } + path->slots[0]++; + } + + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +struct list_head *btrfs_scanned_uuids(void) +{ + return &fs_uuids; +} + +static int rmw_eb(struct btrfs_fs_info *info, + struct extent_buffer *eb, struct extent_buffer *orig_eb) +{ + int ret; + unsigned long orig_off = 0; + unsigned long dest_off = 0; + unsigned long copy_len = eb->len; + + ret = read_whole_eb(info, eb, 0); + if (ret) + return ret; + + if (eb->start + eb->len <= orig_eb->start || + eb->start >= orig_eb->start + orig_eb->len) + return 0; + /* + * | ----- orig_eb ------- | + * | ----- stripe ------- | + * | ----- orig_eb ------- | + * | ----- orig_eb ------- | + */ + if (eb->start > orig_eb->start) + orig_off = eb->start - orig_eb->start; + if (orig_eb->start > eb->start) + dest_off = orig_eb->start - eb->start; + + if (copy_len > orig_eb->len - orig_off) + copy_len = orig_eb->len - orig_off; + if (copy_len > eb->len - dest_off) + copy_len = eb->len - dest_off; + + memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); + return 0; +} + +static void split_eb_for_raid56(struct btrfs_fs_info *info, + struct extent_buffer *orig_eb, + struct extent_buffer **ebs, + u64 stripe_len, u64 *raid_map, + int num_stripes) +{ + struct extent_buffer *eb; + u64 start = orig_eb->start; + u64 this_eb_start; + int i; + int ret; + + for (i = 0; i < num_stripes; i++) { + if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) + break; + + eb = calloc(1, sizeof(struct extent_buffer) + stripe_len); + if (!eb) + BUG(); + + eb->start = raid_map[i]; + eb->len = stripe_len; + eb->refs = 1; + eb->flags = 0; + eb->fd = -1; + eb->dev_bytenr = (u64)-1; + + this_eb_start = raid_map[i]; + + if (start > this_eb_start || + start + orig_eb->len < this_eb_start + stripe_len) { + ret = rmw_eb(info, eb, orig_eb); + BUG_ON(ret); + } else { + memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); + } + ebs[i] = eb; + } +} + +int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map) +{ + struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL; + int i; + int j; + int ret; + int alloc_size = eb->len; + + ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS); + BUG_ON(!ebs); + + if (stripe_len > alloc_size) + alloc_size = stripe_len; + + split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, + multi->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + struct extent_buffer *new_eb; + if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { + ebs[i]->dev_bytenr = multi->stripes[i].physical; + ebs[i]->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + BUG_ON(ebs[i]->start != raid_map[i]); + continue; + } + new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); + BUG_ON(!new_eb); + new_eb->dev_bytenr = multi->stripes[i].physical; + new_eb->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + new_eb->len = stripe_len; + + if (raid_map[i] == BTRFS_RAID5_P_STRIPE) + p_eb = new_eb; + else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) + q_eb = new_eb; + } + if (q_eb) { + void **pointers; + + pointers = kmalloc(sizeof(*pointers) * multi->num_stripes, + GFP_NOFS); + BUG_ON(!pointers); + + ebs[multi->num_stripes - 2] = p_eb; + ebs[multi->num_stripes - 1] = q_eb; + + for (i = 0; i < multi->num_stripes; i++) + pointers[i] = ebs[i]->data; + + raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); + kfree(pointers); + } else { + ebs[multi->num_stripes - 1] = p_eb; + memcpy(p_eb->data, ebs[0]->data, stripe_len); + for (j = 1; j < multi->num_stripes - 1; j++) { + for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { + *(unsigned long *)(p_eb->data + i) ^= + *(unsigned long *)(ebs[j]->data + i); + } + } + } + + for (i = 0; i < multi->num_stripes; i++) { + ret = write_extent_to_disk(ebs[i]); + BUG_ON(ret); + if (ebs[i] != eb) + kfree(ebs[i]); + } + + kfree(ebs); + + return 0; +} diff --git a/tools/libfsimage/btrfs/volumes.h b/tools/libfsimage/btrfs/volumes.h new file mode 100644 index 0000000..c0007ad --- /dev/null +++ b/tools/libfsimage/btrfs/volumes.h @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_VOLUMES_H__ +#define __BTRFS_VOLUMES_H__ + +#include "kerncompat.h" +#include "ctree.h" + +#define BTRFS_STRIPE_LEN (64 * 1024) + +struct btrfs_device { + struct list_head dev_list; + struct btrfs_root *dev_root; + struct btrfs_fs_devices *fs_devices; + + u64 total_ios; + + int fd; + + int writeable; + + char *name; + + /* these are read off the super block, only in the progs */ + char *label; + u64 total_devs; + u64 super_bytes_used; + + u64 generation; + + /* the internal btrfs device id */ + u64 devid; + + /* size of the device */ + u64 total_bytes; + + /* bytes used */ + u64 bytes_used; + + /* optimal io alignment for this device */ + u32 io_align; + + /* optimal io width for this device */ + u32 io_width; + + /* minimal io size for this device */ + u32 sector_size; + + /* type and info about this device */ + u64 type; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_UUID_SIZE]; +}; + +struct btrfs_fs_devices { + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + + /* the device with this id has the most recent copy of the super */ + u64 latest_devid; + u64 latest_trans; + u64 lowest_devid; + int latest_bdev; + int lowest_bdev; + struct list_head devices; + struct list_head list; + + int seeding; + struct btrfs_fs_devices *seed; +}; + +struct btrfs_bio_stripe { + struct btrfs_device *dev; + u64 physical; +}; + +struct btrfs_multi_bio { + int error; + int num_stripes; + struct btrfs_bio_stripe stripes[]; +}; + +struct map_lookup { + struct cache_extent ce; + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + int sub_stripes; + struct btrfs_bio_stripe stripes[]; +}; + +#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ + (sizeof(struct btrfs_bio_stripe) * (n))) +#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct btrfs_bio_stripe) * (n))) + +/* + * Restriper's general type filter + */ +#define BTRFS_BALANCE_DATA (1ULL << 0) +#define BTRFS_BALANCE_SYSTEM (1ULL << 1) +#define BTRFS_BALANCE_METADATA (1ULL << 2) + +#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \ + BTRFS_BALANCE_SYSTEM | \ + BTRFS_BALANCE_METADATA) + +#define BTRFS_BALANCE_FORCE (1ULL << 3) +#define BTRFS_BALANCE_RESUME (1ULL << 4) + +/* + * Balance filters + */ +#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0) +#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1) +#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) +#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) +#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) +#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) +#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) + +/* + * Profile changing flags. When SOFT is set we won't relocate chunk if + * it already has the target profile (even though it may be + * half-filled). + */ +#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8) +#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9) + +#define BTRFS_RAID5_P_STRIPE ((u64)-2) +#define BTRFS_RAID6_Q_STRIPE ((u64)-1) + +/* + * Check if the given range cross stripes. + * To ensure kernel scrub won't causing bug on with METADATA in mixed + * block group + */ +static inline int check_crossing_stripes(u64 start, u64 len) +{ + return (start / BTRFS_STRIPE_LEN) != + ((start + len - 1) / BTRFS_STRIPE_LEN); +} + +int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, u64 *type, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret); +int btrfs_next_bg(struct btrfs_mapping_tree *map_tree, u64 *logical, + u64 *size, u64 type); +static inline int btrfs_next_bg_metadata(struct btrfs_mapping_tree *map_tree, + u64 *logical, u64 *size) +{ + return btrfs_next_bg(map_tree, logical, size, + BTRFS_BLOCK_GROUP_METADATA); +} +static inline int btrfs_next_bg_system(struct btrfs_mapping_tree *map_tree, + u64 *logical, u64 *size) +{ + return btrfs_next_bg(map_tree, logical, size, + BTRFS_BLOCK_GROUP_SYSTEM); +} +int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, + u64 chunk_start, u64 physical, u64 devid, + u64 **logical, int *naddrs, int *stripe_len); +int btrfs_read_sys_array(struct btrfs_root *root); +int btrfs_read_chunk_tree(struct btrfs_root *root); +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u64 type); +int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 num_bytes, u64 type); +int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags); +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +void btrfs_close_all_devices(void); +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device); +int btrfs_scan_one_device(int fd, const char *path, + struct btrfs_fs_devices **fs_devices_ret, + u64 *total_devs, u64 super_offset, int super_recover); +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); +struct list_head *btrfs_scanned_uuids(void); +int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_chunk *chunk, int item_size); +int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); +struct btrfs_device * +btrfs_find_device_by_devid(struct btrfs_fs_devices *fs_devices, + u64 devid, int instance); +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, + u8 *uuid, u8 *fsid); +int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map); +#endif -- 2.4.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |