[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4 of 8 RESEND] blktap3/vhd: Introduce core VHD library
This patch copies the core of the VHD functionality from blktap2, with most changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@xxxxxxxxxx> diff --git a/tools/blktap2/vhd/lib/libvhd.c b/tools/blktap3/vhd/lib/libvhd.c copy from tools/blktap2/vhd/lib/libvhd.c copy to tools/blktap3/vhd/lib/libvhd.c --- a/tools/blktap2/vhd/lib/libvhd.c +++ b/tools/blktap3/vhd/lib/libvhd.c @@ -1,4 +1,5 @@ /* Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2010, Citrix Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,12 +36,16 @@ #include <string.h> #include <libgen.h> #include <iconv.h> +#include <limits.h> #include <sys/mman.h> #include <sys/stat.h> +#include <sys/types.h> #include "libvhd.h" #include "relative-path.h" +#define VHD_HEADER_MAX_RETRIES 10 + static int libvhd_dbg = 0; void @@ -57,7 +62,13 @@ libvhd_set_log_level(int level) __func__, ##_a); \ } while (0) -#define BIT_MASK 0x80 +#define ASSERT(_p) \ + if (!(_p)) { \ + libvhd_set_log_level(1); \ + VHDLOG("%s:%d: FAILED ASSERTION: '%s'\n", \ + __FILE__, __LINE__, #_p); \ + *(int*)0 = 0; \ + } #ifdef ENABLE_FAILURE_TESTING const char* ENV_VAR_FAIL[NUM_FAIL_TESTS] = { @@ -69,26 +80,15 @@ const char* ENV_VAR_FAIL[NUM_FAIL_TESTS] "VHD_UTIL_TEST_FAIL_RESIZE_METADATA_MOVED", "VHD_UTIL_TEST_FAIL_RESIZE_END" }; + int TEST_FAIL[NUM_FAIL_TESTS]; #endif // ENABLE_FAILURE_TESTING -static inline int -test_bit (volatile char *addr, int nr) -{ - return ((addr[nr >> 3] << (nr & 7)) & BIT_MASK) != 0; -} - -static inline void -set_bit (volatile char *addr, int nr) -{ - addr[nr >> 3] |= (BIT_MASK >> (nr & 7)); -} - -static inline void -clear_bit (volatile char *addr, int nr) -{ - addr[nr >> 3] &= ~(BIT_MASK >> (nr & 7)); -} +static void vhd_cache_init(vhd_context_t *); +static int vhd_cache_enabled(vhd_context_t *); +static int vhd_cache_load(vhd_context_t *); +static int vhd_cache_unload(vhd_context_t *); +static vhd_context_t *vhd_cache_get_parent(vhd_context_t *); static inline int old_test_bit(volatile char *addr, int nr) @@ -251,8 +251,8 @@ vhd_validate_footer(vhd_footer_t *footer if (memcmp(footer->cookie, HD_COOKIE, csize) != 0 && memcmp(footer->cookie, VHD_POISON_COOKIE, csize) != 0) { char buf[9]; - strncpy(buf, footer->cookie, sizeof(buf)); - buf[sizeof(buf)-1]= '\0'; + memcpy(buf, footer->cookie, 8); + buf[8] = '\0'; VHDLOG("invalid footer cookie: %s\n", buf); return -EINVAL; } @@ -312,8 +312,8 @@ vhd_validate_header(vhd_header_t *header if (memcmp(header->cookie, DD_COOKIE, 8) != 0) { char buf[9]; - strncpy(buf, header->cookie, sizeof(buf)); - buf[sizeof(buf)-1]= '\0'; + memcpy(buf, header->cookie, 8); + buf[8] = '\0'; VHDLOG("invalid header cookie: %s\n", buf); return -EINVAL; } @@ -323,8 +323,8 @@ vhd_validate_header(vhd_header_t *header return -EINVAL; } - if (header->data_offset != 0xFFFFFFFFFFFFFFFF) { - VHDLOG("invalid header data_offset 0x%016"PRIx64"\n", + if (header->data_offset != 0xFFFFFFFFFFFFFFFFULL) { + VHDLOG("invalid header data_offset 0x%016" PRIx64 "\n", header->data_offset); return -EINVAL; } @@ -355,18 +355,22 @@ vhd_validate_bat(vhd_bat_t *bat) } uint32_t -vhd_checksum_batmap(vhd_batmap_t *batmap) +vhd_checksum_batmap(vhd_context_t *ctx, vhd_batmap_t *batmap) { - int i, n; + int i; char *blob; uint32_t checksum; + size_t map_size; blob = batmap->map; checksum = 0; - n = vhd_sectors_to_bytes(batmap->header.batmap_size); - - for (i = 0; i < n; i++) { + map_size = + vhd_sectors_to_bytes(secs_round_up_no_zero + (ctx->footer. + curr_size >> (VHD_BLOCK_SHIFT + 3))); + + for (i = 0; i < map_size; i++) { if (batmap->header.batmap_version == VHD_BATMAP_VERSION(1, 1)) checksum += (uint32_t)blob[i]; else @@ -389,14 +393,14 @@ vhd_validate_batmap_header(vhd_batmap_t } int -vhd_validate_batmap(vhd_batmap_t *batmap) +vhd_validate_batmap(vhd_context_t *ctx, vhd_batmap_t *batmap) { uint32_t checksum; if (!batmap->map) return -EINVAL; - checksum = vhd_checksum_batmap(batmap); + checksum = vhd_checksum_batmap(ctx, batmap); if (checksum != batmap->header.checksum) return -EINVAL; @@ -404,9 +408,9 @@ vhd_validate_batmap(vhd_batmap_t *batmap } int -vhd_batmap_header_offset(vhd_context_t *ctx, off_t *_off) +vhd_batmap_header_offset(vhd_context_t *ctx, off64_t *_off) { - off_t off; + off64_t off; size_t bat; *_off = 0; @@ -593,11 +597,11 @@ vhd_bitmap_clear(vhd_context_t *ctx, cha * byte of the file which is not vhd metadata */ int -vhd_end_of_headers(vhd_context_t *ctx, off_t *end) +vhd_end_of_headers(vhd_context_t *ctx, off64_t *end) { int err, i, n; uint32_t bat_bytes; - off_t eom, bat_end; + off64_t eom, bat_end; vhd_parent_locator_t *loc; *end = 0; @@ -613,7 +617,7 @@ vhd_end_of_headers(vhd_context_t *ctx, o eom = MAX(eom, bat_end); if (vhd_has_batmap(ctx)) { - off_t hdr_end, hdr_secs, map_end, map_secs; + off64_t hdr_end, hdr_secs, map_end, map_secs; err = vhd_get_batmap(ctx); if (err) @@ -637,7 +641,7 @@ vhd_end_of_headers(vhd_context_t *ctx, o n = sizeof(ctx->header.loc) / sizeof(vhd_parent_locator_t); for (i = 0; i < n; i++) { - off_t loc_end; + off64_t loc_end; loc = &ctx->header.loc[i]; if (loc->code == PLAT_CODE_NONE) @@ -652,10 +656,10 @@ vhd_end_of_headers(vhd_context_t *ctx, o } int -vhd_end_of_data(vhd_context_t *ctx, off_t *end) +vhd_end_of_data(vhd_context_t *ctx, off64_t *end) { int i, err; - off_t max; + off64_t max; uint64_t blk; if (!vhd_type_dynamic(ctx)) { @@ -664,7 +668,7 @@ vhd_end_of_data(vhd_context_t *ctx, off_ return err; max = vhd_position(ctx); - if (max == (off_t)-1) + if (max == (off64_t) - 1) return -errno; *end = max - sizeof(vhd_footer_t); @@ -822,7 +826,7 @@ vhd_get_batmap(vhd_context_t *ctx) if (!vhd_has_batmap(ctx)) return -EINVAL; - if (!vhd_validate_batmap(&ctx->batmap)) + if (!vhd_validate_batmap(ctx, &ctx->batmap)) return 0; vhd_put_batmap(ctx); @@ -871,8 +875,8 @@ int vhd_read_short_footer(vhd_context_t *ctx, vhd_footer_t *footer) { int err; - char *buf; - off_t eof; + void *buf; + off64_t eof; buf = NULL; @@ -881,7 +885,7 @@ vhd_read_short_footer(vhd_context_t *ctx goto out; eof = vhd_position(ctx); - if (eof == (off_t)-1) { + if (eof == (off64_t) - 1) { err = -errno; goto out; } @@ -890,7 +894,7 @@ vhd_read_short_footer(vhd_context_t *ctx if (err) goto out; - err = posix_memalign((void **)&buf, + err = posix_memalign(&buf, VHD_SECTOR_SIZE, sizeof(vhd_footer_t)); if (err) { buf = NULL; @@ -919,10 +923,10 @@ out: } int -vhd_read_footer_at(vhd_context_t *ctx, vhd_footer_t *footer, off_t off) +vhd_read_footer_at(vhd_context_t *ctx, vhd_footer_t *footer, off64_t off) { + void *buf; int err; - char *buf; buf = NULL; @@ -930,7 +934,7 @@ vhd_read_footer_at(vhd_context_t *ctx, v if (err) goto out; - err = posix_memalign((void **)&buf, + err = posix_memalign(&buf, VHD_SECTOR_SIZE, sizeof(vhd_footer_t)); if (err) { buf = NULL; @@ -959,14 +963,14 @@ int vhd_read_footer(vhd_context_t *ctx, vhd_footer_t *footer) { int err; - off_t off; + off64_t off; err = vhd_seek(ctx, 0, SEEK_END); if (err) return err; off = vhd_position(ctx); - if (off == (off_t)-1) + if (off == (off64_t) - 1) return -errno; err = vhd_read_footer_at(ctx, footer, off - 512); @@ -977,17 +981,22 @@ vhd_read_footer(vhd_context_t *ctx, vhd_ if (err != -EINVAL) return err; - if (ctx->oflags & VHD_OPEN_STRICT) - return -EINVAL; + /* + * Disable the enforcement of VHD_OPEN_STRICT until we figure out how + * to recover from crashes. Note that we never enforced it before + * anyways due to a bug (CA-28285) and everything was ok. + */ + /* if (ctx->oflags & VHD_OPEN_STRICT) + return -EINVAL; */ return vhd_read_footer_at(ctx, footer, 0); } int -vhd_read_header_at(vhd_context_t *ctx, vhd_header_t *header, off_t off) +vhd_read_header_at(vhd_context_t *ctx, vhd_header_t *header, off64_t off) { + void *buf; int err; - char *buf; buf = NULL; @@ -1000,7 +1009,7 @@ vhd_read_header_at(vhd_context_t *ctx, v if (err) goto out; - err = posix_memalign((void **)&buf, + err = posix_memalign(&buf, VHD_SECTOR_SIZE, sizeof(vhd_header_t)); if (err) { buf = NULL; @@ -1028,8 +1037,7 @@ out: int vhd_read_header(vhd_context_t *ctx, vhd_header_t *header) { - int err; - off_t off; + off64_t off; if (!vhd_type_dynamic(ctx)) { VHDLOG("%s is not dynamic!\n", ctx->file); @@ -1044,8 +1052,9 @@ int vhd_read_bat(vhd_context_t *ctx, vhd_bat_t *bat) { int err; - char *buf; - off_t off; + void *buf; + off64_t off; + uint32_t vhd_blks; size_t size; buf = NULL; @@ -1056,9 +1065,14 @@ vhd_read_bat(vhd_context_t *ctx, vhd_bat } off = ctx->header.table_offset; - size = vhd_bytes_padded(ctx->header.max_bat_size * sizeof(uint32_t)); - - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + /* The BAT size is stored in ctx->header.max_bat_size. However, we + * sometimes preallocate BAT + batmap for max VHD size, so only read in + * the BAT entries that are in use for curr_size */ + vhd_blks = ctx->footer.curr_size >> VHD_BLOCK_SHIFT; + ASSERT(ctx->header.max_bat_size >= vhd_blks); + size = vhd_bytes_padded(vhd_blks * sizeof(uint32_t)); + + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) { buf = NULL; err = -err; @@ -1074,7 +1088,7 @@ vhd_read_bat(vhd_context_t *ctx, vhd_bat goto fail; bat->spb = ctx->header.block_size >> VHD_SECTOR_SHIFT; - bat->entries = ctx->header.max_bat_size; + bat->entries = vhd_blks; bat->bat = (uint32_t *)buf; vhd_bat_in(bat); @@ -1092,8 +1106,8 @@ static int vhd_read_batmap_header(vhd_context_t *ctx, vhd_batmap_t *batmap) { int err; - char *buf; - off_t off; + void *buf; + off64_t off; size_t size; buf = NULL; @@ -1107,7 +1121,7 @@ vhd_read_batmap_header(vhd_context_t *ct goto fail; size = vhd_bytes_padded(sizeof(vhd_batmap_header_t)); - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) { buf = NULL; err = -err; @@ -1137,13 +1151,16 @@ static int vhd_read_batmap_map(vhd_context_t *ctx, vhd_batmap_t *batmap) { int err; - char *buf; - off_t off; + void *buf; + off64_t off; size_t map_size; - map_size = vhd_sectors_to_bytes(batmap->header.batmap_size); - - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, map_size); + map_size = vhd_sectors_to_bytes(secs_round_up_no_zero + (ctx->footer. + curr_size >> (VHD_BLOCK_SHIFT + 3))); + ASSERT(vhd_sectors_to_bytes(batmap->header.batmap_size) >= map_size); + + err = posix_memalign(&buf, VHD_SECTOR_SIZE, map_size); if (err) { buf = NULL; err = -err; @@ -1191,7 +1208,7 @@ vhd_read_batmap(vhd_context_t *ctx, vhd_ if (err) return err; - err = vhd_validate_batmap(batmap); + err = vhd_validate_batmap(ctx, batmap); if (err) goto fail; @@ -1252,8 +1269,10 @@ vhd_test_file_fixed(const char *file, in int vhd_find_parent(vhd_context_t *ctx, const char *parent, char **_location) { + char *location, __location[PATH_MAX]; + char *cpath, __cpath[PATH_MAX]; + char *cdir, *path; int err; - char *location, *cpath, *cdir, *path; err = 0; path = NULL; @@ -1266,16 +1285,15 @@ vhd_find_parent(vhd_context_t *ctx, cons if (parent[0] == '/') { if (!access(parent, R_OK)) { - path = strdup(parent); - if (!path) - return -ENOMEM; - *_location = path; + *_location = strdup(parent); + if (!*_location) + return -errno; return 0; } } /* check parent path relative to child's directory */ - cpath = realpath(ctx->file, NULL); + cpath = realpath(ctx->file, __cpath); if (!cpath) { err = -errno; goto out; @@ -1289,28 +1307,27 @@ vhd_find_parent(vhd_context_t *ctx, cons } if (!access(location, R_OK)) { - path = realpath(location, NULL); + path = realpath(location, __location); if (path) { - *_location = path; - return 0; + *_location = strdup(path); + if (*_location) + goto out; } } err = -errno; out: free(location); - free(cpath); return err; } -static int +int vhd_macx_encode_location(char *name, char **out, int *outlen) { iconv_t cd; int len, err; size_t ibl, obl; - char *uri, *uri_utf8, *uri_utf8p, *ret; - const char *urip; + char *uri, *urip, *uri_utf8, *uri_utf8p, *ret; err = 0; ret = NULL; @@ -1321,7 +1338,7 @@ vhd_macx_encode_location(char *name, cha ibl = len; obl = len; - urip = uri = malloc(ibl + 1); + uri = urip = malloc(ibl + 1); uri_utf8 = uri_utf8p = malloc(obl); if (!uri || !uri_utf8) @@ -1333,14 +1350,10 @@ vhd_macx_encode_location(char *name, cha goto out; } - snprintf(uri, ibl+1, "file://%s", name); - - if (iconv(cd, -#ifdef __linux__ - (char **) -#endif - &urip, &ibl, &uri_utf8p, &obl) == (size_t)-1 || - ibl || obl) { + sprintf(uri, "file://%s", name); + + if (iconv(cd, &urip, &ibl, &uri_utf8p, &obl) == (size_t) - 1 || + ibl || obl) { err = (errno ? -errno : -EIO); goto out; } @@ -1364,14 +1377,13 @@ vhd_macx_encode_location(char *name, cha return err; } -static int +int vhd_w2u_encode_location(char *name, char **out, int *outlen) { iconv_t cd; int len, err; size_t ibl, obl; - char *uri, *uri_utf16, *uri_utf16p, *tmp, *ret; - const char *urip; + char *uri, *urip, *uri_utf16, *uri_utf16p, *tmp, *ret; err = 0; ret = NULL; @@ -1425,12 +1437,8 @@ vhd_w2u_encode_location(char *name, char goto out; } - if (iconv(cd, -#ifdef __linux__ - (char **) -#endif - &urip, &ibl, &uri_utf16p, &obl) == (size_t)-1 || - ibl || obl) { + if (iconv(cd, &urip, &ibl, &uri_utf16p, &obl) == (size_t) - 1 || + ibl || obl) { err = (errno ? -errno : -EIO); goto out; } @@ -1457,7 +1465,7 @@ vhd_w2u_encode_location(char *name, char } static char * -vhd_macx_decode_location(const char *in, char *out, int len) +vhd_macx_decode_location(char *in, char *out, int len) { iconv_t cd; char *name; @@ -1470,11 +1478,7 @@ vhd_macx_decode_location(const char *in, if (cd == (iconv_t)-1) return NULL; - if (iconv(cd, -#ifdef __linux__ - (char **) -#endif - &in, &ibl, &out, &obl) == (size_t)-1 || ibl) + if (iconv(cd, &in, &ibl, &out, &obl) == (size_t) - 1 || ibl) return NULL; iconv_close(cd); @@ -1489,7 +1493,7 @@ vhd_macx_decode_location(const char *in, } static char * -vhd_w2u_decode_location(const char *in, char *out, int len, char *utf_type) +vhd_w2u_decode_location(char *in, char *out, int len, char *utf_type) { iconv_t cd; char *name, *tmp; @@ -1502,11 +1506,7 @@ vhd_w2u_decode_location(const char *in, if (cd == (iconv_t)-1) return NULL; - if (iconv(cd, -#ifdef __linux__ - (char **) -#endif - &in, &ibl, &out, &obl) == (size_t)-1 || ibl) + if (iconv(cd, &in, &ibl, &out, &obl) == (size_t) - 1 || ibl) return NULL; iconv_close(cd); @@ -1545,7 +1545,7 @@ vhd_parent_locator_read(vhd_context_t *c vhd_parent_locator_t *loc, char **parent) { int err, size; - char *raw, *out, *name; + void *raw, *out, *name; raw = NULL; out = NULL; @@ -1577,7 +1577,7 @@ vhd_parent_locator_read(vhd_context_t *c goto out; } - err = posix_memalign((void **)&raw, VHD_SECTOR_SIZE, size); + err = posix_memalign(&raw, VHD_SECTOR_SIZE, size); if (err) { raw = NULL; err = -err; @@ -1635,7 +1635,7 @@ vhd_parent_locator_get(vhd_context_t *ct char *name, *location; vhd_parent_locator_t *loc; - err = 0; + err = -EINVAL; *parent = NULL; if (ctx->footer.type != HD_TYPE_DIFF) @@ -1643,9 +1643,11 @@ vhd_parent_locator_get(vhd_context_t *ct n = vhd_parent_locator_count(ctx); for (i = 0; i < n; i++) { + int _err; + loc = ctx->header.loc + i; - err = vhd_parent_locator_read(ctx, loc, &name); - if (err) + _err = vhd_parent_locator_read(ctx, loc, &name); + if (_err) continue; err = vhd_find_parent(ctx, name, &location); @@ -1665,12 +1667,14 @@ vhd_parent_locator_get(vhd_context_t *ct int vhd_parent_locator_write_at(vhd_context_t *ctx, - const char *parent, off_t off, uint32_t code, + const char *parent, off64_t off, uint32_t code, size_t max_bytes, vhd_parent_locator_t *loc) { struct stat stats; int err, len, size; - char *absolute_path, *relative_path, *encoded, *block; + char *absolute_path, *relative_path, *encoded; + char __parent[PATH_MAX]; + void *block; memset(loc, 0, sizeof(vhd_parent_locator_t)); @@ -1693,7 +1697,7 @@ vhd_parent_locator_write_at(vhd_context_ return -EINVAL; } - absolute_path = realpath(parent, NULL); + absolute_path = realpath(parent, __parent); if (!absolute_path) { err = -errno; goto out; @@ -1742,7 +1746,7 @@ vhd_parent_locator_write_at(vhd_context_ goto out; } - err = posix_memalign((void **)&block, VHD_SECTOR_SIZE, size); + err = posix_memalign(&block, VHD_SECTOR_SIZE, size); if (err) { block = NULL; err = -err; @@ -1759,7 +1763,6 @@ vhd_parent_locator_write_at(vhd_context_ err = 0; out: - free(absolute_path); free(relative_path); free(encoded); free(block); @@ -1781,7 +1784,7 @@ out: } static int -vhd_footer_offset_at_eof(vhd_context_t *ctx, off_t *off) +vhd_footer_offset_at_eof(vhd_context_t *ctx, off64_t *off) { int err; if ((err = vhd_seek(ctx, 0, SEEK_END))) @@ -1794,9 +1797,9 @@ int vhd_read_bitmap(vhd_context_t *ctx, uint32_t block, char **bufp) { int err; - char *buf; + void *buf; size_t size; - off_t off; + off64_t off; uint64_t blk; buf = NULL; @@ -1823,7 +1826,7 @@ vhd_read_bitmap(vhd_context_t *ctx, uint if (err) return err; - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) return -err; @@ -1843,10 +1846,10 @@ int vhd_read_block(vhd_context_t *ctx, uint32_t block, char **bufp) { int err; - char *buf; + void *buf; size_t size; uint64_t blk; - off_t end, off; + off64_t end, off; buf = NULL; *bufp = NULL; @@ -1872,7 +1875,7 @@ vhd_read_block(vhd_context_t *ctx, uint3 if (err) return err; - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) { err = -err; goto fail; @@ -1900,20 +1903,21 @@ fail: } int -vhd_write_footer_at(vhd_context_t *ctx, vhd_footer_t *footer, off_t off) +vhd_write_footer_at(vhd_context_t *ctx, vhd_footer_t *footer, off64_t off) { int err; + void *buf; vhd_footer_t *f; f = NULL; - err = posix_memalign((void **)&f, + err = posix_memalign(&buf, VHD_SECTOR_SIZE, sizeof(vhd_footer_t)); if (err) { - f = NULL; err = -err; goto out; } + f = buf; memcpy(f, footer, sizeof(vhd_footer_t)); f->checksum = vhd_checksum_footer(f); @@ -1942,7 +1946,7 @@ int vhd_write_footer(vhd_context_t *ctx, vhd_footer_t *footer) { int err; - off_t off; + off64_t off; if (ctx->is_block) err = vhd_footer_offset_at_eof(ctx, &off); @@ -1955,6 +1959,12 @@ vhd_write_footer(vhd_context_t *ctx, vhd if (err) return err; + if (!ctx->is_block) { + err = ftruncate(ctx->fd, off + sizeof(vhd_footer_t)); + if (err) + return -errno; + } + if (!vhd_type_dynamic(ctx)) return 0; @@ -1962,10 +1972,11 @@ vhd_write_footer(vhd_context_t *ctx, vhd } int -vhd_write_header_at(vhd_context_t *ctx, vhd_header_t *header, off_t off) +vhd_write_header_at(vhd_context_t *ctx, vhd_header_t *header, off64_t off) { int err; vhd_header_t *h; + void *buf; h = NULL; @@ -1974,13 +1985,13 @@ vhd_write_header_at(vhd_context_t *ctx, goto out; } - err = posix_memalign((void **)&h, + err = posix_memalign(&buf, VHD_SECTOR_SIZE, sizeof(vhd_header_t)); if (err) { - h = NULL; err = -err; goto out; } + h = buf; memcpy(h, header, sizeof(vhd_header_t)); @@ -2008,8 +2019,7 @@ out: int vhd_write_header(vhd_context_t *ctx, vhd_header_t *header) { - int err; - off_t off; + off64_t off; if (!vhd_type_dynamic(ctx)) return -EINVAL; @@ -2022,8 +2032,9 @@ int vhd_write_bat(vhd_context_t *ctx, vhd_bat_t *bat) { int err; - off_t off; + off64_t off; vhd_bat_t b; + void *buf; size_t size; if (!vhd_type_dynamic(ctx)) @@ -2046,9 +2057,10 @@ vhd_write_bat(vhd_context_t *ctx, vhd_ba if (err) return err; - err = posix_memalign((void **)&b.bat, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) return -err; + b.bat = buf; memcpy(b.bat, bat->bat, size); b.spb = bat->spb; @@ -2061,13 +2073,50 @@ vhd_write_bat(vhd_context_t *ctx, vhd_ba return err; } +static int +vhd_write_batmap_header(vhd_context_t *ctx, vhd_batmap_t *batmap) +{ + int err; + size_t size; + off64_t off; + void *buf = NULL; + + err = vhd_batmap_header_offset(ctx, &off); + if (err) + goto out; + + size = vhd_bytes_padded(sizeof(*batmap)); + + err = vhd_seek(ctx, off, SEEK_SET); + if (err) + goto out; + + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); + if (err) { + err = -err; + goto out; + } + + vhd_batmap_header_out(batmap); + memset(buf, 0, size); + memcpy(buf, &batmap->header, sizeof(batmap->header)); + + err = vhd_write(ctx, buf, size); + + out: + if (err) + VHDLOG("%s: failed writing batmap: %d\n", ctx->file, err); + free(buf); + return err; +} + int vhd_write_batmap(vhd_context_t *ctx, vhd_batmap_t *batmap) { int err; - off_t off; + off64_t off; vhd_batmap_t b; - char *buf, *map; + void *buf, *map; size_t size, map_size; buf = NULL; @@ -2081,19 +2130,22 @@ vhd_write_batmap(vhd_context_t *ctx, vhd b.header = batmap->header; b.map = batmap->map; - b.header.checksum = vhd_checksum_batmap(&b); - err = vhd_validate_batmap(&b); + b.header.checksum = vhd_checksum_batmap(ctx, &b); + err = vhd_validate_batmap(ctx, &b); if (err) goto out; off = b.header.batmap_offset; - map_size = vhd_sectors_to_bytes(b.header.batmap_size); + map_size = vhd_sectors_to_bytes(secs_round_up_no_zero + (ctx->footer. + curr_size >> (VHD_BLOCK_SHIFT + 3))); + ASSERT(vhd_sectors_to_bytes(b.header.batmap_size) >= map_size); err = vhd_seek(ctx, off, SEEK_SET); if (err) goto out; - err = posix_memalign((void **)&map, VHD_SECTOR_SIZE, map_size); + err = posix_memalign(&map, VHD_SECTOR_SIZE, map_size); if (err) { map = NULL; err = -err; @@ -2116,7 +2168,7 @@ vhd_write_batmap(vhd_context_t *ctx, vhd if (err) goto out; - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) { err = -err; buf = NULL; @@ -2141,9 +2193,9 @@ int vhd_write_bitmap(vhd_context_t *ctx, uint32_t block, char *bitmap) { int err; - off_t off; + off64_t off; uint64_t blk; - size_t secs, size; + size_t size; if (!vhd_type_dynamic(ctx)) return -EINVAL; @@ -2180,7 +2232,7 @@ int vhd_write_block(vhd_context_t *ctx, uint32_t block, char *data) { int err; - off_t off; + off64_t off; size_t size; uint64_t blk; @@ -2230,13 +2282,79 @@ namedup(char **dup, const char *name) return 0; } +#define vwrite (ssize_t (*)(int, void *, size_t))write +#define vpwrite (ssize_t (*)(int, void *, size_t, off_t))pwrite + +static ssize_t +vhd_atomic_pio(ssize_t(*f) (int, void *, size_t, off_t), + int fd, void *_s, size_t n, off_t off) +{ + char *s = _s; + size_t pos = 0; + ssize_t res; + struct stat st; + + memset(&st, 0, sizeof(st)); + + for (;;) { + res = (f) (fd, s + pos, n - pos, off + pos); + switch (res) { + case -1: + if (errno == EINTR || errno == EAGAIN) + continue; + else + return 0; + break; + case 0: + errno = EPIPE; + return pos; + } + + if (pos + res == n) + return n; + + if (!st.st_size) + if (fstat(fd, &st) == -1) + return -1; + + if (off + pos + res == st.st_size) + return pos + res; + + pos += (res & ~(VHD_SECTOR_SIZE - 1)); + } + + return -1; +} + +static ssize_t +vhd_atomic_io(ssize_t(*f) (int, void *, size_t), int fd, void *_s, + size_t n) +{ + off64_t off; + ssize_t res; + ssize_t(*pf) (int, void *, size_t, off_t); + + off = lseek64(fd, 0, SEEK_CUR); + if (off == (off_t) - 1) + return -1; + + pf = (f == read ? pread : vpwrite); + res = vhd_atomic_pio(pf, fd, _s, n, off); + + if (res > 0) + if (lseek64(fd, off + res, SEEK_SET) == (off64_t) - 1) + return -1; + + return res; +} + int -vhd_seek(vhd_context_t *ctx, off_t offset, int whence) +vhd_seek(vhd_context_t *ctx, off64_t offset, int whence) { - off_t off; - - off = lseek(ctx->fd, offset, whence); - if (off == (off_t)-1) { + off64_t off; + + off = lseek64(ctx->fd, offset, whence); + if (off == (off64_t)-1) { VHDLOG("%s: seek(0x%08"PRIx64", %d) failed: %d\n", ctx->file, offset, whence, -errno); return -errno; @@ -2245,10 +2363,10 @@ vhd_seek(vhd_context_t *ctx, off_t offse return 0; } -off_t +off64_t vhd_position(vhd_context_t *ctx) { - return lseek(ctx->fd, 0, SEEK_CUR); + return lseek64(ctx->fd, 0, SEEK_CUR); } int @@ -2258,7 +2376,7 @@ vhd_read(vhd_context_t *ctx, void *buf, errno = 0; - ret = read(ctx->fd, buf, size); + ret = vhd_atomic_io(read, ctx->fd, buf, size); if (ret == size) return 0; @@ -2275,7 +2393,7 @@ vhd_write(vhd_context_t *ctx, void *buf, errno = 0; - ret = write(ctx->fd, buf, size); + ret = vhd_atomic_io(vwrite, ctx->fd, buf, size); if (ret == size) return 0; @@ -2285,6 +2403,40 @@ vhd_write(vhd_context_t *ctx, void *buf, return (errno ? -errno : -EIO); } +static int +vhd_pread(vhd_context_t * ctx, void *buf, size_t size, off64_t offset) +{ + ssize_t ret; + + errno = 0; + + ret = vhd_atomic_pio(pread, ctx->fd, buf, size, offset); + if (ret == size) + return 0; + + VHDLOG("%s: pread of %zu returned %zd, errno: %d\n", + ctx->file, size, ret, -errno); + + return (errno ? -errno : -EIO); +} + +static int +vhd_pwrite(vhd_context_t * ctx, void *buf, size_t size, off64_t offset) +{ + ssize_t ret; + + errno = 0; + + ret = vhd_atomic_pio(vpwrite, ctx->fd, buf, size, offset); + if (ret == size) + return 0; + + VHDLOG("%s: pwrite of %zu returned %zd, errno: %d\n", + ctx->file, size, ret, -errno); + + return (errno ? -errno : -EIO); +} + int vhd_offset(vhd_context_t *ctx, uint32_t sector, uint32_t *offset) { @@ -2312,11 +2464,11 @@ int vhd_open_fast(vhd_context_t *ctx) { int err; - char *buf; + void *buf; size_t size; size = sizeof(vhd_footer_t) + sizeof(vhd_header_t); - err = posix_memalign((void **)&buf, VHD_SECTOR_SIZE, size); + err = posix_memalign(&buf, VHD_SECTOR_SIZE, size); if (err) { VHDLOG("failed allocating %s: %d\n", ctx->file, -err); return -err; @@ -2360,12 +2512,14 @@ out: int vhd_open(vhd_context_t *ctx, const char *file, int flags) { - int err, oflags; + int i, err, oflags; if (flags & VHD_OPEN_STRICT) vhd_flag_clear(flags, VHD_OPEN_FAST); memset(ctx, 0, sizeof(vhd_context_t)); + vhd_cache_init(ctx); + ctx->fd = -1; ctx->oflags = flags; @@ -2373,7 +2527,9 @@ vhd_open(vhd_context_t *ctx, const char if (err) return err; - oflags = O_DIRECT | O_LARGEFILE; + oflags = O_LARGEFILE; + if (!(flags & VHD_OPEN_CACHED)) + oflags |= O_DIRECT; if (flags & VHD_OPEN_RDONLY) oflags |= O_RDONLY; if (flags & VHD_OPEN_RDWR) @@ -2408,7 +2564,13 @@ vhd_open(vhd_context_t *ctx, const char } if (vhd_type_dynamic(ctx)) { - err = vhd_read_header(ctx, &ctx->header); + for (i = 0; i < VHD_HEADER_MAX_RETRIES; i++) { + err = vhd_read_header(ctx, &ctx->header); + if (!err) + break; + VHDLOG("Error reading header, retry %d\n", i); + sleep(1); + } if (err) goto fail; @@ -2416,6 +2578,12 @@ vhd_open(vhd_context_t *ctx, const char ctx->bm_secs = secs_round_up_no_zero(ctx->spb >> 3); } + err = vhd_cache_load(ctx); + if (err) { + VHDLOG("failed to load cache: %d\n", err); + goto fail; + } + return 0; fail: @@ -2429,8 +2597,14 @@ fail: void vhd_close(vhd_context_t *ctx) { + vhd_cache_unload(ctx); + if (ctx->file) + { + fsync(ctx->fd); close(ctx->fd); + } + free(ctx->file); free(ctx->bat.bat); free(ctx->batmap.map); @@ -2452,19 +2626,18 @@ vhd_initialize_footer(vhd_context_t *ctx ctx->footer.geometry = vhd_chs(size); ctx->footer.type = type; ctx->footer.saved = 0; - ctx->footer.data_offset = 0xFFFFFFFFFFFFFFFF; + ctx->footer.data_offset = 0xFFFFFFFFFFFFFFFFULL; strcpy(ctx->footer.crtr_app, "tap"); - vhd_uuid_generate(&ctx->footer.uuid); + uuid_generate(ctx->footer.uuid); } -static int +int vhd_initialize_header_parent_name(vhd_context_t *ctx, const char *parent_path) { int err; iconv_t cd; size_t ibl, obl; - char *ppath, *dst; - const char *pname; + char *pname, *ppath, *dst; err = 0; pname = NULL; @@ -2498,11 +2671,7 @@ vhd_initialize_header_parent_name(vhd_co memset(dst, 0, obl); - if (iconv(cd, -#ifdef __linux__ - (char **) -#endif - &pname, &ibl, &dst, &obl) == (size_t)-1 || ibl) + if (iconv(cd, &pname, &ibl, &dst, &obl) == (size_t) - 1 || ibl) err = (errno ? -errno : -EINVAL); out: @@ -2511,25 +2680,25 @@ out: return err; } -static off_t +static off64_t get_file_size(const char *name) { int fd; - off_t end; + off64_t end; fd = open(name, O_LARGEFILE | O_RDONLY); if (fd == -1) { VHDLOG("unable to open '%s': %d\n", name, errno); return -errno; } - end = lseek(fd, 0, SEEK_END); + end = lseek64(fd, 0, SEEK_END); close(fd); return end; } static int vhd_initialize_header(vhd_context_t *ctx, const char *parent_path, - uint64_t size, int raw) + uint64_t size, int raw, uint64_t *psize) { int err; struct stat stats; @@ -2560,20 +2729,26 @@ vhd_initialize_header(vhd_context_t *ctx if (raw) { ctx->header.prt_ts = vhd_time(stats.st_mtime); + *psize = get_file_size(parent_path); if (!size) - size = get_file_size(parent_path); - } - else { + size = *psize; + } else { err = vhd_open(&parent, parent_path, VHD_OPEN_RDONLY); if (err) return err; ctx->header.prt_ts = vhd_time(stats.st_mtime); - vhd_uuid_copy(&ctx->header.prt_uuid, &parent.footer.uuid); + uuid_copy(ctx->header.prt_uuid, parent.footer.uuid); + *psize = parent.footer.curr_size; if (!size) - size = parent.footer.curr_size; + size = *psize; vhd_close(&parent); } + if (size < *psize) { + VHDLOG("snapshot size (%" PRIu64 ") < parent size (%" PRIu64 ")\n", + size, *psize); + return -EINVAL; + } ctx->footer.orig_size = size; ctx->footer.curr_size = size; ctx->footer.geometry = vhd_chs(size); @@ -2583,11 +2758,11 @@ vhd_initialize_header(vhd_context_t *ctx return vhd_initialize_header_parent_name(ctx, parent_path); } -static int +int vhd_write_parent_locators(vhd_context_t *ctx, const char *parent) { int i, err; - off_t off; + off64_t off; uint32_t code; code = PLAT_CODE_NONE; @@ -2631,8 +2806,9 @@ vhd_change_parent(vhd_context_t *child, char *ppath; struct stat stats; vhd_context_t parent; - - ppath = realpath(parent_path, NULL); + char __parent_path[PATH_MAX]; + + ppath = realpath(parent_path, __parent_path); if (!ppath) { VHDLOG("error resolving parent path %s for %s: %d\n", parent_path, child->file, errno); @@ -2651,7 +2827,7 @@ vhd_change_parent(vhd_context_t *child, } if (raw) { - vhd_uuid_clear(&child->header.prt_uuid); + uuid_clear(child->header.prt_uuid); } else { err = vhd_open(&parent, ppath, VHD_OPEN_RDONLY); if (err) { @@ -2659,7 +2835,7 @@ vhd_change_parent(vhd_context_t *child, ppath, child->file, err); goto out; } - vhd_uuid_copy(&child->header.prt_uuid, &parent.footer.uuid); + uuid_copy(child->header.prt_uuid, parent.footer.uuid); vhd_close(&parent); } @@ -2700,16 +2876,16 @@ vhd_change_parent(vhd_context_t *child, err = 0; out: - free(ppath); return err; } static int vhd_create_batmap(vhd_context_t *ctx) { - off_t off; + off64_t off; int err, map_bytes; vhd_batmap_header_t *header; + void *map; if (!vhd_type_dynamic(ctx)) return -EINVAL; @@ -2731,14 +2907,13 @@ vhd_create_batmap(vhd_context_t *ctx) map_bytes = vhd_sectors_to_bytes(header->batmap_size); - err = posix_memalign((void **)&ctx->batmap.map, + err = posix_memalign(&map, VHD_SECTOR_SIZE, map_bytes); - if (err) { - ctx->batmap.map = NULL; + if (err) return -err; - } - - memset(ctx->batmap.map, 0, map_bytes); + + memset(map, 0, map_bytes); + ctx->batmap.map = map; return vhd_write_batmap(ctx, &ctx->batmap); } @@ -2748,16 +2923,17 @@ vhd_create_bat(vhd_context_t *ctx) { int i, err; size_t size; + void *bat; if (!vhd_type_dynamic(ctx)) return -EINVAL; size = vhd_bytes_padded(ctx->header.max_bat_size * sizeof(uint32_t)); - err = posix_memalign((void **)&ctx->bat.bat, VHD_SECTOR_SIZE, size); - if (err) { - ctx->bat.bat = NULL; + err = posix_memalign(&bat, VHD_SECTOR_SIZE, size); + if (err) return err; - } + + ctx->bat.bat = bat; memset(ctx->bat.bat, 0, size); for (i = 0; i < ctx->header.max_bat_size; i++) @@ -2787,7 +2963,7 @@ vhd_initialize_fixed_disk(vhd_context_t return err; buf = mmap(0, VHD_BLOCK_SIZE, PROT_READ, - MAP_SHARED | MAP_ANON, -1, 0); + MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) return -errno; @@ -2805,7 +2981,7 @@ out: } int -vhd_get_phys_size(vhd_context_t *ctx, off_t *size) +vhd_get_phys_size(vhd_context_t *ctx, off64_t *size) { int err; @@ -2816,9 +2992,9 @@ vhd_get_phys_size(vhd_context_t *ctx, of } int -vhd_set_phys_size(vhd_context_t *ctx, off_t size) +vhd_set_phys_size(vhd_context_t *ctx, off64_t size) { - off_t phys_size; + off64_t phys_size; int err; err = vhd_get_phys_size(ctx, &phys_size); @@ -2835,15 +3011,40 @@ vhd_set_phys_size(vhd_context_t *ctx, of } static int -__vhd_create(const char *name, const char *parent, uint64_t bytes, int type, - vhd_flag_creat_t flags) +vhd_set_virt_size_no_write(vhd_context_t *ctx, uint64_t size) +{ + if ((size >> VHD_BLOCK_SHIFT) > ctx->header.max_bat_size) { + VHDLOG("not enough metadata space reserved for fast " + "resize (BAT size %u, need %" PRIu64 ")\n", + ctx->header.max_bat_size, size >> VHD_BLOCK_SHIFT); + return -EINVAL; + } + + /* update footer */ + ctx->footer.curr_size = size; + ctx->footer.geometry = vhd_chs(ctx->footer.curr_size); + ctx->footer.checksum = vhd_checksum_footer(&ctx->footer); + return 0; +} + +int vhd_set_virt_size(vhd_context_t * ctx, uint64_t size) { int err; - off_t off; + + err = vhd_set_virt_size_no_write(ctx, size); + if (err) + return err; + return vhd_write_footer(ctx, &ctx->footer); +} + +static int +__vhd_create(const char *name, const char *parent, uint64_t bytes, int type, + uint64_t mbytes, vhd_flag_creat_t flags) +{ + int err; + off64_t off; vhd_context_t ctx; - vhd_footer_t *footer; - vhd_header_t *header; - uint64_t size, blks; + uint64_t size, psize, blks; switch (type) { case HD_TYPE_DIFF: @@ -2859,10 +3060,19 @@ static int if (strnlen(name, VHD_MAX_NAME_LEN - 1) == VHD_MAX_NAME_LEN - 1) return -ENAMETOOLONG; + if (bytes && mbytes && mbytes < bytes) + return -EINVAL; + memset(&ctx, 0, sizeof(vhd_context_t)); - footer = &ctx.footer; - header = &ctx.header; + psize = 0; blks = (bytes + VHD_BLOCK_SIZE - 1) >> VHD_BLOCK_SHIFT; + /* If mbytes is provided (virtual-size-for-metadata-preallocation), + * create the VHD of size mbytes, which will create the BAT & the + * batmap of the appropriate size. Once the BAT & batmap are + * initialized, reset the virtual size to the requested one. + */ + if (mbytes) + blks = (mbytes + VHD_BLOCK_SIZE - 1) >> VHD_BLOCK_SHIFT; size = blks << VHD_BLOCK_SHIFT; ctx.fd = open(name, O_WRONLY | O_CREAT | @@ -2888,15 +3098,7 @@ static int goto out; } else { int raw = vhd_flag_test(flags, VHD_FLAG_CREAT_PARENT_RAW); - err = vhd_initialize_header(&ctx, parent, size, raw); - if (err) - goto out; - - err = vhd_write_footer_at(&ctx, &ctx.footer, 0); - if (err) - goto out; - - err = vhd_write_header_at(&ctx, &ctx.header, VHD_SECTOR_SIZE); + err = vhd_initialize_header(&ctx, parent, size, raw, &psize); if (err) goto out; @@ -2913,8 +3115,28 @@ static int if (err) goto out; } - - /* write header again since it may have changed */ + } + + if (mbytes) { + /* set the virtual size to the requested size */ + if (bytes) { + blks = (bytes + VHD_BLOCK_SIZE - 1) >> VHD_BLOCK_SHIFT; + size = blks << VHD_BLOCK_SHIFT; + + } else { + size = psize; + } + ctx.footer.orig_size = size; + err = vhd_set_virt_size_no_write(&ctx, size); + if (err) + goto out; + } + + if (type != HD_TYPE_FIXED) { + err = vhd_write_footer_at(&ctx, &ctx.footer, 0); + if (err) + goto out; + err = vhd_write_header_at(&ctx, &ctx.header, VHD_SECTOR_SIZE); if (err) goto out; @@ -2925,7 +3147,7 @@ static int goto out; off = vhd_position(&ctx); - if (off == (off_t)-1) { + if (off == (off64_t)-1) { err = -errno; goto out; } @@ -2947,16 +3169,17 @@ out: } int -vhd_create(const char *name, uint64_t bytes, int type, vhd_flag_creat_t flags) +vhd_create(const char *name, uint64_t bytes, int type, uint64_t mbytes, + vhd_flag_creat_t flags) { - return __vhd_create(name, NULL, bytes, type, flags); + return __vhd_create(name, NULL, bytes, type, mbytes, flags); } int vhd_snapshot(const char *name, uint64_t bytes, const char *parent, - vhd_flag_creat_t flags) + uint64_t mbytes, vhd_flag_creat_t flags) { - return __vhd_create(name, parent, bytes, HD_TYPE_DIFF, flags); + return __vhd_create(name, parent, bytes, HD_TYPE_DIFF, mbytes, flags); } static int @@ -3000,7 +3223,7 @@ static int __vhd_io_dynamic_read_link(vhd_context_t *ctx, char *map, char *buf, uint64_t sector, uint32_t secs) { - off_t off; + off64_t off; uint32_t blk, sec; int err, cnt, map_off; char *bitmap, *data, *src; @@ -3056,9 +3279,9 @@ static int char *map, char *buf, uint64_t sec, uint32_t secs) { int fd, err; - off_t off; + off64_t off; uint64_t size; - char *data; + void *data; err = 0; errno = 0; @@ -3068,8 +3291,8 @@ static int return -errno; } - off = lseek(fd, vhd_sectors_to_bytes(sec), SEEK_SET); - if (off == (off_t)-1) { + off = lseek64(fd, vhd_sectors_to_bytes(sec), SEEK_SET); + if (off == (off64_t)-1) { VHDLOG("%s: seek(0x%08"PRIx64") failed: %d\n", filename, vhd_sectors_to_bytes(sec), -errno); err = -errno; @@ -3077,7 +3300,7 @@ static int } size = vhd_sectors_to_bytes(secs); - err = posix_memalign((void **)&data, VHD_SECTOR_SIZE, size); + err = posix_memalign(&data, VHD_SECTOR_SIZE, size); if (err) goto close; @@ -3134,12 +3357,21 @@ static int } if (vhd->footer.type == HD_TYPE_DIFF) { + vhd_context_t *p; + p = vhd_cache_get_parent(vhd); + if (p) { + vhd = p; + err = vhd_get_bat(vhd); + if (err) + goto out; + continue; + } + err = vhd_parent_locator_get(vhd, &next); if (err) goto close; if (vhd_parent_raw(vhd)) { - err = __raw_read_link(next, map, buf, sec, - secs); + err = __raw_read_link(next, map, buf, sec, secs); goto close; } } else { @@ -3164,7 +3396,7 @@ static int } close: - if (vhd != ctx) + if (vhd != ctx && !vhd_flag_test(vhd->oflags, VHD_OPEN_CACHED)) vhd_close(vhd); out: free(map); @@ -3202,8 +3434,8 @@ static int { char *buf; size_t size; - off_t off, max; - int i, err, gap, spp; + off64_t off, max; + int err, gap, spp, secs; spp = getpagesize() >> VHD_SECTOR_SHIFT; @@ -3225,8 +3457,12 @@ static int if (err) return err; - size = vhd_sectors_to_bytes(ctx->spb + ctx->bm_secs + gap); - buf = mmap(0, size, PROT_READ, MAP_SHARED | MAP_ANON, -1, 0); + secs = ctx->bm_secs + gap; + if (!vhd_flag_test(ctx->oflags, VHD_OPEN_IO_WRITE_SPARSE)) + secs += ctx->spb; + + size = vhd_sectors_to_bytes(secs); + buf = mmap(0, size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) return -errno; @@ -3251,7 +3487,7 @@ static int char *buf, uint64_t sector, uint32_t secs) { char *map; - off_t off; + off64_t off; uint32_t blk, sec; int i, err, cnt, ret; @@ -3350,3 +3586,765 @@ vhd_io_write(vhd_context_t *ctx, char *b return __vhd_io_dynamic_write(ctx, buf, sec, secs); } + +static void vhd_cache_init(vhd_context_t * ctx __attribute__((unused))) +{ +} + +static int vhd_cache_enabled(vhd_context_t * ctx) +{ + return vhd_flag_test(ctx->oflags, VHD_OPEN_CACHED); +} + +static int vhd_cache_load(vhd_context_t * ctx) +{ + char *next; + int err, pflags; + vhd_context_t *vhd; + + err = 1; + pflags = ctx->oflags; + vhd = ctx; + next = NULL; + + vhd_flag_set(pflags, VHD_OPEN_RDONLY); + vhd_flag_clear(pflags, VHD_OPEN_CACHED); + + if (!vhd_cache_enabled(vhd)) + goto done; + + while (vhd->footer.type == HD_TYPE_DIFF) { + vhd_context_t *parent; + + parent = NULL; + + if (vhd_parent_raw(vhd)) + goto done; + + err = vhd_parent_locator_get(vhd, &next); + if (err) + goto out; + + parent = calloc(1, sizeof(*parent)); + if (!parent) + goto out; + + err = vhd_open(parent, next, pflags); + if (err) { + free(parent); + parent = NULL; + goto out; + } + + fcntl(parent->fd, F_SETFL, fcntl(parent->fd, F_GETFL) & ~O_DIRECT); + vhd_flag_set(parent->oflags, VHD_OPEN_CACHED); + vhd->parent = parent; + + free(next); + next = NULL; + vhd = parent; + } + + done: + err = 0; + out: + free(next); + if (err) + vhd_cache_unload(vhd); + + return err; +} + +static int vhd_cache_unload(vhd_context_t * ctx) +{ + vhd_context_t *vhd; + + if (!vhd_cache_enabled(ctx)) + goto out; + + vhd = ctx; + while ((vhd = vhd->parent)) { + vhd_close(vhd); + free(vhd); + } + ctx->parent = NULL; + + out: + return 0; +} + +static inline vhd_context_t *vhd_cache_get_parent(vhd_context_t * ctx) +{ + if (!vhd_cache_enabled(ctx)) + return NULL; + + return ctx->parent; +} + +typedef struct vhd_block_vector vhd_block_vector_t; +typedef struct vhd_block_vector_entry vhd_block_vector_entry_t; + +struct vhd_block_vector_entry { + uint64_t off; /* byte offset from block */ + uint32_t bytes; /* size in bytes */ + char *buf; /* destination buffer */ +}; + +struct vhd_block_vector { + uint32_t block; /* logical block in vhd */ + int entries; /* number of vector entries */ + vhd_block_vector_entry_t *array; /* vector list */ +}; + +/** + * @vec: block vector describing read + * + * @vec describes a list of byte-spans within a given block + * and a corresponding list of destination buffers. + */ +static int +vhd_block_vector_read(vhd_context_t * ctx, vhd_block_vector_t * vec) +{ + int err, i; + off64_t off; + uint32_t blk; + + err = vhd_get_bat(ctx); + if (err) + goto out; + + if (vec->block >= ctx->bat.entries) { + err = -ERANGE; + goto out; + } + + blk = ctx->bat.bat[vec->block]; + if (blk == DD_BLK_UNUSED) { + err = -EINVAL; + goto out; + } + + off = vhd_sectors_to_bytes(blk + ctx->bm_secs); + + for (i = 0; i < vec->entries; i++) { + vhd_block_vector_entry_t *v = vec->array + i; + err = vhd_pread(ctx, v->buf, v->bytes, off + v->off); + if (err) + goto out; + } + + out: + return err; +} + +/** + * @vec: block vector to initialize + * @block: vhd block number + * @map: optional bitmap of sectors to map (relative to beginning of block) + * @buf: destination buffer + * @blk_start: byte offset relative to beginning of block + * @blk_end: byte offset relative to beginning of block + * + * initializes @vec to describe a read into a contiguous buffer + * of potentially non-contiguous byte ranges in a given vhd block. + * only sectors with corresponding bits set in @map (if it is not NULL) + * will be mapped; bits corresponding to unmapped sectors will be cleared. + * first and last sector maps may be smaller than vhd sector size. + */ +static int +vhd_block_vector_init(vhd_context_t * ctx, + vhd_block_vector_t * vec, uint32_t block, char *map, + char *buf, uint64_t blk_start, uint64_t blk_end) +{ + int err, sec; + char *bitmap; + uint32_t first_sec, last_sec; + + bitmap = NULL; + memset(vec, 0, sizeof(*vec)); + + first_sec = blk_start >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(blk_end); + + err = vhd_read_bitmap(ctx, block, &bitmap); + if (err) + goto out; + + vec->array = calloc(ctx->spb, sizeof(vhd_block_vector_entry_t)); + if (!vec->array) { + err = -ENOMEM; + goto out; + } + + for (sec = first_sec; sec < last_sec; sec++) { + uint32_t cnt; + vhd_block_vector_entry_t *v; + + cnt = VHD_SECTOR_SIZE - (blk_start & (VHD_SECTOR_SIZE - 1)); + if (cnt > blk_end - blk_start) + cnt = blk_end - blk_start; + + if (map && !test_bit(map, sec)) + goto next; + + if (vhd_bitmap_test(ctx, bitmap, sec)) { + if (vec->entries > 0) { + v = vec->array + vec->entries - 1; + if (v->off + v->bytes == blk_start) { + v->bytes += cnt; + goto next; + } + } + + v = vec->array + vec->entries; + v->off = blk_start; + v->bytes = cnt; + v->buf = buf; + + vec->entries++; + + } else if (map) { + clear_bit(map, sec); + } + + next: + blk_start += cnt; + buf += cnt; + } + + vec->block = block; + + out: + free(bitmap); + return err; +} + +#if 0 +/** + * @block: vhd block number + * @buf: buffer to place data in + * @size: number of bytes to read + * @start: byte offset into block from which to start reading + * @end: byte offset in block at which to stop reading + * + * reads data (if it exists) into @buf. partial reads may occur + * for the first and last sectors if @start and @end are not multiples + * of vhd sector size. + */ +static int +vhd_block_vector_read_allocated(vhd_context_t * ctx, uint32_t block, + char *buf, uint64_t start, uint64_t end) +{ + int err; + vhd_block_vector_t vec; + + vec.array = NULL; + + err = vhd_block_vector_init(ctx, &vec, block, NULL, buf, start, end); + if (err) + goto out; + + err = vhd_block_vector_read(ctx, &vec); + + out: + free(vec.array); + return err; +} +#endif + +/** + * @block: vhd block number + * @map: bitmap of sectors in block which should be read + * @buf: buffer to place data in + * @start: byte offset into block from which to start reading + * @end: byte offset in block at which to stop reading + * + * for every bit set in @map (corresponding to sectors in @block), + * reads data (if it exists) into @buf. if data does not exist, + * clears corresponding bit in @map. partial reads may occur + * for the first and last sectors if @start and @end are not multiples + * of vhd sector size. + */ +static int +vhd_block_vector_read_allocated_selective(vhd_context_t * ctx, + uint32_t block, char *map, + char *buf, uint64_t start, + uint64_t end) +{ + int err; + vhd_block_vector_t vec; + + vec.array = NULL; + + err = vhd_block_vector_init(ctx, &vec, block, map, buf, start, end); + if (err) + goto out; + + err = vhd_block_vector_read(ctx, &vec); + + out: + free(vec.array); + return err; +} + +/** + * @map: bitmap of sectors which have already been read + * @buf: destination buffer + * @size: size in bytes to read + * @off: byte offset in virtual disk to read + * + * reads @size bytes into @buf, starting at @off, skipping sectors + * which have corresponding bits set in @map + */ +static int +__vhd_io_dynamic_read_link_bytes(vhd_context_t * ctx, char *map, + char *buf, size_t size, uint64_t off) +{ + char *blkmap; + int i, err, map_off; + off64_t blk_off, blk_size; + uint32_t blk, bytes, first_sec, last_sec; + + blkmap = malloc((ctx->spb + 7) >> 3); + if (!blkmap) { + err = -ENOMEM; + goto out; + } + + map_off = 0; + blk_size = vhd_sectors_to_bytes(ctx->spb); + + do { + blk = off / blk_size; + blk_off = off % blk_size; + bytes = MIN(blk_size - blk_off, size); + + first_sec = blk_off >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(blk_off + bytes); + + if (ctx->bat.bat[blk] == DD_BLK_UNUSED) + goto next; + + memset(blkmap, 0, (ctx->spb + 7) >> 3); + + for (i = 0; i < (last_sec - first_sec); i++) + if (!test_bit(map, map_off + i)) + set_bit(blkmap, first_sec + i); + + err = vhd_block_vector_read_allocated_selective(ctx, blk, + blkmap, buf, + blk_off, + blk_off + bytes); + if (err) + goto out; + + for (i = 0; i < (last_sec - first_sec); i++) + if (test_bit(blkmap, first_sec + i)) + set_bit(map, map_off + i); + + next: + size -= bytes; + off += bytes; + map_off += (last_sec - first_sec); + buf += bytes; + + } while (size); + + err = 0; + out: + free(blkmap); + return err; +} + +static int +__raw_read_link_bytes(const char *filename, + char *map, char *buf, size_t size, uint64_t off) +{ + int fd, err; + uint32_t i, first_sec, last_sec; + + fd = open(filename, O_RDONLY | O_LARGEFILE); + if (fd == -1) { + VHDLOG("%s: failed to open: %d\n", filename, -errno); + return -errno; + } + + first_sec = off >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(off + size); + + for (i = first_sec; i < last_sec; i++) { + if (!test_bit(map, i - first_sec)) { + uint32_t secs = 0; + uint64_t coff, csize; + + while (i + secs < last_sec && + !test_bit(map, i + secs - first_sec)) + secs++; + + coff = vhd_sectors_to_bytes(i); + csize = vhd_sectors_to_bytes(secs); + + if (i == first_sec) + coff = off; + if (secs == last_sec - 1) + csize = (off + size) - coff; + + if (pread(fd, buf + coff - off, csize, coff) != csize) { + err = (errno ? -errno : -EIO); + goto close; + } + + i += secs - 1; + } + } + + err = 0; + + close: + close(fd); + return err; +} + +static int +__vhd_io_dynamic_read_bytes(vhd_context_t * ctx, + char *buf, size_t size, uint64_t off) +{ + int err; + char *next, *map; + vhd_context_t parent, *vhd; + uint32_t i, done, first_sec, last_sec; + + err = vhd_get_bat(ctx); + if (err) + return err; + + first_sec = off >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(off + size); + + vhd = ctx; + next = NULL; + map = calloc(1, ((last_sec - first_sec) + 7) >> 3); + if (!map) { + err = -ENOMEM; + goto out; + } + + for (;;) { + err = __vhd_io_dynamic_read_link_bytes(vhd, map, buf, size, off); + if (err) + goto close; + + for (done = 0, i = 0; i < (last_sec - first_sec); i++) + if (test_bit(map, i)) + done++; + + if (done == last_sec - first_sec) { + err = 0; + goto close; + } + + if (vhd->footer.type == HD_TYPE_DIFF) { + vhd_context_t *p; + p = vhd_cache_get_parent(vhd); + if (p) { + vhd = p; + err = vhd_get_bat(vhd); + if (err) + goto out; + continue; + } + + err = vhd_parent_locator_get(vhd, &next); + if (err) + goto close; + + if (vhd_parent_raw(vhd)) { + err = __raw_read_link_bytes(next, map, buf, size, off); + goto close; + } + } else { + err = 0; + goto close; + } + + if (vhd != ctx) + vhd_close(vhd); + vhd = &parent; + + err = vhd_open(vhd, next, VHD_OPEN_RDONLY); + if (err) + goto out; + + err = vhd_get_bat(vhd); + if (err) + goto close; + + free(next); + next = NULL; + } + + close: + if (!err) { + /* + * clear any regions not present on disk + */ + for (i = first_sec; i < last_sec; i++) { + if (!test_bit(map, i - first_sec)) { + uint64_t coff = vhd_sectors_to_bytes(i); + uint32_t csize = VHD_SECTOR_SIZE; + + if (i == first_sec) + coff = off; + if (i == last_sec - 1) + csize = (off + size) - coff; + + memset(buf + coff - off, 0, csize); + } + } + } + + if (vhd != ctx && !vhd_flag_test(vhd->oflags, VHD_OPEN_CACHED)) + vhd_close(vhd); + out: + free(map); + free(next); + return err; +} + +int +vhd_io_read_bytes(vhd_context_t * ctx, void *buf, size_t size, + uint64_t off) +{ + if (off + size > ctx->footer.curr_size) + return -ERANGE; + + if (!vhd_type_dynamic(ctx)) + return vhd_pread(ctx, buf, size, off); + + return __vhd_io_dynamic_read_bytes(ctx, buf, size, off); +} + +static int +__vhd_io_dynamic_write_bytes_aligned(vhd_context_t * ctx, + char *buf, size_t size, uint64_t off) +{ + char *map; + int i, err, ret; + uint64_t blk_off, blk_size, blk_start; + uint32_t blk, bytes, first_sec, last_sec; + + if (off & (VHD_SECTOR_SIZE - 1) || size & (VHD_SECTOR_SIZE - 1)) + return -EINVAL; + + err = vhd_get_bat(ctx); + if (err) + return err; + + if (vhd_has_batmap(ctx)) { + err = vhd_get_batmap(ctx); + if (err) + return err; + } + + map = NULL; + blk_size = vhd_sectors_to_bytes(ctx->spb); + + do { + blk = off / blk_size; + blk_off = off % blk_size; + bytes = MIN(blk_size - blk_off, size); + + first_sec = blk_off >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(blk_off + bytes); + + blk_start = ctx->bat.bat[blk]; + if (blk_start == DD_BLK_UNUSED) { + err = __vhd_io_allocate_block(ctx, blk); + if (err) + goto fail; + + blk_start = ctx->bat.bat[blk]; + } + + blk_start = vhd_sectors_to_bytes(blk_start + ctx->bm_secs); + + err = vhd_pwrite(ctx, buf, bytes, blk_start + blk_off); + if (err) + goto fail; + + if (vhd_has_batmap(ctx) && vhd_batmap_test(ctx, &ctx->batmap, blk)) + goto next; + + err = vhd_read_bitmap(ctx, blk, &map); + if (err) { + map = NULL; + goto fail; + } + + for (i = first_sec; i < last_sec; i++) + vhd_bitmap_set(ctx, map, i); + + err = vhd_write_bitmap(ctx, blk, map); + if (err) + goto fail; + + if (vhd_has_batmap(ctx)) { + for (i = 0; i < ctx->spb; i++) + if (!vhd_bitmap_test(ctx, map, i)) { + free(map); + map = NULL; + goto next; + } + + vhd_batmap_set(ctx, &ctx->batmap, blk); + err = vhd_write_batmap(ctx, &ctx->batmap); + if (err) + goto fail; + } + + free(map); + map = NULL; + + next: + size -= bytes; + off += bytes; + buf += bytes; + + } while (size); + + err = 0; + + out: + ret = vhd_write_footer(ctx, &ctx->footer); + return (err ? err : ret); + + fail: + free(map); + goto out; +} + +static int +__vhd_io_dynamic_write_bytes(vhd_context_t * ctx, + char *buf, size_t size, uint64_t off) +{ + int err; + char *tmp; + uint32_t first_sec, last_sec, first_sec_off, last_sec_off; + + err = 0; + tmp = NULL; + + first_sec = off >> VHD_SECTOR_SHIFT; + last_sec = secs_round_up_no_zero(off + size); + + first_sec_off = off & (VHD_SECTOR_SIZE - 1); + last_sec_off = (off + size) & (VHD_SECTOR_SIZE - 1); + + if (first_sec_off || last_sec_off) { + tmp = malloc(VHD_SECTOR_SIZE); + if (!tmp) { + err = -ENOMEM; + goto out; + } + + if (first_sec_off) { + uint32_t new = VHD_SECTOR_SIZE - first_sec_off; + if (new > size) + new = size; + + err = vhd_io_read_bytes(ctx, tmp, VHD_SECTOR_SIZE, + vhd_sectors_to_bytes(first_sec)); + if (err) + goto out; + + memcpy(tmp + first_sec_off, buf, new); + + err = + __vhd_io_dynamic_write_bytes_aligned(ctx, tmp, + VHD_SECTOR_SIZE, + vhd_sectors_to_bytes + (first_sec)); + if (err) + goto out; + + buf += new; + off += new; + size -= new; + } + + if (last_sec_off && (last_sec - first_sec > 1 || !first_sec_off)) { + uint32_t new = last_sec_off; + + err = vhd_io_read_bytes(ctx, tmp, VHD_SECTOR_SIZE, + vhd_sectors_to_bytes(last_sec - 1)); + if (err) + goto out; + + memcpy(tmp, buf + size - new, new); + + err = + __vhd_io_dynamic_write_bytes_aligned(ctx, tmp, + VHD_SECTOR_SIZE, + vhd_sectors_to_bytes + (last_sec - 1)); + if (err) + goto out; + + size -= new; + } + } + + if (size) + err = __vhd_io_dynamic_write_bytes_aligned(ctx, buf, size, off); + + out: + free(tmp); + return err; +} + +int +vhd_io_write_bytes(vhd_context_t * ctx, void *buf, size_t size, + uint64_t off) +{ + if (off + size > ctx->footer.curr_size) + return -ERANGE; + + if (!vhd_type_dynamic(ctx)) + return vhd_pwrite(ctx, buf, size, off); + + return __vhd_io_dynamic_write_bytes(ctx, buf, size, off); +} + +int vhd_marker(vhd_context_t * ctx, char *marker) +{ + int err; + vhd_batmap_t batmap; + + *marker = 0; + + if (!vhd_has_batmap(ctx)) + return -ENOSYS; + + err = vhd_read_batmap_header(ctx, &batmap); + if (err) + return err; + + *marker = batmap.header.marker; + return 0; +} + +int vhd_set_marker(vhd_context_t * ctx, char marker) +{ + int err; + vhd_batmap_t batmap; + + if (!vhd_has_batmap(ctx)) + return -ENOSYS; + + err = vhd_read_batmap_header(ctx, &batmap); + if (err) + return err; + + batmap.header.marker = marker; + return vhd_write_batmap_header(ctx, &batmap); +} _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |