[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Added blktap support. Includes kernel driver (enabled as CONFIG_XEN_BLKDEV_TAP=y) and userspace tools. The userspace deamon (blktapctrl) is enabled by default when xend is activated. For further information on using and configuring blktap see tools/blktap/README.
# HG changeset patch # User jchesterfield@xxxxxxxxxxxxxxxxxxxxxxx # Node ID 2937703f0ed05099f829dea41ec7fdb67a1d2eaa # Parent af9809f51f81a3c43f276f00c81a52ef558afda4 Added blktap support. Includes kernel driver (enabled as CONFIG_XEN_BLKDEV_TAP=y) and userspace tools. The userspace deamon (blktapctrl) is enabled by default when xend is activated. For further information on using and configuring blktap see tools/blktap/README. --- buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xen0_x86_64 | 1 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_64 | 1 linux-2.6-xen-sparse/drivers/xen/Kconfig | 12 linux-2.6-xen-sparse/drivers/xen/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/blktap/Makefile | 3 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 1439 ++++++++++++++++++++ linux-2.6-xen-sparse/drivers/xen/blktap/common.h | 120 + linux-2.6-xen-sparse/drivers/xen/blktap/interface.c | 165 ++ linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 354 ++++ patches/linux-2.6.16.13/blktap-aio-16_03_06.patch | 164 ++ tools/Makefile | 2 tools/blktap/Makefile | 28 tools/blktap/README | 122 + tools/blktap/drivers/Makefile | 76 + tools/blktap/drivers/aes.c | 1319 ++++++++++++++++++ tools/blktap/drivers/aes.h | 26 tools/blktap/drivers/blktapctrl.c | 704 +++++++++ tools/blktap/drivers/blktapctrl.h | 55 tools/blktap/drivers/block-aio.c | 327 ++++ tools/blktap/drivers/block-qcow.c | 1369 +++++++++++++++++++ tools/blktap/drivers/block-ram.c | 296 ++++ tools/blktap/drivers/block-sync.c | 242 +++ tools/blktap/drivers/block-vmdk.c | 415 +++++ tools/blktap/drivers/bswap.h | 202 ++ tools/blktap/drivers/img2qcow.c | 289 ++++ tools/blktap/drivers/qcow-create.c | 80 + tools/blktap/drivers/qcow2raw.c | 346 ++++ tools/blktap/drivers/tapdisk.c | 671 +++++++++ tools/blktap/drivers/tapdisk.h | 211 ++ tools/blktap/lib/Makefile | 66 tools/blktap/lib/blkif.c | 185 ++ tools/blktap/lib/blktaplib.h | 223 +++ tools/blktap/lib/list.h | 55 tools/blktap/lib/xenbus.c | 387 +++++ tools/blktap/lib/xs_api.c | 364 +++++ tools/blktap/lib/xs_api.h | 50 tools/examples/Makefile | 1 tools/examples/blktap | 15 tools/examples/xen-backend.agent | 3 tools/examples/xen-backend.rules | 1 tools/libaio/COPYING | 515 +++++++ tools/libaio/ChangeLog | 43 tools/libaio/INSTALL | 18 tools/libaio/Makefile | 40 tools/libaio/TODO | 4 tools/libaio/harness/Makefile | 37 tools/libaio/harness/README | 19 tools/libaio/harness/attic/0.t | 9 tools/libaio/harness/attic/1.t | 9 tools/libaio/harness/cases/10.t | 53 tools/libaio/harness/cases/11.t | 39 tools/libaio/harness/cases/12.t | 49 tools/libaio/harness/cases/13.t | 66 tools/libaio/harness/cases/14.t | 90 + tools/libaio/harness/cases/2.t | 41 tools/libaio/harness/cases/3.t | 25 tools/libaio/harness/cases/4.t | 72 + tools/libaio/harness/cases/5.t | 47 tools/libaio/harness/cases/6.t | 57 tools/libaio/harness/cases/7.t | 27 tools/libaio/harness/cases/8.t | 49 tools/libaio/harness/cases/aio_setup.h | 98 + tools/libaio/harness/cases/common-7-8.h | 37 tools/libaio/harness/main.c | 39 tools/libaio/harness/runtests.sh | 19 tools/libaio/libaio.spec | 177 ++ tools/libaio/man/aio.3 | 315 ++++ tools/libaio/man/aio_cancel.3 | 137 + tools/libaio/man/aio_cancel64.3 | 50 tools/libaio/man/aio_error.3 | 81 + tools/libaio/man/aio_error64.3 | 64 tools/libaio/man/aio_fsync.3 | 139 + tools/libaio/man/aio_fsync64.3 | 51 tools/libaio/man/aio_init.3 | 96 + tools/libaio/man/aio_read.3 | 146 ++ tools/libaio/man/aio_read64.3 | 60 tools/libaio/man/aio_return.3 | 71 tools/libaio/man/aio_return64.3 | 51 tools/libaio/man/aio_suspend.3 | 123 + tools/libaio/man/aio_suspend64.3 | 51 tools/libaio/man/aio_write.3 | 176 ++ tools/libaio/man/aio_write64.3 | 61 tools/libaio/man/io.3 | 351 ++++ tools/libaio/man/io_cancel.1 | 21 tools/libaio/man/io_cancel.3 | 65 tools/libaio/man/io_destroy.1 | 17 tools/libaio/man/io_fsync.3 | 82 + tools/libaio/man/io_getevents.1 | 29 tools/libaio/man/io_getevents.3 | 79 + tools/libaio/man/io_prep_fsync.3 | 89 + tools/libaio/man/io_prep_pread.3 | 79 + tools/libaio/man/io_prep_pwrite.3 | 77 + tools/libaio/man/io_queue_init.3 | 63 tools/libaio/man/io_queue_release.3 | 48 tools/libaio/man/io_queue_run.3 | 50 tools/libaio/man/io_queue_wait.3 | 56 tools/libaio/man/io_set_callback.3 | 44 tools/libaio/man/io_setup.1 | 15 tools/libaio/man/io_submit.1 | 109 + tools/libaio/man/io_submit.3 | 135 + tools/libaio/man/lio_listio.3 | 229 +++ tools/libaio/man/lio_listio64.3 | 39 tools/libaio/src/Makefile | 64 tools/libaio/src/compat-0_1.c | 62 tools/libaio/src/io_cancel.c | 23 tools/libaio/src/io_destroy.c | 23 tools/libaio/src/io_getevents.c | 57 tools/libaio/src/io_queue_init.c | 33 tools/libaio/src/io_queue_release.c | 27 tools/libaio/src/io_queue_run.c | 39 tools/libaio/src/io_queue_wait.c | 31 tools/libaio/src/io_setup.c | 23 tools/libaio/src/io_submit.c | 23 tools/libaio/src/libaio.h | 222 +++ tools/libaio/src/libaio.map | 22 tools/libaio/src/raw_syscall.c | 18 tools/libaio/src/syscall-alpha.h | 209 ++ tools/libaio/src/syscall-i386.h | 72 + tools/libaio/src/syscall-ia64.h | 44 tools/libaio/src/syscall-ppc.h | 94 + tools/libaio/src/syscall-s390.h | 131 + tools/libaio/src/syscall-x86_64.h | 63 tools/libaio/src/syscall.h | 27 tools/libaio/src/vsys_def.h | 24 tools/misc/xend | 7 tools/python/xen/xend/XendDomainInfo.py | 2 tools/python/xen/xend/server/BlktapController.py | 14 tools/python/xen/xm/create.py | 8 tools/python/xen/xm/main.py | 8 tools/xenstore/Makefile | 7 xen/common/grant_table.c | 6 133 files changed, 16795 insertions(+), 8 deletions(-) diff -r af9809f51f81 -r 2937703f0ed0 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jul 13 09:55:14 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jul 13 10:13:26 2006 +0100 @@ -1322,6 +1322,7 @@ CONFIG_XEN_PCIDEV_BACKEND_PASS=y CONFIG_XEN_PCIDEV_BACKEND_PASS=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r af9809f51f81 -r 2937703f0ed0 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Thu Jul 13 09:55:14 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Thu Jul 13 10:13:26 2006 +0100 @@ -1263,6 +1263,7 @@ CONFIG_XEN_PCIDEV_BACKEND_PASS=y CONFIG_XEN_PCIDEV_BACKEND_PASS=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r af9809f51f81 -r 2937703f0ed0 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Jul 13 09:55:14 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Thu Jul 13 10:13:26 2006 +0100 @@ -3023,6 +3023,7 @@ CONFIG_XEN_PCIDEV_BACKEND_VPCI=y # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r af9809f51f81 -r 2937703f0ed0 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Jul 13 09:55:14 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Thu Jul 13 10:13:26 2006 +0100 @@ -2855,6 +2855,7 @@ CONFIG_XEN_PCIDEV_BACKEND_PASS=y CONFIG_XEN_PCIDEV_BACKEND_PASS=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Thu Jul 13 09:55:14 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Thu Jul 13 10:13:26 2006 +0100 @@ -94,6 +94,18 @@ config XEN_XENBUS_DEV depends on PROC_FS default y +config XEN_BLKDEV_TAP + tristate "Blockk device tap backend" + depends on XEN_BACKEND + default XEN_PRIVILEGED_GUEST + help + The block tap driver is an alternative to the block back driver + and allows VM block requests to be redirected to userspace through + a device interface. The tap allows user-space development of + high-performance block backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. This + driver can safely coexist with the existing blockback driver. + config XEN_NETDEV_BACKEND tristate "Network-device backend driver" depends on XEN_BACKEND && NET diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Jul 13 09:55:14 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -8,6 +8,7 @@ obj-$(CONFIG_XEN_BALLOON) += balloon/ obj-$(CONFIG_XEN_BALLOON) += balloon/ obj-$(CONFIG_XEN_DEVMEM) += char/ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ diff -r af9809f51f81 -r 2937703f0ed0 tools/Makefile --- a/tools/Makefile Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -16,6 +16,8 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm_manager SUBDIRS-$(VTPM_TOOLS) += vtpm_manager SUBDIRS-$(VTPM_TOOLS) += vtpm SUBDIRS-y += xenstat +SUBDIRS-y += libaio +SUBDIRS-y += blktap # These don't cross-compile ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) diff -r af9809f51f81 -r 2937703f0ed0 tools/examples/Makefile --- a/tools/examples/Makefile Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/examples/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -26,6 +26,7 @@ XEN_SCRIPTS += network-nat vif-nat XEN_SCRIPTS += network-nat vif-nat XEN_SCRIPTS += block XEN_SCRIPTS += block-enbd block-nbd +XEN_SCRIPTS += blktap XEN_SCRIPTS += vtpm vtpm-delete XEN_SCRIPTS += xen-hotplug-cleanup XEN_SCRIPTS += external-device-migrate diff -r af9809f51f81 -r 2937703f0ed0 tools/examples/xen-backend.agent --- a/tools/examples/xen-backend.agent Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/examples/xen-backend.agent Thu Jul 13 10:13:26 2006 +0100 @@ -7,6 +7,9 @@ claim_lock xenbus_hotplug_global claim_lock xenbus_hotplug_global case "$XENBUS_TYPE" in + tap) + /etc/xen/scripts/blktap "$ACTION" + ;; vbd) /etc/xen/scripts/block "$ACTION" ;; diff -r af9809f51f81 -r 2937703f0ed0 tools/examples/xen-backend.rules --- a/tools/examples/xen-backend.rules Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/examples/xen-backend.rules Thu Jul 13 10:13:26 2006 +0100 @@ -1,3 +1,4 @@ SUBSYSTEM=="xen-backend", KERNEL=="vbd*" +SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}" SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}" SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}" SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online" diff -r af9809f51f81 -r 2937703f0ed0 tools/misc/xend --- a/tools/misc/xend Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/misc/xend Thu Jul 13 10:13:26 2006 +0100 @@ -92,6 +92,10 @@ def start_consoled(): def start_consoled(): if os.fork() == 0: os.execvp('xenconsoled', ['xenconsoled']) + +def start_blktapctrl(): + if os.fork() == 0: + os.execvp('blktapctrl', ['blktapctrl']) def main(): try: @@ -106,16 +110,19 @@ def main(): elif sys.argv[1] == 'start': start_xenstored() start_consoled() + start_blktapctrl() return daemon.start() elif sys.argv[1] == 'trace_start': start_xenstored() start_consoled() + start_blktapctrl() return daemon.start(trace=1) elif sys.argv[1] == 'stop': return daemon.stop() elif sys.argv[1] == 'restart': start_xenstored() start_consoled() + start_blktapctrl() return daemon.stop() or daemon.start() elif sys.argv[1] == 'status': return daemon.status() diff -r af9809f51f81 -r 2937703f0ed0 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Jul 13 10:13:26 2006 +0100 @@ -1701,6 +1701,7 @@ def addControllerClass(device_class, cls from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, usbif +from xen.xend.server.BlktapController import BlktapController addControllerClass('vbd', blkif.BlkifController) addControllerClass('vif', netif.NetifController) addControllerClass('vtpm', tpmif.TPMifController) @@ -1708,3 +1709,4 @@ addControllerClass('ioports', iopif.IOPo addControllerClass('ioports', iopif.IOPortsController) addControllerClass('irq', irqif.IRQController) addControllerClass('usb', usbif.UsbifController) +addControllerClass('tap', BlktapController) diff -r af9809f51f81 -r 2937703f0ed0 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/python/xen/xm/create.py Thu Jul 13 10:13:26 2006 +0100 @@ -479,7 +479,13 @@ def configure_disks(config_devs, vals): """Create the config for disks (virtual block devices). """ for (uname, dev, mode, backend) in vals.disk: - config_vbd = ['vbd', + + if uname.startswith('tap:'): + cls = 'tap' + else: + cls = 'vbd' + + config_vbd = [cls, ['uname', uname], ['dev', dev ], ['mode', mode ] ] diff -r af9809f51f81 -r 2937703f0ed0 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/python/xen/xm/main.py Thu Jul 13 10:13:26 2006 +0100 @@ -994,7 +994,13 @@ def xm_block_attach(args): arg_check(args, 'block-attach', 4, 5) dom = args[0] - vbd = ['vbd', + + if args[1].startswith('tap:'): + cls = 'tap' + else: + cls = 'vbd' + + vbd = [cls, ['uname', args[1]], ['dev', args[2]], ['mode', args[3]]] diff -r af9809f51f81 -r 2937703f0ed0 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Thu Jul 13 09:55:14 2006 +0100 +++ b/tools/xenstore/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -35,7 +35,7 @@ XENSTORED_OBJS += $(XENSTORED_$(OS)) XENSTORED_OBJS += $(XENSTORED_$(OS)) .PHONY: all -all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls +all: libxenstore.so libxenstore.a xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls test_interleaved_transactions: test_interleaved_transactions.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ @@ -89,6 +89,9 @@ talloc_test.o: talloc.c libxenstore.so: xs.opic xs_lib.opic $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ $^ -lpthread + +libxenstore.a: libxenstore.so + ar rcs libxenstore.a $^ .PHONY: clean clean: testsuite-clean @@ -172,7 +175,7 @@ install: all $(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin $(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) - $(INSTALL_LIBS) libxenstore.so $(DESTDIR)/usr/$(LIBDIR) + $(INSTALL_DATA) libxenstore.* $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_DATA) xs.h $(DESTDIR)/usr/include $(INSTALL_DATA) xs_lib.h $(DESTDIR)/usr/include diff -r af9809f51f81 -r 2937703f0ed0 xen/common/grant_table.c --- a/xen/common/grant_table.c Thu Jul 13 09:55:14 2006 +0100 +++ b/xen/common/grant_table.c Thu Jul 13 10:13:26 2006 +0100 @@ -110,8 +110,7 @@ __gnttab_map_grant_ref( return; } - if ( unlikely((rd = find_domain_by_id(op->dom)) == NULL) || - unlikely(ld == rd) ) + if ( unlikely((rd = find_domain_by_id(op->dom)) == NULL) ) { if ( rd != NULL ) put_domain(rd); @@ -350,8 +349,7 @@ __gnttab_unmap_grant_ref( ref = map->ref; flags = map->flags; - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ) { if ( rd != NULL ) put_domain(rd); diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/blktap/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,3 @@ +LINUXINCLUDE += -I../xen/include/public/io +obj-y := xenbus.o interface.o blktap.o + diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,1439 @@ +/****************************************************************************** + * drivers/xen/blktap/blktap.c + * + * Back-end driver for user level virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. Requests + * are remapped to a user-space memory region. + * + * Based on the blkback driver code. + * + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/spinlock.h> +#include <linux/kthread.h> +#include <linux/list.h> +#include <asm/hypervisor.h> +#include "common.h" +#include <xen/balloon.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/errno.h> +#include <linux/major.h> +#include <linux/gfp.h> +#include <linux/poll.h> +#include <asm/tlbflush.h> +#include <linux/devfs_fs_kernel.h> + +#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */ +#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ + +/* + * The maximum number of requests that can be outstanding at any time + * is determined by + * + * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] + * + * where mmap_alloc < MAX_DYNAMIC_MEM. + * + * TODO: + * mmap_alloc is initialised to 2 and should be adjustable on the fly via + * sysfs. + */ +#define MAX_DYNAMIC_MEM 64 +#define MAX_PENDING_REQS 64 +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define MMAP_VADDR(_start, _req,_seg) \ + (_start + \ + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) +static int blkif_reqs = MAX_PENDING_REQS; +static int mmap_pages = MMAP_PAGES; + +#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we + * have a bunch of pages reserved for shared + * memory rings. + */ + +/*Data struct associated with each of the tapdisk devices*/ +typedef struct tap_blkif { + struct vm_area_struct *vma; /*Shared memory area */ + unsigned long rings_vstart; /*Kernel memory mapping */ + unsigned long user_vstart; /*User memory mapping */ + unsigned long dev_inuse; /*One process opens device at a time. */ + unsigned long dev_pending; /*In process of being opened */ + unsigned long ring_ok; /*make this ring->state */ + blkif_front_ring_t ufe_ring; /*Rings up to user space. */ + wait_queue_head_t wait; /*for poll */ + unsigned long mode; /*current switching mode */ + int minor; /*Minor number for tapdisk device */ + pid_t pid; /*tapdisk process id */ + enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace + shutdown */ + unsigned long *idx_map; /*Record the user ring id to kern + [req id, idx] tuple */ + blkif_t *blkif; /*Associate blkif with tapdev */ +} tap_blkif_t; + +/*Private data struct associated with the inode*/ +typedef struct private_info { + int idx; +} private_info_t; + +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ +typedef struct domid_translate { + unsigned short domid; + unsigned short busid; +} domid_translate_t ; + + +domid_translate_t translate_domid[MAX_TAP_DEV]; +tap_blkif_t *tapfds[MAX_TAP_DEV]; + +static int __init set_blkif_reqs(char *str) +{ + get_option(&str, &blkif_reqs); + return 1; +} +__setup("blkif_reqs=", set_blkif_reqs); + +/* Run-time switchable: /sys/module/blktap/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + unsigned long id; + unsigned short mem_idx; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; + int inuse; +} pending_req_t; + +static pending_req_t *pending_reqs[MAX_PENDING_REQS]; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq); +static int alloc_pending_reqs; + +typedef unsigned int PEND_RING_IDX; + +static inline int MASK_PEND_IDX(int i) { + return (i & (MAX_PENDING_REQS-1)); +} + +static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) { + return (req - pending_reqs[idx]); +} + +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +#define BLKBACK_INVALID_HANDLE (~0) + +typedef struct mmap_page { + unsigned long start; + struct page *mpage; +} mmap_page_t; + +static mmap_page_t mmap_start[MAX_DYNAMIC_MEM]; +static unsigned short mmap_alloc = 0; +static unsigned short mmap_lock = 0; +static unsigned short mmap_inuse = 0; +static unsigned long *pending_addrs[MAX_DYNAMIC_MEM]; + +/****************************************************************** + * GRANT HANDLES + */ + +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + grant_handle_t kernel; + grant_handle_t user; +}; + +static struct grant_handle_pair + pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; +#define pending_handle(_id, _idx, _i) \ + (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \ + + (_i)]) + + +static int blktap_read_ufe_ring(int idx); /*local prototypes*/ + +#define BLKTAP_MINOR 0 /*/dev/xen/blktap resides at device number + major=254, minor numbers begin at 0 */ +#define BLKTAP_DEV_MAJOR 254 /* TODO: Make major number dynamic * + * and create devices in the kernel * + */ +#define BLKTAP_DEV_DIR "/dev/xen" + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */ +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_SENDPID 4 +#define BLKTAP_IOCTL_NEWINTF 5 +#define BLKTAP_IOCTL_MINOR 6 +#define BLKTAP_IOCTL_MAJOR 7 +#define BLKTAP_QUERY_ALLOC_REQS 8 +#define BLKTAP_IOCTL_FREEINTF 9 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ((arg == BLKTAP_MODE_PASSTHROUGH ) || + (arg == BLKTAP_MODE_INTERCEPT_FE) || + (arg == BLKTAP_MODE_INTERPOSE )); +} + +/* Requests passing through the tap to userspace are re-assigned an ID. + * We must record a mapping between the BE [IDX,ID] tuple and the userspace + * ring ID. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) +{ + return ((fe_dom << 16) | MASK_PEND_IDX(idx)); +} + +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) +{ + return (PEND_RING_IDX)(id & 0x0000ffff); +} + +extern inline int ID_TO_MIDX(unsigned long id) +{ + return (int)(id >> 16); +} + +#define INVALID_REQ 0xdead0000 + +/*TODO: Convert to a free list*/ +static inline int GET_NEXT_REQ(unsigned long *idx_map) +{ + int i; + for (i = 0; i < MAX_PENDING_REQS; i++) + if (idx_map[i] == INVALID_REQ) return i; + + return INVALID_REQ; +} + + +#define BLKTAP_INVALID_HANDLE(_g) \ + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) + +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ + } while(0) + + +/****************************************************************** + * BLKTAP VM OPS + */ + +static struct page *blktap_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + /* + * if the page has not been mapped in by the driver then return + * NOPAGE_SIGBUS to the domain. + */ + + return NOPAGE_SIGBUS; +} + +struct vm_operations_struct blktap_vm_ops = { + nopage: blktap_nopage, +}; + +/****************************************************************** + * BLKTAP FILE OPS + */ + +/*Function Declarations*/ +static int get_next_free_dev(void); +static int blktap_open(struct inode *inode, struct file *filp); +static int blktap_release(struct inode *inode, struct file *filp); +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +static unsigned int blktap_poll(struct file *file, poll_table *wait); + +struct miscdevice *set_misc(int minor, char *name, int dev); + +static struct file_operations blktap_fops = { + .owner = THIS_MODULE, + .poll = blktap_poll, + .ioctl = blktap_ioctl, + .open = blktap_open, + .release = blktap_release, + .mmap = blktap_mmap, +}; + + +static int get_next_free_dev(void) +{ + tap_blkif_t *info; + int i = 0, ret = -1; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + + while (i < MAX_TAP_DEV) { + info = tapfds[i]; + if ( (tapfds[i] != NULL) && (info->dev_inuse == 0) + && (info->dev_pending == 0) ) { + info->dev_pending = 1; + ret = i; + goto done; + } + i++; + } + +done: + spin_unlock_irqrestore(&pending_free_lock, flags); + return ret; +} + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +{ + int i; + + for (i = 0; i < MAX_TAP_DEV; i++) + if ( (translate_domid[i].domid == domid) + && (translate_domid[i].busid == xenbus_id) ) { + tapfds[i]->blkif = blkif; + tapfds[i]->status = RUNNING; + return i; + } + return -1; +} + +void signal_tapdisk(int idx) +{ + tap_blkif_t *info; + struct task_struct *ptask; + + info = tapfds[idx]; + if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) { + ptask = find_task_by_pid(info->pid); + if (ptask) { + info->status = CLEANSHUTDOWN; + } + } + info->blkif = NULL; + return; +} + +static int blktap_open(struct inode *inode, struct file *filp) +{ + blkif_sring_t *sring; + int idx = iminor(inode) - BLKTAP_MINOR; + tap_blkif_t *info; + private_info_t *prv; + int i; + + if (tapfds[idx] == NULL) { + WPRINTK("Unable to open device /dev/xen/blktap%d\n", + idx); + return -ENOMEM; + } + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); + + info = tapfds[idx]; + + /*Only one process can access device at a time*/ + if (test_and_set_bit(0, &info->dev_inuse)) + return -EBUSY; + + info->dev_pending = 0; + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); + + prv = kzalloc(sizeof(private_info_t),GFP_KERNEL); + prv->idx = idx; + filp->private_data = prv; + info->vma = NULL; + + info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, + GFP_KERNEL); + + if (idx > 0) { + init_waitqueue_head(&info->wait); + for (i = 0; i < MAX_PENDING_REQS; i++) + info->idx_map[i] = INVALID_REQ; + } + + DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx); + return 0; + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + int idx = iminor(inode) - BLKTAP_MINOR; + tap_blkif_t *info; + + if (tapfds[idx] == NULL) { + WPRINTK("Trying to free device that doesn't exist " + "[/dev/xen/blktap%d]\n",idx); + return -1; + } + info = tapfds[idx]; + info->dev_inuse = 0; + DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx); + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(info->ufe_ring.sring)); + free_page((unsigned long) info->ufe_ring.sring); + + /* Clear any active mappings and free foreign map table */ + if (info->vma) { + zap_page_range( + info->vma, info->vma->vm_start, + info->vma->vm_end - info->vma->vm_start, NULL); + info->vma = NULL; + } + + if (filp->private_data) kfree(filp->private_data); + + if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) { + kthread_stop(info->blkif->xenblkd); + info->blkif->xenblkd = NULL; + info->status = CLEANSHUTDOWN; + } + return 0; +} + + +/* Note on mmap: + * We need to map pages to user space in a way that will allow the block + * subsystem set up direct IO to them. This couldn't be done before, because + * there isn't really a sane way to translate a user virtual address down to a + * physical address when the page belongs to another domain. + * + * My first approach was to map the page in to kernel memory, add an entry + * for it in the physical frame list (using alloc_lomem_region as in blkback) + * and then attempt to map that page up to user space. This is disallowed + * by xen though, which realizes that we don't really own the machine frame + * underlying the physical page. + * + * The new approach is to provide explicit support for this in xen linux. + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages + * mapped from other vms. vma->vm_private_data is set up as a mapping + * from pages to actual page structs. There is a new clause in get_user_pages + * that does the right thing for this sort of mapping. + */ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + struct page **map; + int i; + private_info_t *prv; + tap_blkif_t *info; + + /*Retrieve the dev info*/ + prv = (private_info_t *)filp->private_data; + if (prv == NULL) { + WPRINTK("blktap: mmap, retrieving idx failed\n"); + return -ENOMEM; + } + info = tapfds[prv->idx]; + + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) { + WPRINTK("you _must_ map exactly %d pages!\n", + mmap_pages + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + info->rings_vstart = vma->vm_start; + info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, + __pa(info->ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("Mapping user ring failed!\n"); + goto fail; + } + + /* Mark this VM as containing foreign pages, and set up mappings. */ + map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + * sizeof(struct page_struct*), + GFP_KERNEL); + if (map == NULL) { + WPRINTK("Couldn't alloc VM_FOREIGN map.\n"); + goto fail; + } + + for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) + map[i] = NULL; + + vma->vm_private_data = map; + vma->vm_flags |= VM_FOREIGN; + + info->vma = vma; + info->ring_ok = 1; + return 0; + fail: + /* Clear any active mappings. */ + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); + + return -ENOMEM; +} + + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int idx = iminor(inode) - BLKTAP_MINOR; + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: + { + /* There are fe messages to process. */ + return blktap_read_ufe_ring(idx); + } + case BLKTAP_IOCTL_SETMODE: + { + tap_blkif_t *info = tapfds[idx]; + + if ( (idx > 0) && (idx < MAX_TAP_DEV) + && (tapfds[idx] != NULL) ) + { + if (BLKTAP_MODE_VALID(arg)) { + info->mode = arg; + /* XXX: may need to flush rings here. */ + DPRINTK("blktap: set mode to %lx\n", + arg); + return 0; + } + } + return 0; + } + case BLKTAP_IOCTL_PRINT_IDXS: + { + tap_blkif_t *info = tapfds[idx]; + + if ( (idx > 0) && (idx < MAX_TAP_DEV) + && (tapfds[idx] != NULL) ) + { + printk("User Rings: \n-----------\n"); + printk("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + info->ufe_ring.rsp_cons, + info->ufe_ring.req_prod_pvt, + info->ufe_ring.sring->req_prod, + info->ufe_ring.sring->rsp_prod); + } + return 0; + } + case BLKTAP_IOCTL_SENDPID: + { + tap_blkif_t *info = tapfds[idx]; + + if ( (idx > 0) && (idx < MAX_TAP_DEV) + && (tapfds[idx] != NULL) ) + { + info->pid = (pid_t)arg; + DPRINTK("blktap: pid received %d\n", + info->pid); + } + return 0; + } + case BLKTAP_IOCTL_NEWINTF: + { + uint64_t val = (uint64_t)arg; + domid_translate_t *tr = (domid_translate_t *)&val; + int newdev; + + DPRINTK("NEWINTF Req for domid %d and bus id %d\n", + tr->domid, tr->busid); + newdev = get_next_free_dev(); + if (newdev < 1) { + WPRINTK("Error initialising /dev/xen/blktap - " + "No more devices\n"); + return -1; + } + translate_domid[newdev].domid = tr->domid; + translate_domid[newdev].busid = tr->busid; + return newdev; + } + case BLKTAP_IOCTL_FREEINTF: + { + unsigned long dev = arg; + tap_blkif_t *info = NULL; + + if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev]; + + if ( (info != NULL) && (info->dev_pending) ) + info->dev_pending = 0; + return 0; + } + case BLKTAP_IOCTL_MINOR: + { + unsigned long dev = arg; + tap_blkif_t *info = NULL; + + if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev]; + + if (info != NULL) return info->minor; + else return -1; + } + case BLKTAP_IOCTL_MAJOR: + return BLKTAP_DEV_MAJOR; + + case BLKTAP_QUERY_ALLOC_REQS: + { + WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n", + alloc_pending_reqs, blkif_reqs); + return (alloc_pending_reqs/blkif_reqs) * 100; + } + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *file, poll_table *wait) +{ + private_info_t *prv; + tap_blkif_t *info; + + /*Retrieve the dev info*/ + prv = (private_info_t *)file->private_data; + if (prv == NULL) { + WPRINTK(" poll, retrieving idx failed\n"); + return 0; + } + + if (prv->idx == 0) return 0; + + info = tapfds[prv->idx]; + + poll_wait(file, &info->wait, wait); + if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) { + flush_tlb_all(); + RING_PUSH_REQUESTS(&info->ufe_ring); + return POLLIN | POLLRDNORM; + } + return 0; +} + +void blktap_kick_user(int idx) +{ + tap_blkif_t *info; + + if (idx == 0) return; + + info = tapfds[idx]; + + if (info != NULL) wake_up_interruptible(&info->wait); + return; +} + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +/* FIXME: Return ENOMEM properly on failure to allocate additional reqs. */ +static void req_increase(void) +{ + int i, j; + struct page *page; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + + if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) + goto done; + + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * + blkif_reqs, GFP_KERNEL); + pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * + mmap_pages, GFP_KERNEL); + + if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { + kfree(pending_reqs[mmap_alloc]); + kfree(pending_addrs[mmap_alloc]); + WPRINTK("%s: out of memory\n", __FUNCTION__); + goto done; + } + +#ifdef __ia64__ + extern unsigned long alloc_empty_foreign_map_page_range( + unsigned long pages); + mmap_start[mmap_alloc].start = (unsigned long) + alloc_empty_foreign_map_page_range(mmap_pages); +#else /* ! ia64 */ + page = balloon_alloc_empty_page_range(mmap_pages); + BUG_ON(page == NULL); + + /* Pin all of the pages. */ + for (i=0; i<mmap_pages; i++) + get_page(&page[i]); + + mmap_start[mmap_alloc].start = + (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + mmap_start[mmap_alloc].mpage = page; + +#endif + DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", + __FUNCTION__, blkif_reqs, mmap_pages, + mmap_start[mmap_alloc].start); + + BUG_ON(mmap_start[mmap_alloc].start == 0); + + for (i = 0; i < mmap_pages; i++) + pending_addrs[mmap_alloc][i] = + mmap_start[mmap_alloc].start + (i << PAGE_SHIFT); + + for (i = 0; i < MAX_PENDING_REQS ; i++) { + list_add_tail(&pending_reqs[mmap_alloc][i].free_list, + &pending_free); + pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc; + for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) + BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, + i, j)); + } + + mmap_alloc++; + DPRINTK("# MMAPs increased to %d\n",mmap_alloc); + done: + spin_unlock_irqrestore(&pending_free_lock, flags); + +} + +static void mmap_req_del(int mmap) +{ + int i; + struct page *page; + + /*Spinlock already acquired*/ + kfree(pending_reqs[mmap]); + kfree(pending_addrs[mmap]); + +#ifdef __ia64__ + /*Not sure what goes here yet!*/ +#else + + /* Unpin all of the pages. */ + page = mmap_start[mmap].mpage; + for (i=0; i<mmap_pages; i++) + put_page(&page[i]); + + balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages); +#endif + + mmap_lock = 0; + DPRINTK("# MMAPs decreased to %d\n",mmap_alloc); + mmap_alloc--; +} + +/*N.B. Currently unused - will be accessed via sysfs*/ +static void req_decrease(void) +{ + pending_req_t *req; + int i; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + + DPRINTK("Req decrease called.\n"); + if (mmap_lock || mmap_alloc == 1) + goto done; + + mmap_lock = 1; + mmap_inuse = MAX_PENDING_REQS; + + /*Go through reqs and remove any that aren't in use*/ + for (i = 0; i < MAX_PENDING_REQS ; i++) { + req = &pending_reqs[mmap_alloc-1][i]; + if (req->inuse == 0) { + list_del(&req->free_list); + mmap_inuse--; + } + } + if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1); + done: + spin_unlock_irqrestore(&pending_free_lock, flags); + return; +} + +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + + if (req) { + req->inuse = 1; + alloc_pending_reqs++; + } + spin_unlock_irqrestore(&pending_free_lock, flags); + + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + + alloc_pending_reqs--; + req->inuse = 0; + if (mmap_lock && (req->mem_idx == mmap_alloc-1)) { + mmap_inuse--; + if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1); + spin_unlock_irqrestore(&pending_free_lock, flags); + return; + } + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + + spin_unlock_irqrestore(&pending_free_lock, flags); + + if (was_empty) + wake_up(&pending_free_wq); +} + +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int + tapidx) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int i, invcount = 0; + struct grant_handle_pair *khandle; + uint64_t ptep; + int ret, mmap_idx; + unsigned long kvaddr, uvaddr; + + tap_blkif_t *info = tapfds[tapidx]; + + if (info == NULL) { + WPRINTK("fast_flush: Couldn't get info!\n"); + return; + } + mmap_idx = req->mem_idx; + + for (i = 0; i < req->nr_pages; i++) { + kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i); + uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); + + khandle = &pending_handle(mmap_idx, k_idx, i); + if (BLKTAP_INVALID_HANDLE(khandle)) { + WPRINTK("BLKTAP_INVALID_HANDLE\n"); + continue; + } + gnttab_set_unmap_op(&unmap[invcount], + MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i), + GNTMAP_host_map, khandle->kernel); + invcount++; + + if (create_lookup_pte_addr( + info->vma->vm_mm, + MMAP_VADDR(info->user_vstart, u_idx, i), + &ptep) !=0) { + WPRINTK("Couldn't get a pte addr!\n"); + return; + } + + gnttab_set_unmap_op(&unmap[invcount], + ptep, GNTMAP_host_map, + khandle->user); + invcount++; + + BLKTAP_INVALIDATE_HANDLE(khandle); + } + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + + if (info->vma != NULL) + zap_page_range(info->vma, + MMAP_VADDR(info->user_vstart, u_idx, 0), + req->nr_pages << PAGE_SHIFT, NULL); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int tap_blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called by user level ioctl() + */ + +static int blktap_read_ufe_ring(int idx) +{ + /* This is called to read responses from the UFE ring. */ + RING_IDX i, j, rp; + blkif_response_t *resp; + blkif_t *blkif=NULL; + int pending_idx, usr_idx, mmap_idx; + pending_req_t *pending_req; + tap_blkif_t *info; + + info = tapfds[idx]; + if (info == NULL) { + return 0; + } + + /* We currently only forward packets in INTERCEPT_FE mode. */ + if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE)) + return 0; + + /* for each outstanding message on the UFEring */ + rp = info->ufe_ring.sring->rsp_prod; + rmb(); + + for (i = info->ufe_ring.rsp_cons; i != rp; i++) { + resp = RING_GET_RESPONSE(&info->ufe_ring, i); + ++info->ufe_ring.rsp_cons; + + /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ + usr_idx = (int)resp->id; + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); + + if ( (mmap_idx >= mmap_alloc) || + (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) ) + WPRINTK("Incorrect req map" + "[%d], internal map [%d,%d (%d)]\n", + usr_idx, mmap_idx, + ID_TO_IDX(info->idx_map[usr_idx]), + MASK_PEND_IDX( + ID_TO_IDX(info->idx_map[usr_idx]))); + + pending_req = &pending_reqs[mmap_idx][pending_idx]; + blkif = pending_req->blkif; + + for (j = 0; j < pending_req->nr_pages; j++) { + + unsigned long kvaddr, uvaddr; + struct page **map = info->vma->vm_private_data; + struct page *pg; + int offset; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); + kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, + pending_idx, j); + + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ClearPageReserved(pg); + offset = (uvaddr - info->vma->vm_start) + >> PAGE_SHIFT; + map[offset] = NULL; + } + fast_flush_area(pending_req, pending_idx, usr_idx, idx); + make_response(blkif, pending_req->id, resp->operation, + resp->status); + info->idx_map[usr_idx] = INVALID_REQ; + blkif_put(pending_req->blkif); + free_req(pending_req); + } + + return 0; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ +static int print_dbug = 1; +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_ring_t *blk_ring = &blkif->blk_ring; + blkif_request_t *req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + tap_blkif_t *info; + + rc = blk_ring->req_cons; + rp = blk_ring->sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + /*Check blkif has corresponding UE ring*/ + if (blkif->dev_num == -1) { + /*oops*/ + if (print_dbug) { + WPRINTK("Corresponding UE " + "ring does not exist!\n"); + print_dbug = 0; /*We only print this message once*/ + } + return 1; + } + + info = tapfds[blkif->dev_num]; + if (info == NULL || !info->dev_inuse) { + if (print_dbug) { + WPRINTK("Can't get UE info!\n"); + print_dbug = 0; + } + return 1; + } + + while (rc != rp) { + + if (RING_FULL(&info->ufe_ring)) { + WPRINTK("RING_FULL! More to do\n"); + more_to_do = 1; + break; + } + + if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) { + WPRINTK("RING_REQUEST_CONS_OVERFLOW!" + " More to do\n"); + more_to_do = 1; + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + req = RING_GET_REQUEST(blk_ring, rc); + blk_ring->req_cons = ++rc; /* before make_response() */ + + switch (req->operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, req, pending_req); + break; + + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, req, pending_req); + break; + + default: + WPRINTK("unknown operation [%d]\n", + req->operation); + make_response(blkif, req->id, req->operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + } + + blktap_kick_user(blkif->dev_num); + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int nseg; + int ret, i; + tap_blkif_t *info = tapfds[blkif->dev_num]; + uint64_t sector; + + blkif_request_t *target; + int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); + int usr_idx = GET_NEXT_REQ(info->idx_map); + uint16_t mmap_idx = pending_req->mem_idx; + + /*Check we have space on user ring - should never fail*/ + if(usr_idx == INVALID_REQ) goto fail_flush; + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if ( unlikely(nseg == 0) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) { + WPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + /* Make sure userspace is ready. */ + if (!info->ring_ok) { + WPRINTK("blktap: ring not ready for requests!\n"); + goto fail_response; + } + + if (RING_FULL(&info->ufe_ring)) { + WPRINTK("blktap: fe_ring is full, can't add " + "IO Request will be dropped. %d %d\n", + RING_SIZE(&info->ufe_ring), + RING_SIZE(&blkif->blk_ring)); + goto fail_response; + } + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + op = 0; + for (i = 0; i < nseg; i++) { + unsigned long uvaddr; + unsigned long kvaddr; + uint64_t ptep; + struct page *page; + uint32_t flags; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); + kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, + pending_idx, i); + page = virt_to_page(kvaddr); + + sector = req->sector_number + (8*i); + if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) { + WPRINTK("BLKTAP: Sector request greater" + "than size\n"); + WPRINTK("BLKTAP: %s request sector" + "[%llu,%llu], Total [%llu]\n", + (req->operation == + BLKIF_OP_WRITE ? "WRITE" : "READ"), + (long long unsigned) sector, + (long long unsigned) sector>>9, + blkif->sectors); + } + + flags = GNTMAP_host_map; + if (operation == WRITE) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], kvaddr, flags, + req->seg[i].gref, blkif->domid); + op++; + + /* Now map it to user. */ + ret = create_lookup_pte_addr(info->vma->vm_mm, + uvaddr, &ptep); + if (ret) { + WPRINTK("Couldn't get a pte addr!\n"); + fast_flush_area(pending_req, pending_idx, usr_idx, + blkif->dev_num); + goto fail_flush; + } + + flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + if (operation == WRITE) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, + req->seg[i].gref, blkif->domid); + op++; + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); + BUG_ON(ret); + + for (i = 0; i < (nseg*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + struct page *pg; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2); + kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, + pending_idx, i/2); + + if (unlikely(map[i].status != 0)) { + WPRINTK("invalid kernel buffer -- " + "could not remap it\n"); + goto fail_flush; + } + + if (unlikely(map[i+1].status != 0)) { + WPRINTK("invalid user buffer -- " + "could not remap it\n"); + goto fail_flush; + } + + pending_handle(mmap_idx, pending_idx, i/2).kernel + = map[i].handle; + pending_handle(mmap_idx, pending_idx, i/2).user + = map[i+1].handle; +#ifdef CONFIG_XEN_IA64_DOM0_NON_VP + pending_addrs[mmap_idx][vaddr_pagenr(pending_req, i)] = + (unsigned long)gnttab_map_vaddr(map[i]); +#else + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); +#endif + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ((struct page **)info->vma->vm_private_data)[offset] = + pg; + } + /* Mark mapped pages as reserved: */ + for (i = 0; i < req->nr_segments; i++) { + unsigned long kvaddr; + struct page *pg; + + kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, + pending_idx, i); + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + SetPageReserved(pg); + } + + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ + info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); + + blkif_get(blkif); + /* Finally, write the request message to the user ring. */ + target = RING_GET_REQUEST(&info->ufe_ring, + info->ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + target->id = usr_idx; + info->ufe_ring.req_prod_pvt++; + return; + + fail_flush: + WPRINTK("Reached Fail_flush\n"); + fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, int st) +{ + blkif_response_t *resp; + unsigned long flags; + blkif_back_ring_t *blk_ring = &blkif->blk_ring; + int more_to_do = 0; + int notify; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); + resp->id = id; + resp->operation = op; + resp->status = st; + blk_ring->rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify); + + if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do); + } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) { + more_to_do = 1; + + } + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i,ret,blktap_dir; + tap_blkif_t *info; + + if (!is_running_on_xen()) + return -ENODEV; + + INIT_LIST_HEAD(&pending_free); + for(i = 0; i < 2; i++) req_increase(); + + tap_blkif_interface_init(); + + alloc_pending_reqs = 0; + + tap_blkif_xenbus_init(); + + /*Create the blktap devices, but do not map memory or waitqueue*/ + for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF; + + ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops); + blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL); + + if ( (ret < 0)||(blktap_dir < 0) ) { + WPRINTK("Couldn't register /dev/xen/blktap\n"); + return -ENOMEM; + } + + for(i = 0; i < MAX_TAP_DEV; i++ ) { + info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); + if(tapfds[i] == NULL) return -ENOMEM; + info->minor = i; + info->pid = 0; + info->blkif = NULL; + + ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); + + if(ret != 0) return -ENOMEM; + info->dev_pending = info->dev_inuse = 0; + + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + } + + DPRINTK("Blktap device successfully created\n"); + + return 0; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/blktap/common.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,120 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> +#include <asm/io.h> +#include <asm/setup.h> +#include <asm/pgalloc.h> +#include <xen/evtchn.h> +#include <asm/hypervisor.h> +#include <xen/interface/io/blkif.h> +#include <xen/interface/io/ring.h> +#include <xen/gnttab.h> +#include <xen/driver_util.h> + +#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int evtchn; + unsigned int irq; + /* Comms information. */ + blkif_back_ring_t blk_ring; + struct vm_struct *blk_ring_area; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + request_queue_t *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; + + int dev_num; + uint64_t sectors; +} blkif_t; + +blkif_t *tap_alloc_blkif(domid_t domid); +void tap_blkif_free(blkif_t *blkif); +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, + unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +void tap_blkif_interface_init(void); + +void tap_blkif_xenbus_init(void); + +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int tap_blkif_schedule(void *arg); + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); +void signal_tapdisk(int idx); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,165 @@ +/****************************************************************************** + * drivers/xen/blktap/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + + */ + +#include "common.h" +#include <xen/evtchn.h> + +static kmem_cache_t *blkif_cachep; + +blkif_t *tap_alloc_blkif(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + int ret; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + lock_vm_area(blkif->blk_ring_area); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + +#ifdef CONFIG_XEN_IA64_DOM0_NON_VP + /* on some arch's, map_grant_ref behaves like mmap, in that the + * passed address is a hint and a different address may be returned */ + blkif->blk_ring_area->addr = gnttab_map_vaddr(op); +#endif + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + int ret; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + lock_vm_area(blkif->blk_ring_area); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); +} + +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + blkif_sring_t *sring; + int err; + struct evtchn_bind_interdomain bind_interdomain; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + bind_interdomain.remote_dom = blkif->domid; + bind_interdomain.remote_port = evtchn; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (err) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + return err; + } + + blkif->evtchn = bind_interdomain.local_port; + + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); + + blkif->irq = bind_evtchn_to_irqhandler( + blkif->evtchn, tap_blkif_be_int, 0, "blkif-backend", blkif); + + return 0; +} + +void tap_blkif_free(blkif_t *blkif) +{ + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + + /* Already disconnected? */ + if (blkif->irq) + unbind_from_irqhandler(blkif->irq, blkif); + + if (blkif->blk_ring.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + } + + kmem_cache_free(blkif_cachep, blkif); +} + +void __init tap_blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); +} diff -r af9809f51f81 -r 2937703f0ed0 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,354 @@ +/* drivers/xen/blktap/xenbus.c + * + * Xenbus code for blktap + * + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Based on the blkback xenbus code: + * + * Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx> + * Copyright (C) 2005 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdarg.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <xen/xenbus.h> +#include "common.h" + + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + int xenbus_id; +}; + + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static int blktap_remove(struct xenbus_device *dev); +static int blktap_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id); +static void tap_backend_changed(struct xenbus_watch *, const char **, + unsigned int); +static void tap_frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state); + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +static long get_id(const char *str) +{ + int len,end; + const char *ptr; + char *tptr, num[10]; + + len = strsep_len(str, '/', 2); + end = strlen(str); + if ( (len < 0) || (end < 0) ) return -1; + + ptr = str + len + 1; + strncpy(num,ptr,end - len); + tptr = num + (end - (len + 1)); + *tptr = '\0'; + DPRINTK("Get_id called for %s (%s)\n",str,num); + + return simple_strtol(num, NULL, 10); +} + +static void tap_update_blkif_status(blkif_t *blkif) +{ + int err; + + /* Not ready to connect? */ + if(!blkif->irq || !blkif->sectors) { + return; + } + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, + "xvd %d", + blkif->domid); + + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd"); + WPRINTK("Error starting thread\n"); + } +} + +static int blktap_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->dev.driver_data; + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + if (be->blkif) { + if (be->blkif->xenblkd) + kthread_stop(be->blkif->xenblkd); + signal_tapdisk(be->blkif->dev_num); + tap_blkif_free(be->blkif); + be->blkif = NULL; + } + kfree(be); + dev->dev.driver_data = NULL; + return 0; +} + +/** + * Entry point to this code when a new device is created. Allocate + * the basic structures, and watch the store waiting for the + * user-space program to tell us the physical device info. Switch to + * InitWait. + */ +static int blktap_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->dev = dev; + dev->dev.driver_data = be; + be->xenbus_id = get_id(dev->nodename); + + be->blkif = tap_alloc_blkif(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + be->blkif->sectors = 0; + + /* set a watch on disk info, waiting for userspace to update details*/ + err = xenbus_watch_path2(dev, dev->nodename, "info", + &be->backend_watch, tap_backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + return 0; + +fail: + DPRINTK("blktap probe failed"); + blktap_remove(dev); + return err; +} + + +/** + * Callback received when the user space code has placed the device + * information in xenstore. + */ +static void tap_backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned long info; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + + /** + * Check to see whether userspace code has opened the image + * and written sector + * and disk info to xenstore + */ + err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, + NULL); + if (err) { + xenbus_dev_error(dev, err, "getting info"); + return; + } + + DPRINTK("Userspace update on disk info, %lu\n",info); + + err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", + &be->blkif->sectors, NULL); + + /* Associate tap dev with domid*/ + be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, + be->blkif); + DPRINTK("Thread started for domid [%d], connecting disk\n", + be->blkif->dev_num); + + tap_update_blkif_status(be->blkif); +} + +/** + * Callback received when the frontend's state changes. + */ +static void tap_frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev->dev.driver_data; + int err; + + DPRINTK(""); + + switch (frontend_state) { + case XenbusStateInitialising: + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + tap_update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + device_unregister(&dev->dev); + break; + + case XenbusStateUnknown: + case XenbusStateInitWait: + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/** + * Switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + int err; + + struct xenbus_device *dev = be->dev; + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + /* Map the shared frame, irq etc. */ + err = tap_blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static struct xenbus_device_id blktap_ids[] = { + { "tap" }, + { "" } +}; + + +static struct xenbus_driver blktap = { + .name = "tap", + .owner = THIS_MODULE, + .ids = blktap_ids, + .probe = blktap_probe, + .remove = blktap_remove, + .otherend_changed = tap_frontend_changed +}; + + +void tap_blkif_xenbus_init(void) +{ + xenbus_register_backend(&blktap); +} diff -r af9809f51f81 -r 2937703f0ed0 patches/linux-2.6.16.13/blktap-aio-16_03_06.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/blktap-aio-16_03_06.patch Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,297 @@ +diff -pruN ../pristine-linux-2.6.16-rc5/fs/aio.c ./fs/aio.c +--- ../pristine-linux-2.6.16-rc5/fs/aio.c 2006-03-14 14:10:10.827401387 +0000 ++++ ./fs/aio.c 2006-03-16 09:57:53.898316582 +0000 +@@ -34,6 +34,11 @@ + #include <asm/uaccess.h> + #include <asm/mmu_context.h> + ++#ifdef CONFIG_EPOLL ++#include <linux/poll.h> ++#include <linux/eventpoll.h> ++#endif ++ + #if DEBUG > 1 + #define dprintk printk + #else +@@ -1016,6 +1021,10 @@ put_rq: + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + ++#ifdef CONFIG_EPOLL ++ if (ctx->file && waitqueue_active(&ctx->poll_wait)) ++ wake_up(&ctx->poll_wait); ++#endif + if (ret) + put_ioctx(ctx); + +@@ -1025,6 +1034,8 @@ put_rq: + /* aio_read_evt + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched (0 or 1 ;-) ++ * If ent parameter is 0, just returns the number of events that would ++ * be fetched. + * FIXME: make this use cmpxchg. + * TODO: make the ringbuffer user mmap()able (requires FIXME). + */ +@@ -1047,13 +1058,18 @@ static int aio_read_evt(struct kioctx *i + + head = ring->head % info->nr; + if (head != ring->tail) { +- struct io_event *evp = aio_ring_event(info, head, KM_USER1); +- *ent = *evp; +- head = (head + 1) % info->nr; +- smp_mb(); /* finish reading the event before updatng the head */ +- ring->head = head; +- ret = 1; +- put_aio_ring_event(evp, KM_USER1); ++ if (ent) { /* event requested */ ++ struct io_event *evp = ++ aio_ring_event(info, head, KM_USER1); ++ *ent = *evp; ++ head = (head + 1) % info->nr; ++ /* finish reading the event before updatng the head */ ++ smp_mb(); ++ ring->head = head; ++ ret = 1; ++ put_aio_ring_event(evp, KM_USER1); ++ } else /* only need to know availability */ ++ ret = 1; + } + spin_unlock(&info->ring_lock); + +@@ -1236,9 +1252,78 @@ static void io_destroy(struct kioctx *io + + aio_cancel_all(ioctx); + wait_for_all_aios(ioctx); ++#ifdef CONFIG_EPOLL ++ /* forget the poll file, but it's up to the user to close it */ ++ if (ioctx->file) { ++ ioctx->file->private_data = 0; ++ ioctx->file = 0; ++ } ++#endif + put_ioctx(ioctx); /* once for the lookup */ + } + ++#ifdef CONFIG_EPOLL ++ ++static int aio_queue_fd_close(struct inode *inode, struct file *file) ++{ ++ struct kioctx *ioctx = file->private_data; ++ if (ioctx) { ++ file->private_data = 0; ++ spin_lock_irq(&ioctx->ctx_lock); ++ ioctx->file = 0; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ return 0; ++} ++ ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) ++{ unsigned int pollflags = 0; ++ struct kioctx *ioctx = file->private_data; ++ ++ if (ioctx) { ++ ++ spin_lock_irq(&ioctx->ctx_lock); ++ /* Insert inside our poll wait queue */ ++ poll_wait(file, &ioctx->poll_wait, wait); ++ ++ /* Check our condition */ ++ if (aio_read_evt(ioctx, 0)) ++ pollflags = POLLIN | POLLRDNORM; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ ++ return pollflags; ++} ++ ++static struct file_operations aioq_fops = { ++ .release = aio_queue_fd_close, ++ .poll = aio_queue_fd_poll ++}; ++ ++/* make_aio_fd: ++ * Create a file descriptor that can be used to poll the event queue. ++ * Based and piggybacked on the excellent epoll code. ++ */ ++ ++static int make_aio_fd(struct kioctx *ioctx) ++{ ++ int error, fd; ++ struct inode *inode; ++ struct file *file; ++ ++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); ++ if (error) ++ return error; ++ ++ /* associate the file with the IO context */ ++ file->private_data = ioctx; ++ ioctx->file = file; ++ init_waitqueue_head(&ioctx->poll_wait); ++ return fd; ++} ++#endif ++ ++ + /* sys_io_setup: + * Create an aio_context capable of receiving at least nr_events. + * ctxp must not point to an aio_context that already exists, and +@@ -1251,18 +1336,30 @@ static void io_destroy(struct kioctx *io + * resources are available. May fail with -EFAULT if an invalid + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. ++ * ++ * To request a selectable fd, the user context has to be initialized ++ * to 1, instead of 0, and the return value is the fd. ++ * This keeps the system call compatible, since a non-zero value ++ * was not allowed so far. + */ + asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; ++ int make_fd = 0; + + ret = get_user(ctx, ctxp); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; ++#ifdef CONFIG_EPOLL ++ if (ctx == 1) { ++ make_fd = 1; ++ ctx = 0; ++ } ++#endif + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); +@@ -1273,8 +1370,12 @@ asmlinkage long sys_io_setup(unsigned nr + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + ret = put_user(ioctx->user_id, ctxp); +- if (!ret) +- return 0; ++#ifdef CONFIG_EPOLL ++ if (make_fd && ret >= 0) ++ ret = make_aio_fd(ioctx); ++#endif ++ if (ret >= 0) ++ return ret; + + get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + io_destroy(ioctx); + +diff -pruN ../pristine-linux-2.6.16-rc5/fs/eventpoll.c ./fs/eventpoll.c +--- ../pristine-linux-2.6.16-rc5/fs/eventpoll.c 2006-01-03 03:21:10.000000000 +0000 ++++ ./fs/eventpoll.c 2006-03-16 10:04:35.469956167 +0000 +@@ -235,8 +235,6 @@ struct ep_pqueue { + + static void ep_poll_safewake_init(struct poll_safewake *psw); + static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep); + static int ep_alloc(struct eventpoll **pep); + static void ep_free(struct eventpoll *ep); + static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); +@@ -266,7 +264,7 @@ static int ep_events_transfer(struct eve + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents, long timeout); + static int eventpollfs_delete_dentry(struct dentry *dentry); +-static struct inode *ep_eventpoll_inode(void); ++static struct inode *ep_eventpoll_inode(struct file_operations *fops); + static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data); +@@ -525,7 +523,7 @@ asmlinkage long sys_epoll_create(int siz + * Creates all the items needed to setup an eventpoll file. That is, + * a file structure, and inode and a free file descriptor. + */ +- error = ep_getfd(&fd, &inode, &file, ep); ++ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); + if (error) + goto eexit_2; + +@@ -710,8 +708,8 @@ eexit_1: + /* + * Creates the file descriptor to be used by the epoll interface. + */ +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep) ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, struct file_operations *fops) + { + struct qstr this; + char name[32]; +@@ -727,7 +725,7 @@ static int ep_getfd(int *efd, struct ino + goto eexit_1; + + /* Allocates an inode from the eventpoll file system */ +- inode = ep_eventpoll_inode(); ++ inode = ep_eventpoll_inode(fops); + error = PTR_ERR(inode); + if (IS_ERR(inode)) + goto eexit_2; +@@ -758,7 +756,7 @@ static int ep_getfd(int *efd, struct ino + + file->f_pos = 0; + file->f_flags = O_RDONLY; +- file->f_op = &eventpoll_fops; ++ file->f_op = fops; + file->f_mode = FMODE_READ; + file->f_version = 0; + file->private_data = ep; +@@ -1574,7 +1572,7 @@ static int eventpollfs_delete_dentry(str + } + + +-static struct inode *ep_eventpoll_inode(void) ++static struct inode *ep_eventpoll_inode(struct file_operations *fops) + { + int error = -ENOMEM; + struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); +@@ -1582,7 +1580,7 @@ static struct inode *ep_eventpoll_inode( + if (!inode) + goto eexit_1; + +- inode->i_fop = &eventpoll_fops; ++ inode->i_fop = fops; + + /* + * Mark the inode dirty from the very beginning, + +diff -pruN ../pristine-linux-2.6.16-rc5/include/linux/aio.h ./include/linux/aio.h +--- ../pristine-linux-2.6.16-rc5/include/linux/aio.h 2006-03-14 14:10:21.597916731 +0000 ++++ ./include/linux/aio.h 2006-03-16 10:05:39.848833028 +0000 +@@ -191,6 +191,11 @@ struct kioctx { + struct aio_ring_info ring_info; + + struct work_struct wq; ++#ifdef CONFIG_EPOLL ++ // poll integration ++ wait_queue_head_t poll_wait; ++ struct file *file; ++#endif + }; + + /* prototypes */ + +diff -pruN ../pristine-linux-2.6.16-rc5/include/linux/eventpoll.h ./include/linux/eventpoll.h +--- ../pristine-linux-2.6.16-rc5/include/linux/eventpoll.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/linux/eventpoll.h 2006-03-16 10:08:51.577809317 +0000 +@@ -86,6 +86,12 @@ static inline void eventpoll_release(str + } + + ++/* ++ * called by aio code to create fd that can poll the aio event queueQ ++ */ ++struct eventpoll; ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, struct file_operations *fops); + #else + + static inline void eventpoll_init_file(struct file *file) {} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,28 @@ +XEN_ROOT = ../.. +include $(XEN_ROOT)/tools/Rules.mk + +SUBDIRS-y := +SUBDIRS-y += lib +SUBDIRS-y += drivers + +.PHONY: all +all: build + +.PHONY: build +build: mk-symlinks + @set -e; for subdir in $(SUBDIRS-y); do \ + $(MAKE) -C $$subdir all; \ + done + +.PHONY: install +install: + @set -e; for subdir in $(SUBDIRS-y); do \ + $(MAKE) -C $$subdir install; \ + done + +.PHONY: clean +clean: + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS + @set -e; for subdir in $(SUBDIRS-y); do \ + $(MAKE) -C $$subdir clean; \ + done diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/README Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,122 @@ +Blktap Userspace Tools + Library +================================ + +Andrew Warfield and Julian Chesterfield +16th June 2006 + +{firstname.lastname}@cl.cam.ac.uk + +The blktap userspace toolkit provides a user-level disk I/O +interface. The blktap mechanism involves a kernel driver that acts +similarly to the existing Xen/Linux blkback driver, and a set of +associated user-level libraries. Using these tools, blktap allows +virtual block devices presented to VMs to be implemented in userspace +and to be backed by raw partitions, files, network, etc. + +The key benefit of blktap is that it makes it easy and fast to write +arbitrary block backends, and that these user-level backends actually +perform very well. Specifically: + +- Metadata disk formats such as Copy-on-Write, encrypted disks, sparse + formats and other compression features can be easily implemented. + +- Accessing file-based images from userspace avoids problems related + to flushing dirty pages which are present in the Linux loopback + driver. (Specifically, doing a large number of writes to an + NFS-backed image don't result in the OOM killer going berserk.) + +- Per-disk handler processes enable easier userspace policing of block + resources, and process-granularity QoS techniques (disk scheduling + and related tools) may be trivially applied to block devices. + +- It's very easy to take advantage of userspace facilities such as + networking libraries, compression utilities, peer-to-peer + file-sharing systems and so on to build more complex block backends. + +- Crashes are contained -- incremental development/debugging is very + fast. + +How it works (in one paragraph): + +Working in conjunction with the kernel blktap driver, all disk I/O +requests from VMs are passed to the userspace deamon (using a shared +memory interface) through a character device. Each active disk is +mapped to an individual device node, allowing per-disk processes to +implement individual block devices where desired. The userspace +drivers are implemented using asynchronous (Linux libaio), +O_DIRECT-based calls to preserve the unbuffered, batched and +asynchronous request dispatch achieved with the existing blkback +code. We provide a simple, asynchronous virtual disk interface that +makes it quite easy to add new disk implementations. + +As of June 2006 the current supported disk formats are: + + - Raw Images (both on partitions and in image files) + - File-backed Qcow disks + - Standalone sparse Qcow disks + - Fast shareable RAM disk between VMs (requires some form of cluster-based + filesystem support e.g. OCFS2 in the guest kernel) + - Some VMDK images - your mileage may vary + +Raw and QCow images have asynchronous backends and so should perform +fairly well. VMDK is based directly on the qemu vmdk driver, which is +synchronous (a.k.a. slow). + +Build and Installation Instructions +=================================== + +Make to configure the blktap backend driver in your dom0 kernel. It +will cooperate fine with the existing backend driver, so you can +experiment with tap disks without breaking existing VM configs. + +To build the tools separately, "make && make install" in +tools/blktap. + + +Using the Tools +=============== + +Prepare the image for booting. For qcow files use the qcow utilities +installed earlier. e.g. qcow-create generates a blank standalone image +or a file-backed CoW image. img2qcow takes an existing image or +partition and creates a sparse, standalone qcow-based file. + +The userspace disk agent is configured to start automatically via xend +(alternatively you can start it manually => 'blktapctrl') + +Customise the VM config file to use the 'tap' handler, followed by the +driver type. e.g. for a raw image such as a file or partition: + +disk = ['tap:aio:<FILENAME>,sda1,w'] + +e.g. for a qcow image: + +disk = ['tap:qcow:<FILENAME>,sda1,w'] + + +Mounting images in Dom0 using the blktap driver +=============================================== +Tap (and blkback) disks are also mountable in Dom0 without requiring an +active VM to attach. You will need to build a xenlinux Dom0 kernel that +includes the blkfront driver (e.g. the default 'make world' or +'make kernels' build. Simply use the xm command-line tool to activate +the backend disks, and blkfront will generate a virtual block device that +can be accessed in the same way as a loop device or partition: + +e.g. for a raw image file <FILENAME> that would normally be mounted using +the loopback driver (such as 'mount -o loop <FILENAME> /mnt/disk'), do the +following: + +xm block-attach 0 tap:aio:<FILENAME> /dev/xvda1 w 0 +mount /dev/xvda1 /mnt/disk <--- don't use loop driver + +In this way, you can use any of the userspace device-type drivers built +with the blktap userspace toolkit to open and mount disks such as qcow +or vmdk images: + +xm block-attach 0 tap:qcow:<FILENAME> /dev/xvda1 w 0 +mount /dev/xvda1 /mnt/disk + + + + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,76 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + +INCLUDES += -I.. -I../lib + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +IBIN = blktapctrl tapdisk +QCOW_UTIL = img2qcow qcow2raw qcow-create +INSTALL_DIR = /usr/sbin +LIBAIO_DIR = ../../libaio/src + +CFLAGS += -fPIC +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wno-unused +CFLAGS += -g3 +CFLAGS += -fno-strict-aliasing +CFLAGS += -I $(XEN_LIBXC) -I $(LIBAIO_DIR) +CFLAGS += $(INCLUDES) -I. -I../../xenstore +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +CFLAGS += -D_GNU_SOURCE + +# Get gcc to generate the dependencies for us. +CFLAGS += -Wp,-MD,.$(@F).d +DEPS = .*.d + +THREADLIB := -lpthread -lz +LIBS := -L. -L.. -L../lib +LIBS += -L$(XEN_LIBXC) +LIBS += -lblktap +LIBS += -lcrypto +LIBS += -lz +LIBS += -L$(XEN_XENSTORE) -lxenstore + +AIOLIBS := -L $(LIBAIO_DIR) +AIOLIBS += -laio +AIOLIBS += -static + +BLK-OBJS := block-aio.o +BLK-OBJS += block-sync.o +BLK-OBJS += block-vmdk.o +BLK-OBJS += block-ram.o +BLK-OBJS += block-qcow.o +BLK-OBJS += aes.o + +all: $(IBIN) qcow-util + +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) + + +blktapctrl: + $(CC) $(CFLAGS) -o blktapctrl $(LIBS) blktapctrl.c + +tapdisk: $(BLK-OBJS) + $(CC) $(CFLAGS) -o tapdisk $(BLK-OBJS) tapdisk.c \ + $(AIOLIBS) $(LIBS) + + +qcow-util: $(BLK-OBJS) + $(CC) $(CFLAGS) -o img2qcow $(BLK-OBJS) img2qcow.c \ + $(AIOLIBS) $(LIBS) + $(CC) $(CFLAGS) -o qcow2raw $(BLK-OBJS) qcow2raw.c \ + $(AIOLIBS) $(LIBS) + $(CC) $(CFLAGS) -o qcow-create $(BLK-OBJS) qcow-create.c \ + $(AIOLIBS) $(LIBS) + +install: all + $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INSTALL_DIR) + +clean: + rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) + +.PHONY: clean install + +-include $(DEPS) diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/aes.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/aes.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,1319 @@ +/** + * + * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project. + */ +/* + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen <vincent.rijmen@xxxxxxxxxxxxxxxxxxx> + * @author Antoon Bosselaers <antoon.bosselaers@xxxxxxxxxxxxxxxxxxx> + * @author Paulo Barreto <paulo.barreto@xxxxxxxxxxxx> + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +//#include "vl.h" +#include <inttypes.h> +#include <string.h> +#include "aes.h" + +//#define NDEBUG +#include <assert.h> + +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +/* This controls loop-unrolling in aes_core.c */ +#undef FULL_UNROLL +# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } + +/* +Te0[x] = S [x].[02, 01, 01, 03]; +Te1[x] = S [x].[03, 02, 01, 01]; +Te2[x] = S [x].[01, 03, 02, 01]; +Te3[x] = S [x].[01, 01, 03, 02]; +Te4[x] = S [x].[01, 01, 01, 01]; + +Td0[x] = Si[x].[0e, 09, 0d, 0b]; +Td1[x] = Si[x].[0b, 0e, 09, 0d]; +Td2[x] = Si[x].[0d, 0b, 0e, 09]; +Td3[x] = Si[x].[09, 0d, 0b, 0e]; +Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +static const u32 Te0[256] = { + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}; +static const u32 Te1[256] = { + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}; +static const u32 Te2[256] = { + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}; +static const u32 Te3[256] = { + + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +}; +static const u32 Te4[256] = { + 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, + 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, + 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, + 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, + 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, + 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, + 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, + 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, + 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, + 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, + 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, + 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, + 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, + 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, + 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, + 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, + 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, + 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, + 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, + 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, + 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, + 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, + 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, + 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, + 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, + 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, + 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, + 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, + 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, + 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, + 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, + 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, + 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, + 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, + 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, + 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, + 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, + 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, + 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, + 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, + 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, + 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, + 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, + 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, + 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, + 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, + 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, + 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, + 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, + 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, + 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, + 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, + 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, + 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, + 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, + 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, + 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, + 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, + 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, + 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, + 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, + 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, + 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, + 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, +}; +static const u32 Td0[256] = { + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}; +static const u32 Td1[256] = { + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}; +static const u32 Td2[256] = { + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}; +static const u32 Td3[256] = { + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +}; +static const u32 Td4[256] = { + 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, + 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, + 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, + 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, + 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, + 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, + 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, + 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, + 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, + 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, + 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, + 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, + 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, + 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, + 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, + 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, + 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, + 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, + 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, + 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, + 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, + 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, + 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, + 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, + 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, + 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, + 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, + 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, + 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, + 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, + 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, + 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, + 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, + 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, + 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, + 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, + 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, + 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, + 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, + 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, + 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, + 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, + 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, + 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, + 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, + 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, + 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, + 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, + 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, + 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, + 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, + 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, + 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, + 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, + 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, + 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, + 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, + 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, + 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, + 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, + 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, + 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, + 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, + 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +/** + * Expand the cipher key into the encryption key schedule. + */ +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { + + u32 *rk; + int i = 0; + u32 temp; + + if (!userKey || !key) + return -1; + if (bits != 128 && bits != 192 && bits != 256) + return -2; + + rk = key->rd_key; + + if (bits==128) + key->rounds = 10; + else if (bits==192) + key->rounds = 12; + else + key->rounds = 14; + + rk[0] = GETU32(userKey ); + rk[1] = GETU32(userKey + 4); + rk[2] = GETU32(userKey + 8); + rk[3] = GETU32(userKey + 12); + if (bits == 128) { + while (1) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 0; + } + rk += 4; + } + } + rk[4] = GETU32(userKey + 16); + rk[5] = GETU32(userKey + 20); + if (bits == 192) { + while (1) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 0; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(userKey + 24); + rk[7] = GETU32(userKey + 28); + if (bits == 256) { + while (1) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 0; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (Te4[(temp >> 24) ] & 0xff000000) ^ + (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + */ +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { + + u32 *rk; + int i, j, status; + u32 temp; + + /* first, start with an encryption schedule */ + status = AES_set_encrypt_key(userKey, bits, key); + if (status < 0) + return status; + + rk = key->rd_key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < (key->rounds); i++) { + rk += 4; + rk[0] = + Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[3] ) & 0xff] & 0xff]; + } + return 0; +} + +#ifndef AES_ASM +/* + * Encrypt a single block + * in and out can overlap + */ +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) { + + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + assert(in && out && key); + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in ) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24) ] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24) ] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24) ] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24) ] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te4[(t0 >> 24) ] & 0xff000000) ^ + (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(out , s0); + s1 = + (Te4[(t1 >> 24) ] & 0xff000000) ^ + (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (Te4[(t2 >> 24) ] & 0xff000000) ^ + (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (Te4[(t3 >> 24) ] & 0xff000000) ^ + (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +/* + * Decrypt a single block + * in and out can overlap + */ +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) { + + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + assert(in && out && key); + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in ) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24) ] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24) ] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24) ] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24) ] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Td4[(t0 >> 24) ] & 0xff000000) ^ + (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(out , s0); + s1 = + (Td4[(t1 >> 24) ] & 0xff000000) ^ + (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (Td4[(t2 >> 24) ] & 0xff000000) ^ + (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (Td4[(t3 >> 24) ] & 0xff000000) ^ + (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +#endif /* AES_ASM */ + +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + const unsigned long length, const AES_KEY *key, + unsigned char *ivec, const int enc) +{ + + unsigned long n; + unsigned long len = length; + unsigned char tmp[AES_BLOCK_SIZE]; + + assert(in && out && key && ivec); + + if (enc) { + while (len >= AES_BLOCK_SIZE) { + for(n=0; n < AES_BLOCK_SIZE; ++n) + tmp[n] = in[n] ^ ivec[n]; + AES_encrypt(tmp, out, key); + memcpy(ivec, out, AES_BLOCK_SIZE); + len -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + if (len) { + for(n=0; n < len; ++n) + tmp[n] = in[n] ^ ivec[n]; + for(n=len; n < AES_BLOCK_SIZE; ++n) + tmp[n] = ivec[n]; + AES_encrypt(tmp, tmp, key); + memcpy(out, tmp, AES_BLOCK_SIZE); + memcpy(ivec, tmp, AES_BLOCK_SIZE); + } + } else { + while (len >= AES_BLOCK_SIZE) { + memcpy(tmp, in, AES_BLOCK_SIZE); + AES_decrypt(in, out, key); + for(n=0; n < AES_BLOCK_SIZE; ++n) + out[n] ^= ivec[n]; + memcpy(ivec, tmp, AES_BLOCK_SIZE); + len -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + if (len) { + memcpy(tmp, in, AES_BLOCK_SIZE); + AES_decrypt(tmp, tmp, key); + for(n=0; n < len; ++n) + out[n] = tmp[n] ^ ivec[n]; + memcpy(ivec, tmp, AES_BLOCK_SIZE); + } + } +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/aes.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/aes.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,26 @@ +#ifndef QEMU_AES_H +#define QEMU_AES_H + +#define AES_MAXNR 14 +#define AES_BLOCK_SIZE 16 + +struct aes_key_st { + uint32_t rd_key[4 *(AES_MAXNR + 1)]; + int rounds; +}; +typedef struct aes_key_st AES_KEY; + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + const unsigned long length, const AES_KEY *key, + unsigned char *ivec, const int enc); + +#endif diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/blktapctrl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/blktapctrl.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,704 @@ +/* + * blktapctrl.c + * + * userspace controller for the blktap disks. + * As requests for new block devices arrive, + * the controller spawns off a separate process + * per-disk. + * + * + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/user.h> +#include <err.h> +#include <errno.h> +#include <sys/types.h> +#include <linux/types.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/poll.h> +#include <sys/ioctl.h> +#include <string.h> +#include <unistd.h> +#include <xs.h> +#include <printf.h> +#include <sys/time.h> +#include <syslog.h> + +#include "blktaplib.h" +#include "blktapctrl.h" +#include "tapdisk.h" + +#define NUM_POLL_FDS 2 +#define MSG_SIZE 4096 +#define MAX_TIMEOUT 10 +#define MAX_RAND_VAL 0xFFFF + +int run = 1; +int max_timeout = MAX_TIMEOUT; +int ctlfd = 0; + +static int open_ctrl_socket(char *devname); +static int write_msg(int fd, int msgtype, void *ptr, void *ptr2); +static int read_msg(int fd, int msgtype, void *ptr); +static driver_list_entry_t *active_disks[MAX_DISK_TYPES]; + +void sig_handler(int sig) +{ + run = 0; +} + +static void init_driver_list(void) +{ + int i; + + for (i = 0; i < MAX_DISK_TYPES; i++) + active_disks[i] = NULL; + return; +} + +static void init_rng(void) +{ + static uint32_t seed; + struct timeval tv; + + gettimeofday(&tv, NULL); + seed = tv.tv_usec; + srand48(seed); + return; +} + +static void make_blktap_dev(char *devname, int major, int minor) +{ + struct stat st; + + if (lstat(devname, &st) != 0) { + /*Need to create device*/ + if (mkdir(BLKTAP_DEV_DIR, 0755) == 0) + DPRINTF("Created %s directory\n",BLKTAP_DEV_DIR); + if (mknod(devname, S_IFCHR|0600, + makedev(major, minor)) == 0) + DPRINTF("Created %s device\n",devname); + } else DPRINTF("%s device already exists\n",devname); +} + +static int get_new_dev(int *major, int *minor, blkif_t *blkif) +{ + domid_translate_t tr; + int ret; + char *devname; + + tr.domid = blkif->domid; + tr.busid = (unsigned short)blkif->be_id; + ret = ioctl(ctlfd, BLKTAP_IOCTL_NEWINTF, tr ); + + if ( (ret <= 0)||(ret > MAX_TAP_DEV) ) { + DPRINTF("Incorrect Dev ID [%d]\n",ret); + return -1; + } + + *minor = ret; + *major = ioctl(ctlfd, BLKTAP_IOCTL_MAJOR, ret ); + if (*major < 0) { + DPRINTF("Incorrect Major ID [%d]\n",*major); + return -1; + } + + asprintf(&devname,"%s/%s%d",BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, *minor); + make_blktap_dev(devname,*major,*minor); + DPRINTF("Received device id %d and major %d, " + "sent domid %d and be_id %d\n", + *minor, *major, tr.domid, tr.busid); + return 0; +} + +static int get_tapdisk_pid(blkif_t *blkif) +{ + int ret; + + if ((ret = write_msg(blkif->fds[WRITE], CTLMSG_PID, blkif, NULL)) + <= 0) { + DPRINTF("Write_msg failed - CTLMSG_PID(%d)\n", ret); + return -EINVAL; + } + + if ((ret = read_msg(blkif->fds[READ], CTLMSG_PID_RSP, blkif)) + <= 0) { + DPRINTF("Read_msg failure - CTLMSG_PID(%d)\n", ret); + return -EINVAL; + } + return 1; +} + +static blkif_t *test_path(char *path, char **dev, int *type) +{ + char *ptr, handle[10]; + int i, size; + + size = sizeof(dtypes)/sizeof(disk_info_t *); + *type = MAX_DISK_TYPES + 1; + + if ( (ptr = strstr(path, ":"))!=NULL) { + memcpy(handle, path, (ptr - path)); + *dev = ptr + 1; + ptr = handle + (ptr - path); + *ptr = '\0'; + DPRINTF("Detected handle: [%s]\n",handle); + + for (i = 0; i < size; i++) { + if (strncmp(handle, dtypes[i]->handle, (ptr - path)) + ==0) { + *type = dtypes[i]->idnum; + + if (dtypes[i]->single_handler == 1) { + /* Check whether tapdisk process + already exists */ + if (active_disks[dtypes[i]->idnum] + == NULL) return NULL; + else + return active_disks[dtypes[i]->idnum]->blkif; + } + } + } + } else *dev = NULL; + + return NULL; +} + +static void add_disktype(blkif_t *blkif, int type) +{ + driver_list_entry_t *entry, *ptr, *last; + + if (type > MAX_DISK_TYPES) return; + + entry = malloc(sizeof(driver_list_entry_t)); + entry->blkif = blkif; + entry->next = NULL; + ptr = active_disks[type]; + + if (ptr == NULL) { + active_disks[type] = entry; + entry->prev = NULL; + return; + } + + while (ptr != NULL) { + last = ptr; + ptr = ptr->next; + } + + /*We've found the end of the list*/ + last->next = entry; + entry->prev = last; + + return; +} + +static int del_disktype(blkif_t *blkif) +{ + driver_list_entry_t *ptr, *cur, *last; + int type = blkif->drivertype, count = 0, close = 0; + + if (type > MAX_DISK_TYPES) return 1; + + ptr = active_disks[type]; + last = NULL; + while (ptr != NULL) { + count++; + if (blkif == ptr->blkif) { + cur = ptr; + if (ptr->next != NULL) { + /*There's more later in the chain*/ + if (!last) { + /*We're first in the list*/ + active_disks[type] = ptr->next; + ptr = ptr->next; + ptr->prev = NULL; + } + else { + /*We're sandwiched*/ + last->next = ptr->next; + ptr = ptr->next; + ptr->prev = last; + } + + } else if (last) { + /*There's more earlier in the chain*/ + last->next = NULL; + } else { + /*We're the only entry*/ + active_disks[type] = NULL; + if(dtypes[type]->single_handler == 1) + close = 1; + } + DPRINTF("DEL_DISKTYPE: Freeing entry\n"); + free(cur); + if (dtypes[type]->single_handler == 0) close = 1; + + return close; + } + last = ptr; + ptr = ptr->next; + } + DPRINTF("DEL_DISKTYPE: No match\n"); + return 1; +} + +static int write_msg(int fd, int msgtype, void *ptr, void *ptr2) +{ + blkif_t *blkif; + blkif_info_t *blk; + msg_hdr_t *msg; + msg_newdev_t *msg_dev; + char *p, *buf, *path; + int msglen, len, ret; + fd_set writefds; + struct timeval timeout; + image_t *image, *img; + uint32_t seed; + + blkif = (blkif_t *)ptr; + blk = blkif->info; + image = blkif->prv; + len = 0; + + switch (msgtype) + { + case CTLMSG_PARAMS: + path = (char *)ptr2; + DPRINTF("Write_msg called: CTLMSG_PARAMS, sending [%s, %s]\n", + blk->params, path); + + msglen = sizeof(msg_hdr_t) + strlen(path) + 1; + buf = malloc(msglen); + + /*Assign header fields*/ + msg = (msg_hdr_t *)buf; + msg->type = CTLMSG_PARAMS; + msg->len = msglen; + msg->drivertype = blkif->drivertype; + + gettimeofday(&timeout, NULL); + msg->cookie = blkif->cookie; + DPRINTF("Generated cookie, %d\n",blkif->cookie); + + /*Copy blk->params to msg*/ + p = buf + sizeof(msg_hdr_t); + memcpy(p, path, strlen(path) + 1); + + break; + + case CTLMSG_NEWDEV: + DPRINTF("Write_msg called: CTLMSG_NEWDEV\n"); + + msglen = sizeof(msg_hdr_t) + sizeof(msg_newdev_t); + buf = malloc(msglen); + + /*Assign header fields*/ + msg = (msg_hdr_t *)buf; + msg->type = CTLMSG_NEWDEV; + msg->len = msglen; + msg->drivertype = blkif->drivertype; + msg->cookie = blkif->cookie; + + msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); + msg_dev->devnum = blkif->minor; + msg_dev->domid = blkif->domid; + + break; + + case CTLMSG_CLOSE: + DPRINTF("Write_msg called: CTLMSG_CLOSE\n"); + + msglen = sizeof(msg_hdr_t); + buf = malloc(msglen); + + /*Assign header fields*/ + msg = (msg_hdr_t *)buf; + msg->type = CTLMSG_CLOSE; + msg->len = msglen; + msg->drivertype = blkif->drivertype; + msg->cookie = blkif->cookie; + + break; + + case CTLMSG_PID: + DPRINTF("Write_msg called: CTLMSG_PID\n"); + + msglen = sizeof(msg_hdr_t); + buf = malloc(msglen); + + /*Assign header fields*/ + msg = (msg_hdr_t *)buf; + msg->type = CTLMSG_PID; + msg->len = msglen; + msg->drivertype = blkif->drivertype; + msg->cookie = blkif->cookie; + + break; + + default: + return -1; + } + + /*Now send the message*/ + ret = 0; + FD_ZERO(&writefds); + FD_SET(fd,&writefds); + timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/ + timeout.tv_usec = 0; + if (select(fd+1, (fd_set *) 0, &writefds, + (fd_set *) 0, &timeout) > 0) { + len = write(fd, buf, msglen); + if (len == -1) DPRINTF("Write failed: (%d)\n",errno); + } + free(buf); + + return len; +} + +static int read_msg(int fd, int msgtype, void *ptr) +{ + blkif_t *blkif; + blkif_info_t *blk; + msg_hdr_t *msg; + msg_pid_t *msg_pid; + char *p, *buf; + int msglen = MSG_SIZE, len, ret; + fd_set readfds; + struct timeval timeout; + image_t *image, *img; + + + blkif = (blkif_t *)ptr; + blk = blkif->info; + image = blkif->prv; + + buf = malloc(MSG_SIZE); + + ret = 0; + FD_ZERO(&readfds); + FD_SET(fd,&readfds); + timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/ + timeout.tv_usec = 0; + if (select(fd+1, &readfds, (fd_set *) 0, + (fd_set *) 0, &timeout) > 0) { + ret = read(fd, buf, msglen); + + } + if (ret > 0) { + msg = (msg_hdr_t *)buf; + switch (msg->type) + { + case CTLMSG_IMG: + img = (image_t *)(buf + sizeof(msg_hdr_t)); + image->size = img->size; + image->secsize = img->secsize; + image->info = img->info; + + DPRINTF("Received CTLMSG_IMG: %lu, %lu, %lu\n", + image->size, image->secsize, image->info); + if(msgtype != CTLMSG_IMG) ret = 0; + break; + + case CTLMSG_IMG_FAIL: + DPRINTF("Received CTLMSG_IMG_FAIL, " + "unable to open image\n"); + ret = 0; + break; + + case CTLMSG_NEWDEV_RSP: + DPRINTF("Received CTLMSG_NEWDEV_RSP\n"); + if(msgtype != CTLMSG_NEWDEV_RSP) ret = 0; + break; + + case CTLMSG_NEWDEV_FAIL: + DPRINTF("Received CTLMSG_NEWDEV_FAIL\n"); + ret = 0; + break; + + case CTLMSG_CLOSE_RSP: + DPRINTF("Received CTLMSG_CLOSE_RSP\n"); + if (msgtype != CTLMSG_CLOSE_RSP) ret = 0; + break; + + case CTLMSG_PID_RSP: + DPRINTF("Received CTLMSG_PID_RSP\n"); + if (msgtype != CTLMSG_PID_RSP) ret = 0; + else { + msg_pid = (msg_pid_t *) + (buf + sizeof(msg_hdr_t)); + blkif->tappid = msg_pid->pid; + DPRINTF("\tPID: [%d]\n",blkif->tappid); + } + break; + default: + DPRINTF("UNKNOWN MESSAGE TYPE RECEIVED\n"); + ret = 0; + break; + } + } + + free(buf); + + return ret; + +} + +int blktapctrl_new_blkif(blkif_t *blkif) +{ + blkif_info_t *blk; + int major, minor, fd_read, fd_write, type, new; + char *rdctldev, *wrctldev, *cmd, *ptr; + image_t *image; + blkif_t *exist = NULL; + + DPRINTF("Received a poll for a new vbd\n"); + if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) { + if (get_new_dev(&major, &minor, blkif)<0) + return -1; + + exist = test_path(blk->params, &ptr, &type); + blkif->drivertype = type; + blkif->cookie = lrand48() % MAX_RAND_VAL; + + if (!exist) { + DPRINTF("Process does not exist:\n"); + asprintf(&rdctldev, "/dev/xen/tapctrlread%d", minor); + blkif->fds[READ] = open_ctrl_socket(rdctldev); + + + asprintf(&wrctldev, "/dev/xen/tapctrlwrite%d", minor); + blkif->fds[WRITE] = open_ctrl_socket(wrctldev); + + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) + goto fail; + + /*launch the new process*/ + asprintf(&cmd, "tapdisk %s %s", wrctldev, rdctldev); + DPRINTF("Launching process, CMDLINE [%s]\n",cmd); + if (system(cmd) == -1) { + DPRINTF("Unable to fork, cmdline: [%s]\n",cmd); + return -1; + } + + free(rdctldev); + free(wrctldev); + free(cmd); + } else { + DPRINTF("Process exists!\n"); + blkif->fds[READ] = exist->fds[READ]; + blkif->fds[WRITE] = exist->fds[WRITE]; + } + + add_disktype(blkif, type); + blkif->major = major; + blkif->minor = minor; + + image = (image_t *)malloc(sizeof(image_t)); + blkif->prv = (void *)image; + blkif->ops = &tapdisk_ops; + + /*Retrieve the PID of the new process*/ + if (get_tapdisk_pid(blkif) <= 0) { + DPRINTF("Unable to contact disk process\n"); + goto fail; + } + + /* Both of the following read and write calls will block up to + * max_timeout val*/ + if (write_msg(blkif->fds[WRITE], CTLMSG_PARAMS, blkif, ptr) + <= 0) { + DPRINTF("Write_msg failed - CTLMSG_PARAMS\n"); + goto fail; + } + + if (read_msg(blkif->fds[READ], CTLMSG_IMG, blkif) <= 0) { + DPRINTF("Read_msg failure - CTLMSG_IMG\n"); + goto fail; + } + + } else return -1; + + return 0; +fail: + ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor); + return -EINVAL; +} + +int map_new_blktapctrl(blkif_t *blkif) +{ + DPRINTF("Received a poll for a new devmap\n"); + if (write_msg(blkif->fds[WRITE], CTLMSG_NEWDEV, blkif, NULL) <= 0) { + DPRINTF("Write_msg failed - CTLMSG_NEWDEV\n"); + return -EINVAL; + } + + if (read_msg(blkif->fds[READ], CTLMSG_NEWDEV_RSP, blkif) <= 0) { + DPRINTF("Read_msg failed - CTLMSG_NEWDEV_RSP\n"); + return -EINVAL; + } + DPRINTF("Exiting map_new_blktapctrl\n"); + + return blkif->minor - 1; +} + +int unmap_blktapctrl(blkif_t *blkif) +{ + DPRINTF("Unmapping vbd\n"); + + if (write_msg(blkif->fds[WRITE], CTLMSG_CLOSE, blkif, NULL) <= 0) { + DPRINTF("Write_msg failed - CTLMSG_CLOSE\n"); + return -EINVAL; + } + + if (del_disktype(blkif)) { + close(blkif->fds[WRITE]); + close(blkif->fds[READ]); + + } + return 0; +} + +int open_ctrl_socket(char *devname) +{ + int ret; + int ipc_fd; + char *cmd; + fd_set socks; + struct timeval timeout; + + ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO); + if ( (ret != 0) && (errno != EEXIST) ) { + DPRINTF("ERROR: pipe failed (%d)\n", errno); + exit(0); + } + + ipc_fd = open(devname,O_RDWR|O_NONBLOCK); + + if (ipc_fd < 0) { + DPRINTF("FD open failed\n"); + return -1; + } + + return ipc_fd; +} + +static void print_drivers(void) +{ + int i, size; + + size = sizeof(dtypes)/sizeof(disk_info_t *); + DPRINTF("blktapctrl: v1.0.0\n"); + for (i = 0; i < size; i++) + DPRINTF("Found driver: [%s]\n",dtypes[i]->name); +} + +int main(int argc, char *argv[]) +{ + char *devname; + tapdev_info_t *ctlinfo; + int tap_pfd, store_pfd, xs_fd, ret, timeout, pfd_count; + struct xs_handle *h; + struct pollfd pfd[NUM_POLL_FDS]; + pid_t process; + + __init_blkif(); + openlog("BLKTAPCTRL", LOG_CONS|LOG_ODELAY, LOG_DAEMON); + + print_drivers(); + init_driver_list(); + init_rng(); + + register_new_blkif_hook(blktapctrl_new_blkif); + register_new_devmap_hook(map_new_blktapctrl); + register_new_unmap_hook(unmap_blktapctrl); + + /*Attach to blktap0 */ + asprintf(&devname,"%s/%s0", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME); + make_blktap_dev(devname,254,0); + ctlfd = open(devname, O_RDWR); + if (ctlfd == -1) { + DPRINTF("blktap0 open failed\n"); + goto open_failed; + } + + /* Set up store connection and watch. */ + h = xs_daemon_open(); + if (h == NULL) { + DPRINTF("xs_daemon_open failed -- " + "is xenstore running?\n"); + goto open_failed; + } + + ret = add_blockdevice_probe_watch(h, "Domain-0"); + if (ret != 0) { + DPRINTF("adding device probewatch\n"); + goto open_failed; + } + + ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); + + process = getpid(); + ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process ); + + /*Static pollhooks*/ + pfd_count = 0; + tap_pfd = pfd_count++; + pfd[tap_pfd].fd = ctlfd; + pfd[tap_pfd].events = POLLIN; + + store_pfd = pfd_count++; + pfd[store_pfd].fd = xs_fileno(h); + pfd[store_pfd].events = POLLIN; + + while (run) { + timeout = 1000; /*Milliseconds*/ + ret = poll(pfd, pfd_count, timeout); + + if (ret > 0) { + if (pfd[store_pfd].revents) { + ret = xs_fire_next_watch(h); + } + } + } + + ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH ); + close(ctlfd); + closelog(); + + return 0; + + open_failed: + DPRINTF("Unable to start blktapctrl\n"); + closelog(); + return -1; +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/blktapctrl.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/blktapctrl.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,55 @@ +/* blktapctrl.h + * + * controller image utils. + * + * (c) 2004-6 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +static inline long int tapdisk_get_size(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->size; +} + +static inline long int tapdisk_get_secsize(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->secsize; +} + +static inline unsigned tapdisk_get_info(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->info; +} + +struct blkif_ops tapdisk_ops = { + .get_size = tapdisk_get_size, + .get_secsize = tapdisk_get_secsize, + .get_info = tapdisk_get_info, +}; diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/block-aio.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/block-aio.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,327 @@ +/* block-aio.c + * + * libaio-based raw disk implementation. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * NB: This code is not thread-safe. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include <errno.h> +#include <libaio.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include "tapdisk.h" + + +/** + * We used a kernel patch to return an fd associated with the AIO context + * so that we can concurrently poll on synchronous and async descriptors. + * This is signalled by passing 1 as the io context to io_setup. + */ +#define REQUEST_ASYNC_FD 1 + +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8) + +struct pending_aio { + td_callback_t cb; + int id; + void *private; +}; + +struct tdaio_state { + int fd; + + /* libaio state */ + io_context_t aio_ctx; + struct iocb iocb_list [MAX_AIO_REQS]; + struct iocb *iocb_free [MAX_AIO_REQS]; + struct pending_aio pending_aio[MAX_AIO_REQS]; + int iocb_free_count; + struct iocb *iocb_queue[MAX_AIO_REQS]; + int iocb_queued; + int poll_fd; /* NB: we require aio_poll support */ + struct io_event aio_events[MAX_AIO_REQS]; +}; + +#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list) + +/*Get Image size, secsize*/ +static int get_image_info(struct td_state *s, int fd) +{ + int ret; + long size; + unsigned long total_size; + struct statvfs statBuf; + struct stat stat; + + ret = fstat(fd, &stat); + if (ret != 0) { + DPRINTF("ERROR: fstat failed, Couldn't stat image"); + return -EINVAL; + } + + if (S_ISBLK(stat.st_mode)) { + /*Accessing block device directly*/ + s->size = 0; + if (ioctl(fd,BLKGETSIZE,&s->size)!=0) { + DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image"); + return -EINVAL; + } + + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + + /*Get the sector size*/ +#if defined(BLKSSZGET) + { + int arg; + s->sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, &s->sector_size); + + if (s->sector_size != DEFAULT_SECTOR_SIZE) + DPRINTF("Note: sector size is %ld (not %d)\n", + s->sector_size, DEFAULT_SECTOR_SIZE); + } +#else + s->sector_size = DEFAULT_SECTOR_SIZE; +#endif + + } else { + /*Local file? try fstat instead*/ + s->size = (stat.st_size >> SECTOR_SHIFT); + s->sector_size = DEFAULT_SECTOR_SIZE; + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + } + + if (s->size == 0) { + s->size =((uint64_t) 16836057); + s->sector_size = DEFAULT_SECTOR_SIZE; + } + s->info = 0; + + return 0; +} + +/* Open the disk file and initialize aio state. */ +int tdaio_open (struct td_state *s, const char *name) +{ + int i, fd, ret = 0; + struct tdaio_state *prv = (struct tdaio_state *)s->private; + s->private = prv; + + DPRINTF("XXX: block-aio open('%s')", name); + /* Initialize AIO */ + prv->iocb_free_count = MAX_AIO_REQS; + prv->iocb_queued = 0; + + prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; + prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx); + + if (prv->poll_fd < 0) { + ret = prv->poll_fd; + DPRINTF("Couldn't get fd for AIO poll support. This is " + "probably because your kernel does not have the " + "aio-poll patch applied.\n"); + goto done; + } + + for (i=0;i<MAX_AIO_REQS;i++) + prv->iocb_free[i] = &prv->iocb_list[i]; + + /* Open the file */ + fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); + + if ( (fd == -1) && (errno == EINVAL) ) { + + /* Maybe O_DIRECT isn't supported. */ + fd = open(name, O_RDWR | O_LARGEFILE); + if (fd != -1) DPRINTF("WARNING: Accessing image without" + "O_DIRECT! (%s)\n", name); + + } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name); + + if (fd == -1) { + DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno); + ret = 0 - errno; + goto done; + } + + prv->fd = fd; + + ret = get_image_info(s, fd); +done: + return ret; +} + +int tdaio_queue_read(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct iocb *io; + struct pending_aio *pio; + struct tdaio_state *prv = (struct tdaio_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + long ioidx; + + if (prv->iocb_free_count == 0) + return -ENOMEM; + io = prv->iocb_free[--prv->iocb_free_count]; + + ioidx = IOCB_IDX(prv, io); + pio = &prv->pending_aio[ioidx]; + pio->cb = cb; + pio->id = id; + pio->private = private; + + io_prep_pread(io, prv->fd, buf, size, offset); + io->data = (void *)ioidx; + + prv->iocb_queue[prv->iocb_queued++] = io; + + return 0; +} + +int tdaio_queue_write(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct iocb *io; + struct pending_aio *pio; + struct tdaio_state *prv = (struct tdaio_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + long ioidx; + + if (prv->iocb_free_count == 0) + return -ENOMEM; + io = prv->iocb_free[--prv->iocb_free_count]; + + ioidx = IOCB_IDX(prv, io); + pio = &prv->pending_aio[ioidx]; + pio->cb = cb; + pio->id = id; + pio->private = private; + + io_prep_pwrite(io, prv->fd, buf, size, offset); + io->data = (void *)ioidx; + + prv->iocb_queue[prv->iocb_queued++] = io; + + return 0; +} + +int tdaio_submit(struct td_state *s) +{ + int ret; + struct tdaio_state *prv = (struct tdaio_state *)s->private; + + ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); + + /* XXX: TODO: Handle error conditions here. */ + + /* Success case: */ + prv->iocb_queued = 0; + + return ret; +} + +int *tdaio_get_fd(struct td_state *s) +{ + struct tdaio_state *prv = (struct tdaio_state *)s->private; + int *fds, i; + + fds = malloc(sizeof(int) * MAX_IOFD); + /*initialise the FD array*/ + for(i=0;i<MAX_IOFD;i++) fds[i] = 0; + + fds[0] = prv->poll_fd; + + return fds; +} + +int tdaio_close(struct td_state *s) +{ + struct tdaio_state *prv = (struct tdaio_state *)s->private; + + io_destroy(prv->aio_ctx); + close(prv->fd); + + return 0; +} + +int tdaio_do_callbacks(struct td_state *s, int sid) +{ + int ret, i, rsp = 0; + struct io_event *ep; + struct tdaio_state *prv = (struct tdaio_state *)s->private; + + /* Non-blocking test for completed io. */ + ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, + NULL); + + for (ep=prv->aio_events,i=ret; i-->0; ep++) { + struct iocb *io = ep->obj; + struct pending_aio *pio; + + pio = &prv->pending_aio[(long)io->data]; + + if (ep->res != io->u.c.nbytes) { + /* TODO: handle this case better. */ + DPRINTF("AIO did less than I asked it to. \n"); + } + rsp += pio->cb(s, ep->res2, pio->id, pio->private); + + prv->iocb_free[prv->iocb_free_count++] = io; + } + return rsp; +} + +struct tap_disk tapdisk_aio = { + "tapdisk_aio", + sizeof(struct tdaio_state), + tdaio_open, + tdaio_queue_read, + tdaio_queue_write, + tdaio_submit, + tdaio_get_fd, + tdaio_close, + tdaio_do_callbacks, +}; diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/block-qcow.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/block-qcow.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,1369 @@ +/* block-qcow.c + * + * Asynchronous Qemu copy-on-write disk implementation. + * Code based on the Qemu implementation + * (see copyright notice below) + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + */ + +/* + * Block driver for the QCOW format + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files(the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include <zlib.h> +#include <inttypes.h> +#include <libaio.h> +#include <openssl/md5.h> +#include "bswap.h" +#include "aes.h" +#include "tapdisk.h" + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + + +/******AIO DEFINES******/ +#define REQUEST_ASYNC_FD 1 +#define MAX_QCOW_IDS 0xFFFF +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8) + +struct pending_aio { + td_callback_t cb; + int id; + void *private; + int nb_sectors; + char *buf; + uint64_t sector; + int qcow_idx; +}; + +#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list) + +#define ZERO_TEST(_b) (_b | 0x00) + +/**************************************************************/ +/* QEMU COW block driver with compression and encryption support */ + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) +#define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb) +#define QCOW_VERSION 1 + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 + +#define QCOW_OFLAG_COMPRESSED (1LL << 63) + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t mtime; + uint64_t size; /* in bytes */ + uint8_t cluster_bits; + uint8_t l2_bits; + uint32_t crypt_method; + uint64_t l1_table_offset; +} QCowHeader; + +/*Extended header for Xen enhancements*/ +typedef struct QCowHeader_ext { + uint32_t xmagic; + uint32_t cksum; + uint32_t min_cluster_alloc; +} QCowHeader_ext; + +#define L2_CACHE_SIZE 16 /*Fixed allocation in Qemu*/ + +struct tdqcow_state { + int fd; /*Main Qcow file descriptor */ + uint64_t fd_end; /*Store a local record of file length */ + int bfd; /*Backing file descriptor*/ + char *name; /*Record of the filename*/ + int poll_pipe[2]; /*dummy fd for polling on */ + int encrypted; /*File contents are encrypted or plain*/ + int cluster_bits; /*Determines length of cluster as + *indicated by file hdr*/ + int cluster_size; /*Length of cluster*/ + int cluster_sectors; /*Number of sectors per cluster*/ + int cluster_alloc; /*Blktap fix for allocating full + *extents*/ + int min_cluster_alloc; /*Blktap historical extent alloc*/ + int l2_bits; /*Size of L2 table entry*/ + int l2_size; /*Full table size*/ + int l1_size; /*L1 table size*/ + uint64_t cluster_offset_mask; + uint64_t l1_table_offset; /*L1 table offset from beginning of + *file*/ + uint64_t *l1_table; /*L1 table entries*/ + uint64_t *l2_cache; /*We maintain a cache of size + *L2_CACHE_SIZE of most read entries*/ + uint64_t l2_cache_offsets[L2_CACHE_SIZE]; /*L2 cache entries*/ + uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/ + uint8_t *cluster_cache; + uint8_t *cluster_data; + uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/ + uint64_t cluster_cache_offset; /**/ + uint32_t crypt_method; /*current crypt method, 0 if no + *key yet */ + uint32_t crypt_method_header; /**/ + AES_KEY aes_encrypt_key; /*AES key*/ + AES_KEY aes_decrypt_key; /*AES key*/ + /* libaio state */ + io_context_t aio_ctx; + int nr_reqs [MAX_QCOW_IDS]; + struct iocb iocb_list [MAX_AIO_REQS]; + struct iocb *iocb_free [MAX_AIO_REQS]; + struct pending_aio pending_aio[MAX_AIO_REQS]; + int iocb_free_count; + struct iocb *iocb_queue[MAX_AIO_REQS]; + int iocb_queued; + int poll_fd; /* NB: we require aio_poll support */ + struct io_event aio_events[MAX_AIO_REQS]; +}; + +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset); + +static int init_aio_state(struct td_state *bs) +{ + int i; + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + long ioidx; + + /*Initialize Locking bitmap*/ + s->sector_lock = calloc(1, bs->size); + + if (!s->sector_lock) { + DPRINTF("Failed to allocate sector lock\n"); + goto fail; + } + + /* Initialize AIO */ + s->iocb_free_count = MAX_AIO_REQS; + s->iocb_queued = 0; + + /*Signal kernel to create Poll FD for Asyc completion events*/ + s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; + s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx); + + if (s->poll_fd < 0) { + DPRINTF("Retrieving Async poll fd failed\n"); + goto fail; + } + + for (i=0;i<MAX_AIO_REQS;i++) + s->iocb_free[i] = &s->iocb_list[i]; + for (i=0;i<MAX_QCOW_IDS;i++) + s->nr_reqs[i] = 0; + DPRINTF("AIO state initialised\n"); + + return 0; + + fail: + return -1; +} + +/* + *Test if block is zero. + * Return: + * 1 for TRUE + * 0 for FALSE + */ +static inline int IS_ZERO(char *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) { + /*if not zero, return false*/ + if (ZERO_TEST(*(buf + i))) return 0; + } + return 1; +} + +static uint32_t gen_cksum(char *ptr, int len) +{ + unsigned char *md; + uint32_t ret; + + md = malloc(MD5_DIGEST_LENGTH); + + if(!md) return 0; + + if (MD5((unsigned char *)ptr, len, md) != md) return 0; + + memcpy(&ret, md, sizeof(uint32_t)); + free(md); + return ret; +} + +static int qcow_set_key(struct td_state *bs, const char *key) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + uint8_t keybuf[16]; + int len, i; + + memset(keybuf, 0, 16); + len = strlen(key); + if (len > 16) + len = 16; + /* XXX: we could compress the chars to 7 bits to increase + entropy */ + for (i = 0; i < len; i++) { + keybuf[i] = key[i]; + } + s->crypt_method = s->crypt_method_header; + + if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) + return -1; + if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) + return -1; +#if 0 + /* test */ + { + uint8_t in[16]; + uint8_t out[16]; + uint8_t tmp[16]; + for (i=0; i<16; i++) + in[i] = i; + AES_encrypt(in, tmp, &s->aes_encrypt_key); + AES_decrypt(tmp, out, &s->aes_decrypt_key); + for (i = 0; i < 16; i++) + DPRINTF(" %02x", tmp[i]); + DPRINTF("\n"); + for (i = 0; i < 16; i++) + DPRINTF(" %02x", out[i]); + DPRINTF("\n"); + } +#endif + return 0; +} + +static int async_read(struct tdqcow_state *s, int fd, int size, + uint64_t offset, + char *buf, td_callback_t cb, + int id, uint64_t sector, int qcow_idx, void *private) +{ + struct iocb *io; + struct pending_aio *pio; + long ioidx; + + io = s->iocb_free[--s->iocb_free_count]; + + ioidx = IOCB_IDX(s, io); + pio = &s->pending_aio[ioidx]; + pio->cb = cb; + pio->id = id; + pio->private = private; + pio->nb_sectors = size/512; + pio->buf = buf; + pio->sector = sector; + pio->qcow_idx = qcow_idx; + + io_prep_pread(io, fd, buf, size, offset); + io->data = (void *)ioidx; + + s->iocb_queue[s->iocb_queued++] = io; + + return 1; +} + +static int async_write(struct tdqcow_state *s, int fd, int size, + uint64_t offset, + char *buf, td_callback_t cb, + int id, uint64_t sector, int qcow_idx, void *private) +{ + struct iocb *io; + struct pending_aio *pio; + long ioidx; + + io = s->iocb_free[--s->iocb_free_count]; + + ioidx = IOCB_IDX(s, io); + pio = &s->pending_aio[ioidx]; + pio->cb = cb; + pio->id = id; + pio->private = private; + pio->nb_sectors = size/512; + pio->buf = buf; + pio->sector = sector; + pio->qcow_idx = qcow_idx; + + io_prep_pwrite(io, fd, buf, size, offset); + io->data = (void *)ioidx; + + s->iocb_queue[s->iocb_queued++] = io; + + return 1; +} + +/*TODO: Fix sector span!*/ +static int aio_can_lock(struct tdqcow_state *s, uint64_t sector) +{ + return (s->sector_lock[sector] ? 0 : 1); +} + +static int aio_lock(struct tdqcow_state *s, uint64_t sector) +{ + return ++s->sector_lock[sector]; +} + +static void aio_unlock(struct tdqcow_state *s, uint64_t sector) +{ + if (!s->sector_lock[sector]) return; + + --s->sector_lock[sector]; + return; +} + +/*TODO - Use a freelist*/ +static int get_free_idx(struct tdqcow_state *s) +{ + int i; + + for(i = 0; i < MAX_QCOW_IDS; i++) { + if(s->nr_reqs[i] == 0) return i; + } + return -1; +} + +/* + * The crypt function is compatible with the linux cryptoloop + * algorithm for < 4 GB images. NOTE: out_buf == in_buf is + * supported . + */ +static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num, + uint8_t *out_buf, const uint8_t *in_buf, + int nb_sectors, int enc, + const AES_KEY *key) +{ + union { + uint64_t ll[2]; + uint8_t b[16]; + } ivec; + int i; + + for (i = 0; i < nb_sectors; i++) { + ivec.ll[0] = cpu_to_le64(sector_num); + ivec.ll[1] = 0; + AES_cbc_encrypt(in_buf, out_buf, 512, key, + ivec.b, enc); + sector_num++; + in_buf += 512; + out_buf += 512; + } +} + + +/* 'allocate' is: + * + * 0 to not allocate. + * + * 1 to allocate a normal cluster (for sector indexes 'n_start' to + * 'n_end') + * + * 2 to allocate a compressed cluster of size + * 'compressed_size'. 'compressed_size' must be > 0 and < + * cluster_size + * + * return 0 if not allocated. + */ +static uint64_t get_cluster_offset(struct td_state *bs, + uint64_t offset, int allocate, + int compressed_size, + int n_start, int n_end) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector; + char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr; + uint64_t l2_offset, *l2_table, cluster_offset, tmp; + uint32_t min_count; + int new_l2_table; + + /*Check L1 table for the extent offset*/ + l1_index = offset >> (s->l2_bits + s->cluster_bits); + l2_offset = s->l1_table[l1_index]; + new_l2_table = 0; + if (!l2_offset) { + if (!allocate) + return 0; + /* + * allocating a new l2 entry + extent + * at the end of the file, we must also + * update the L1 entry safely. + */ + l2_offset = s->fd_end; + + /* round to cluster size */ + l2_offset = (l2_offset + s->cluster_size - 1) + & ~(s->cluster_size - 1); + + /* update the L1 entry */ + s->l1_table[l1_index] = l2_offset; + tmp = cpu_to_be64(l2_offset); + + /*Truncate file for L2 table + *(initialised to zero in case we crash)*/ + ftruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t))); + s->fd_end += (s->l2_size * sizeof(uint64_t)); + + /*Update the L1 table entry on disk + * (for O_DIRECT we write 4KByte blocks)*/ + l1_sector = (l1_index * sizeof(uint64_t)) >> 12; + l1_ptr = (char *)s->l1_table + (l1_sector << 12); + + if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) { + DPRINTF("ERROR allocating memory for L1 table\n"); + } + memcpy(tmp_ptr, l1_ptr, 4096); + + /* + * Issue non-asynchronous L1 write. + * For safety, we must ensure that + * entry is written before blocks. + */ + lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET); + if (write(s->fd, tmp_ptr, 4096) != 4096) + return 0; + free(tmp_ptr); + + new_l2_table = 1; + goto cache_miss; + } else if (s->min_cluster_alloc == s->l2_size) { + /*Fast-track the request*/ + cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t)); + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + return cluster_offset + (l2_index * s->cluster_size); + } + + /*Check to see if L2 entry is already cached*/ + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == s->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++s->l2_cache_counts[i] == 0xffffffff) { + for (j = 0; j < L2_CACHE_SIZE; j++) { + s->l2_cache_counts[j] >>= 1; + } + } + l2_table = s->l2_cache + (i << s->l2_bits); + goto found; + } + } + +cache_miss: + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (s->l2_cache_counts[i] < min_count) { + min_count = s->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = s->l2_cache + (min_index << s->l2_bits); + + /*If extent pre-allocated, read table from disk, + *otherwise write new table to disk*/ + if (new_l2_table) { + /*Should we allocate the whole extent? Adjustable parameter.*/ + if (s->cluster_alloc == s->l2_size) { + cluster_offset = l2_offset + + (s->l2_size * sizeof(uint64_t)); + cluster_offset = (cluster_offset + s->cluster_size - 1) + & ~(s->cluster_size - 1); + ftruncate(s->fd, cluster_offset + + (s->cluster_size * s->l2_size)); + s->fd_end = cluster_offset + + (s->cluster_size * s->l2_size); + for (i = 0; i < s->l2_size; i++) { + l2_table[i] = cpu_to_be64(cluster_offset + + (i*s->cluster_size)); + } + } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); + + lseek(s->fd, l2_offset, SEEK_SET); + if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return 0; + } else { + lseek(s->fd, l2_offset, SEEK_SET); + if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return 0; + } + + /*Update the cache entries*/ + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + +found: + /*The extent is split into 's->l2_size' blocks of + *size 's->cluster_size'*/ + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + if (!cluster_offset || + ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) { + if (!allocate) + return 0; + + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && + (n_end - n_start) < s->cluster_sectors) { + /* cluster is already allocated but compressed, we must + decompress it in the case it is not completely + overwritten */ + if (decompress_cluster(s, cluster_offset) < 0) + return 0; + cluster_offset = lseek(s->fd, 0, SEEK_END); + cluster_offset = (cluster_offset + s->cluster_size - 1) + & ~(s->cluster_size - 1); + /* write the cluster content - not asynchronous */ + lseek(s->fd, cluster_offset, SEEK_SET); + if (write(s->fd, s->cluster_cache, s->cluster_size) != + s->cluster_size) + return -1; + } else { + /* allocate a new cluster */ + cluster_offset = lseek(s->fd, 0, SEEK_END); + if (allocate == 1) { + /* round to cluster size */ + cluster_offset = + (cluster_offset + s->cluster_size - 1) + & ~(s->cluster_size - 1); + ftruncate(s->fd, cluster_offset + + s->cluster_size); + /* if encrypted, we must initialize the cluster + content which won't be written */ + if (s->crypt_method && + (n_end - n_start) < s->cluster_sectors) { + uint64_t start_sect; + start_sect = (offset & + ~(s->cluster_size - 1)) + >> 9; + memset(s->cluster_data + 512, + 0xaa, 512); + for (i = 0; i < s->cluster_sectors;i++) + { + if (i < n_start || i >= n_end) + { + encrypt_sectors(s, start_sect + i, + s->cluster_data, + s->cluster_data + 512, 1, 1, + &s->aes_encrypt_key); + lseek(s->fd, cluster_offset + i * 512, SEEK_SET); + if (write(s->fd, s->cluster_data, 512) != 512) + return -1; + } + } + } + } else { + cluster_offset |= QCOW_OFLAG_COMPRESSED | + (uint64_t)compressed_size + << (63 - s->cluster_bits); + } + } + /* update L2 table */ + tmp = cpu_to_be64(cluster_offset); + l2_table[l2_index] = tmp; + + /*For IO_DIRECT we write 4KByte blocks*/ + l2_sector = (l2_index * sizeof(uint64_t)) >> 12; + l2_ptr = (char *)l2_table + (l2_sector << 12); + + if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) { + DPRINTF("ERROR allocating memory for L1 table\n"); + } + memcpy(tmp_ptr2, l2_ptr, 4096); + aio_lock(s, offset >> 9); + async_write(s, s->fd, 4096, l2_offset + (l2_sector << 12), + tmp_ptr2, 0, -2, offset >> 9, 0, NULL); + } + return cluster_offset; +} + +static void init_cluster_cache(struct td_state *bs) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + uint32_t count = 0; + int i, cluster_entries; + + cluster_entries = s->cluster_size / 512; + DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n", + cluster_entries, s->cluster_size); + + for (i = 0; i < bs->size; i += cluster_entries) { + if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++; + if (count >= L2_CACHE_SIZE) return; + } + DPRINTF("Finished cluster initialisation, added %d entries\n", count); + return; +} + +static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + + int index_in_cluster, n; + uint64_t cluster_offset; + + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + *pnum = n; + return (cluster_offset != 0); +} + +static int decompress_buffer(uint8_t *out_buf, int out_buf_size, + const uint8_t *buf, int buf_size) +{ + z_stream strm1, *strm = &strm1; + int ret, out_len; + + memset(strm, 0, sizeof(*strm)); + + strm->next_in = (uint8_t *)buf; + strm->avail_in = buf_size; + strm->next_out = out_buf; + strm->avail_out = out_buf_size; + + ret = inflateInit2(strm, -12); + if (ret != Z_OK) + return -1; + ret = inflate(strm, Z_FINISH); + out_len = strm->next_out - out_buf; + if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) || + (out_len != out_buf_size) ) { + inflateEnd(strm); + return -1; + } + inflateEnd(strm); + return 0; +} + +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset) +{ + int ret, csize; + uint64_t coffset; + + coffset = cluster_offset & s->cluster_offset_mask; + if (s->cluster_cache_offset != coffset) { + csize = cluster_offset >> (63 - s->cluster_bits); + csize &= (s->cluster_size - 1); + lseek(s->fd, coffset, SEEK_SET); + ret = read(s->fd, s->cluster_data, csize); + if (ret != csize) + return -1; + if (decompress_buffer(s->cluster_cache, s->cluster_size, + s->cluster_data, csize) < 0) { + return -1; + } + s->cluster_cache_offset = coffset; + } + return 0; +} + +/* Open the disk file and initialize qcow state. */ +int tdqcow_open (struct td_state *bs, const char *name) +{ + int fd, len, i, shift, ret, size, l1_table_size; + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + char *buf; + QCowHeader *header; + QCowHeader_ext *exthdr; + uint32_t cksum; + + DPRINTF("QCOW: Opening %s\n",name); + /* set up a pipe so that we can hand back a poll fd that won't fire.*/ + ret = pipe(s->poll_pipe); + if (ret != 0) + return (0 - errno); + + fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); + if (fd < 0) { + DPRINTF("Unable to open %s (%d)\n",name,0 - errno); + return -1; + } + + s->fd = fd; + asprintf(&s->name,"%s", name); + + ASSERT(sizeof(header) < 512); + + ret = posix_memalign((void **)&buf, 512, 512); + if (ret != 0) goto fail; + + if (read(fd, buf, 512) != 512) + goto fail; + + header = (QCowHeader *)buf; + be32_to_cpus(&header->magic); + be32_to_cpus(&header->version); + be64_to_cpus(&header->backing_file_offset); + be32_to_cpus(&header->backing_file_size); + be32_to_cpus(&header->mtime); + be64_to_cpus(&header->size); + be32_to_cpus(&header->crypt_method); + be64_to_cpus(&header->l1_table_offset); + + if (header->magic != QCOW_MAGIC || header->version > QCOW_VERSION) + goto fail; + if (header->size <= 1 || header->cluster_bits < 9) + goto fail; + if (header->crypt_method > QCOW_CRYPT_AES) + goto fail; + s->crypt_method_header = header->crypt_method; + if (s->crypt_method_header) + s->encrypted = 1; + s->cluster_bits = header->cluster_bits; + s->cluster_size = 1 << s->cluster_bits; + s->cluster_sectors = 1 << (s->cluster_bits - 9); + s->l2_bits = header->l2_bits; + s->l2_size = 1 << s->l2_bits; + s->cluster_alloc = s->l2_size; + bs->size = header->size / 512; + s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; + + /* read the level 1 table */ + shift = s->cluster_bits + s->l2_bits; + s->l1_size = (header->size + (1LL << shift) - 1) >> shift; + + s->l1_table_offset = header->l1_table_offset; + + /*allocate a 4Kbyte multiple of memory*/ + l1_table_size = s->l1_size * sizeof(uint64_t); + if (l1_table_size % 4096 > 0) { + l1_table_size = ((l1_table_size >> 12) + 1) << 12; + } + ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size); + if (ret != 0) goto fail; + memset(s->l1_table, 0x00, l1_table_size); + + DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n", + (long long)s->l1_table_offset, + (int) (s->l1_size * sizeof(uint64_t)), + l1_table_size); + + lseek(fd, s->l1_table_offset, SEEK_SET); + if (read(fd, s->l1_table, l1_table_size) != l1_table_size) + goto fail; +/* for(i = 0;i < s->l1_size; i++) { + //be64_to_cpus(&s->l1_table[i]); + DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]); + }*/ + + /* alloc L2 cache */ + size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t); + ret = posix_memalign((void **)&s->l2_cache, 4096, size); + if(ret != 0) goto fail; + + size = s->cluster_size; + ret = posix_memalign((void **)&s->cluster_cache, 4096, size); + if(ret != 0) goto fail; + + ret = posix_memalign((void **)&s->cluster_data, 4096, size); + if(ret != 0) goto fail; + s->cluster_cache_offset = -1; + + /* read the backing file name */ + s->bfd = -1; + if (header->backing_file_offset != 0) { + DPRINTF("Reading backing file data\n"); + len = header->backing_file_size; + if (len > 1023) + len = 1023; + + /*TODO - Fix read size for O_DIRECT and use original fd!*/ + fd = open(name, O_RDONLY | O_LARGEFILE); + + lseek(fd, header->backing_file_offset, SEEK_SET); + if (read(fd, bs->backing_file, len) != len) + goto fail; + bs->backing_file[len] = '\0'; + close(fd); + /***********************************/ + + /*Open backing file*/ + fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE); + if (fd < 0) { + DPRINTF("Unable to open backing file: %s\n", + bs->backing_file); + goto fail; + } + s->bfd = fd; + s->cluster_alloc = 1; /*Cannot use pre-alloc*/ + } + + bs->sector_size = 512; + bs->info = 0; + + /*Detect min_cluster_alloc*/ + s->min_cluster_alloc = 1; /*Default*/ + if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) { + /*We test to see if the xen magic # exists*/ + exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader)); + be32_to_cpus(&exthdr->xmagic); + if(exthdr->xmagic != XEN_MAGIC) + goto end_xenhdr; + + /*Finally check the L1 table cksum*/ + be32_to_cpus(&exthdr->cksum); + cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t)); + if(exthdr->cksum != cksum) + goto end_xenhdr; + + be32_to_cpus(&exthdr->min_cluster_alloc); + s->min_cluster_alloc = exthdr->min_cluster_alloc; + } + + end_xenhdr: + if (init_aio_state(bs)!=0) { + DPRINTF("Unable to initialise AIO state\n"); + goto fail; + } + s->fd_end = lseek(s->fd, 0, SEEK_END); + + return 0; + +fail: + DPRINTF("QCOW Open failed\n"); + free(s->l1_table); + free(s->l2_cache); + free(s->cluster_cache); + free(s->cluster_data); + close(fd); + return -1; +} + + int tdqcow_queue_read(struct td_state *bs, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; + uint64_t cluster_offset; + + /*Check we can get a lock*/ + for (i = 0; i < nb_sectors; i++) + if (!aio_can_lock(s, sector + i)) { + DPRINTF("AIO_CAN_LOCK failed [%llu]\n", + (long long) sector + i); + return -EBUSY; + } + + /*We store a local record of the request*/ + qcow_idx = get_free_idx(s); + while (nb_sectors > 0) { + cluster_offset = + get_cluster_offset(bs, sector << 9, 0, 0, 0, 0); + index_in_cluster = sector & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + + if (s->iocb_free_count == 0 || !aio_lock(s, sector)) { + DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" + "[%llu]\n", s->iocb_free_count, + (long long) sector); + return -ENOMEM; + } + + if (!cluster_offset && (s->bfd > 0)) { + s->nr_reqs[qcow_idx]++; + asubmit += async_read(s, s->bfd, n * 512, sector << 9, + buf, cb, id, sector, + qcow_idx, private); + } else if(!cluster_offset) { + memset(buf, 0, 512 * n); + aio_unlock(s, sector); + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + if (decompress_cluster(s, cluster_offset) < 0) { + ret = -1; + goto done; + } + memcpy(buf, s->cluster_cache + index_in_cluster * 512, + 512 * n); + } else { + s->nr_reqs[qcow_idx]++; + asubmit += async_read(s, s->fd, n * 512, + (cluster_offset + + index_in_cluster * 512), + buf, cb, id, sector, + qcow_idx, private); + } + nb_sectors -= n; + sector += n; + buf += n * 512; + } +done: + /*Callback if no async requests outstanding*/ + if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); + + return 0; +} + + int tdqcow_queue_write(struct td_state *bs, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; + uint64_t cluster_offset; + + /*Check we can get a lock*/ + for (i = 0; i < nb_sectors; i++) + if (!aio_can_lock(s, sector + i)) { + DPRINTF("AIO_CAN_LOCK failed [%llu]\n", + (long long) (sector + i)); + return -EBUSY; + } + + /*We store a local record of the request*/ + qcow_idx = get_free_idx(s); + while (nb_sectors > 0) { + index_in_cluster = sector & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + + if (s->iocb_free_count == 0 || !aio_lock(s, sector)){ + DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" + "[%llu]\n", s->iocb_free_count, + (long long) sector); + return -ENOMEM; + } + + if (!IS_ZERO(buf,n * 512)) { + + cluster_offset = get_cluster_offset(bs, sector << 9, + 1, 0, + index_in_cluster, + index_in_cluster+n + ); + if (!cluster_offset) { + DPRINTF("Ooops, no write cluster offset!\n"); + ret = -1; + goto done; + } + + if (s->crypt_method) { + encrypt_sectors(s, sector, s->cluster_data, + (unsigned char *)buf, n, 1, + &s->aes_encrypt_key); + s->nr_reqs[qcow_idx]++; + asubmit += async_write(s, s->fd, n * 512, + (cluster_offset + + index_in_cluster*512), + (char *)s->cluster_data, + cb, id, sector, + qcow_idx, private); + } else { + s->nr_reqs[qcow_idx]++; + asubmit += async_write(s, s->fd, n * 512, + (cluster_offset + + index_in_cluster*512), + buf, cb, id, sector, + qcow_idx, private); + } + } else { + /*Write data contains zeros, but we must check to see + if cluster already allocated*/ + cluster_offset = get_cluster_offset(bs, sector << 9, + 0, 0, + index_in_cluster, + index_in_cluster+n + ); + if(cluster_offset) { + if (s->crypt_method) { + encrypt_sectors(s, sector, + s->cluster_data, + (unsigned char *)buf, + n, 1, + &s->aes_encrypt_key); + s->nr_reqs[qcow_idx]++; + asubmit += async_write(s, s->fd, + n * 512, + (cluster_offset+ + index_in_cluster * 512), + (char *)s->cluster_data, cb, id, sector, + qcow_idx, private); + } else { + s->nr_reqs[qcow_idx]++; + asubmit += async_write(s, s->fd, n*512, + cluster_offset + index_in_cluster * 512, + buf, cb, id, sector, + qcow_idx, private); + } + } + else aio_unlock(s, sector); + } + nb_sectors -= n; + sector += n; + buf += n * 512; + } + s->cluster_cache_offset = -1; /* disable compressed cache */ + +done: + /*Callback if no async requests outstanding*/ + if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); + + return 0; +} + +int tdqcow_submit(struct td_state *bs) +{ + int ret; + struct tdqcow_state *prv = (struct tdqcow_state *)bs->private; + + ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); + + /* XXX: TODO: Handle error conditions here. */ + + /* Success case: */ + prv->iocb_queued = 0; + + return ret; +} + + +int *tdqcow_get_fd(struct td_state *bs) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + int *fds, i; + + fds = malloc(sizeof(int) * MAX_IOFD); + /*initialise the FD array*/ + for(i=0;i<MAX_IOFD;i++) fds[i] = 0; + + fds[0] = s->poll_fd; + return fds; +} + +int tdqcow_close(struct td_state *bs) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + uint32_t cksum, out; + int fd, offset; + + /*Update the hdr cksum*/ + if(s->min_cluster_alloc == s->l2_size) { + cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t)); + printf("Writing cksum: %d",cksum); + fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/ + offset = sizeof(QCowHeader) + sizeof(uint32_t); + lseek(fd, offset, SEEK_SET); + out = cpu_to_be32(cksum); + write(fd, &out, sizeof(uint32_t)); + close(fd); + } + + free(s->name); + free(s->l1_table); + free(s->l2_cache); + free(s->cluster_cache); + free(s->cluster_data); + close(s->fd); + return 0; +} + +int tdqcow_do_callbacks(struct td_state *s, int sid) +{ + int ret, i, rsp = 0,*ptr; + struct io_event *ep; + struct tdqcow_state *prv = (struct tdqcow_state *)s->private; + + if (sid > MAX_IOFD) return 1; + + /* Non-blocking test for completed io. */ + ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, + NULL); + + for (ep=prv->aio_events, i = ret; i-->0; ep++) { + struct iocb *io = ep->obj; + struct pending_aio *pio; + + pio = &prv->pending_aio[(long)io->data]; + + if (ep->res != io->u.c.nbytes) { + /* TODO: handle this case better. */ + ptr = (int *)&ep->res; + DPRINTF("AIO did less than I asked it to " + "[%lu,%lu,%d]\n", + ep->res, io->u.c.nbytes, *ptr); + } + aio_unlock(prv, pio->sector); + if (pio->id >= 0) { + if (prv->crypt_method) + encrypt_sectors(prv, pio->sector, + (unsigned char *)pio->buf, + (unsigned char *)pio->buf, + pio->nb_sectors, 0, + &prv->aes_decrypt_key); + prv->nr_reqs[pio->qcow_idx]--; + if (prv->nr_reqs[pio->qcow_idx] == 0) + rsp += pio->cb(s, ep->res2, pio->id, + pio->private); + } else if (pio->id == -2) free(pio->buf); + + prv->iocb_free[prv->iocb_free_count++] = io; + } + return rsp; +} + +int qcow_create(const char *filename, uint64_t total_size, + const char *backing_file, int flags) +{ + int fd, header_size, backing_filename_len, l1_size, i; + int shift, length, adjust, ret = 0; + QCowHeader header; + QCowHeader_ext exthdr; + char backing_filename[1024], *ptr; + uint64_t tmp, size; + struct stat st; + + DPRINTF("Qcow_create: size %llu\n",(long long unsigned)total_size); + + fd = open(filename, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, + 0644); + if (fd < 0) + return -1; + + memset(&header, 0, sizeof(header)); + header.magic = cpu_to_be32(QCOW_MAGIC); + header.version = cpu_to_be32(QCOW_VERSION); + + /*Create extended header fields*/ + exthdr.xmagic = cpu_to_be32(XEN_MAGIC); + + header_size = sizeof(header) + sizeof(QCowHeader_ext); + backing_filename_len = 0; + size = (total_size >> SECTOR_SHIFT); + if (backing_file) { + if (strcmp(backing_file, "fat:")) { + const char *p; + /* XXX: this is a hack: we do not attempt to + *check for URL like syntax */ + p = strchr(backing_file, ':'); + if (p && (p - backing_file) >= 2) { + /* URL like but exclude "c:" like filenames */ + strncpy(backing_filename, backing_file, + sizeof(backing_filename)); + } else { + realpath(backing_file, backing_filename); + if (stat(backing_filename, &st) != 0) { + return -1; + } + } + header.backing_file_offset = cpu_to_be64(header_size); + backing_filename_len = strlen(backing_filename); + header.backing_file_size = cpu_to_be32( + backing_filename_len); + header_size += backing_filename_len; + + /*Set to the backing file size*/ + size = (st.st_size >> SECTOR_SHIFT); + DPRINTF("Backing file size detected: %lld sectors" + "(total %lld [%lld MB])\n", + (long long)total_size, + (long long)(total_size << SECTOR_SHIFT), + (long long)(total_size >> 11)); + } else { + backing_file = NULL; + DPRINTF("Setting file size: %lld (total %lld)\n", + (long long) total_size, + (long long) (total_size << SECTOR_SHIFT)); + } + header.mtime = cpu_to_be32(st.st_mtime); + header.cluster_bits = 9; /* 512 byte cluster to avoid copying + unmodifyed sectors */ + header.l2_bits = 12; /* 32 KB L2 tables */ + exthdr.min_cluster_alloc = cpu_to_be32(1); + } else { + DPRINTF("Setting file size: %lld sectors" + "(total %lld [%lld MB])\n", + (long long) size, + (long long) (size << SECTOR_SHIFT), + (long long) (size >> 11)); + header.cluster_bits = 12; /* 4 KB clusters */ + header.l2_bits = 9; /* 4 KB L2 tables */ + exthdr.min_cluster_alloc = cpu_to_be32(1 << 9); + } + /*Set the header size value*/ + header.size = cpu_to_be64(size * 512); + + header_size = (header_size + 7) & ~7; + if (header_size % 4096 > 0) { + header_size = ((header_size >> 12) + 1) << 12; + } + + shift = header.cluster_bits + header.l2_bits; + l1_size = ((size * 512) + (1LL << shift) - 1) >> shift; + + header.l1_table_offset = cpu_to_be64(header_size); + DPRINTF("L1 Table offset: %d, size %d\n", + header_size, + (int)(l1_size * sizeof(uint64_t))); + if (flags) { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + } else { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + } + + ptr = calloc(1, l1_size * sizeof(uint64_t)); + exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t))); + printf("Created cksum: %d\n",exthdr.cksum); + free(ptr); + + /* write all the data */ + ret += write(fd, &header, sizeof(header)); + ret += write(fd, &exthdr, sizeof(exthdr)); + if (backing_file) { + ret += write(fd, backing_filename, backing_filename_len); + } + lseek(fd, header_size, SEEK_SET); + tmp = 0; + for (i = 0;i < l1_size; i++) { + ret += write(fd, &tmp, sizeof(tmp)); + } + + /*adjust file length to 4 KByte boundary*/ + length = header_size + l1_size * sizeof(uint64_t); + if (length % 4096 > 0) { + length = ((length >> 12) + 1) << 12; + ftruncate(fd, length); + DPRINTF("Adjusted filelength to %d for 4 " + "Kbyte alignment\n",length); + } + + close(fd); + + return 0; +} + +int qcow_make_empty(struct td_state *bs) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + uint32_t l1_length = s->l1_size * sizeof(uint64_t); + + memset(s->l1_table, 0, l1_length); + lseek(s->fd, s->l1_table_offset, SEEK_SET); + if (write(s->fd, s->l1_table, l1_length) < 0) + return -1; + ftruncate(s->fd, s->l1_table_offset + l1_length); + + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); + + return 0; +} + +int qcow_get_cluster_size(struct td_state *bs) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + + return s->cluster_size; +} + +/* XXX: put compressed sectors first, then all the cluster aligned + tables to avoid losing bytes in alignment */ +int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, + const uint8_t *buf) +{ + struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + z_stream strm; + int ret, out_len; + uint8_t *out_buf; + uint64_t cluster_offset; + + out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128); + if (!out_buf) + return -1; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, -12, + 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + free(out_buf); + return -1; + } + + strm.avail_in = s->cluster_size; + strm.next_in = (uint8_t *)buf; + strm.avail_out = s->cluster_size; + strm.next_out = out_buf; + + ret = deflate(&strm, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) { + free(out_buf); + deflateEnd(&strm); + return -1; + } + out_len = strm.next_out - out_buf; + + deflateEnd(&strm); + + if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + /* could not compress: write normal cluster */ + //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors); + } else { + cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, + out_len, 0, 0); + cluster_offset &= s->cluster_offset_mask; + lseek(s->fd, cluster_offset, SEEK_SET); + if (write(s->fd, out_buf, out_len) != out_len) { + free(out_buf); + return -1; + } + } + + free(out_buf); + return 0; +} + +struct tap_disk tapdisk_qcow = { + "tapdisk_qcow", + sizeof(struct tdqcow_state), + tdqcow_open, + tdqcow_queue_read, + tdqcow_queue_write, + tdqcow_submit, + tdqcow_get_fd, + tdqcow_close, + tdqcow_do_callbacks, +}; + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/block-ram.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/block-ram.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,296 @@ +/* block-ram.c + * + * Fast Ramdisk implementation. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include "tapdisk.h" + +#define MAX_DISK_SIZE 1024000 /*500MB disk limit*/ + +char *img; +long int disksector_size; +long int disksize; +long int diskinfo; +static int connections = 0; + +struct tdram_state { + int fd; + int poll_pipe[2]; /* dummy fd for polling on */ +}; + +/*Get Image size, secsize*/ +static int get_image_info(struct td_state *s, int fd) +{ + int ret; + long size; + unsigned long total_size; + struct statvfs statBuf; + struct stat stat; + + ret = fstat(fd, &stat); + if (ret != 0) { + DPRINTF("ERROR: fstat failed, Couldn't stat image"); + return -EINVAL; + } + + if (S_ISBLK(stat.st_mode)) { + /*Accessing block device directly*/ + s->size = 0; + if (ioctl(fd,BLKGETSIZE,&s->size)!=0) { + DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image"); + return -EINVAL; + } + + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + + /*Get the sector size*/ +#if defined(BLKSSZGET) + { + int arg; + s->sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, &s->sector_size); + + if (s->sector_size != DEFAULT_SECTOR_SIZE) + DPRINTF("Note: sector size is %ld (not %d)\n", + s->sector_size, DEFAULT_SECTOR_SIZE); + } +#else + s->sector_size = DEFAULT_SECTOR_SIZE; +#endif + + } else { + /*Local file? try fstat instead*/ + s->size = (stat.st_size >> SECTOR_SHIFT); + s->sector_size = DEFAULT_SECTOR_SIZE; + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + } + + if (s->size == 0) { + s->size =((uint64_t) MAX_DISK_SIZE); + s->sector_size = DEFAULT_SECTOR_SIZE; + } + s->info = 0; + + /*Store variables locally*/ + disksector_size = s->sector_size; + disksize = s->size; + diskinfo = s->info; + DPRINTF("Image sector_size: \n\t[%lu]\n", + s->sector_size); + + return 0; +} + +/* Open the disk file and initialize ram state. */ +int tdram_open (struct td_state *s, const char *name) +{ + int i, fd, ret = 0, count = 0; + struct tdram_state *prv = (struct tdram_state *)s->private; + uint64_t size; + char *p; + s->private = prv; + + connections++; + + /* set up a pipe so that we can hand back a poll fd that won't fire.*/ + ret = pipe(prv->poll_pipe); + if (ret != 0) + return (0 - errno); + + if (connections > 1) { + s->sector_size = disksector_size; + s->size = disksize; + s->info = diskinfo; + DPRINTF("Image already open, returning parameters:\n"); + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + DPRINTF("Image sector_size: \n\t[%lu]\n", + s->sector_size); + + prv->fd = -1; + goto done; + } + + /* Open the file */ + fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); + + if ((fd == -1) && (errno == EINVAL)) { + + /* Maybe O_DIRECT isn't supported. */ + fd = open(name, O_RDWR | O_LARGEFILE); + if (fd != -1) DPRINTF("WARNING: Accessing image without" + "O_DIRECT! (%s)\n", name); + + } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name); + + if (fd == -1) { + DPRINTF("Unable to open [%s]!\n",name); + ret = 0 - errno; + goto done; + } + + prv->fd = fd; + + ret = get_image_info(s, fd); + size = MAX_DISK_SIZE; + + if (s->size > size) { + DPRINTF("Disk exceeds limit, must be less than [%d]MB", + (MAX_DISK_SIZE<<SECTOR_SHIFT)>>20); + return -ENOMEM; + } + + /*Read the image into memory*/ + p = img = malloc(s->size << SECTOR_SHIFT); + if (img == NULL) { + DPRINTF("Mem malloc failed\n"); + return -1; + } + DPRINTF("Reading %llu bytes.......",(long long unsigned)s->size << SECTOR_SHIFT); + + for (i = 0; i < s->size; i++) { + ret = read(prv->fd, p, s->sector_size); + if (ret != s->sector_size) { + ret = 0 - errno; + break; + } else { + count += ret; + p = img + count; + } + } + DPRINTF("[%d]\n",count); + if (count != s->size << SECTOR_SHIFT) { + ret = -1; + } else { + ret = 0; + } + +done: + return ret; +} + + int tdram_queue_read(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdram_state *prv = (struct tdram_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + int ret; + + memcpy(buf, img + offset, size); + ret = size; + + cb(s, (ret < 0) ? ret: 0, id, private); + + return ret; +} + + int tdram_queue_write(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdram_state *prv = (struct tdram_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + int ret; + + /*We assume that write access is controlled at a higher level for multiple disks*/ + memcpy(img + offset, buf, size); + ret = size; + + cb(s, (ret < 0) ? ret : 0, id, private); + + return ret; +} + +int tdram_submit(struct td_state *s) +{ + return 0; +} + + +int *tdram_get_fd(struct td_state *s) +{ + struct tdram_state *prv = (struct tdram_state *)s->private; + int *fds, i; + + fds = malloc(sizeof(int) * MAX_IOFD); + /*initialise the FD array*/ + for(i=0;i<MAX_IOFD;i++) fds[i] = 0; + + fds[0] = prv->poll_pipe[0]; + return fds; +} + +int tdram_close(struct td_state *s) +{ + struct tdram_state *prv = (struct tdram_state *)s->private; + + connections--; + + return 0; +} + +int tdram_do_callbacks(struct td_state *s, int sid) +{ + /* always ask for a kick */ + return 1; +} + +struct tap_disk tapdisk_ram = { + "tapdisk_ram", + sizeof(struct tdram_state), + tdram_open, + tdram_queue_read, + tdram_queue_write, + tdram_submit, + tdram_get_fd, + tdram_close, + tdram_do_callbacks, +}; + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/block-sync.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/block-sync.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,242 @@ +/* block-sync.c + * + * simple slow synchronous raw disk implementation. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include "tapdisk.h" + +struct tdsync_state { + int fd; + int poll_pipe[2]; /* dummy fd for polling on */ +}; + +/*Get Image size, secsize*/ +static int get_image_info(struct td_state *s, int fd) +{ + int ret; + long size; + unsigned long total_size; + struct statvfs statBuf; + struct stat stat; + + ret = fstat(fd, &stat); + if (ret != 0) { + DPRINTF("ERROR: fstat failed, Couldn't stat image"); + return -EINVAL; + } + + if (S_ISBLK(stat.st_mode)) { + /*Accessing block device directly*/ + s->size = 0; + if (ioctl(fd,BLKGETSIZE,&s->size)!=0) { + DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image"); + return -EINVAL; + } + + DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + + /*Get the sector size*/ +#if defined(BLKSSZGET) + { + int arg; + s->sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, &s->sector_size); + + if (s->sector_size != DEFAULT_SECTOR_SIZE) + DPRINTF("Note: sector size is %ld (not %d)\n", + s->sector_size, DEFAULT_SECTOR_SIZE); + } +#else + s->sector_size = DEFAULT_SECTOR_SIZE; +#endif + + } else { + /*Local file? try fstat instead*/ + s->size = (stat.st_size >> SECTOR_SHIFT); + s->sector_size = DEFAULT_SECTOR_SIZE; + DPRINTF("Image size: \n\tpre sector_shift [%lluu]\n\tpost " + "sector_shift [%lluu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + } + + if (s->size == 0) + return -EINVAL; + + s->info = 0; + + return 0; +} + +/* Open the disk file and initialize aio state. */ +int tdsync_open (struct td_state *s, const char *name) +{ + int i, fd, ret = 0; + struct tdsync_state *prv = (struct tdsync_state *)s->private; + s->private = prv; + + /* set up a pipe so that we can hand back a poll fd that won't fire.*/ + ret = pipe(prv->poll_pipe); + if (ret != 0) + return (0 - errno); + + /* Open the file */ + fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); + + if ( (fd == -1) && (errno == EINVAL) ) { + + /* Maybe O_DIRECT isn't supported. */ + fd = open(name, O_RDWR | O_LARGEFILE); + if (fd != -1) DPRINTF("WARNING: Accessing image without" + "O_DIRECT! (%s)\n", name); + + } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name); + + if (fd == -1) { + DPRINTF("Unable to open [%s]!\n",name); + ret = 0 - errno; + goto done; + } + + prv->fd = fd; + + ret = get_image_info(s, fd); +done: + return ret; +} + + int tdsync_queue_read(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdsync_state *prv = (struct tdsync_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + int ret; + + ret = lseek(prv->fd, offset, SEEK_SET); + if (ret != (off_t)-1) { + ret = read(prv->fd, buf, size); + if (ret != size) { + ret = 0 - errno; + } else { + ret = 1; + } + } else ret = 0 - errno; + + cb(s, (ret < 0) ? ret: 0, id, private); + + return 1; +} + + int tdsync_queue_write(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdsync_state *prv = (struct tdsync_state *)s->private; + int size = nb_sectors * s->sector_size; + uint64_t offset = sector * (uint64_t)s->sector_size; + int ret = 0; + + ret = lseek(prv->fd, offset, SEEK_SET); + if (ret != (off_t)-1) { + ret = write(prv->fd, buf, size); + if (ret != size) { + ret = 0 - errno; + } else { + ret = 1; + } + } else ret = 0 - errno; + + cb(s, (ret < 0) ? ret : 0, id, private); + + return 1; +} + +int tdsync_submit(struct td_state *s) +{ + return 0; +} + + +int *tdsync_get_fd(struct td_state *s) +{ + struct tdsync_state *prv = (struct tdsync_state *)s->private; + + int *fds, i; + + fds = malloc(sizeof(int) * MAX_IOFD); + /*initialise the FD array*/ + for(i=0;i<MAX_IOFD;i++) fds[i] = 0; + + fds[0] = prv->poll_pipe[0]; + return fds; +} + +int tdsync_close(struct td_state *s) +{ + struct tdsync_state *prv = (struct tdsync_state *)s->private; + + close(prv->fd); + close(prv->poll_pipe[0]); + close(prv->poll_pipe[1]); + + return 0; +} + +int tdsync_do_callbacks(struct td_state *s, int sid) +{ + /* always ask for a kick */ + return 1; +} + +struct tap_disk tapdisk_sync = { + "tapdisk_sync", + sizeof(struct tdsync_state), + tdsync_open, + tdsync_queue_read, + tdsync_queue_write, + tdsync_submit, + tdsync_get_fd, + tdsync_close, + tdsync_do_callbacks, +}; + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/block-vmdk.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/block-vmdk.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,415 @@ +/* block-vmdk.c + * + * VMware Disk format implementation. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * This is largely the same as the vmdk driver in Qemu, I've just twisted it + * to match our interfaces. The original (BSDish) Copyright message appears + * below: + */ + +/* + * Block driver for the VMDK format + * + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2005 Filip Navara + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include "tapdisk.h" +#include "bswap.h" + +#define safer_free(_x) \ + do { \ + if (NULL != _x) { \ + free(_x); \ + (_x) = NULL; \ + } \ + } while (0) ; + +#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') +#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') + +typedef struct { + uint32_t version; + uint32_t flags; + uint32_t disk_sectors; + uint32_t granularity; + uint32_t l1dir_offset; + uint32_t l1dir_size; + uint32_t file_sectors; + uint32_t cylinders; + uint32_t heads; + uint32_t sectors_per_track; +} VMDK3Header; + +typedef struct { + uint32_t version; + uint32_t flags; + int64_t capacity; + int64_t granularity; + int64_t desc_offset; + int64_t desc_size; + int32_t num_gtes_per_gte; + int64_t rgd_offset; + int64_t gd_offset; + int64_t grain_offset; + char filler[1]; + char check_bytes[4]; +} __attribute__((packed)) VMDK4Header; + +#define L2_CACHE_SIZE 16 + +struct tdvmdk_state { + int fd; + int poll_pipe[2]; /* dummy fd for polling on */ + + unsigned int l1_size; + int64_t l1_table_offset; + int64_t l1_backup_table_offset; + uint32_t l1_entry_sectors; + unsigned int l2_size; + + uint32_t *l1_table; + uint32_t *l1_backup_table; + uint32_t *l2_cache; + uint32_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + + unsigned int cluster_sectors; +}; + + +/* Open the disk file and initialize aio state. */ +static int tdvmdk_open (struct td_state *s, const char *name) +{ + int ret, fd; + int l1_size, i; + uint32_t magic; + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + + /* set up a pipe so that we can hand back a poll fd that won't fire.*/ + ret = pipe(prv->poll_pipe); + if (ret != 0) + return -1; + + /* Open the file */ + fd = open(name, O_RDWR | O_LARGEFILE); + + if ( (fd == -1) && (errno == EINVAL) ) { + + /* Maybe O_DIRECT isn't supported. */ + fd = open(name, O_RDWR | O_LARGEFILE); + if (fd != -1) DPRINTF("WARNING: Accessing image without" + "O_DIRECT! (%s)\n", name); + + } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name); + + if (fd == -1) { + DPRINTF("Unable to open [%s]!\n",name); + ret = 0 - errno; + return -1; + } + + prv->fd = fd; + + /* Grok the vmdk header. */ + if ((ret = read(fd, &magic, sizeof(magic))) != sizeof(magic)) + goto fail; + magic = be32_to_cpu(magic); + if (magic == VMDK3_MAGIC) { + VMDK3Header header; + if (read(fd, &header, sizeof(header)) != + sizeof(header)) + goto fail; + prv->cluster_sectors = le32_to_cpu(header.granularity); + prv->l2_size = 1 << 9; + prv->l1_size = 1 << 6; + s->size = le32_to_cpu(header.disk_sectors); + prv->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9; + prv->l1_backup_table_offset = 0; + prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors; + } else if (magic == VMDK4_MAGIC) { + VMDK4Header header; + + if (read(fd, &header, sizeof(header)) != sizeof(header)) + goto fail; + s->size = le32_to_cpu(header.capacity); + prv->cluster_sectors = le32_to_cpu(header.granularity); + prv->l2_size = le32_to_cpu(header.num_gtes_per_gte); + prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors; + if (prv->l1_entry_sectors <= 0) + goto fail; + prv->l1_size = (s->size + prv->l1_entry_sectors - 1) + / prv->l1_entry_sectors; + prv->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; + prv->l1_backup_table_offset = + le64_to_cpu(header.gd_offset) << 9; + } else { + goto fail; + } + /* read the L1 table */ + l1_size = prv->l1_size * sizeof(uint32_t); + prv->l1_table = malloc(l1_size); + if (!prv->l1_table) + goto fail; + if (lseek(fd, prv->l1_table_offset, SEEK_SET) == -1) + goto fail; + if (read(fd, prv->l1_table, l1_size) != l1_size) + goto fail; + for (i = 0; i < prv->l1_size; i++) { + le32_to_cpus(&prv->l1_table[i]); + } + + if (prv->l1_backup_table_offset) { + prv->l1_backup_table = malloc(l1_size); + if (!prv->l1_backup_table) + goto fail; + if (lseek(fd, prv->l1_backup_table_offset, SEEK_SET) == -1) + goto fail; + if (read(fd, prv->l1_backup_table, l1_size) != l1_size) + goto fail; + for(i = 0; i < prv->l1_size; i++) { + le32_to_cpus(&prv->l1_backup_table[i]); + } + } + + prv->l2_cache = malloc(prv->l2_size * L2_CACHE_SIZE *sizeof(uint32_t)); + if (!prv->l2_cache) + goto fail; + prv->fd = fd; + DPRINTF("VMDK File opened successfully\n"); + return 0; + +fail: + DPRINTF("VMDK File open failed.\n"); + safer_free(prv->l1_backup_table); + free(prv->l1_table); + free(prv->l2_cache); + close(fd); + return -1; +} + +static uint64_t get_cluster_offset(struct td_state *s, + uint64_t offset, int allocate) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + unsigned int l1_index, l2_offset, l2_index; + int min_index, i, j; + uint32_t min_count, *l2_table, tmp; + uint64_t cluster_offset; + + l1_index = (offset >> 9) / prv->l1_entry_sectors; + if (l1_index >= prv->l1_size) + return 0; + l2_offset = prv->l1_table[l1_index]; + if (!l2_offset) + return 0; + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == prv->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++prv->l2_cache_counts[i] == 0xffffffff) { + for(j = 0; j < L2_CACHE_SIZE; j++) { + prv->l2_cache_counts[j] >>= 1; + } + } + l2_table = prv->l2_cache + (i * prv->l2_size); + goto found; + } + } + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for (i = 0; i < L2_CACHE_SIZE; i++) { + if (prv->l2_cache_counts[i] < min_count) { + min_count = prv->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = prv->l2_cache + (min_index * prv->l2_size); + lseek(prv->fd, (int64_t)l2_offset * 512, SEEK_SET); + if (read(prv->fd, l2_table, prv->l2_size * sizeof(uint32_t)) != + prv->l2_size * sizeof(uint32_t)) + return 0; + prv->l2_cache_offsets[min_index] = l2_offset; + prv->l2_cache_counts[min_index] = 1; + found: + l2_index = ((offset >> 9) / prv->cluster_sectors) % prv->l2_size; + cluster_offset = le32_to_cpu(l2_table[l2_index]); + if (!cluster_offset) { + if (!allocate) + return 0; + cluster_offset = lseek(prv->fd, 0, SEEK_END); + ftruncate(prv->fd, cluster_offset + + (prv->cluster_sectors << 9)); + cluster_offset >>= 9; + /* update L2 table */ + tmp = cpu_to_le32(cluster_offset); + l2_table[l2_index] = tmp; + lseek(prv->fd, ((int64_t)l2_offset * 512) + + (l2_index * sizeof(tmp)), SEEK_SET); + if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp)) + return 0; + /* update backup L2 table */ + if (prv->l1_backup_table_offset != 0) { + l2_offset = prv->l1_backup_table[l1_index]; + lseek(prv->fd, ((int64_t)l2_offset * 512) + + (l2_index * sizeof(tmp)), SEEK_SET); + if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp)) + return 0; + } + } + cluster_offset <<= 9; + return cluster_offset; +} + +static int tdvmdk_queue_read(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + int index_in_cluster, n; + uint64_t cluster_offset; + int ret = 0; + while (nb_sectors > 0) { + cluster_offset = get_cluster_offset(s, sector << 9, 0); + index_in_cluster = sector % prv->cluster_sectors; + n = prv->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + if (!cluster_offset) { + memset(buf, 0, 512 * n); + } else { + lseek(prv->fd, cluster_offset + index_in_cluster * 512, + SEEK_SET); + ret = read(prv->fd, buf, n * 512); + if (ret != n * 512) { + ret = -1; + goto done; + } + } + nb_sectors -= n; + sector += n; + buf += n * 512; + } +done: + cb(s, ret == -1 ? -1 : 0, id, private); + + return 1; +} + +static int tdvmdk_queue_write(struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + int index_in_cluster, n; + uint64_t cluster_offset; + int ret = 0; + + + while (nb_sectors > 0) { + index_in_cluster = sector & (prv->cluster_sectors - 1); + n = prv->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + cluster_offset = get_cluster_offset(s, sector << 9, 1); + if (!cluster_offset) { + ret = -1; + goto done; + } + lseek(prv->fd, cluster_offset + index_in_cluster * 512, + SEEK_SET); + ret = write(prv->fd, buf, n * 512); + if (ret != n * 512) { + ret = -1; + goto done; + } + nb_sectors -= n; + sector += n; + buf += n * 512; + } +done: + cb(s, ret == -1 ? -1 : 0, id, private); + + return 1; +} + +static int tdvmdk_submit(struct td_state *s) +{ + return 0; +} + + +static int *tdvmdk_get_fd(struct td_state *s) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + int *fds, i; + + fds = malloc(sizeof(int) * MAX_IOFD); + /*initialise the FD array*/ + for (i=0;i<MAX_IOFD;i++) fds[i] = 0; + + fds[0] = prv->poll_pipe[0]; + return fds; +} + +static int tdvmdk_close(struct td_state *s) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + + safer_free(prv->l1_table); + safer_free(prv->l1_backup_table); + safer_free(prv->l2_cache); + close(prv->fd); + close(prv->poll_pipe[0]); + close(prv->poll_pipe[1]); + return 0; +} + +static int tdvmdk_do_callbacks(struct td_state *s, int sid) +{ + /* always ask for a kick */ + return 1; +} + +struct tap_disk tapdisk_vmdk = { + "tapdisk_vmdk", + sizeof(struct tdvmdk_state), + tdvmdk_open, + tdvmdk_queue_read, + tdvmdk_queue_write, + tdvmdk_submit, + tdvmdk_get_fd, + tdvmdk_close, + tdvmdk_do_callbacks, +}; + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/bswap.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/bswap.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,202 @@ +#ifndef BSWAP_H +#define BSWAP_H + +//#include "config-host.h" + +#include <inttypes.h> + +#ifdef HAVE_BYTESWAP_H +#include <byteswap.h> +#else + +#define bswap_16(x) \ +({ \ + uint16_t __x = (x); \ + ((uint16_t)( \ + (((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \ + (((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \ +}) + +#define bswap_32(x) \ +({ \ + uint32_t __x = (x); \ + ((uint32_t)( \ + (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \ +}) + +#define bswap_64(x) \ +({ \ + uint64_t __x = (x); \ + ((uint64_t)( \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \ +}) + +#endif /* !HAVE_BYTESWAP_H */ + +static inline uint16_t bswap16(uint16_t x) +{ + return bswap_16(x); +} + +static inline uint32_t bswap32(uint32_t x) +{ + return bswap_32(x); +} + +static inline uint64_t bswap64(uint64_t x) +{ + return bswap_64(x); +} + +static inline void bswap16s(uint16_t *s) +{ + *s = bswap16(*s); +} + +static inline void bswap32s(uint32_t *s) +{ + *s = bswap32(*s); +} + +static inline void bswap64s(uint64_t *s) +{ + *s = bswap64(*s); +} + +#if defined(WORDS_BIGENDIAN) +#define be_bswap(v, size) (v) +#define le_bswap(v, size) bswap ## size(v) +#define be_bswaps(v, size) +#define le_bswaps(p, size) *p = bswap ## size(*p); +#else +#define le_bswap(v, size) (v) +#define be_bswap(v, size) bswap ## size(v) +#define le_bswaps(v, size) +#define be_bswaps(p, size) *p = bswap ## size(*p); +#endif + +#define CPU_CONVERT(endian, size, type)\ +static inline type endian ## size ## _to_cpu(type v)\ +{\ + return endian ## _bswap(v, size);\ +}\ +\ +static inline type cpu_to_ ## endian ## size(type v)\ +{\ + return endian ## _bswap(v, size);\ +}\ +\ +static inline void endian ## size ## _to_cpus(type *p)\ +{\ + endian ## _bswaps(p, size)\ +}\ +\ +static inline void cpu_to_ ## endian ## size ## s(type *p)\ +{\ + endian ## _bswaps(p, size)\ +}\ +\ +static inline type endian ## size ## _to_cpup(const type *p)\ +{\ + return endian ## size ## _to_cpu(*p);\ +}\ +\ +static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\ +{\ + *p = cpu_to_ ## endian ## size(v);\ +} + +CPU_CONVERT(be, 16, uint16_t) +CPU_CONVERT(be, 32, uint32_t) +CPU_CONVERT(be, 64, uint64_t) + +CPU_CONVERT(le, 16, uint16_t) +CPU_CONVERT(le, 32, uint32_t) +CPU_CONVERT(le, 64, uint64_t) + +/* unaligned versions (optimized for frequent unaligned accesses)*/ + +#if defined(__i386__) || defined(__powerpc__) + +#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v) +#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v) +#define le16_to_cpupu(p) le16_to_cpup(p) +#define le32_to_cpupu(p) le32_to_cpup(p) + +#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v) +#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v) + +#else + +static inline void cpu_to_le16wu(uint16_t *p, uint16_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v; + p1[1] = v >> 8; +} + +static inline void cpu_to_le32wu(uint32_t *p, uint32_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v; + p1[1] = v >> 8; + p1[2] = v >> 16; + p1[3] = v >> 24; +} + +static inline uint16_t le16_to_cpupu(const uint16_t *p) +{ + const uint8_t *p1 = (const uint8_t *)p; + return p1[0] | (p1[1] << 8); +} + +static inline uint32_t le32_to_cpupu(const uint32_t *p) +{ + const uint8_t *p1 = (const uint8_t *)p; + return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24); +} + +static inline void cpu_to_be16wu(uint16_t *p, uint16_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v >> 8; + p1[1] = v; +} + +static inline void cpu_to_be32wu(uint32_t *p, uint32_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v >> 24; + p1[1] = v >> 16; + p1[2] = v >> 8; + p1[3] = v; +} + +#endif + +#ifdef WORDS_BIGENDIAN +#define cpu_to_32wu cpu_to_be32wu +#else +#define cpu_to_32wu cpu_to_le32wu +#endif + +#undef le_bswap +#undef be_bswap +#undef le_bswaps +#undef be_bswaps + +#endif /* BSWAP_H */ diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/img2qcow.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/img2qcow.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,289 @@ +/* img2qcow.c + * + * Generates a qcow format disk and fills it from an existing image. + * + * (c) 2006 Julian Chesterfield and Andrew Warfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include "tapdisk.h" + +#if 1 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a ) +#else +#define DFPRINTF(_f, _a...) ((void)0) +#endif + +#define TAPDISK 1 +#define BLOCK_PROCESSSZ 4096 + +static int maxfds, *io_fd, running = 1, complete = 0; +static int returned_events = 0, submit_events = 0; +static uint64_t prev = 0; +static char output[25]; + +void print_bytes(void *ptr, int length) { + + int i,k; + unsigned char *p = ptr; + + DFPRINTF("Buf dump, length %d:\n",length); + for (k = 0; k < length; k++) { + DFPRINTF("%x",*p); + *p++; + if(k % 16 == 0) DFPRINTF("\n"); + else if(k % 2 == 0) DFPRINTF(" "); + } + DFPRINTF("\n"); + return; +} + +void debug_output(uint64_t progress, uint64_t size) +{ + uint64_t blocks = size/20; + + /*Output progress every 5% */ + if (progress/blocks > prev) { + memcpy(output+prev+1,"=>",2); + prev++; + DFPRINTF("\r%s %llu%%", output, + (long long)(prev-1)*5); + } + return; +} + +static inline void LOCAL_FD_SET(fd_set *readfds) +{ + FD_SET(io_fd[0], readfds); + maxfds = io_fd[0] + 1; + + return; +} + +static int get_image_info(struct td_state *s, int fd) +{ + int ret; + long size; + unsigned long total_size; + struct statvfs statBuf; + struct stat stat; + + ret = fstat(fd, &stat); + if (ret != 0) { + DFPRINTF("ERROR: fstat failed, Couldn't stat image"); + return -EINVAL; + } + + if (S_ISBLK(stat.st_mode)) { + /*Accessing block device directly*/ + s->size = 0; + if (ioctl(fd,BLKGETSIZE,&s->size)!=0) { + DFPRINTF("ERR: BLKGETSIZE failed, " + "couldn't stat image"); + return -EINVAL; + } + + DFPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " + "sector_shift [%llu]\n", + (long long unsigned)(s->size << SECTOR_SHIFT), + (long long unsigned)s->size); + + /*Get the sector size*/ +#if defined(BLKSSZGET) + { + int arg; + s->sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, &s->sector_size); + + if (s->sector_size != DEFAULT_SECTOR_SIZE) + DFPRINTF("Note: sector size is %ld (not %d)\n", + s->sector_size, DEFAULT_SECTOR_SIZE); + } +#else + s->sector_size = DEFAULT_SECTOR_SIZE; +#endif + + } else { + /*Local file? try fstat instead*/ + s->size = (stat.st_size >> SECTOR_SHIFT); + s->sector_size = DEFAULT_SECTOR_SIZE; + DFPRINTF("Image size: [%llu]\n", + (long long unsigned)s->size); + } + + return 0; +} + +static int send_responses(struct td_state *s, int res, int idx, void *private) +{ + if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res); + + returned_events++; + + free(private); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct tap_disk *drv; + struct td_state *s; + int ret = -1, fd, len; + fd_set readfds; + struct timeval timeout; + uint64_t i; + char *buf; + + if (argc != 3) { + fprintf(stderr, "Qcow-utils: v1.0.0\n"); + fprintf(stderr, "usage: %s <QCOW FILENAME> <SRC IMAGE>\n", + argv[0]); + exit(-1); + } + + s = malloc(sizeof(struct td_state)); + + /*Open image*/ + fd = open(argv[2], O_RDONLY | O_LARGEFILE); + + if (fd == -1) { + DFPRINTF("Unable to open [%s], (err %d)!\n",argv[2],0 - errno); + exit(-1); + } + + get_image_info(s, fd); + + /*Create qcow file*/ + ret = qcow_create(argv[1],s->size<<SECTOR_SHIFT,NULL,0); + + if (ret < 0) { + DFPRINTF("Unable to create QCOW file\n"); + exit(-1); + } else DFPRINTF("Qcow file created: size %llu sectors\n", + (long long unsigned)s->size); + + drv = &tapdisk_qcow; + s->private = malloc(drv->private_data_size); + + /*Open qcow file*/ + if (drv->td_open(s, argv[1])!=0) { + DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]); + exit(-1); + } + + io_fd = drv->td_get_fd(s); + + /*Initialise the output string*/ + memset(output,0x20,25); + output[0] = '['; + output[22] = ']'; + output[23] = '\0'; + DFPRINTF("%s",output); + + i = 0; + while (running) { + timeout.tv_sec = 0; + + if (!complete) { + /*Read sector from image*/ + if (lseek(fd, i, SEEK_SET) == (off_t)-1) { + DFPRINTF("Unable to access file offset %llu\n", + (long long)i); + exit(-1); + } + + if( (ret = posix_memalign((void **)&buf, + BLOCK_PROCESSSZ, + BLOCK_PROCESSSZ)) != 0) { + DFPRINTF("Unable to read memalign buf (%d)\n",ret); + exit(-1); + } + + /*We attempt to read 4k sized blocks*/ + len = read(fd, buf, BLOCK_PROCESSSZ); + if (len < 512) { + DFPRINTF("Unable to read sector %llu\n", + (long long unsigned) (i >> 9)); + complete = 1; + continue; + } + + if (len % 512) { + len = (len >> 9) << 9; + } + + ret = drv->td_queue_write(s, i >> 9, + len >> 9, buf, + send_responses, 0, buf); + + if (!ret) submit_events++; + + if (ret < 0) { + DFPRINTF("UNABLE TO WRITE block [%llu]\n", + (long long unsigned) (i >> 9)); + } else i += len; + + if (i >> 9 == s->size) complete = 1; + + debug_output(i,s->size << 9); + + if ((submit_events % 10 == 0) || complete) + drv->td_submit(s); + timeout.tv_usec = 0; + + } else { + timeout.tv_usec = 1000; + if (!submit_events) running = 0; + } + + + /*Check AIO FD*/ + LOCAL_FD_SET(&readfds); + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, &timeout); + + if (ret > 0) drv->td_do_callbacks(s, 0); + if (complete && (returned_events == submit_events)) + running = 0; + } + memcpy(output+prev+1,"=",1); + DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output); + drv->td_close(s); + free(s->private); + free(s); + + return 0; +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/qcow-create.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/qcow-create.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,80 @@ +/* qcow-create.c + * + * Generates a qcow format disk. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include "tapdisk.h" + +#if 1 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a ) +#else +#define DFPRINTF(_f, _a...) ((void)0) +#endif + + +int main(int argc, char *argv[]) +{ + int ret = -1; + uint64_t size; + + if ( (argc < 3) || (argc > 4) ) { + fprintf(stderr, "Qcow-utils: v1.0.0\n"); + fprintf(stderr, + "usage: %s <SIZE(MB)> <FILENAME> " + "[<BACKING_FILENAME>]\n", + argv[0]); + exit(-1); + } + + size = atoi(argv[1]); + size = size << 20; + DFPRINTF("Creating file size %llu\n",(long long unsigned)size); + switch(argc) { + case 3: + ret = qcow_create(argv[2],size,NULL,0); + break; + case 4: + ret = qcow_create(argv[2],size,argv[3],0); + break; + } + if (ret < 0) DPRINTF("Unable to create QCOW file\n"); + else DPRINTF("QCOW file successfully created\n"); + + return 0; +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/qcow2raw.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/qcow2raw.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,346 @@ +/* qcow2raw.c + * + * Generates raw image data from an existing qcow image + * + * (c) 2006 Julian Chesterfield and Andrew Warfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/statvfs.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include <string.h> +#include "tapdisk.h" + +#if 1 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a ) +#else +#define DFPRINTF(_f, _a...) ((void)0) +#endif + +#define TAPDISK 1 +#define BLOCK_PROCESSSZ 4096 + +static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0; +static int read_complete = 0, write_complete = 0; +static int returned_read_events = 0, returned_write_events = 0; +static int submit_events = 0; +static uint32_t read_idx = 0, write_idx = 0; +struct tap_disk *drv1, *drv2; +struct td_state *sqcow, *saio; +static uint64_t prev = 0, written = 0; +static char output[25]; + +void print_bytes(void *ptr, int length) { + + int i,k; + unsigned char *p = ptr; + + DFPRINTF("Buf dump, length %d:\n",length); + for (k = 0; k < length; k++) { + DFPRINTF("%x",*p); + *p++; + if (k % 16 == 0) DFPRINTF("\n"); + else if (k % 2 == 0) DFPRINTF(" "); + } + DFPRINTF("\n"); + return; +} + +void debug_output(uint64_t progress, uint64_t size) +{ + /*Output progress every 5% */ + uint64_t blocks = size/20; + + if (progress/blocks > prev) { + memcpy(output+prev+1,"=>",2); + prev++; + DFPRINTF("\r%s %llu%%", + output, (long long)((prev-1)*5)); + } + return; +} + +static inline void LOCAL_FD_SET(fd_set *readfds) +{ + FD_SET(qcowio_fd[0], readfds); + FD_SET(aio_fd[0], readfds); + + maxfds = (qcowio_fd[0] > aio_fd[0] ? qcowio_fd[0] : aio_fd[0]) + 1; + + return; +} + +static int send_write_responses(struct td_state *s, int res, int idx, void *private) +{ + if (res < 0) { + DFPRINTF("AIO FAILURE: res [%d]!\n",res); + return 0; + } + written += BLOCK_PROCESSSZ; + returned_write_events++; + write_idx = idx; + if (complete && (returned_write_events == submit_events)) + write_complete = 1; + + debug_output(written, s->size << 9); + free(private); + return 0; +} + +static int send_read_responses(struct td_state *s, int res, int idx, void *private) +{ + int ret; + + if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res); + + returned_read_events++; + read_idx = idx; + if (complete && (returned_read_events == submit_events)) + read_complete = 1; + + ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, + send_write_responses, idx, private); + if (ret != 0) { + DFPRINTF("ERROR in submitting queue write!\n"); + return 0; + } + + if ( (complete && returned_read_events == submit_events) || + (returned_read_events % 10 == 0) ) { + drv2->td_submit(saio); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret = -1, fd, len,input; + long int size; + fd_set readfds; + struct timeval timeout; + uint64_t i; + char *buf; + struct stat finfo; + + if (argc != 3) { + fprintf(stderr, "Qcow-utils: v1.0.0\n"); + fprintf(stderr, "usage: %s <Dest File descriptor> " + "<Qcow SRC IMAGE>\n", + argv[0]); + exit(-1); + } + + sqcow = malloc(sizeof(struct td_state)); + saio = malloc(sizeof(struct td_state)); + + /*Open qcow source file*/ + drv1 = &tapdisk_qcow; + sqcow->private = malloc(drv1->private_data_size); + + if (drv1->td_open(sqcow, argv[2])!=0) { + DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]); + exit(-1); + } else DFPRINTF("QCOW file opened, size %llu\n", + (long long unsigned)sqcow->size); + + qcowio_fd = drv1->td_get_fd(sqcow); + + /*Setup aio destination file*/ + ret = stat(argv[1],&finfo); + if (ret == -1) { + /*Check errno*/ + switch(errno) { + case ENOENT: + /*File doesn't exist, create*/ + fd = open(argv[1], + O_RDWR | O_LARGEFILE | O_CREAT, 0644); + if (fd < 0) { + DFPRINTF("ERROR creating file [%s] " + "(errno %d)\n", + argv[1], 0 - errno); + exit(-1); + } + if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { + DFPRINTF("Unable to create file " + "[%s] of size %llu (errno %d). " + "Exiting...\n", + argv[1], + (long long unsigned)sqcow->size<<9, + 0 - errno); + close(fd); + exit(-1); + } + close(fd); + break; + case ENXIO: + DFPRINTF("ERROR Device [%s] does not exist\n",argv[1]); + exit(-1); + default: + DFPRINTF("An error occurred opening Device [%s] " + "(errno %d)\n", + argv[1], 0 - errno); + exit(-1); + } + } else { + fprintf(stderr, "WARNING: All existing data in " + "%s will be overwritten.\nDo you wish to continue? " + "(y or n) ", + argv[1]); + if (getchar() != 'y') { + DFPRINTF("Exiting...\n"); + exit(-1); + } + + /*TODO - Test the existing file or device for adequate space*/ + fd = open(argv[1], O_RDWR | O_LARGEFILE); + if (fd < 0) { + DFPRINTF("ERROR: opening file [%s] (errno %d)\n", + argv[1], 0 - errno); + exit(-1); + } + + if (S_ISBLK(finfo.st_mode)) { + if(ioctl(fd,BLKGETSIZE,&size)!=0) { + DFPRINTF("ERROR: BLKGETSIZE failed, " + "couldn't stat image [%s]\n", + argv[1]); + close(fd); + exit(-1); + } + if (size < sqcow->size<<9) { + DFPRINTF("ERROR: Not enough space on device " + "%s (%lu bytes available, %llu bytes required\n", + argv[1], size, + (long long unsigned)sqcow->size<<9); + close(fd); + exit(-1); + } + } else { + if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { + DFPRINTF("Unable to create file " + "[%s] of size %llu (errno %d). " + "Exiting...\n", + argv[1], + (long long unsigned)sqcow->size<<9, + 0 - errno); + close(fd); + exit(-1); + } else DFPRINTF("File [%s] truncated to length %llu " + "(%llu)\n", + argv[1], + (long long unsigned)sqcow->size<<9, + (long long unsigned)sqcow->size); + } + close(fd); + } + + /*Open aio destination file*/ + drv2 = &tapdisk_aio; + saio->private = malloc(drv2->private_data_size); + + if (drv2->td_open(saio, argv[1])!=0) { + DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]); + exit(-1); + } + + aio_fd = drv2->td_get_fd(saio); + + /*Initialise the output string*/ + memset(output,0x20,25); + output[0] = '['; + output[22] = ']'; + output[23] = '\0'; + DFPRINTF("%s",output); + + i = 0; + while (running) { + timeout.tv_sec = 0; + + if (!complete) { + /*Read Pages from qcow image*/ + if ( (ret = posix_memalign((void **)&buf, + BLOCK_PROCESSSZ, + BLOCK_PROCESSSZ)) + != 0) { + DFPRINTF("Unable to alloc memory (%d)\n",ret); + exit(-1); + } + + /*Attempt to read 4k sized blocks*/ + ret = drv1->td_queue_read(sqcow, i>>9, + BLOCK_PROCESSSZ>>9, buf, + send_read_responses, i>>9, buf); + + if (ret < 0) { + DFPRINTF("UNABLE TO READ block [%llu]\n", + (long long unsigned)i); + exit(-1); + } else { + i += BLOCK_PROCESSSZ; + submit_events++; + } + + if (i >= sqcow->size<<9) { + complete = 1; + } + + if ((submit_events % 10 == 0) || complete) + drv1->td_submit(sqcow); + timeout.tv_usec = 0; + + } else { + timeout.tv_usec = 1000; + if (!submit_events) running = 0; + } + + + /*Check AIO FD*/ + LOCAL_FD_SET(&readfds); + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, &timeout); + + if (ret > 0) { + if (FD_ISSET(qcowio_fd[0], &readfds)) + drv1->td_do_callbacks(sqcow, 0); + if (FD_ISSET(aio_fd[0], &readfds)) + drv2->td_do_callbacks(saio, 0); + } + if (complete && (returned_write_events == submit_events)) + running = 0; + } + memcpy(output+prev+1,"=",1); + DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output); + + return 0; +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/tapdisk.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/tapdisk.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,671 @@ +/* tapdisk.c + * + * separate disk process, spawned by blktapctrl. Inherits code from driver + * plugins + * + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. + * + */ + +#define MSG_SIZE 4096 +#define TAPDISK + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/poll.h> +#include <unistd.h> +#include <errno.h> +#include <pthread.h> +#include <time.h> +#include <err.h> +#include <poll.h> +#include <sys/statvfs.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#include "blktaplib.h" +#include "tapdisk.h" + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +#define INPUT 0 +#define OUTPUT 1 + +static int maxfds, fds[2], run = 1; + +static pid_t process; +int connected_disks = 0; +fd_list_entry_t *fd_start = NULL; + +void usage(void) +{ + fprintf(stderr, "blktap-utils: v1.0.0\n"); + fprintf(stderr, "usage: tapdisk <READ fifo> <WRITE fifo>\n"); + exit(-1); +} + +void daemonize(void) +{ + int i; + + if (getppid()==1) return; /* already a daemon */ + if (fork() != 0) exit(0); + +#if 0 + /*Set new program session ID and close all descriptors*/ + setsid(); + for (i = getdtablesize(); i >= 0; --i) close(i); + + /*Send all I/O to /dev/null */ + i = open("/dev/null",O_RDWR); + dup(i); + dup(i); +#endif + return; +} + +static void unmap_disk(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + struct tap_disk *drv = s->drv; + fd_list_entry_t *ptr, *prev; + + drv->td_close(s); + + if (info != NULL && info->mem > 0) + munmap(info->mem, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE); + + ptr = s->fd_entry; + prev = ptr->prev; + + if (prev) { + /*There are entries earlier in the list*/ + prev->next = ptr->next; + if (ptr->next) { + ptr = ptr->next; + ptr->prev = prev; + } + } else { + /*We are the first entry in list*/ + if (ptr->next) { + ptr = ptr->next; + fd_start = ptr; + ptr->prev = NULL; + } else fd_start = NULL; + } + + close(info->fd); + + free(s->fd_entry); + free(s->blkif); + free(s->ring_info); + free(s); + + return; + +} + +void sig_handler(int sig) +{ + /*Received signal to close. If no disks are active, we close app.*/ + + if (connected_disks < 1) run = 0; +} + +static inline int LOCAL_FD_SET(fd_set *readfds) +{ + fd_list_entry_t *ptr; + int i; + + ptr = fd_start; + while (ptr != NULL) { + if (ptr->tap_fd) { + FD_SET(ptr->tap_fd, readfds); + for (i = 0; i < MAX_IOFD; i++) { + if (ptr->io_fd[i]) + FD_SET(ptr->io_fd[i], readfds); + maxfds = (ptr->io_fd[i] > maxfds ? + ptr->io_fd[i]: maxfds); + } + maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds); + } + ptr = ptr->next; + } + + return 0; +} + +static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s) +{ + fd_list_entry_t *ptr, *last, *entry; + int i; + DPRINTF("Adding fd_list_entry\n"); + + /*Add to linked list*/ + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); + entry->tap_fd = tap_fd; + for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i]; + entry->s = s; + entry->next = NULL; + + ptr = fd_start; + if (ptr == NULL) { + /*We are the first entry*/ + fd_start = entry; + entry->prev = NULL; + goto finish; + } + + while (ptr != NULL) { + last = ptr; + ptr = ptr->next; + } + last->next = entry; + entry->prev = last; + + finish: + return entry; +} + +static inline struct td_state *get_state(int cookie) +{ + fd_list_entry_t *ptr; + + ptr = fd_start; + while (ptr != NULL) { + if (ptr->cookie == cookie) return ptr->s; + ptr = ptr->next; + } + return NULL; +} + +static struct tap_disk *get_driver(int drivertype) +{ + /* blktapctrl has passed us the driver type */ + + return dtypes[drivertype]->drv; +} + +static struct td_state *state_init(void) +{ + int i; + struct td_state *s; + blkif_t *blkif; + + s = malloc(sizeof(struct td_state)); + blkif = s->blkif = malloc(sizeof(blkif_t)); + s->ring_info = malloc(sizeof(tapdev_info_t)); + + for (i = 0; i < MAX_REQUESTS; i++) + blkif->pending_list[i].count = 0; + + return s; +} + +static int map_new_dev(struct td_state *s, int minor) +{ + int tap_fd; + tapdev_info_t *info = s->ring_info; + char *devname; + fd_list_entry_t *ptr; + + asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor); + tap_fd = open(devname, O_RDWR); + if (tap_fd == -1) + { + DPRINTF("open failed on dev %s!",devname); + goto fail; + } + info->fd = tap_fd; + + /*Map the shared memory*/ + info->mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0); + if ((long int)info->mem == -1) + { + DPRINTF("mmap failed on dev %s!\n",devname); + goto fail; + } + + /* assign the rings to the mapped memory */ + info->sring = (blkif_sring_t *)((unsigned long)info->mem); + BACK_RING_INIT(&info->fe_ring, info->sring, PAGE_SIZE); + + info->vstart = + (unsigned long)info->mem + (BLKTAP_RING_PAGES << PAGE_SHIFT); + + ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process ); + ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); + free(devname); + + /*Update the fd entry*/ + ptr = fd_start; + while (ptr != NULL) { + if (s == ptr->s) { + ptr->tap_fd = tap_fd; + break; + } + ptr = ptr->next; + } + + return minor; + + fail: + free(devname); + return -1; +} + +static int read_msg(char *buf) +{ + int length, len, msglen, tap_fd, *io_fd; + char *ptr, *path; + image_t *img; + struct timeval timeout; + msg_hdr_t *msg; + msg_newdev_t *msg_dev; + msg_pid_t *msg_pid; + struct tap_disk *drv; + int ret = -1; + struct td_state *s = NULL; + fd_list_entry_t *entry; + + length = read(fds[READ], buf, MSG_SIZE); + + if (length > 0 && length >= sizeof(msg_hdr_t)) + { + msg = (msg_hdr_t *)buf; + DPRINTF("Tapdisk: Received msg, len %d, type %d, UID %d\n", + length,msg->type,msg->cookie); + + switch (msg->type) { + case CTLMSG_PARAMS: + ptr = buf + sizeof(msg_hdr_t); + len = (length - sizeof(msg_hdr_t)); + path = calloc(1, len); + + memcpy(path, ptr, len); + DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path); + + /*Assign driver*/ + drv = get_driver(msg->drivertype); + if (drv == NULL) + goto params_done; + + DPRINTF("Loaded driver: name [%s], type [%d]\n", + drv->disk_type, msg->drivertype); + + /* Allocate the disk structs */ + s = state_init(); + if (s == NULL) + goto params_done; + + s->drv = drv; + s->private = malloc(drv->private_data_size); + if (s->private == NULL) { + free(s); + goto params_done; + } + + /*Open file*/ + ret = drv->td_open(s, path); + io_fd = drv->td_get_fd(s); + + entry = add_fd_entry(0, io_fd, s); + entry->cookie = msg->cookie; + DPRINTF("Entered cookie %d\n",entry->cookie); + + memset(buf, 0x00, MSG_SIZE); + + params_done: + if (ret == 0) { + msglen = sizeof(msg_hdr_t) + sizeof(image_t); + msg->type = CTLMSG_IMG; + img = (image_t *)(buf + sizeof(msg_hdr_t)); + img->size = s->size; + img->secsize = s->sector_size; + img->info = s->info; + } else { + msglen = sizeof(msg_hdr_t); + msg->type = CTLMSG_IMG_FAIL; + msg->len = msglen; + } + len = write(fds[WRITE], buf, msglen); + free(path); + return 1; + + + + case CTLMSG_NEWDEV: + msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); + + s = get_state(msg->cookie); + DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK")); + if (s != NULL) { + ret = ((map_new_dev(s, msg_dev->devnum) + == msg_dev->devnum ? 0: -1)); + connected_disks++; + } + + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t); + msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP + : CTLMSG_NEWDEV_FAIL); + msg->len = msglen; + + len = write(fds[WRITE], buf, msglen); + return 1; + + case CTLMSG_CLOSE: + s = get_state(msg->cookie); + if (s) unmap_disk(s); + + connected_disks--; + sig_handler(SIGINT); + + return 1; + + case CTLMSG_PID: + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t); + msg->type = CTLMSG_PID_RSP; + msg->len = msglen; + + msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t)); + process = getpid(); + msg_pid->pid = process; + + len = write(fds[WRITE], buf, msglen); + return 1; + + default: + return 0; + } + } + return 0; +} + +static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp) +{ + tapdev_info_t *info = s->ring_info; + blkif_response_t *rsp_d; + + rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt); + memcpy(rsp_d, rsp, sizeof(blkif_response_t)); + wmb(); + info->fe_ring.rsp_prod_pvt++; + + return 0; +} + +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +void io_done(struct td_state *s, int sid) +{ + struct tap_disk *drv = s->drv; + + if (!run) return; /*We have received signal to close*/ + + if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s); + + return; +} + +int send_responses(struct td_state *s, int res, int idx, void *private) +{ + blkif_request_t *req; + int responses_queued = 0; + blkif_t *blkif = s->blkif; + + req = &blkif->pending_list[idx].req; + + if ( (idx > MAX_REQUESTS-1) || + (blkif->pending_list[idx].count == 0) ) + { + DPRINTF("invalid index returned(%u)!\n", idx); + return 0; + } + + if (res != 0) { + DPRINTF("*** request error %d! \n", res); + return 0; + } + + blkif->pending_list[idx].count--; + + if (blkif->pending_list[idx].count == 0) + { + blkif_request_t tmp; + blkif_response_t *rsp; + + tmp = blkif->pending_list[idx].req; + rsp = (blkif_response_t *)req; + + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = blkif->pending_list[idx].status; + + write_rsp_to_ring(s, rsp); + responses_queued++; + } + return responses_queued; +} + +static void get_io_request(struct td_state *s) +{ + RING_IDX rp, rc, j, i, ret; + blkif_request_t *req; + int idx, nsects; + uint64_t sector_nr; + char *page; + int early = 0; /* count early completions */ + struct tap_disk *drv = s->drv; + blkif_t *blkif = s->blkif; + tapdev_info_t *info = s->ring_info; + + if (!run) return; /*We have received signal to close*/ + + rp = info->fe_ring.sring->req_prod; + rmb(); + for (j = info->fe_ring.req_cons; j != rp; j++) + { + int done = 0; + + req = NULL; + req = RING_GET_REQUEST(&info->fe_ring, j); + ++info->fe_ring.req_cons; + + if (req == NULL) continue; + + idx = req->id; + ASSERT(blkif->pending_list[idx].count == 0); + memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; + blkif->pending_list[idx].count = req->nr_segments; + + sector_nr = req->sector_number; + + for (i = 0; i < req->nr_segments; i++) { + nsects = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= PAGE_SIZE >> 9) || + (nsects <= 0)) + continue; + + page = (char *)MMAP_VADDR(info->vstart, + (unsigned long)req->id, i); + page += (req->seg[i].first_sect << SECTOR_SHIFT); + + if (sector_nr >= s->size) { + DPRINTF("Sector request failed:\n"); + DPRINTF("%s request, idx [%d,%d] size [%llu], " + "sector [%llu,%llu]\n", + (req->operation == BLKIF_OP_WRITE ? + "WRITE" : "READ"), + idx,i, + (long long unsigned) + nsects<<SECTOR_SHIFT, + (long long unsigned) + sector_nr<<SECTOR_SHIFT, + (long long unsigned) sector_nr); + continue; + } + + switch (req->operation) + { + case BLKIF_OP_WRITE: + ret = drv->td_queue_write(s, sector_nr, + nsects, page, send_responses, + idx, NULL); + if (ret > 0) early += ret; + else if (ret == -EBUSY) { + /* + * TODO: Sector is locked * + * Need to put req back on queue * + */ + } + break; + case BLKIF_OP_READ: + ret = drv->td_queue_read(s, sector_nr, + nsects, page, send_responses, + idx, NULL); + if (ret > 0) early += ret; + else if (ret == -EBUSY) { + /* + * TODO: Sector is locked * + * Need to put req back on queue * + */ + } + break; + default: + DPRINTF("Unknown block operation\n"); + break; + } + sector_nr += nsects; + } + } + + /*Batch done*/ + drv->td_submit(s); + + if (early > 0) + io_done(s,10); + + return; +} + +int main(int argc, char *argv[]) +{ + int len, msglen, ret, i; + char *p, *buf; + fd_set readfds, writefds; + struct timeval timeout; + fd_list_entry_t *ptr; + struct tap_disk *drv; + struct td_state *s; + + if (argc != 3) usage(); + + daemonize(); + + openlog("TAPDISK", LOG_CONS|LOG_ODELAY, LOG_DAEMON); + /*Setup signal handlers*/ + signal (SIGBUS, sig_handler); + signal (SIGINT, sig_handler); + + /*Open the control channel*/ + fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); + fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK); + + if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) + { + DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]); + exit(-1); + } + + buf = calloc(MSG_SIZE, 1); + + if (buf == NULL) + { + DPRINTF("ERROR: allocating memory.\n"); + exit(-1); + } + + while (run) + { + ret = 0; + FD_ZERO(&readfds); + FD_SET(fds[READ], &readfds); + maxfds = fds[READ]; + + /*Set all tap fds*/ + LOCAL_FD_SET(&readfds); + + timeout.tv_sec = 0; + timeout.tv_usec = 1000; + + /*Wait for incoming messages*/ + ret = select(maxfds + 1, &readfds, (fd_set *) 0, + (fd_set *) 0, &timeout); + + if (ret > 0) + { + ptr = fd_start; + while (ptr != NULL) { + if (FD_ISSET(ptr->tap_fd, &readfds)) + get_io_request(ptr->s); + for (i = 0; i < MAX_IOFD; i++) { + if (ptr->io_fd[i] && + FD_ISSET(ptr->io_fd[i], &readfds)) + io_done(ptr->s, i); + } + + ptr = ptr->next; + } + + if (FD_ISSET(fds[READ], &readfds)) + read_msg(buf); + } + } + free(buf); + close(fds[READ]); + close(fds[WRITE]); + + ptr = fd_start; + while (ptr != NULL) { + s = ptr->s; + drv = s->drv; + + unmap_disk(s); + drv->td_close(s); + free(s->private); + free(s->blkif); + free(s->ring_info); + free(s); + close(ptr->tap_fd); + ptr = ptr->next; + } + closelog(); + + return 0; +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/drivers/tapdisk.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/drivers/tapdisk.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,211 @@ +/* tapdisk.h + * + * Generic disk interface for blktap-based image adapters. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + * + * Some notes on the tap_disk interface: + * + * tap_disk aims to provide a generic interface to easily implement new + * types of image accessors. The structure-of-function-calls is similar + * to disk interfaces used in qemu/denali/etc, with the significant + * difference being the expectation of asynchronous rather than synchronous + * I/O. The asynchronous interface is intended to allow lots of requests to + * be pipelined through a disk, without the disk requiring any of its own + * threads of control. As such, a batch of requests is delivered to the disk + * using: + * + * td_queue_[read,write]() + * + * and passing in a completion callback, which the disk is responsible for + * tracking. The end of a back is marked with a call to: + * + * td_submit() + * + * The disk implementation must provide a file handle, which is used to + * indicate that it needs to do work. tapdisk will add this file handle + * (returned from td_get_fd()) to it's poll set, and will call into the disk + * using td_do_callbacks() whenever there is data pending. + * + * Two disk implementations demonstrate how this interface may be used to + * implement disks with both asynchronous and synchronous calls. block-aio.c + * maps this interface down onto the linux libaio calls, while block-sync uses + * normal posix read/write. + * + * A few things to realize about the sync case, which doesn't need to defer + * io completions: + * + * - td_queue_[read,write]() call read/write directly, and then call the + * callback immediately. The MUST then return a value greater than 0 + * in order to tell tapdisk that requests have finished early, and to + * force responses to be kicked to the clents. + * + * - The fd used for poll is an otherwise unused pipe, which allows poll to + * be safely called without ever returning anything. + * + */ + +#ifndef TAPDISK_H_ +#define TAPDISK_H_ + +#include <stdint.h> +#include <syslog.h> +#include "blktaplib.h" + +/*If enabled, log all debug messages to syslog*/ +#if 1 +#define DPRINTF(_f, _a...) syslog( LOG_DEBUG, _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +/* Things disks need to know about, these should probably be in a higher-level + * header. */ +#define MAX_REQUESTS 64 +#define MAX_SEGMENTS_PER_REQ 11 +#define SECTOR_SHIFT 9 +#define DEFAULT_SECTOR_SIZE 512 + +/* This structure represents the state of an active virtual disk. */ +struct td_state { + void *private; + void *drv; + void *blkif; + void *image; + void *ring_info; + void *fd_entry; + char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/ + long int sector_size; + uint64_t size; + long int info; +}; + +/* Prototype of the callback to activate as requests complete. */ +typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv); + +/* Structure describing the interface to a virtual disk implementation. */ +/* See note at the top of this file describing this interface. */ +struct tap_disk { + const char *disk_type; + int private_data_size; + int (*td_open) (struct td_state *s, const char *name); + int (*td_queue_read) (struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *prv); + int (*td_queue_write) (struct td_state *s, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *prv); + int (*td_submit) (struct td_state *s); + int *(*td_get_fd) (struct td_state *s); + int (*td_close) (struct td_state *s); + int (*td_do_callbacks)(struct td_state *s, int sid); +}; + +typedef struct disk_info { + int idnum; + char name[50]; /* e.g. "RAMDISK" */ + char handle[10]; /* xend handle, e.g. 'ram' */ + int single_handler; /* is there a single controller for all */ + /* instances of disk type? */ +#ifdef TAPDISK + struct tap_disk *drv; +#endif +} disk_info_t; + +void debug_fe_ring(struct td_state *s); + +extern struct tap_disk tapdisk_aio; +extern struct tap_disk tapdisk_sync; +extern struct tap_disk tapdisk_vmdk; +extern struct tap_disk tapdisk_ram; +extern struct tap_disk tapdisk_qcow; + +#define MAX_DISK_TYPES 20 +#define MAX_IOFD 2 + +#define DISK_TYPE_AIO 0 +#define DISK_TYPE_SYNC 1 +#define DISK_TYPE_VMDK 2 +#define DISK_TYPE_RAM 3 +#define DISK_TYPE_QCOW 4 + + +/*Define Individual Disk Parameters here */ +static disk_info_t aio_disk = { + DISK_TYPE_AIO, + "raw image (aio)", + "aio", + 0, +#ifdef TAPDISK + &tapdisk_aio, +#endif +}; + +static disk_info_t sync_disk = { + DISK_TYPE_SYNC, + "raw image (sync)", + "sync", + 0, +#ifdef TAPDISK + &tapdisk_sync, +#endif +}; + +static disk_info_t vmdk_disk = { + DISK_TYPE_VMDK, + "vmware image (vmdk)", + "vmdk", + 1, +#ifdef TAPDISK + &tapdisk_vmdk, +#endif +}; + +static disk_info_t ram_disk = { + DISK_TYPE_RAM, + "ramdisk image (ram)", + "ram", + 1, +#ifdef TAPDISK + &tapdisk_ram, +#endif +}; + +static disk_info_t qcow_disk = { + DISK_TYPE_QCOW, + "qcow disk (qcow)", + "qcow", + 0, +#ifdef TAPDISK + &tapdisk_qcow, +#endif +}; + +/*Main disk info array */ +static disk_info_t *dtypes[] = { + &aio_disk, + &sync_disk, + &vmdk_disk, + &ram_disk, + &qcow_disk, +}; + +typedef struct driver_list_entry { + void *blkif; + void *prev; + void *next; +} driver_list_entry_t; + +typedef struct fd_list_entry { + int cookie; + int tap_fd; + int io_fd[MAX_IOFD]; + struct td_state *s; + void *prev; + void *next; +} fd_list_entry_t; + +int qcow_create(const char *filename, uint64_t total_size, + const char *backing_file, int flags); + +#endif /*TAPDISK_H_*/ diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/lib/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/lib/Makefile Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,66 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + +MAJOR = 3.0 +MINOR = 0 +SONAME = libblktap.so.$(MAJOR) + +BLKTAP_INSTALL_DIR = /usr/sbin + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +INCLUDES += -I. -I.. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) + +LIBS := -lz + +SRCS := +SRCS += xenbus.c blkif.c xs_api.c + +CFLAGS += -Werror +CFLAGS += -Wno-unused +CFLAGS += -fno-strict-aliasing -fPIC +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# get asprintf(): +CFLAGS += -D _GNU_SOURCE + +# Get gcc to generate the dependencies for us. +CFLAGS += -Wp,-MD,.$(@F).d +CFLAGS += $(INCLUDES) +DEPS = .*.d + +OBJS = $(patsubst %.c,%.o,$(SRCS)) +IBINS := + +LIB = libblktap.a libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) + +all: build + +build: + $(MAKE) libblktap + +install: all + $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) + $(INSTALL_DIR) -p $(DESTDIR)/usr/include + $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) + $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include + +clean: + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS + +libblktap: $(OBJS) + $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ + -L$(XEN_XENSTORE) -l xenstore \ + -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) + ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) + ln -sf libblktap.so.$(MAJOR) $@.so + ar rc libblktap.a $@.so + +.PHONY: TAGS all build clean install libblktap + +TAGS: + etags -t $(SRCS) *.h + +-include $(DEPS) + diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/lib/blkif.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/lib/blkif.c Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,185 @@ +/* + * tools/blktap_user/blkif.c + * + * The blkif interface for blktap. A blkif describes an in-use virtual disk. + * (c) 2005 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <err.h> +#include <unistd.h> + +#include "blktaplib.h" + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) + +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) + blkif = blkif->hash_next; + return blkif; +} + +blkif_t *alloc_blkif(domid_t domid) +{ + blkif_t *blkif; + DPRINTF("Alloc_blkif called [%d]\n",domid); + blkif = (blkif_t *)malloc(sizeof(blkif_t)); + if (!blkif) + return NULL; + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->devnum = -1; + return blkif; +} + +/*Controller callbacks*/ +static int (*new_devmap_hook)(blkif_t *blkif) = NULL; +void register_new_devmap_hook(int (*fn)(blkif_t *blkif)) +{ + new_devmap_hook = fn; +} + +static int (*new_unmap_hook)(blkif_t *blkif) = NULL; +void register_new_unmap_hook(int (*fn)(blkif_t *blkif)) +{ + new_unmap_hook = fn; +} + +static int (*new_blkif_hook)(blkif_t *blkif) = NULL; +void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) +{ + new_blkif_hook = fn; +} + +int blkif_init(blkif_t *blkif, long int handle, long int pdev, + long int readonly) +{ + domid_t domid; + blkif_t **pblkif; + int devnum; + + if (blkif == NULL) + return -EINVAL; + + domid = blkif->domid; + blkif->handle = handle; + blkif->pdev = pdev; + blkif->readonly = readonly; + + /* + * Call out to the new_blkif_hook. + * The tap application should define this, + * and it should return having set blkif->ops + * + */ + if (new_blkif_hook == NULL) + { + DPRINTF("Probe detected a new blkif, but no new_blkif_hook!"); + return -1; + } + if (new_blkif_hook(blkif)!=0) { + DPRINTF("BLKIF: Image open failed\n"); + return -1; + } + + /* Now wire it in. */ + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + DPRINTF("Created hash entry: %d [%d,%ld]\n", + BLKIF_HASH(domid, handle), domid, handle); + + while ( *pblkif != NULL ) + { + if ( ((*pblkif)->domid == domid) && + ((*pblkif)->handle == handle) ) + { + DPRINTF("Could not create blkif: already exists\n"); + return -1; + } + pblkif = &(*pblkif)->hash_next; + } + blkif->hash_next = NULL; + *pblkif = blkif; + + if (new_devmap_hook == NULL) + { + DPRINTF("Probe setting up new blkif but no devmap hook!"); + return -1; + } + + devnum = new_devmap_hook(blkif); + if (devnum == -1) + return -1; + blkif->devnum = devnum; + + return 0; +} + +void free_blkif(blkif_t *blkif) +{ + blkif_t **pblkif, *curs; + image_t *image; + + pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)]; + while ( (curs = *pblkif) != NULL ) + { + if ( blkif == curs ) + { + *pblkif = curs->hash_next; + } + pblkif = &curs->hash_next; + } + if (blkif != NULL) { + if ((image=(image_t *)blkif->prv)!=NULL) { + free(blkif->prv); + } + if (blkif->info!=NULL) { + free(blkif->info); + } + if (new_unmap_hook != NULL) new_unmap_hook(blkif); + free(blkif); + } +} + +void __init_blkif(void) +{ + memset(blkif_hash, 0, sizeof(blkif_hash)); +} diff -r af9809f51f81 -r 2937703f0ed0 tools/blktap/lib/blktaplib.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blktap/lib/blktaplib.h Thu Jul 13 10:13:26 2006 +0100 @@ -0,0 +1,223 @@ +/* blktaplib.h + * + * Blktap library userspace code. + * + * (c) 2005 Andrew Warfield and Julian Chesterfield + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKTAPLIB_H__ +#define __BLKTAPLIB_H__ + +#include <xenctrl.h> +#include <sys/user.h> +#include <xen/xen.h> +#include <xen/io/blkif.h> +#include <xen/io/ring.h> +#include <xs.h> +#include <sys/types.h> +#include <unistd.h> + +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) + +/* size of the extra VMA area to map in attached pages. */ +#define BLKTAP_VMA_PAGES BLK_RING_SIZE + +/* blktap IOCTLs: These must correspond with the blktap driver ioctls*/ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_SENDPID 4 +#define BLKTAP_IOCTL_NEWINTF 5 +#define BLKTAP_IOCTL_MINOR 6 +#define BLKTAP_IOCTL_MAJOR 7 +#define BLKTAP_QUERY_ALLOC_REQS 8 +#define BLKTAP_IOCTL_FREEINTF 9 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) ); +} + +#define MAX_REQUESTS 64 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |