Xen project Mailing List

[win-pv-devel] [PATCH 3/5] Update headers to xen.git:f919dbc0583797d1c5c09da815518084ce77eb81

From: Paul Durrant <pdurrant@xxxxxxxxx>

Date: Fri, 20 Mar 2015 12:43:54 +0000

Cc: Paul Durrant <paul.durrant@xxxxxxxxxx>

Delivery-date: Fri, 20 Mar 2015 12:44:12 +0000

List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>

This brings the headers into line with XENBUS. There's no reliance on a post-4.5.0 change in XENVIF but the re-structuring in the headers done after 4.5.0 does mean change is necessary and we may as well take the pain sooner rather than later. Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> --- get_xen_headers.py | 59 +- include/xen.h | 7 +- include/xen/arch-x86/xen-x86_32.h | 171 ------ include/xen/arch-x86/xen-x86_64.h | 202 ------- include/xen/arch-x86/xen.h | 264 --------- include/xen/grant_table.h | 662 ---------------------- include/xen/io/netif.h | 236 -------- include/xen/io/ring.h | 323 ----------- include/xen/io/xenbus.h | 80 --- include/xen/public/arch-x86/xen-x86_32.h | 172 ++++++ include/xen/public/arch-x86/xen-x86_64.h | 202 +++++++ include/xen/public/arch-x86/xen.h | 307 +++++++++++ include/xen/public/errno.h | 93 ++++ include/xen/public/grant_table.h | 682 +++++++++++++++++++++++ include/xen/public/io/netif.h | 422 ++++++++++++++ include/xen/public/io/ring.h | 312 +++++++++++ include/xen/public/io/xenbus.h | 80 +++ include/xen/public/trace.h | 331 +++++++++++ include/xen/public/xen-compat.h | 44 ++ include/xen/public/xen.h | 908 +++++++++++++++++++++++++++++++ include/xen/xen-compat.h | 44 -- include/xen/xen.h | 895 ------------------------------ include/xen/xen/errno.h | 20 + vs2012/xenvif/xenvif.vcxproj | 2 +- vs2013/xenvif/xenvif.vcxproj | 5 +- 25 files changed, 3619 insertions(+), 2904 deletions(-) delete mode 100644 include/xen/arch-x86/xen-x86_32.h delete mode 100644 include/xen/arch-x86/xen-x86_64.h delete mode 100644 include/xen/arch-x86/xen.h delete mode 100644 include/xen/grant_table.h delete mode 100644 include/xen/io/netif.h delete mode 100644 include/xen/io/ring.h delete mode 100644 include/xen/io/xenbus.h create mode 100644 include/xen/public/arch-x86/xen-x86_32.h create mode 100644 include/xen/public/arch-x86/xen-x86_64.h create mode 100644 include/xen/public/arch-x86/xen.h create mode 100644 include/xen/public/errno.h create mode 100644 include/xen/public/grant_table.h create mode 100644 include/xen/public/io/netif.h create mode 100644 include/xen/public/io/ring.h create mode 100644 include/xen/public/io/xenbus.h create mode 100644 include/xen/public/trace.h create mode 100644 include/xen/public/xen-compat.h create mode 100644 include/xen/public/xen.h delete mode 100644 include/xen/xen-compat.h delete mode 100644 include/xen/xen.h create mode 100644 include/xen/xen/errno.h diff --git a/get_xen_headers.py b/get_xen_headers.py index 16807bb..1d2a0c8 100755 --- a/get_xen_headers.py +++ b/get_xen_headers.py @@ -5,34 +5,39 @@ import shutil import subprocess import re -def shell(command): - print(command) +def shell(command, dir = '.'): + print("in '%s' execute '%s'" % (dir, ' '.join(command))) sys.stdout.flush() - pipe = os.popen(' '.join(command), 'r', 1) + sub = subprocess.Popen(' '.join(command), cwd=dir, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) - for line in pipe: - print(line.rstrip()) + for line in sub.stdout: + print(line.decode(sys.getdefaultencoding()).rstrip()) - return pipe.close() + sub.wait() + + return sub.returncode def get_repo(url, working): shell(['git', 'clone', '--no-checkout', url, working]) def get_branch(tag, working): - cwd = os.getcwd() - os.chdir(working) - shell(['git', 'checkout', '-b', tag]) - os.chdir(cwd) + shell(['git', 'checkout', '-b', 'tmp', tag], working) + +def put_branch(working): + shell(['git', 'checkout', 'master'], working) + shell(['git', 'branch', '-d', 'tmp'], working) -def copy_file(working, src_dir, dst_dir, name): +def copy_file(working, dir, name): try: - os.makedirs('include\\xen\\%s' % dst_dir) + os.makedirs('include\\xen\\%s' % dir) except OSError: None - src = open('%s\\xen\\include\\%s\\%s' % (working, src_dir, name), 'r') - dst = open('include\\xen\\%s\\%s' % (dst_dir, name), 'w', newline='\n') + src = open('%s\\xen\\include\\%s\\%s' % (working, dir, name), 'r') + dst = open('include\\xen\\%s\\%s' % (dir, name), 'w', newline='\n') print(name) @@ -53,14 +58,24 @@ if __name__ == '__main__': get_repo('git://xenbits.xen.org/xen.git', working) get_branch(tag, working) - copy_file(working, 'public', '.', 'xen.h') + shell(['git', 'rm', '-r', '-f', 'xen'], 'include') + + copy_file(working, 'public', 'xen.h') + copy_file(working, 'public', 'xen-compat.h') + copy_file(working, 'public', 'trace.h') + copy_file(working, 'public', 'grant_table.h') + copy_file(working, 'public', 'errno.h') + + copy_file(working, 'xen', 'errno.h') + + copy_file(working, 'public\\arch-x86', 'xen.h') + copy_file(working, 'public\\arch-x86', 'xen-x86_32.h') + copy_file(working, 'public\\arch-x86', 'xen-x86_64.h') - copy_file(working, 'public', '.', 'xen-compat.h') - copy_file(working, 'public', '.', 'trace.h') - copy_file(working, 'public', '.', 'grant_table.h') + copy_file(working, 'public\\io', 'ring.h') + copy_file(working, 'public\\io', 'netif.h') + copy_file(working, 'public\\io', 'xenbus.h') - copy_file(working, 'public\\arch-x86', 'arch-x86', 'xen.h') - copy_file(working, 'public\\arch-x86', 'arch-x86', 'xen-x86_32.h') - copy_file(working, 'public\\arch-x86', 'arch-x86', 'xen-x86_64.h') + put_branch(working) - copy_file(working, 'public\\io', 'io', 'netif.h') + shell(['git', 'add', 'xen'], 'include') diff --git a/include/xen.h b/include/xen.h index 9575285..9dccd0c 100644 --- a/include/xen.h +++ b/include/xen.h @@ -37,8 +37,9 @@ #include <xen-version.h> #include <xen-types.h> #include <xen-warnings.h> -#include <xen/io/ring.h> -#include <xen/io/netif.h> -#include <xen/io/xenbus.h> + +#include <public/io/ring.h> +#include <public/io/netif.h> +#include <public/io/xenbus.h> #endif // _XEN_H diff --git a/include/xen/arch-x86/xen-x86_32.h b/include/xen/arch-x86/xen-x86_32.h deleted file mode 100644 index c1edf17..0000000 --- a/include/xen/arch-x86/xen-x86_32.h +++ /dev/null @@ -1,171 +0,0 @@ -/****************************************************************************** - * xen-x86_32.h - * - * Guest OS interface to x86 32-bit Xen. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2004-2007, K A Fraser - */ - -#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ -#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ - -/* - * Hypercall interface: - * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) - * Output: %eax - * Access is via hypercall page (set up by guest loader or via a Xen MSR): - * call hypercall_page + hypercall-number * 32 - * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) - */ - -/* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ -#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ -#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ -#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ -#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ -#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ - -#define FLAT_KERNEL_CS FLAT_RING1_CS -#define FLAT_KERNEL_DS FLAT_RING1_DS -#define FLAT_KERNEL_SS FLAT_RING1_SS -#define FLAT_USER_CS FLAT_RING3_CS -#define FLAT_USER_DS FLAT_RING3_DS -#define FLAT_USER_SS FLAT_RING3_SS - -#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 -#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 -#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 -#define HYPERVISOR_VIRT_START_PAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) -#define MACH2PHYS_VIRT_START_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) -#define MACH2PHYS_VIRT_END_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) - -/* Non-PAE bounds are obsolete. */ -#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 -#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 -#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 -#define HYPERVISOR_VIRT_START_NONPAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) -#define MACH2PHYS_VIRT_START_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) -#define MACH2PHYS_VIRT_END_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) - -#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE -#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE -#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((ULONG_PTR *)MACH2PHYS_VIRT_START) -#endif - -/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ -#if defined(__XEN__) || defined(__XEN_TOOLS__) -#undef ___DEFINE_XEN_GUEST_HANDLE -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } \ - __guest_handle_ ## name; \ - typedef struct { union { type *p; uint64_aligned_t q; }; } \ - __guest_handle_64_ ## name -#undef set_xen_guest_handle_raw -#define set_xen_guest_handle_raw(hnd, val) \ - do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ - (hnd).p = val; \ - } while ( 0 ) -#define uint64_aligned_t uint64_t __attribute__((aligned(8))) -#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name -#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) -#endif - -#ifndef __ASSEMBLY__ - -struct cpu_user_regs { - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t esi; - uint32_t edi; - uint32_t ebp; - uint32_t eax; - uint16_t error_code; /* private */ - uint16_t entry_vector; /* private */ - uint32_t eip; - uint16_t cs; - uint8_t saved_upcall_mask; - uint8_t _pad0; - uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ - uint32_t esp; - uint16_t ss, _pad1; - uint16_t es, _pad2; - uint16_t ds, _pad3; - uint16_t fs, _pad4; - uint16_t gs, _pad5; -}; -typedef struct cpu_user_regs cpu_user_regs_t; -DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); - -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - -struct arch_vcpu_info { - ULONG_PTR cr2; - ULONG_PTR pad[5]; /* sizeof(vcpu_info_t) == 64 */ -}; -typedef struct arch_vcpu_info arch_vcpu_info_t; - -struct xen_callback { - ULONG_PTR cs; - ULONG_PTR eip; -}; -typedef struct xen_callback xen_callback_t; - -#endif /* !__ASSEMBLY__ */ - -#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/arch-x86/xen-x86_64.h b/include/xen/arch-x86/xen-x86_64.h deleted file mode 100644 index 10a433a..0000000 --- a/include/xen/arch-x86/xen-x86_64.h +++ /dev/null @@ -1,202 +0,0 @@ -/****************************************************************************** - * xen-x86_64.h - * - * Guest OS interface to x86 64-bit Xen. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2004-2006, K A Fraser - */ - -#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ -#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ - -/* - * Hypercall interface: - * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) - * Output: %rax - * Access is via hypercall page (set up by guest loader or via a Xen MSR): - * call hypercall_page + hypercall-number * 32 - * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) - */ - -/* - * 64-bit segment selectors - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ - -#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ -#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ -#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ -#define FLAT_RING3_DS64 0x0000 /* NULL selector */ -#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ -#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ - -#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 -#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 -#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 -#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 -#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 -#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 -#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 -#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 -#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 - -#define FLAT_USER_DS64 FLAT_RING3_DS64 -#define FLAT_USER_DS32 FLAT_RING3_DS32 -#define FLAT_USER_DS FLAT_USER_DS64 -#define FLAT_USER_CS64 FLAT_RING3_CS64 -#define FLAT_USER_CS32 FLAT_RING3_CS32 -#define FLAT_USER_CS FLAT_USER_CS64 -#define FLAT_USER_SS64 FLAT_RING3_SS64 -#define FLAT_USER_SS32 FLAT_RING3_SS32 -#define FLAT_USER_SS FLAT_USER_SS64 - -#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 -#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 -#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 -#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((ULONG_PTR *)HYPERVISOR_VIRT_START) -#endif - -/* - * int HYPERVISOR_set_segment_base(unsigned int which, ULONG_PTR base) - * @which == SEGBASE_* ; @base == 64-bit base address - * Returns 0 on success. - */ -#define SEGBASE_FS 0 -#define SEGBASE_GS_USER 1 -#define SEGBASE_GS_KERNEL 2 -#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ - -/* - * int HYPERVISOR_iret(void) - * All arguments are on the kernel stack, in the following format. - * Never returns if successful. Current kernel context is lost. - * The saved CS is mapped as follows: - * RING0 -> RING3 kernel mode. - * RING1 -> RING3 kernel mode. - * RING2 -> RING3 kernel mode. - * RING3 -> RING3 user mode. - * However RING0 indicates that the guest kernel should return to iteself - * directly with - * orb $3,1*8(%rsp) - * iretq - * If flags contains VGCF_in_syscall: - * Restore RAX, RIP, RFLAGS, RSP. - * Discard R11, RCX, CS, SS. - * Otherwise: - * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. - * All other registers are saved on hypercall entry and restored to user. - */ -/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ -#define _VGCF_in_syscall 8 -#define VGCF_in_syscall (1<<_VGCF_in_syscall) -#define VGCF_IN_SYSCALL VGCF_in_syscall - -#ifndef __ASSEMBLY__ - -struct iret_context { - /* Top of stack (%rsp at point of hypercall). */ - uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; - /* Bottom of iret stack frame. */ -}; - -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ -#define __DECL_REG(name) union { \ - uint64_t r ## name, e ## name; \ - uint32_t _e ## name; \ -} -#else -/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ -#define __DECL_REG(name) uint64_t r ## name -#endif - -struct cpu_user_regs { - uint64_t r15; - uint64_t r14; - uint64_t r13; - uint64_t r12; - __DECL_REG(bp); - __DECL_REG(bx); - uint64_t r11; - uint64_t r10; - uint64_t r9; - uint64_t r8; - __DECL_REG(ax); - __DECL_REG(cx); - __DECL_REG(dx); - __DECL_REG(si); - __DECL_REG(di); - uint32_t error_code; /* private */ - uint32_t entry_vector; /* private */ - __DECL_REG(ip); - uint16_t cs, _pad0[1]; - uint8_t saved_upcall_mask; - uint8_t _pad1[3]; - __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ - __DECL_REG(sp); - uint16_t ss, _pad2[3]; - uint16_t es, _pad3[3]; - uint16_t ds, _pad4[3]; - uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ - uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ -}; -typedef struct cpu_user_regs cpu_user_regs_t; -DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); - -#undef __DECL_REG - -#define xen_pfn_to_cr3(pfn) ((ULONG_PTR)(pfn) << 12) -#define xen_cr3_to_pfn(cr3) ((ULONG_PTR)(cr3) >> 12) - -struct arch_vcpu_info { - ULONG_PTR cr2; - ULONG_PTR pad; /* sizeof(vcpu_info_t) == 64 */ -}; -typedef struct arch_vcpu_info arch_vcpu_info_t; - -typedef ULONG_PTR xen_callback_t; - -#endif /* !__ASSEMBLY__ */ - -#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/arch-x86/xen.h b/include/xen/arch-x86/xen.h deleted file mode 100644 index 5cc22fb..0000000 --- a/include/xen/arch-x86/xen.h +++ /dev/null @@ -1,264 +0,0 @@ -/****************************************************************************** - * arch-x86/xen.h - * - * Guest OS interface to x86 Xen. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2004-2006, K A Fraser - */ - -#include "../xen.h" - -#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ -#define __XEN_PUBLIC_ARCH_X86_XEN_H__ - -/* Structural guest handles introduced in 0x00030201. */ -#if __XEN_INTERFACE_VERSION__ >= 0x00030201 -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } __guest_handle_ ## name -#else -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef type * __guest_handle_ ## name -#endif - -/* - * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field - * in a struct in memory. - * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an - * hypercall argument. - * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but - * they might not be on other architectures. - */ -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - ___DEFINE_XEN_GUEST_HANDLE(name, type); \ - ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) -#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) -#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) -#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif -#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) - -#if defined(__i386__) -#include "xen-x86_32.h" -#elif defined(__x86_64__) -#include "xen-x86_64.h" -#endif - -#ifndef __ASSEMBLY__ -typedef ULONG_PTR xen_pfn_t; -#define PRI_xen_pfn "lx" -#endif - -#define XEN_HAVE_PV_GUEST_ENTRY 1 - -#define XEN_HAVE_PV_UPCALL_MASK 1 - -/* - * `incontents 200 segdesc Segment Descriptor Tables - */ -/* - * ` enum neg_errnoval - * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); - * ` - */ -/* - * A number of GDT entries are reserved by Xen. These are not situated at the - * start of the GDT because some stupid OSes export hard-coded selector values - * in their ABI. These hard-coded values are always near the start of the GDT, - * so Xen places itself out of the way, at the far end of the GDT. - * - * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op - */ -#define FIRST_RESERVED_GDT_PAGE 14 -#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) -#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) - - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); - * ` - * ` @pa The machine physical address of the descriptor to - * ` update. Must be either a descriptor page or writable. - * ` @desc The descriptor value to update, in the same format as a - * ` native descriptor table entry. - */ - -/* Maximum number of virtual CPUs in legacy multi-processor guests. */ -#define XEN_LEGACY_MAX_VCPUS 32 - -#ifndef __ASSEMBLY__ - -typedef ULONG_PTR xen_ulong_t; -#define PRI_xen_ulong "lx" - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_stack_switch(ULONG_PTR ss, ULONG_PTR esp); - * ` - * Sets the stack segment and pointer for the current vcpu. - */ - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); - * ` - */ -/* - * Send an array of these to HYPERVISOR_set_trap_table(). - * Terminate the array with a sentinel entry, with traps[].address==0. - * The privilege level specifies which modes may enter a trap via a software - * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate - * privilege levels as follows: - * Level == 0: Noone may enter - * Level == 1: Kernel may enter - * Level == 2: Kernel may enter - * Level == 3: Everyone may enter - */ -#define TI_GET_DPL(_ti) ((_ti)->flags & 3) -#define TI_GET_IF(_ti) ((_ti)->flags & 4) -#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) -#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -struct trap_info { - uint8_t vector; /* exception vector */ - uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ - uint16_t cs; /* code selector */ - ULONG_PTR address; /* code offset */ -}; -typedef struct trap_info trap_info_t; -DEFINE_XEN_GUEST_HANDLE(trap_info_t); - -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ - -/* - * The following is all CPU context. Note that the fpu_ctxt block is filled - * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. - */ -struct vcpu_guest_context { - /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ - struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_IN_KERNEL (1<<2) -#define _VGCF_i387_valid 0 -#define VGCF_i387_valid (1<<_VGCF_i387_valid) -#define _VGCF_in_kernel 2 -#define VGCF_in_kernel (1<<_VGCF_in_kernel) -#define _VGCF_failsafe_disables_events 3 -#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) -#define _VGCF_syscall_disables_events 4 -#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) -#define _VGCF_online 5 -#define VGCF_online (1<<_VGCF_online) - ULONG_PTR flags; /* VGCF_* flags */ - struct cpu_user_regs user_regs; /* User-level CPU registers */ - struct trap_info trap_ctxt[256]; /* Virtual IDT */ - ULONG_PTR ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - ULONG_PTR gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ - ULONG_PTR kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ - /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ - ULONG_PTR ctrlreg[8]; /* CR0-CR7 (control registers) */ - ULONG_PTR debugreg[8]; /* DB0-DB7 (debug registers) */ -#ifdef __i386__ - ULONG_PTR event_callback_cs; /* CS:EIP of event callback */ - ULONG_PTR event_callback_eip; - ULONG_PTR failsafe_callback_cs; /* CS:EIP of failsafe callback */ - ULONG_PTR failsafe_callback_eip; -#else - ULONG_PTR event_callback_eip; - ULONG_PTR failsafe_callback_eip; -#ifdef __XEN__ - union { - ULONG_PTR syscall_callback_eip; - struct { - unsigned int event_callback_cs; /* compat CS of event cb */ - unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ - }; - }; -#else - ULONG_PTR syscall_callback_eip; -#endif -#endif - ULONG_PTR vm_assist; /* VMASST_TYPE_* bitmap */ -#ifdef __x86_64__ - /* Segment base addresses. */ - uint64_t fs_base; - uint64_t gs_base_kernel; - uint64_t gs_base_user; -#endif -}; -typedef struct vcpu_guest_context vcpu_guest_context_t; -DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); - -struct arch_shared_info { - ULONG_PTR max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - xen_pfn_t pfn_to_mfn_frame_list_list; - ULONG_PTR nmi_reason; - uint64_t pad[32]; -}; -typedef struct arch_shared_info arch_shared_info_t; - -#endif /* !__ASSEMBLY__ */ - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_fpu_taskswitch(int set); - * ` - * Sets (if set!=0) or clears (if set==0) CR0.TS. - */ - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_set_debugreg(int regno, ULONG_PTR value); - * - * ` ULONG_PTR - * ` HYPERVISOR_get_debugreg(int regno); - * For 0<=reg<=7, returns the debug register value. - * For other values of reg, returns ((ULONG_PTR)-EINVAL). - * (Unfortunately, this interface is defective.) - */ - -/* - * Prefix forces emulation of some non-trapping instructions. - * Currently only CPUID. - */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif - -#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h deleted file mode 100644 index 6bff379..0000000 --- a/include/xen/grant_table.h +++ /dev/null @@ -1,662 +0,0 @@ -/****************************************************************************** - * grant_table.h - * - * Interface for granting foreign access to page frames, and receiving - * page-ownership transfers. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ -#define __XEN_PUBLIC_GRANT_TABLE_H__ - -#include "xen.h" - -/* - * `incontents 150 gnttab Grant Tables - * - * Xen's grant tables provide a generic mechanism to memory sharing - * between domains. This shared memory interface underpins the split - * device drivers for block and network IO. - * - * Each domain has its own grant table. This is a data structure that - * is shared with Xen; it allows the domain to tell Xen what kind of - * permissions other domains have on its pages. Entries in the grant - * table are identified by grant references. A grant reference is an - * integer, which indexes into the grant table. It acts as a - * capability which the grantee can use to perform operations on the - * granterâs memory. - * - * This capability-based system allows shared-memory communications - * between unprivileged domains. A grant reference also encapsulates - * the details of a shared page, removing the need for a domain to - * know the real machine address of a page it is sharing. This makes - * it possible to share memory correctly with domains running in - * fully virtualised memory. - */ - -/*********************************** - * GRANT TABLE REPRESENTATION - */ - -/* Some rough guidelines on accessing and updating grant-table entries - * in a concurrency-safe manner. For more information, Linux contains a - * reference implementation for guest OSes (drivers/xen/grant_table.c, see - * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD - * - * NB. WMB is a no-op on current-generation x86 processors. However, a - * compiler barrier will still be required. - * - * Introducing a valid entry into the grant table: - * 1. Write ent->domid. - * 2. Write ent->frame: - * GTF_permit_access: Frame to which access is permitted. - * GTF_accept_transfer: Pseudo-phys frame slot being filled by new - * frame, or zero if none. - * 3. Write memory barrier (WMB). - * 4. Write ent->flags, inc. valid type. - * - * Invalidating an unused GTF_permit_access entry: - * 1. flags = ent->flags. - * 2. Observe that !(flags & (GTF_reading|GTF_writing)). - * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). - * NB. No need for WMB as reuse of entry is control-dependent on success of - * step 3, and all architectures guarantee ordering of ctrl-dep writes. - * - * Invalidating an in-use GTF_permit_access entry: - * This cannot be done directly. Request assistance from the domain controller - * which can set a timeout on the use of a grant entry and take necessary - * action. (NB. This is not yet implemented!). - * - * Invalidating an unused GTF_accept_transfer entry: - * 1. flags = ent->flags. - * 2. Observe that !(flags & GTF_transfer_committed). [*] - * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). - * NB. No need for WMB as reuse of entry is control-dependent on success of - * step 3, and all architectures guarantee ordering of ctrl-dep writes. - * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. - * The guest must /not/ modify the grant entry until the address of the - * transferred frame is written. It is safe for the guest to spin waiting - * for this to occur (detect by observing GTF_transfer_completed in - * ent->flags). - * - * Invalidating a committed GTF_accept_transfer entry: - * 1. Wait for (ent->flags & GTF_transfer_completed). - * - * Changing a GTF_permit_access from writable to read-only: - * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. - * - * Changing a GTF_permit_access from read-only to writable: - * Use SMP-safe bit-setting instruction. - */ - -/* - * Reference to a grant entry in a specified domain's grant table. - */ -typedef uint32_t grant_ref_t; - -/* - * A grant table comprises a packed array of grant entries in one or more - * page frames shared between Xen and a guest. - * [XEN]: This field is written by Xen and read by the sharing guest. - * [GST]: This field is written by the guest and read by Xen. - */ - -/* - * Version 1 of the grant table entry structure is maintained purely - * for backwards compatibility. New guests should use version 2. - */ -#if __XEN_INTERFACE_VERSION__ < 0x0003020a -#define grant_entry_v1 grant_entry -#define grant_entry_v1_t grant_entry_t -#endif -struct grant_entry_v1 { - /* GTF_xxx: various type and flag information. [XEN,GST] */ - uint16_t flags; - /* The domain being granted foreign privileges. [GST] */ - domid_t domid; - /* - * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] - * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] - */ - uint32_t frame; -}; -typedef struct grant_entry_v1 grant_entry_v1_t; - -/* The first few grant table entries will be preserved across grant table - * version changes and may be pre-populated at domain creation by tools. - */ -#define GNTTAB_NR_RESERVED_ENTRIES 8 -#define GNTTAB_RESERVED_CONSOLE 0 -#define GNTTAB_RESERVED_XENSTORE 1 - -/* - * Type of grant entry. - * GTF_invalid: This grant entry grants no privileges. - * GTF_permit_access: Allow @domid to map/access @frame. - * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame - * to this guest. Xen writes the page number to @frame. - * GTF_transitive: Allow @domid to transitively access a subrange of - * @trans_grant in @trans_domid. No mappings are allowed. - */ -#define GTF_invalid (0U<<0) -#define GTF_permit_access (1U<<0) -#define GTF_accept_transfer (2U<<0) -#define GTF_transitive (3U<<0) -#define GTF_type_mask (3U<<0) - -/* - * Subflags for GTF_permit_access. - * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] - * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] - * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] - * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] - * GTF_sub_page: Grant access to only a subrange of the page. @domid - * will only be allowed to copy from the grant, and not - * map it. [GST] - */ -#define _GTF_readonly (2) -#define GTF_readonly (1U<<_GTF_readonly) -#define _GTF_reading (3) -#define GTF_reading (1U<<_GTF_reading) -#define _GTF_writing (4) -#define GTF_writing (1U<<_GTF_writing) -#define _GTF_PWT (5) -#define GTF_PWT (1U<<_GTF_PWT) -#define _GTF_PCD (6) -#define GTF_PCD (1U<<_GTF_PCD) -#define _GTF_PAT (7) -#define GTF_PAT (1U<<_GTF_PAT) -#define _GTF_sub_page (8) -#define GTF_sub_page (1U<<_GTF_sub_page) - -/* - * Subflags for GTF_accept_transfer: - * GTF_transfer_committed: Xen sets this flag to indicate that it is committed - * to transferring ownership of a page frame. When a guest sees this flag - * it must /not/ modify the grant entry until GTF_transfer_completed is - * set by Xen. - * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag - * after reading GTF_transfer_committed. Xen will always write the frame - * address, followed by ORing this flag, in a timely manner. - */ -#define _GTF_transfer_committed (2) -#define GTF_transfer_committed (1U<<_GTF_transfer_committed) -#define _GTF_transfer_completed (3) -#define GTF_transfer_completed (1U<<_GTF_transfer_completed) - -/* - * Version 2 grant table entries. These fulfil the same role as - * version 1 entries, but can represent more complicated operations. - * Any given domain will have either a version 1 or a version 2 table, - * and every entry in the table will be the same version. - * - * The interface by which domains use grant references does not depend - * on the grant table version in use by the other domain. - */ -#if __XEN_INTERFACE_VERSION__ >= 0x0003020a -/* - * Version 1 and version 2 grant entries share a common prefix. The - * fields of the prefix are documented as part of struct - * grant_entry_v1. - */ -struct grant_entry_header { - uint16_t flags; - domid_t domid; -}; -typedef struct grant_entry_header grant_entry_header_t; - -/* - * Version 2 of the grant entry structure. - */ -union grant_entry_v2 { - grant_entry_header_t hdr; - - /* - * This member is used for V1-style full page grants, where either: - * - * -- hdr.type is GTF_accept_transfer, or - * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. - * - * In that case, the frame field has the same semantics as the - * field of the same name in the V1 entry structure. - */ - struct { - grant_entry_header_t hdr; - uint32_t pad0; - uint64_t frame; - } full_page; - - /* - * If the grant type is GTF_grant_access and GTF_sub_page is set, - * @domid is allowed to access bytes [@page_off,@page_off+@length) - * in frame @frame. - */ - struct { - grant_entry_header_t hdr; - uint16_t page_off; - uint16_t length; - uint64_t frame; - } sub_page; - - /* - * If the grant is GTF_transitive, @domid is allowed to use the - * grant @gref in domain @trans_domid, as if it was the local - * domain. Obviously, the transitive access must be compatible - * with the original grant. - * - * The current version of Xen does not allow transitive grants - * to be mapped. - */ - struct { - grant_entry_header_t hdr; - domid_t trans_domid; - uint16_t pad0; - grant_ref_t gref; - } transitive; - - uint32_t __spacer[4]; /* Pad to a power of two */ -}; -typedef union grant_entry_v2 grant_entry_v2_t; - -typedef uint16_t grant_status_t; - -#endif /* __XEN_INTERFACE_VERSION__ */ - -/*********************************** - * GRANT TABLE QUERIES AND USES - */ - -/* ` enum neg_errnoval - * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, - * ` void *args, - * ` unsigned int count) - * ` - * - * @args points to an array of a per-command data structure. The array - * has @count members - */ - -/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ -#define GNTTABOP_map_grant_ref 0 -#define GNTTABOP_unmap_grant_ref 1 -#define GNTTABOP_setup_table 2 -#define GNTTABOP_dump_table 3 -#define GNTTABOP_transfer 4 -#define GNTTABOP_copy 5 -#define GNTTABOP_query_size 6 -#define GNTTABOP_unmap_and_replace 7 -#if __XEN_INTERFACE_VERSION__ >= 0x0003020a -#define GNTTABOP_set_version 8 -#define GNTTABOP_get_status_frames 9 -#define GNTTABOP_get_version 10 -#define GNTTABOP_swap_grant_ref 11 -#endif /* __XEN_INTERFACE_VERSION__ */ -/* ` } */ - -/* - * Handle to track a mapping created via a grant reference. - */ -typedef uint32_t grant_handle_t; - -/* - * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access - * by devices and/or host CPUs. If successful, <handle> is a tracking number - * that must be presented later to destroy the mapping(s). On error, <handle> - * is a negative status code. - * NOTES: - * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address - * via which I/O devices may access the granted frame. - * 2. If GNTMAP_host_map is specified then a mapping will be added at - * either a host virtual address in the current address space, or at - * a PTE at the specified machine address. The type of mapping to - * perform is selected through the GNTMAP_contains_pte flag, and the - * address is specified in <host_addr>. - * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a - * host mapping is destroyed by other means then it is *NOT* guaranteed - * to be accounted to the correct grant reference! - */ -struct gnttab_map_grant_ref { - /* IN parameters. */ - uint64_t host_addr; - uint32_t flags; /* GNTMAP_* */ - grant_ref_t ref; - domid_t dom; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ - grant_handle_t handle; - uint64_t dev_bus_addr; -}; -typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); - -/* - * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings - * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that - * field is ignored. If non-zero, they must refer to a device/host mapping - * that is tracked by <handle> - * NOTES: - * 1. The call may fail in an undefined manner if either mapping is not - * tracked by <handle>. - * 3. After executing a batch of unmaps, it is guaranteed that no stale - * mappings will remain in the device or host TLBs. - */ -struct gnttab_unmap_grant_ref { - /* IN parameters. */ - uint64_t host_addr; - uint64_t dev_bus_addr; - grant_handle_t handle; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ -}; -typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); - -/* - * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least - * <nr_frames> pages. The frame addresses are written to the <frame_list>. - * Only <nr_frames> addresses are written, even if the table is larger. - * NOTES: - * 1. <dom> may be specified as DOMID_SELF. - * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. - * 3. Xen may not support more than a single grant-table page per domain. - */ -struct gnttab_setup_table { - /* IN parameters. */ - domid_t dom; - uint32_t nr_frames; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ -#if __XEN_INTERFACE_VERSION__ < 0x00040300 - XEN_GUEST_HANDLE(ulong) frame_list; -#else - XEN_GUEST_HANDLE(xen_pfn_t) frame_list; -#endif -}; -typedef struct gnttab_setup_table gnttab_setup_table_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); - -/* - * GNTTABOP_dump_table: Dump the contents of the grant table to the - * xen console. Debugging use only. - */ -struct gnttab_dump_table { - /* IN parameters. */ - domid_t dom; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ -}; -typedef struct gnttab_dump_table gnttab_dump_table_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); - -/* - * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The - * foreign domain has previously registered its interest in the transfer via - * <domid, ref>. - * - * Note that, even if the transfer fails, the specified page no LONG_PTRer belongs - * to the calling domain *unless* the error is GNTST_bad_page. - */ -struct gnttab_transfer { - /* IN parameters. */ - xen_pfn_t mfn; - domid_t domid; - grant_ref_t ref; - /* OUT parameters. */ - int16_t status; -}; -typedef struct gnttab_transfer gnttab_transfer_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); - - -/* - * GNTTABOP_copy: Hypervisor based copy - * source and destinations can be eithers MFNs or, for foreign domains, - * grant references. the foreign domain has to grant read/write access - * in its grant table. - * - * The flags specify what type source and destinations are (either MFN - * or grant reference). - * - * Note that this can also be used to copy data between two domains - * via a third party if the source and destination domains had previously - * grant appropriate access to their pages to the third party. - * - * source_offset specifies an offset in the source frame, dest_offset - * the offset in the target frame and len specifies the number of - * bytes to be copied. - */ - -#define _GNTCOPY_source_gref (0) -#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) -#define _GNTCOPY_dest_gref (1) -#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) - -struct gnttab_copy { - /* IN parameters. */ - struct { - union { - grant_ref_t ref; - xen_pfn_t gmfn; - } u; - domid_t domid; - uint16_t offset; - } source, dest; - uint16_t len; - uint16_t flags; /* GNTCOPY_* */ - /* OUT parameters. */ - int16_t status; -}; -typedef struct gnttab_copy gnttab_copy_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); - -/* - * GNTTABOP_query_size: Query the current and maximum sizes of the shared - * grant table. - * NOTES: - * 1. <dom> may be specified as DOMID_SELF. - * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. - */ -struct gnttab_query_size { - /* IN parameters. */ - domid_t dom; - /* OUT parameters. */ - uint32_t nr_frames; - uint32_t max_nr_frames; - int16_t status; /* => enum grant_status */ -}; -typedef struct gnttab_query_size gnttab_query_size_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); - -/* - * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings - * tracked by <handle> but atomically replace the page table entry with one - * pointing to the machine address under <new_addr>. <new_addr> will be - * redirected to the null entry. - * NOTES: - * 1. The call may fail in an undefined manner if either mapping is not - * tracked by <handle>. - * 2. After executing a batch of unmaps, it is guaranteed that no stale - * mappings will remain in the device or host TLBs. - */ -struct gnttab_unmap_and_replace { - /* IN parameters. */ - uint64_t host_addr; - uint64_t new_addr; - grant_handle_t handle; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ -}; -typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); - -#if __XEN_INTERFACE_VERSION__ >= 0x0003020a -/* - * GNTTABOP_set_version: Request a particular version of the grant - * table shared table structure. This operation can only be performed - * once in any given domain. It must be performed before any grants - * are activated; otherwise, the domain will be stuck with version 1. - * The only defined versions are 1 and 2. - */ -struct gnttab_set_version { - /* IN/OUT parameters */ - uint32_t version; -}; -typedef struct gnttab_set_version gnttab_set_version_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); - - -/* - * GNTTABOP_get_status_frames: Get the list of frames used to store grant - * status for <dom>. In grant format version 2, the status is separated - * from the other shared grant fields to allow more efficient synchronization - * using barriers instead of atomic cmpexch operations. - * <nr_frames> specify the size of vector <frame_list>. - * The frame addresses are returned in the <frame_list>. - * Only <nr_frames> addresses are returned, even if the table is larger. - * NOTES: - * 1. <dom> may be specified as DOMID_SELF. - * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. - */ -struct gnttab_get_status_frames { - /* IN parameters. */ - uint32_t nr_frames; - domid_t dom; - /* OUT parameters. */ - int16_t status; /* => enum grant_status */ - XEN_GUEST_HANDLE(uint64_t) frame_list; -}; -typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); - -/* - * GNTTABOP_get_version: Get the grant table version which is in - * effect for domain <dom>. - */ -struct gnttab_get_version { - /* IN parameters */ - domid_t dom; - uint16_t pad; - /* OUT parameters */ - uint32_t version; -}; -typedef struct gnttab_get_version gnttab_get_version_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); - -/* - * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. - */ -struct gnttab_swap_grant_ref { - /* IN parameters */ - grant_ref_t ref_a; - grant_ref_t ref_b; - /* OUT parameters */ - int16_t status; /* => enum grant_status */ -}; -typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; -DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); - -#endif /* __XEN_INTERFACE_VERSION__ */ - -/* - * Bitfield values for gnttab_map_grant_ref.flags. - */ - /* Map the grant entry for access by I/O devices. */ -#define _GNTMAP_device_map (0) -#define GNTMAP_device_map (1<<_GNTMAP_device_map) - /* Map the grant entry for access by host CPUs. */ -#define _GNTMAP_host_map (1) -#define GNTMAP_host_map (1<<_GNTMAP_host_map) - /* Accesses to the granted frame will be restricted to read-only access. */ -#define _GNTMAP_readonly (2) -#define GNTMAP_readonly (1<<_GNTMAP_readonly) - /* - * GNTMAP_host_map subflag: - * 0 => The host mapping is usable only by the guest OS. - * 1 => The host mapping is usable by guest OS + current application. - */ -#define _GNTMAP_application_map (3) -#define GNTMAP_application_map (1<<_GNTMAP_application_map) - - /* - * GNTMAP_contains_pte subflag: - * 0 => This map request contains a host virtual address. - * 1 => This map request contains the machine addess of the PTE to update. - */ -#define _GNTMAP_contains_pte (4) -#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) - -#define _GNTMAP_can_fail (5) -#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) - -/* - * Bits to be placed in guest kernel available PTE bits (architecture - * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). - */ -#define _GNTMAP_guest_avail0 (16) -#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) - -/* - * Values for error status returns. All errors are -ve. - */ -/* ` enum grant_status { */ -#define GNTST_okay (0) /* Normal return. */ -#define GNTST_general_error (-1) /* General undefined error. */ -#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ -#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ -#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ -#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ -#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ -#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ -#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ -#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ -#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ -#define GNTST_address_too_big (-11) /* transfer page address too large. */ -#define GNTST_eagain (-12) /* Operation not done; try again. */ -/* ` } */ - -#define GNTTABOP_error_msgs { \ - "okay", \ - "undefined error", \ - "unrecognised domain id", \ - "invalid grant reference", \ - "invalid mapping handle", \ - "invalid virtual address", \ - "invalid device address", \ - "no spare translation slot in the I/O MMU", \ - "permission denied", \ - "bad page", \ - "copy arguments cross page boundary", \ - "page address size too large", \ - "operation not done; try again" \ -} - -#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/io/netif.h b/include/xen/io/netif.h deleted file mode 100644 index 3aa2dec..0000000 --- a/include/xen/io/netif.h +++ /dev/null @@ -1,236 +0,0 @@ -/****************************************************************************** - * netif.h - * - * Unified network-device I/O interface for Xen guest OSes. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2003-2004, Keir Fraser - */ - -#ifndef __XEN_PUBLIC_IO_NETIF_H__ -#define __XEN_PUBLIC_IO_NETIF_H__ - -#include "ring.h" -#include "../grant_table.h" - -/* - * Older implementation of Xen network frontend / backend has an - * implicit dependency on the MAX_SKB_FRAGS as the maximum number of - * ring slots a skb can use. Netfront / netback may not work as - * expected when frontend and backend have different MAX_SKB_FRAGS. - * - * A better approach is to add mechanism for netfront / netback to - * negotiate this value. However we cannot fix all possible - * frontends, so we need to define a value which states the minimum - * slots backend must support. - * - * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS - * (18), which is proved to work with most frontends. Any new backend - * which doesn't negotiate with frontend should expect frontend to - * send a valid packet using slots up to this value. - */ -#define XEN_NETIF_NR_SLOTS_MIN 18 - -/* - * Notifications after enqueuing any type of message should be conditional on - * the appropriate req_event or rsp_event field in the shared ring. - * If the client sends notification for rx requests then it should specify - * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume - * that it cannot safely queue packets (as it may not be kicked to send them). - */ - -/* - * "feature-split-event-channels" is introduced to separate guest TX - * and RX notification. Backend either doesn't support this feature or - * advertises it via xenstore as 0 (disabled) or 1 (enabled). - * - * To make use of this feature, frontend should allocate two event - * channels for TX and RX, advertise them to backend as - * "event-channel-tx" and "event-channel-rx" respectively. If frontend - * doesn't want to use this feature, it just writes "event-channel" - * node as before. - */ - -/* - * This is the 'wire' format for packets: - * Request 1: netif_tx_request -- NETTXF_* (any flags) - * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) - * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) - * Request 4: netif_tx_request -- NETTXF_more_data - * Request 5: netif_tx_request -- NETTXF_more_data - * ... - * Request N: netif_tx_request -- 0 - */ - -/* Protocol checksum field is blank in the packet (hardware offload)? */ -#define _NETTXF_csum_blank (0) -#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) - -/* Packet data has been validated against protocol checksum. */ -#define _NETTXF_data_validated (1) -#define NETTXF_data_validated (1U<<_NETTXF_data_validated) - -/* Packet continues in the next request descriptor. */ -#define _NETTXF_more_data (2) -#define NETTXF_more_data (1U<<_NETTXF_more_data) - -/* Packet to be followed by extra descriptor(s). */ -#define _NETTXF_extra_info (3) -#define NETTXF_extra_info (1U<<_NETTXF_extra_info) - -#define XEN_NETIF_MAX_TX_SIZE 0xFFFF -struct netif_tx_request { - grant_ref_t gref; /* Reference to buffer page */ - uint16_t offset; /* Offset within buffer page */ - uint16_t flags; /* NETTXF_* */ - uint16_t id; /* Echoed in response message. */ - uint16_t size; /* Packet size in bytes. */ -}; -typedef struct netif_tx_request netif_tx_request_t; - -/* Types of netif_extra_info descriptors. */ -#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ -#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ -#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ -#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ -#define XEN_NETIF_EXTRA_TYPE_MAX (4) - -/* netif_extra_info flags. */ -#define _XEN_NETIF_EXTRA_FLAG_MORE (0) -#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) - -/* GSO types - only TCPv4 currently supported. */ -#define XEN_NETIF_GSO_TYPE_TCPV4 (1) - -/* - * This structure needs to fit within both netif_tx_request and - * netif_rx_response for compatibility. - */ -struct netif_extra_info { - uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ - uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ - - union { - /* - * XEN_NETIF_EXTRA_TYPE_GSO: - */ - struct { - /* - * Maximum payload size of each segment. For example, for TCP this - * is just the path MSS. - */ - uint16_t size; - - /* - * GSO type. This determines the protocol of the packet and any - * extra features required to segment the packet properly. - */ - uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ - - /* Future expansion. */ - uint8_t pad; - - /* - * GSO features. This specifies any extra GSO features required - * to process this packet, such as ECN support for TCPv4. - */ - uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ - } gso; - - /* - * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: - * Backend advertises availability via 'feature-multicast-control' - * xenbus node containing value '1'. - * Frontend requests this feature by advertising - * 'request-multicast-control' xenbus node containing value '1'. - * If multicast control is requested then multicast flooding is - * disabled and the frontend must explicitly register its interest - * in multicast groups using dummy transmit requests containing - * MCAST_{ADD,DEL} extra-info fragments. - */ - struct { - uint8_t addr[6]; /* Address to add/remove. */ - } mcast; - - uint16_t pad[3]; - } u; -}; -typedef struct netif_extra_info netif_extra_info_t; - -struct netif_tx_response { - uint16_t id; - int16_t status; /* NETIF_RSP_* */ -}; -typedef struct netif_tx_response netif_tx_response_t; - -struct netif_rx_request { - uint16_t id; /* Echoed in response message. */ - grant_ref_t gref; /* Reference to incoming granted frame */ -}; -typedef struct netif_rx_request netif_rx_request_t; - -/* Packet data has been validated against protocol checksum. */ -#define _NETRXF_data_validated (0) -#define NETRXF_data_validated (1U<<_NETRXF_data_validated) - -/* Protocol checksum field is blank in the packet (hardware offload)? */ -#define _NETRXF_csum_blank (1) -#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) - -/* Packet continues in the next request descriptor. */ -#define _NETRXF_more_data (2) -#define NETRXF_more_data (1U<<_NETRXF_more_data) - -/* Packet to be followed by extra descriptor(s). */ -#define _NETRXF_extra_info (3) -#define NETRXF_extra_info (1U<<_NETRXF_extra_info) - -struct netif_rx_response { - uint16_t id; - uint16_t offset; /* Offset in page of start of received packet */ - uint16_t flags; /* NETRXF_* */ - int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */ -}; -typedef struct netif_rx_response netif_rx_response_t; - -/* - * Generate netif ring structures and types. - */ - -DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); -DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); - -#define NETIF_RSP_DROPPED -2 -#define NETIF_RSP_ERROR -1 -#define NETIF_RSP_OKAY 0 -/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ -#define NETIF_RSP_NULL 1 - -#endif - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/io/ring.h b/include/xen/io/ring.h deleted file mode 100644 index 938693f..0000000 --- a/include/xen/io/ring.h +++ /dev/null @@ -1,323 +0,0 @@ -/****************************************************************************** - * ring.h - * - * Shared producer-consumer ring macros. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Tim Deegan and Andrew Warfield November 2004. - */ - -#ifndef __XEN_PUBLIC_IO_RING_H__ -#define __XEN_PUBLIC_IO_RING_H__ - -#include "../xen-compat.h" - -#if __XEN_INTERFACE_VERSION__ < 0x00030208 -#define xen_mb() mb() -#define xen_rmb() rmb() -#define xen_wmb() wmb() -#endif - -typedef unsigned int RING_IDX; - -/* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) -#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) -#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) -#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) -#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) - -/* - * Calculate size of a shared ring, given the total available space for the - * ring and indexes (_sz), and the name tag of the request/response structure. - * A ring contains as many entries as will fit, rounded down to the nearest - * power of two (so we can mask with (size-1) to loop around). - */ -#define __CONST_RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) -/* - * The same for passing in an actual pointer instead of a name tag. - */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (LONG_PTR)(_s)->ring + (LONG_PTR)(_s)) / sizeof((_s)->ring[0]))) - -/* - * Macros to make the correct C datatypes for a new kind of ring. - * - * To make a new ring datatype, you need to have two message structures, - * let's say request_t, and response_t already defined. - * - * In a header where you want the ring datatype declared, you then do: - * - * DEFINE_RING_TYPES(mytag, request_t, response_t); - * - * These expand out to give you a set of types, as you can see below. - * The most important of these are: - * - * mytag_sring_t - The shared ring. - * mytag_front_ring_t - The 'front' half of the ring. - * mytag_back_ring_t - The 'back' half of the ring. - * - * To initialize a ring in your code you need to know the location and size - * of the shared memory area (PAGE_SIZE, for instance). To initialise - * the front half: - * - * mytag_front_ring_t front_ring; - * SHARED_RING_INIT((mytag_sring_t *)shared_page); - * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); - * - * Initializing the back follows similarly (note that only the front - * initializes the shared ring): - * - * mytag_back_ring_t back_ring; - * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); - */ - -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - union { \ - struct { \ - uint8_t smartpoll_active; \ - } netif; \ - struct { \ - uint8_t msg; \ - } tapif_user; \ - uint8_t pvt_pad[4]; \ - } private; \ - uint8_t __pad[44]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* Syntactic sugar */ \ -typedef struct __name##_sring __name##_sring_t; \ -typedef struct __name##_front_ring __name##_front_ring_t; \ -typedef struct __name##_back_ring __name##_back_ring_t - -/* - * Macros for manipulating rings. - * - * FRONT_RING_whatever works on the "front end" of a ring: here - * requests are pushed on to the ring and responses taken off it. - * - * BACK_RING_whatever works on the "back end" of a ring: here - * requests are taken off the ring and responses put on. - * - * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. - * This is OK in 1-for-1 request-response situations where the - * requestor (front end) never has more than RING_SIZE()-1 - * outstanding requests. - */ - -/* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ - (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ -} while(0) - -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ -} while (0) - -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ -} while (0) - -/* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) - -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) - -/* How big is this ring? */ -#define RING_SIZE(_r) \ - ((_r)->nr_ents) - -/* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ - (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) - -/* Test if there is an empty slot available on the front ring. - * (This is only meaningful from the front. ) - */ -#define RING_FULL(_r) \ - (RING_FREE_REQUESTS(_r) == 0) - -/* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ - ((_r)->sring->rsp_prod - (_r)->rsp_cons) - -#ifdef __GNUC__ -#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ -}) -#else -/* Same as above, but without the nice GCC ({ ... }) syntax. */ -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ((((_r)->sring->req_prod - (_r)->req_cons) < \ - (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ - ((_r)->sring->req_prod - (_r)->req_cons) : \ - (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) -#endif - -/* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) - -#define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) - -/* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ - (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) - -#define RING_PUSH_REQUESTS(_r) do { \ - xen_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ -} while (0) - -#define RING_PUSH_RESPONSES(_r) do { \ - xen_wmb(); /* front sees resps /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ -} while (0) - -/* - * Notification hold-off (req_event and rsp_event): - * - * When queueing requests or responses on a shared ring, it may not always be - * necessary to notify the remote end. For example, if requests are in flight - * in a backend, the front may be able to queue further requests without - * notifying the back (if the back checks for new requests when it queues - * responses). - * - * When enqueuing requests or responses: - * - * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument - * is a boolean return value. True indicates that the receiver requires an - * asynchronous notification. - * - * After dequeuing requests or responses (before sleeping the connection): - * - * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). - * The second argument is a boolean return value. True indicates that there - * are pending messages on the ring (i.e., the connection should not be put - * to sleep). - * - * These macros will set the req_event/rsp_event field to trigger a - * notification on the very next message that is enqueued. If you want to - * create batches of work (i.e., only receive a notification after several - * messages have been enqueued) then you will need to create a customised - * version of the FINAL_CHECK macro in your own code, which sets the event - * field appropriately. - */ - -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - xen_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - xen_mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ -} while (0) - -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - xen_wmb(); /* front sees resps /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ -} while (0) - -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - xen_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ -} while (0) - -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - xen_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ -} while (0) - -#endif /* __XEN_PUBLIC_IO_RING_H__ */ - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/io/xenbus.h b/include/xen/io/xenbus.h deleted file mode 100644 index 4a053df..0000000 --- a/include/xen/io/xenbus.h +++ /dev/null @@ -1,80 +0,0 @@ -/***************************************************************************** - * xenbus.h - * - * Xenbus protocol details. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (C) 2005 XenSource Ltd. - */ - -#ifndef _XEN_PUBLIC_IO_XENBUS_H -#define _XEN_PUBLIC_IO_XENBUS_H - -/* - * The state of either end of the Xenbus, i.e. the current communication - * status of initialisation across the bus. States here imply nothing about - * the state of the connection between the driver and the kernel's device - * layers. - */ -enum xenbus_state { - XenbusStateUnknown = 0, - - XenbusStateInitialising = 1, - - /* - * InitWait: Finished early initialisation but waiting for information - * from the peer or hotplug scripts. - */ - XenbusStateInitWait = 2, - - /* - * Initialised: Waiting for a connection from the peer. - */ - XenbusStateInitialised = 3, - - XenbusStateConnected = 4, - - /* - * Closing: The device is being closed due to an error or an unplug event. - */ - XenbusStateClosing = 5, - - XenbusStateClosed = 6, - - /* - * Reconfiguring: The device is being reconfigured. - */ - XenbusStateReconfiguring = 7, - - XenbusStateReconfigured = 8 -}; -typedef enum xenbus_state XenbusState; - -#endif /* _XEN_PUBLIC_IO_XENBUS_H */ - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/public/arch-x86/xen-x86_32.h b/include/xen/public/arch-x86/xen-x86_32.h new file mode 100644 index 0000000..4f75f5b --- /dev/null +++ b/include/xen/public/arch-x86/xen-x86_32.h @@ -0,0 +1,172 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +/* Non-PAE bounds are obsolete. */ +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((ULONG_PTR *)MACH2PHYS_VIRT_START) +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef ___DEFINE_XEN_GUEST_HANDLE +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define int64_aligned_t int64_t __attribute__((aligned(8))) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + ULONG_PTR cr2; + ULONG_PTR pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + ULONG_PTR cs; + ULONG_PTR eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/arch-x86/xen-x86_64.h b/include/xen/public/arch-x86/xen-x86_64.h new file mode 100644 index 0000000..10a433a --- /dev/null +++ b/include/xen/public/arch-x86/xen-x86_64.h @@ -0,0 +1,202 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((ULONG_PTR *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, ULONG_PTR base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((ULONG_PTR)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((ULONG_PTR)(cr3) >> 12) + +struct arch_vcpu_info { + ULONG_PTR cr2; + ULONG_PTR pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef ULONG_PTR xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/arch-x86/xen.h b/include/xen/public/arch-x86/xen.h new file mode 100644 index 0000000..dea64ff --- /dev/null +++ b/include/xen/public/arch-x86/xen.h @@ -0,0 +1,307 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "../xen.h" + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +#ifndef __ASSEMBLY__ +typedef ULONG_PTR xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +#define XEN_HAVE_PV_GUEST_ENTRY 1 + +#define XEN_HAVE_PV_UPCALL_MASK 1 + +/* + * `incontents 200 segdesc Segment Descriptor Tables + */ +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); + * ` + * ` @pa The machine physical address of the descriptor to + * ` update. Must be either a descriptor page or writable. + * ` @desc The descriptor value to update, in the same format as a + * ` native descriptor table entry. + */ + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 + +#ifndef __ASSEMBLY__ + +typedef ULONG_PTR xen_ulong_t; +#define PRI_xen_ulong "lx" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(ULONG_PTR ss, ULONG_PTR esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + ULONG_PTR address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + ULONG_PTR flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + ULONG_PTR ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + ULONG_PTR gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + ULONG_PTR kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + ULONG_PTR ctrlreg[8]; /* CR0-CR7 (control registers) */ + ULONG_PTR debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + ULONG_PTR event_callback_cs; /* CS:EIP of event callback */ + ULONG_PTR event_callback_eip; + ULONG_PTR failsafe_callback_cs; /* CS:EIP of failsafe callback */ + ULONG_PTR failsafe_callback_eip; +#else + ULONG_PTR event_callback_eip; + ULONG_PTR failsafe_callback_eip; +#ifdef __XEN__ + union { + ULONG_PTR syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else + ULONG_PTR syscall_callback_eip; +#endif +#endif + ULONG_PTR vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + ULONG_PTR max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it has + * been set to invalid e.g. due to the p2m being too large for the 3-level + * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr + * is to be used. + */ + xen_pfn_t pfn_to_mfn_frame_list_list; + ULONG_PTR nmi_reason; + /* + * Following three fields are valid if p2m_cr3 contains a value different + * from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register state + * and holds the folded machine frame number (via xen_pfn_to_cr3) of a + * L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All entries + * in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 and + * a value with the least significant bit set indicates that a mapping + * update is in progress. This allows guest external software (e.g. in Dom0) + * to verify that read mappings are consistent and whether they have changed + * since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an atomic + * write only. + */ + ULONG_PTR p2m_cr3; /* cr3 value of the p2m address space */ + ULONG_PTR p2m_vaddr; /* virtual address of the p2m list */ + ULONG_PTR p2m_generation; /* generation count of p2m mapping */ +#ifdef __i386__ + /* There's no room for this field in the generic structure. */ + uint32_t wc_sec_hi; +#endif +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, ULONG_PTR value); + * + * ` ULONG_PTR + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((ULONG_PTR)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/errno.h b/include/xen/public/errno.h new file mode 100644 index 0000000..c644633 --- /dev/null +++ b/include/xen/public/errno.h @@ -0,0 +1,93 @@ +#ifndef __XEN_PUBLIC_ERRNO_H__ + +#ifndef __ASSEMBLY__ + +#define XEN_ERRNO(name, value) XEN_##name = value, +enum xen_errno { + +#else /* !__ASSEMBLY__ */ + +#define XEN_ERRNO(name, value) .equ XEN_##name, value + +#endif /* __ASSEMBLY__ */ + +/* ` enum neg_errnoval { [ -Efoo for each Efoo in the list below ] } */ +/* ` enum errnoval { */ + +#endif /* __XEN_PUBLIC_ERRNO_H__ */ + +#ifdef XEN_ERRNO + +/* + * Values originating from x86 Linux. Please consider using respective + * values when adding new definitions here. + */ + +XEN_ERRNO(EPERM, 1) /* Operation not permitted */ +XEN_ERRNO(ENOENT, 2) /* No such file or directory */ +XEN_ERRNO(ESRCH, 3) /* No such process */ +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */ +XEN_ERRNO(EINTR, 4) /* Interrupted system call */ +#endif +XEN_ERRNO(EIO, 5) /* I/O error */ +XEN_ERRNO(ENXIO, 6) /* No such device or address */ +XEN_ERRNO(E2BIG, 7) /* Arg list too LONG_PTR */ +XEN_ERRNO(ENOEXEC, 8) /* Exec format error */ +XEN_ERRNO(EBADF, 9) /* Bad file number */ +XEN_ERRNO(ECHILD, 10) /* No child processes */ +XEN_ERRNO(EAGAIN, 11) /* Try again */ +XEN_ERRNO(ENOMEM, 12) /* Out of memory */ +XEN_ERRNO(EACCES, 13) /* Permission denied */ +XEN_ERRNO(EFAULT, 14) /* Bad address */ +XEN_ERRNO(EBUSY, 16) /* Device or resource busy */ +XEN_ERRNO(EEXIST, 17) /* File exists */ +XEN_ERRNO(EXDEV, 18) /* Cross-device link */ +XEN_ERRNO(ENODEV, 19) /* No such device */ +XEN_ERRNO(EINVAL, 22) /* Invalid argument */ +XEN_ERRNO(ENFILE, 23) /* File table overflow */ +XEN_ERRNO(EMFILE, 24) /* Too many open files */ +XEN_ERRNO(ENOSPC, 28) /* No space left on device */ +XEN_ERRNO(EMLINK, 31) /* Too many links */ +XEN_ERRNO(EDOM, 33) /* Math argument out of domain of func */ +XEN_ERRNO(ERANGE, 34) /* Math result not representable */ +XEN_ERRNO(EDEADLK, 35) /* Resource deadlock would occur */ +XEN_ERRNO(ENAMETOOLONG, 36) /* File name too LONG_PTR */ +XEN_ERRNO(ENOLCK, 37) /* No record locks available */ +XEN_ERRNO(ENOSYS, 38) /* Function not implemented */ +XEN_ERRNO(EBADRQC, 56) /* Invalid request code */ +XEN_ERRNO(EBADSLT, 57) /* Invalid slot */ +XEN_ERRNO(ENODATA, 61) /* No data available */ +XEN_ERRNO(ETIME, 62) /* Timer expired */ +XEN_ERRNO(EBADMSG, 74) /* Not a data message */ +XEN_ERRNO(EOVERFLOW, 75) /* Value too large for defined data type */ +XEN_ERRNO(EILSEQ, 84) /* Illegal byte sequence */ +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */ +XEN_ERRNO(ERESTART, 85) /* Interrupted system call should be restarted */ +#endif +XEN_ERRNO(EUSERS, 87) /* Too many users */ +XEN_ERRNO(EOPNOTSUPP, 95) /* Operation not supported on transport endpoint */ +XEN_ERRNO(EADDRINUSE, 98) /* Address already in use */ +XEN_ERRNO(EADDRNOTAVAIL, 99) /* Cannot assign requested address */ +XEN_ERRNO(ENOBUFS, 105) /* No buffer space available */ +XEN_ERRNO(EISCONN, 106) /* Transport endpoint is already connected */ +XEN_ERRNO(ENOTCONN, 107) /* Transport endpoint is not connected */ +XEN_ERRNO(ESHUTDOWN, 108) /* Cannot send after transport endpoint shutdown */ +XEN_ERRNO(ETOOMANYREFS, 109) /* Too many references: cannot splice */ +XEN_ERRNO(ETIMEDOUT, 110) /* Connection timed out */ + +#undef XEN_ERRNO +#endif /* XEN_ERRNO */ + +#ifndef __XEN_PUBLIC_ERRNO_H__ +#define __XEN_PUBLIC_ERRNO_H__ + +/* ` } */ + +#ifndef __ASSEMBLY__ +}; +#endif + +#define XEN_EWOULDBLOCK XEN_EAGAIN /* Operation would block */ +#define XEN_EDEADLOCK XEN_EDEADLK /* Resource deadlock would occur */ + +#endif /* __XEN_PUBLIC_ERRNO_H__ */ diff --git a/include/xen/public/grant_table.h b/include/xen/public/grant_table.h new file mode 100644 index 0000000..f1b7917 --- /dev/null +++ b/include/xen/public/grant_table.h @@ -0,0 +1,682 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include "xen.h" + +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granterâs memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members + */ + +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#define GNTTABOP_cache_flush 12 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access + * by devices and/or host CPUs. If successful, <handle> is a tracking number + * that must be presented later to destroy the mapping(s). On error, <handle> + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in <host_addr>. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by <handle> + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least + * <nr_frames> pages. The frame addresses are written to the <frame_list>. + * Only <nr_frames> addresses are written, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + XEN_GUEST_HANDLE(ulong) frame_list; +#else + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +#endif +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * <domid, ref>. + * + * Note that, even if the transfer fails, the specified page no LONG_PTRer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +struct gnttab_copy { + /* IN parameters. */ + struct gnttab_copy_ptr { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_copy gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by <handle> but atomically replace the page table entry with one + * pointing to the machine address under <new_addr>. <new_addr> will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for <dom>. In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * <nr_frames> specify the size of vector <frame_list>. + * The frame addresses are returned in the <frame_list>. + * Only <nr_frames> addresses are returned, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain <dom>. + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1<<31) + uint32_t op; +}; +typedef struct gnttab_cache_flush gnttab_cache_flush_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +/* ` enum grant_status { */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/io/netif.h b/include/xen/public/io/netif.h new file mode 100644 index 0000000..353eab7 --- /dev/null +++ b/include/xen/public/io/netif.h @@ -0,0 +1,422 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" + * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" + * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" + * + * If there is any inconsistency in the XenStore data, the backend may + * choose not to connect any queues, instead treating the request as an + * error. This includes scenarios where more (or fewer) queues were + * requested than the frontend provided details for. + * + * Mapping of packets to queues is considered to be a function of the + * transmitting system (backend or frontend) and is not negotiated + * between the two. Guests are free to transmit packets on any queue + * they choose, provided it has been set up correctly. Guests must be + * prepared to receive packets on any queue they have requested be set up. + */ + +/* + * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum + * offload off or on. If it is missing then the feature is assumed to be on. + * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum + * offload on or off. If it is missing then the feature is assumed to be off. + */ + +/* + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither + * frontends nor backends are assumed to be capable unless the flags are + * present. + */ + +/* + * This is the 'wire' format for packets: + * Request 1: netif_tx_request_t -- NETTXF_* (any flags) + * [Request 2: netif_extra_info_t] (only if request 1 has NETTXF_extra_info) + * [Request 3: netif_extra_info_t] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * Request 4: netif_tx_request_t -- NETTXF_more_data + * Request 5: netif_tx_request_t -- NETTXF_more_data + * ... + * Request N: netif_tx_request_t -- 0 + */ + +/* + * Guest transmit + * ============== + * + * Ring slot size is 12 octets, however not all request/response + * structs use the full size. + * + * tx request data (netif_tx_request_t) + * ------------------------------------ + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | offset | flags | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | size | + * +-----+-----+-----+-----+ + * + * grant ref: Reference to buffer page. + * offset: Offset within buffer page. + * flags: NETTXF_*. + * id: request identifier, echoed in response. + * size: packet size in bytes. + * + * tx response (netif_tx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | status | unused | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | unused | + * +-----+-----+-----+-----+ + * + * id: reflects id in transmit request + * status: NETIF_RSP_* + * + * Guest receive + * ============= + * + * Ring slot size is 8 octets. + * + * rx request (netif_rx_request_t) + * ------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | pad | gref | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: request identifier, echoed in response. + * gref: reference to incoming granted frame. + * + * rx response (netif_rx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | offset | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: reflects id in receive request + * offset: offset in page of start of received packet + * flags: NETRXF_* + * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. + * + * Extra Info + * ========== + * + * Can be present if initial request has NET{T,R}XF_extra_info, or + * previous extra request has XEN_NETIF_EXTRA_MORE. + * + * The struct therefore needs to fit into either a tx or rx slot and + * is therefore limited to 8 octets. + * + * extra info (netif_extra_info_t) + * ------------------------------- + * + * General format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| type specfic data | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | padding for tx | + * +-----+-----+-----+-----+ + * + * type: XEN_NETIF_EXTRA_TYPE_* + * flags: XEN_NETIF_EXTRA_FLAG_* + * padding for tx: present only in the tx case due to 8 octet limit + * from rx case. Not shown in type specific entries below. + * + * XEN_NETIF_EXTRA_TYPE_GSO: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| size |type | pad | features | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_GSO + * flags: XEN_NETIF_EXTRA_FLAG_* + * size: Maximum payload size of each segment. + * type: XEN_NETIF_GSO_TYPE_* + * features: EN_NETIF_GSO_FEAT_* + * + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| addr | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} + * flags: XEN_NETIF_EXTRA_FLAG_* + * addr: address to add/remove + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF +struct netif_tx_request { + grant_ref_t gref; /* Reference to buffer page */ + uint16_t offset; /* Offset within buffer page */ + uint16_t flags; /* NETTXF_* */ + uint16_t id; /* Echoed in response message. */ + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MAX (4) + +/* netif_extra_info_t flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types */ +#define XEN_NETIF_GSO_TYPE_NONE (0) +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) +#define XEN_NETIF_GSO_TYPE_TCPV6 (2) + +/* + * This structure needs to fit within both netif_tx_request_t and + * netif_rx_response_t for compatibility. + */ +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + /* + * XEN_NETIF_EXTRA_TYPE_GSO: + */ + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + /* + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * Backend advertises availability via 'feature-multicast-control' + * xenbus node containing value '1'. + * Frontend requests this feature by advertising + * 'request-multicast-control' xenbus node containing value '1'. + * If multicast control is requested then multicast flooding is + * disabled and the frontend must explicitly register its interest + * in multicast groups using dummy transmit requests containing + * MCAST_{ADD,DEL} extra-info fragments. + */ + struct { + uint8_t addr[6]; /* Address to add/remove. */ + } mcast; + + uint16_t pad[3]; + } u; +}; +typedef struct netif_extra_info netif_extra_info_t; + +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + uint16_t pad; + grant_ref_t gref; /* Reference to incoming granted frame */ +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ + uint16_t flags; /* NETRXF_* */ + int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */ +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */ +#define NETIF_RSP_NULL 1 + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/io/ring.h b/include/xen/public/io/ring.h new file mode 100644 index 0000000..427a429 --- /dev/null +++ b/include/xen/public/io/ring.h @@ -0,0 +1,312 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (LONG_PTR)(_s)->ring + (LONG_PTR)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/io/xenbus.h b/include/xen/public/io/xenbus.h new file mode 100644 index 0000000..927f9db --- /dev/null +++ b/include/xen/public/io/xenbus.h @@ -0,0 +1,80 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/trace.h b/include/xen/public/trace.h new file mode 100644 index 0000000..8de630d --- /dev/null +++ b/include/xen/public/trace.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +#define TRACE_EXTRA_MAX 7 +#define TRACE_EXTRA_SHIFT 28 + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_PV 0x0020f000 /* Xen PV traces */ +#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ +#define TRC_ALL 0x0ffff000 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) +#define TRC_HD_CYCLE_FLAG (1UL<<31) +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ +#define TRC_HVM_EMUL 0x00084000 /* emulated devices */ + +#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ +#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ + +/* + * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are + * reserved for encoding what scheduler produced the information. The + * actual event is encoded in the last 9 bits. + * + * This means we have 8 scheduling IDs available (which means at most 8 + * schedulers generating events) and, in each scheduler, up to 512 + * different events. + */ +#define TRC_SCHED_ID_BITS 3 +#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS) +#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT) +#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK)) + +/* Per-scheduler IDs, to identify scheduler specific events */ +#define TRC_SCHED_CSCHED 0 +#define TRC_SCHED_CSCHED2 1 +#define TRC_SCHED_SEDF 2 +#define TRC_SCHED_ARINC653 3 +#define TRC_SCHED_RTDS 4 + +/* Per-scheduler tracing */ +#define TRC_SCHED_CLASS_EVT(_c, _e) \ + ( ( TRC_SCHED_CLASS | \ + ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \ + (_e & TRC_SCHED_EVT_MASK) ) + +/* Trace classes for Hardware */ +#define TRC_HW_PM 0x00801000 /* Power management traces */ +#define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ + +/* Trace events per class */ +#define TRC_LOST_RECORDS (TRC_GEN + 1) +#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) +#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) + +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) +#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) +#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) +#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) +#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4) +#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5) +#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6) +#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7) +#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8) +#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9) +#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10) +#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11) +#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12) +#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) +#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) + +#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) +#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) +#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) +#define TRC_MEM_SET_P2M_ENTRY (TRC_MEM + 4) +#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5) +#define TRC_MEM_POD_POPULATE (TRC_MEM + 16) +#define TRC_MEM_POD_ZERO_RECLAIM (TRC_MEM + 17) +#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18) + +#define TRC_PV_ENTRY 0x00201000 /* Hypervisor entry points for PV guests. */ +#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */ + +#define TRC_PV_HYPERCALL (TRC_PV_ENTRY + 1) +#define TRC_PV_TRAP (TRC_PV_ENTRY + 3) +#define TRC_PV_PAGE_FAULT (TRC_PV_ENTRY + 4) +#define TRC_PV_FORCED_INVALID_OP (TRC_PV_ENTRY + 5) +#define TRC_PV_EMULATE_PRIVOP (TRC_PV_ENTRY + 6) +#define TRC_PV_EMULATE_4GB (TRC_PV_ENTRY + 7) +#define TRC_PV_MATH_STATE_RESTORE (TRC_PV_ENTRY + 8) +#define TRC_PV_PAGING_FIXUP (TRC_PV_ENTRY + 9) +#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10) +#define TRC_PV_PTWR_EMULATION (TRC_PV_ENTRY + 11) +#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV_ENTRY + 12) +#define TRC_PV_HYPERCALL_V2 (TRC_PV_ENTRY + 13) +#define TRC_PV_HYPERCALL_SUBCALL (TRC_PV_SUBCALL + 14) + +/* + * TRC_PV_HYPERCALL_V2 format + * + * Only some of the hypercall argument are recorded. Bit fields A0 to + * A5 in the first extra word are set if the argument is present and + * the arguments themselves are packed sequentially in the following + * words. + * + * The TRC_64_FLAG bit is not set for these events (even if there are + * 64-bit arguments in the record). + * + * Word + * 0 bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0 + * A5 |A4 |A3 |A2 |A1 |A0 |Hypercall op + * 1 First 32 bit (or low word of first 64 bit) arg in record + * 2 Second 32 bit (or high word of first 64 bit) arg in record + * ... + * + * A0-A5 bitfield values: + * + * 00b Argument not present + * 01b 32-bit argument present + * 10b 64-bit argument present + * 11b Reserved + */ +#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i))) +#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i))) +#define TRC_PV_HYPERCALL_V2_ARG_MASK (0xfff00000) + +#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1) +#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2) +#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3) +#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4) +#define TRC_SHADOW_MMIO (TRC_SHADOW + 5) +#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6) +#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7) +#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8) +#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9) +#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10) +#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11) +#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12) +#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13) +#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14) +#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) + +/* trace events per subclass */ +#define TRC_HVM_NESTEDFLAG (0x400) +#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) +#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) +#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) +#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) +#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01) +#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) +#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02) +#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) +#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) +#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) +#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) +#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) +#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) +#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08) +#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) +#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09) +#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) +#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) +#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) +#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) +#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) +#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) +#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) +#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) +#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) +#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) +#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) +#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) +#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) +#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) +#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) +#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) +#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) +#define TRC_HVM_RDTSC (TRC_HVM_HANDLER + 0x1a) +#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) +#define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21) +#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22) +#define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) +#define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) +#define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) + +#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) +#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) + +/* Trace events for emulated devices */ +#define TRC_HVM_EMUL_HPET_START_TIMER (TRC_HVM_EMUL + 0x1) +#define TRC_HVM_EMUL_PIT_START_TIMER (TRC_HVM_EMUL + 0x2) +#define TRC_HVM_EMUL_RTC_START_TIMER (TRC_HVM_EMUL + 0x3) +#define TRC_HVM_EMUL_LAPIC_START_TIMER (TRC_HVM_EMUL + 0x4) +#define TRC_HVM_EMUL_HPET_STOP_TIMER (TRC_HVM_EMUL + 0x5) +#define TRC_HVM_EMUL_PIT_STOP_TIMER (TRC_HVM_EMUL + 0x6) +#define TRC_HVM_EMUL_RTC_STOP_TIMER (TRC_HVM_EMUL + 0x7) +#define TRC_HVM_EMUL_LAPIC_STOP_TIMER (TRC_HVM_EMUL + 0x8) +#define TRC_HVM_EMUL_PIT_TIMER_CB (TRC_HVM_EMUL + 0x9) +#define TRC_HVM_EMUL_LAPIC_TIMER_CB (TRC_HVM_EMUL + 0xA) +#define TRC_HVM_EMUL_PIC_INT_OUTPUT (TRC_HVM_EMUL + 0xB) +#define TRC_HVM_EMUL_PIC_KICK (TRC_HVM_EMUL + 0xC) +#define TRC_HVM_EMUL_PIC_INTACK (TRC_HVM_EMUL + 0xD) +#define TRC_HVM_EMUL_PIC_POSEDGE (TRC_HVM_EMUL + 0xE) +#define TRC_HVM_EMUL_PIC_NEGEDGE (TRC_HVM_EMUL + 0xF) +#define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10) +#define TRC_HVM_EMUL_LAPIC_PIC_INTR (TRC_HVM_EMUL + 0x11) + +/* trace events for per class */ +#define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) +#define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) +#define TRC_PM_IDLE_EXIT (TRC_HW_PM + 0x03) + +/* Trace events for IRQs */ +#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1) +#define TRC_HW_IRQ_MOVE_CLEANUP (TRC_HW_IRQ + 0x2) +#define TRC_HW_IRQ_BIND_VECTOR (TRC_HW_IRQ + 0x3) +#define TRC_HW_IRQ_CLEAR_VECTOR (TRC_HW_IRQ + 0x4) +#define TRC_HW_IRQ_MOVE_FINISH (TRC_HW_IRQ + 0x5) +#define TRC_HW_IRQ_ASSIGN_VECTOR (TRC_HW_IRQ + 0x6) +#define TRC_HW_IRQ_UNMAPPED_VECTOR (TRC_HW_IRQ + 0x7) +#define TRC_HW_IRQ_HANDLED (TRC_HW_IRQ + 0x8) + +/* + * Event Flags + * + * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple + * record formats. These event flags distinguish between the + * different formats. + */ +#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */ + +/* This structure represents a single trace buffer record. */ +struct t_rec { + uint32_t event:28; + uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */ + uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */ + union { + struct { + uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ + uint32_t extra_u32[7]; /* event data items */ + } cycles; + struct { + uint32_t extra_u32[7]; /* event data items */ + } nocycles; + } u; +}; + +/* + * This structure contains the metadata for a single trace buffer. The head + * field, indexes into an array of struct t_rec's. + */ +struct t_buf { + /* Assume the data buffer size is X. X is generally not a power of 2. + * CONS and PROD are incremented modulo (2*X): + * 0 <= cons < 2*X + * 0 <= prod < 2*X + * This is done because addition modulo X breaks at 2^32 when X is not a + * power of 2: + * (((2^32 - 1) % X) + 1) % X != (2^32) % X + */ + uint32_t cons; /* Offset of next item to be consumed by control tools. */ + uint32_t prod; /* Offset of next item to be produced by Xen. */ + /* Records follow immediately after the meta-data header. */ +}; + +/* Structure used to pass MFNs to the trace buffers back to trace consumers. + * Offset is an offset into the mapped structure where the mfn list will be held. + * MFNs will be at ((ULONG_PTR *)(t_info))+(t_info->cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/public/xen-compat.h b/include/xen/public/xen-compat.h new file mode 100644 index 0000000..1e62dc1 --- /dev/null +++ b/include/xen/public/xen-compat.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff --git a/include/xen/public/xen.h b/include/xen/public/xen.h new file mode 100644 index 0000000..728cc2f --- /dev/null +++ b/include/xen/public/xen.h @@ -0,0 +1,908 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include "xen-compat.h" + +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__arm__) || defined (__aarch64__) +#include "arch-arm.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +#if __XEN_INTERFACE_VERSION__ < 0x00040300 +DEFINE_XEN_GUEST_HANDLE(LONG_PTR); +__DEFINE_XEN_GUEST_HANDLE(ulong, ULONG_PTR); +#endif +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); +#endif + +/* + * HYPERCALLS + */ + +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* ` } */ + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +/* ` enum virq { */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 +/* ` } */ + +#define NR_VIRQS 24 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 + * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ + +/* + * MMU EXTENDED OPERATIONS + * + * ` enum neg_errnoval + * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * ` unsigned int count, + * ` unsigned int *pdone, + * ` unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. + */ +/* ` enum mmuext_cmd { */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 +/* ` } */ + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; /* => enum mmuext_cmd */ + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + ULONG_PTR linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping(ULONG_PTR va, u64 val, + * ` enum uvm_flags flags) + * ` + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping_otherdomain(ULONG_PTR va, u64 val, + * ` enum uvm_flags flags, + * ` domid_t domid) + * ` + * ` @va: The virtual address whose mapping we want to change + * ` @val: The new page table entry, must contain a machine address + * ` @flags: Control TLB flushes + */ +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +/* ` enum uvm_flags { */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +/* ` } */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_multicall(multicall_entry_t call_list[], + * ` uint32_t nr_calls); + * + * NB. The fields are logically the natural register size for this + * architecture. In cases where xen_ulong_t is larger than this then + * any unused bits in the upper portion must be zero. + */ +struct multicall_entry { + xen_ulong_t op, result; + xen_ulong_t args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +/* + * Event channel endpoints per domain (when using the 2-level ABI): + * 1024 if a LONG_PTR is 32 bits; 4096 if a LONG_PTR is 64 bits. + */ +#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS +#endif + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; +#ifdef XEN_HAVE_PV_UPCALL_MASK + uint8_t evtchn_upcall_mask; +#else /* XEN_HAVE_PV_UPCALL_MASK */ + uint8_t pad0; +#endif /* XEN_HAVE_PV_UPCALL_MASK */ + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * `incontents 200 startofday_shared Start-of-day shared data structure + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C LONG_PTR' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ +#if !defined(__i386__) + uint32_t wc_sec_hi; +# define xen_wc_sec_hi wc_sec_hi +#elif !defined(__XEN__) && !defined(__XEN_TOOLS__) +# define xen_wc_sec_hi arch.wc_sec_hi +#endif + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * `incontents 200 startofday Start-of-day memory layout + * + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base and CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + * + * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page + * table layout") a bug caused the pt_base (3.e above) and cr3 to not point + * to the start of the guest page tables (it was offset by two pages). + * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU + * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got + * allocated in the order: 'first L1','first L2', 'first L3', so the offset + * to the page table base is by two pages back. The initial domain if it is + * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the + * pages preceding pt_base and mark them as reserved/unused. + */ +#ifdef XEN_HAVE_PV_GUEST_ENTRY +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen-<version>-<platform>". */ + ULONG_PTR nr_pages; /* Total pages allocated to this domain. */ + ULONG_PTR shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + ULONG_PTR pt_base; /* VIRTUAL address of page directory. */ + ULONG_PTR nr_pt_frames; /* Number of bootstrap p.t. frames. */ + ULONG_PTR mfn_list; /* VIRTUAL address of page-frame list. */ + ULONG_PTR mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ + ULONG_PTR mod_len; /* Size (bytes) of pre-loaded module. */ +#define MAX_GUEST_CMDLINE 1024 + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + ULONG_PTR first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + ULONG_PTR nr_p2m_frames;/* # of pfns forming initial P->M table. */ +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif +#endif /* XEN_HAVE_PV_GUEST_ENTRY */ + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; +/* + * `incontents 200 startofday_dom0_console Dom0_console + * + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C ULONG_PTR constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef int64_aligned_t +#define int64_aligned_t int64_t +#endif +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif + +#ifndef __ASSEMBLY__ +struct xenctl_bitmap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_bits; +}; +#endif + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/xen-compat.h b/include/xen/xen-compat.h deleted file mode 100644 index 69141c4..0000000 --- a/include/xen/xen-compat.h +++ /dev/null @@ -1,44 +0,0 @@ -/****************************************************************************** - * xen-compat.h - * - * Guest OS interface to Xen. Compatibility layer. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2006, Christian Limpach - */ - -#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_PUBLIC_XEN_COMPAT_H__ - -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040300 - -#if defined(__XEN__) || defined(__XEN_TOOLS__) -/* Xen is built with matching headers and implements the latest interface. */ -#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ -#elif !defined(__XEN_INTERFACE_VERSION__) -/* Guests which do not specify a version get the legacy interface. */ -#define __XEN_INTERFACE_VERSION__ 0x00000000 -#endif - -#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ -#error "These header files do not support the requested interface version." -#endif - -#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff --git a/include/xen/xen.h b/include/xen/xen.h deleted file mode 100644 index c14f9ae..0000000 --- a/include/xen/xen.h +++ /dev/null @@ -1,895 +0,0 @@ -/****************************************************************************** - * xen.h - * - * Guest OS interface to Xen. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __XEN_PUBLIC_XEN_H__ -#define __XEN_PUBLIC_XEN_H__ - -#include "xen-compat.h" - -#if defined(__i386__) || defined(__x86_64__) -#include "arch-x86/xen.h" -#elif defined(__arm__) || defined (__aarch64__) -#include "arch-arm.h" -#else -#error "Unsupported architecture" -#endif - -#ifndef __ASSEMBLY__ -/* Guest handles for primitive C types. */ -DEFINE_XEN_GUEST_HANDLE(char); -__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); -DEFINE_XEN_GUEST_HANDLE(int); -__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); -#if __XEN_INTERFACE_VERSION__ < 0x00040300 -DEFINE_XEN_GUEST_HANDLE(LONG_PTR); -__DEFINE_XEN_GUEST_HANDLE(ulong, ULONG_PTR); -#endif -DEFINE_XEN_GUEST_HANDLE(void); - -DEFINE_XEN_GUEST_HANDLE(uint64_t); -DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); -DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); -#endif - -/* - * HYPERCALLS - */ - -/* `incontents 100 hcalls List of hypercalls - * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() - */ - -#define __HYPERVISOR_set_trap_table 0 -#define __HYPERVISOR_mmu_update 1 -#define __HYPERVISOR_set_gdt 2 -#define __HYPERVISOR_stack_switch 3 -#define __HYPERVISOR_set_callbacks 4 -#define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ -#define __HYPERVISOR_platform_op 7 -#define __HYPERVISOR_set_debugreg 8 -#define __HYPERVISOR_get_debugreg 9 -#define __HYPERVISOR_update_descriptor 10 -#define __HYPERVISOR_memory_op 12 -#define __HYPERVISOR_multicall 13 -#define __HYPERVISOR_update_va_mapping 14 -#define __HYPERVISOR_set_timer_op 15 -#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ -#define __HYPERVISOR_xen_version 17 -#define __HYPERVISOR_console_io 18 -#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ -#define __HYPERVISOR_grant_table_op 20 -#define __HYPERVISOR_vm_assist 21 -#define __HYPERVISOR_update_va_mapping_otherdomain 22 -#define __HYPERVISOR_iret 23 /* x86 only */ -#define __HYPERVISOR_vcpu_op 24 -#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ -#define __HYPERVISOR_mmuext_op 26 -#define __HYPERVISOR_xsm_op 27 -#define __HYPERVISOR_nmi_op 28 -#define __HYPERVISOR_sched_op 29 -#define __HYPERVISOR_callback_op 30 -#define __HYPERVISOR_xenoprof_op 31 -#define __HYPERVISOR_event_channel_op 32 -#define __HYPERVISOR_physdev_op 33 -#define __HYPERVISOR_hvm_op 34 -#define __HYPERVISOR_sysctl 35 -#define __HYPERVISOR_domctl 36 -#define __HYPERVISOR_kexec_op 37 -#define __HYPERVISOR_tmem_op 38 -#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ - -/* Architecture-specific hypercall definitions. */ -#define __HYPERVISOR_arch_0 48 -#define __HYPERVISOR_arch_1 49 -#define __HYPERVISOR_arch_2 50 -#define __HYPERVISOR_arch_3 51 -#define __HYPERVISOR_arch_4 52 -#define __HYPERVISOR_arch_5 53 -#define __HYPERVISOR_arch_6 54 -#define __HYPERVISOR_arch_7 55 - -/* ` } */ - -/* - * HYPERCALL COMPATIBILITY. - */ - -/* New sched_op hypercall introduced in 0x00030101. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030101 -#undef __HYPERVISOR_sched_op -#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat -#endif - -/* New event-channel and physdev hypercalls introduced in 0x00030202. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030202 -#undef __HYPERVISOR_event_channel_op -#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat -#undef __HYPERVISOR_physdev_op -#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat -#endif - -/* New platform_op hypercall introduced in 0x00030204. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030204 -#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op -#endif - -/* - * VIRTUAL INTERRUPTS - * - * Virtual interrupts that a guest OS may receive from Xen. - * - * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a - * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. - * The latter can be allocated only once per guest: they must initially be - * allocated to VCPU0 but can subsequently be re-bound. - */ -/* ` enum virq { */ -#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ -#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ -#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ -#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ -#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ -#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ -#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ -#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ -#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ -#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ -#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ -#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ - -/* Architecture-specific VIRQ definitions. */ -#define VIRQ_ARCH_0 16 -#define VIRQ_ARCH_1 17 -#define VIRQ_ARCH_2 18 -#define VIRQ_ARCH_3 19 -#define VIRQ_ARCH_4 20 -#define VIRQ_ARCH_5 21 -#define VIRQ_ARCH_6 22 -#define VIRQ_ARCH_7 23 -/* ` } */ - -#define NR_VIRQS 24 - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], - * ` unsigned count, unsigned *done_out, - * ` unsigned foreigndom) - * ` - * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). - * @count is the length of the above array. - * @pdone is an output parameter indicating number of completed operations - * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this - * hypercall invocation. Can be DOMID_SELF. - * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced - * in this hypercall invocation. The value of this field - * (x) encodes the PFD as follows: - * x == 0 => PFD == DOMID_SELF - * x != 0 => PFD == x - 1 - * - * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. - * ------------- - * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table belonging to PFD. If updating an L1 table, - * and the new table entry is valid/present, the mapped frame must belong to - * FD. If attempting to map an I/O page then the caller assumes the privilege - * of the FD. - * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. - * FD == DOMID_XEN: Map restricted areas of Xen's heap space. - * ptr[:2] -- Machine address of the page-table entry to modify. - * val -- Value to write. - * - * There also certain implicit requirements when using this hypercall. The - * pages that make up a pagetable must be mapped read-only in the guest. - * This prevents uncontrolled guest updates to the pagetable. Xen strictly - * enforces this, and will disallow any pagetable update which will end up - * mapping pagetable page RW, and will disallow using any writable page as a - * pagetable. In practice it means that when constructing a page table for a - * process, thread, etc, we MUST be very dilligient in following these rules: - * 1). Start with top-level page (PGD or in Xen language: L4). Fill out - * the entries. - * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD - * or L2). - * 3). Start filling out the PTE table (L1) with the PTE entries. Once - * done, make sure to set each of those entries to RO (so writeable bit - * is unset). Once that has been completed, set the PMD (L2) for this - * PTE table as RO. - * 4). When completed with all of the PMD (L2) entries, and all of them have - * been set to RO, make sure to set RO the PUD (L3). Do the same - * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. - * 5). Now before you can use those pages (so setting the cr3), you MUST also - * pin them so that the hypervisor can verify the entries. This is done - * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame - * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( - * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be - * issued. - * For 32-bit guests, the L4 is not used (as there is less pagetables), so - * instead use L3. - * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE - * hypercall. Also if so desired the OS can also try to write to the PTE - * and be trapped by the hypervisor (as the PTE entry is RO). - * - * To deallocate the pages, the operations are the reverse of the steps - * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the - * pagetable MUST not be in use (meaning that the cr3 is not set to it). - * - * ptr[1:0] == MMU_MACHPHYS_UPDATE: - * Updates an entry in the machine->pseudo-physical mapping table. - * ptr[:2] -- Machine address within the frame whose mapping to modify. - * The frame must belong to the FD, if one is specified. - * val -- Value to write into the mapping entry. - * - * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: - * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed - * with those in @val. - * - * @val is usually the machine frame number along with some attributes. - * The attributes by default follow the architecture defined bits. Meaning that - * if this is a X86_64 machine and four page table layout is used, the layout - * of val is: - * - 63 if set means No execute (NX) - * - 46-13 the machine frame number - * - 12 available for guest - * - 11 available for guest - * - 10 available for guest - * - 9 available for guest - * - 8 global - * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) - * - 6 dirty - * - 5 accessed - * - 4 page cached disabled - * - 3 page write through - * - 2 userspace accessible - * - 1 writeable - * - 0 present - * - * The one bits that does not fit with the default layout is the PAGE_PSE - * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the - * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB - * (or 2MB) instead of using the PAGE_PSE bit. - * - * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen - * using it as the Page Attribute Table (PAT) bit - for details on it please - * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of - * pages instead of using MTRRs. - * - * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): - * PAT4 PAT0 - * +-----+-----+----+----+----+-----+----+----+ - * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux - * +-----+-----+----+----+----+-----+----+----+ - * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) - * +-----+-----+----+----+----+-----+----+----+ - * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen - * +-----+-----+----+----+----+-----+----+----+ - * - * The lookup of this index table translates to looking up - * Bit 7, Bit 4, and Bit 3 of val entry: - * - * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). - * - * If all bits are off, then we are using PAT0. If bit 3 turned on, - * then we are using PAT1, if bit 3 and bit 4, then PAT2.. - * - * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means - * that if a guest that follows Linux's PAT setup and would like to set Write - * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is - * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the - * caching as: - * - * WB = none (so PAT0) - * WC = PWT (bit 3 on) - * UC = PWT | PCD (bit 3 and 4 are on). - * - * To make it work with Xen, it needs to translate the WC bit as so: - * - * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 - * - * And to translate back it would: - * - * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. - */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ -#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ - -/* - * MMU EXTENDED OPERATIONS - * - * ` enum neg_errnoval - * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], - * ` unsigned int count, - * ` unsigned int *pdone, - * ` unsigned int foreigndom) - */ -/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. - * A foreigndom (FD) can be specified (or DOMID_SELF for none). - * Where the FD has some effect, it is described below. - * - * cmd: MMUEXT_(UN)PIN_*_TABLE - * mfn: Machine frame number to be (un)pinned as a p.t. page. - * The frame must belong to the FD, if one is specified. - * - * cmd: MMUEXT_NEW_BASEPTR - * mfn: Machine frame number of new page-table base to install in MMU. - * - * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] - * mfn: Machine frame number of new page-table base to install in MMU - * when in user space. - * - * cmd: MMUEXT_TLB_FLUSH_LOCAL - * No additional arguments. Flushes local TLB. - * - * cmd: MMUEXT_INVLPG_LOCAL - * linear_addr: Linear address to be flushed from the local TLB. - * - * cmd: MMUEXT_TLB_FLUSH_MULTI - * vcpumask: Pointer to bitmap of VCPUs to be flushed. - * - * cmd: MMUEXT_INVLPG_MULTI - * linear_addr: Linear address to be flushed. - * vcpumask: Pointer to bitmap of VCPUs to be flushed. - * - * cmd: MMUEXT_TLB_FLUSH_ALL - * No additional arguments. Flushes all VCPUs' TLBs. - * - * cmd: MMUEXT_INVLPG_ALL - * linear_addr: Linear address to be flushed from all VCPUs' TLBs. - * - * cmd: MMUEXT_FLUSH_CACHE - * No additional arguments. Writes back and flushes cache contents. - * - * cmd: MMUEXT_FLUSH_CACHE_GLOBAL - * No additional arguments. Writes back and flushes cache contents - * on all CPUs in the system. - * - * cmd: MMUEXT_SET_LDT - * linear_addr: Linear address of LDT base (NB. must be page-aligned). - * nr_ents: Number of entries in LDT. - * - * cmd: MMUEXT_CLEAR_PAGE - * mfn: Machine frame number to be cleared. - * - * cmd: MMUEXT_COPY_PAGE - * mfn: Machine frame number of the destination page. - * src_mfn: Machine frame number of the source page. - * - * cmd: MMUEXT_[UN]MARK_SUPER - * mfn: Machine frame number of head of superpage to be [un]marked. - */ -/* ` enum mmuext_cmd { */ -#define MMUEXT_PIN_L1_TABLE 0 -#define MMUEXT_PIN_L2_TABLE 1 -#define MMUEXT_PIN_L3_TABLE 2 -#define MMUEXT_PIN_L4_TABLE 3 -#define MMUEXT_UNPIN_TABLE 4 -#define MMUEXT_NEW_BASEPTR 5 -#define MMUEXT_TLB_FLUSH_LOCAL 6 -#define MMUEXT_INVLPG_LOCAL 7 -#define MMUEXT_TLB_FLUSH_MULTI 8 -#define MMUEXT_INVLPG_MULTI 9 -#define MMUEXT_TLB_FLUSH_ALL 10 -#define MMUEXT_INVLPG_ALL 11 -#define MMUEXT_FLUSH_CACHE 12 -#define MMUEXT_SET_LDT 13 -#define MMUEXT_NEW_USER_BASEPTR 15 -#define MMUEXT_CLEAR_PAGE 16 -#define MMUEXT_COPY_PAGE 17 -#define MMUEXT_FLUSH_CACHE_GLOBAL 18 -#define MMUEXT_MARK_SUPER 19 -#define MMUEXT_UNMARK_SUPER 20 -/* ` } */ - -#ifndef __ASSEMBLY__ -struct mmuext_op { - unsigned int cmd; /* => enum mmuext_cmd */ - union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR - * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ - xen_pfn_t mfn; - /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ - ULONG_PTR linear_addr; - } arg1; - union { - /* SET_LDT */ - unsigned int nr_ents; - /* TLB_FLUSH_MULTI, INVLPG_MULTI */ -#if __XEN_INTERFACE_VERSION__ >= 0x00030205 - XEN_GUEST_HANDLE(const_void) vcpumask; -#else - const void *vcpumask; -#endif - /* COPY_PAGE */ - xen_pfn_t src_mfn; - } arg2; -}; -typedef struct mmuext_op mmuext_op_t; -DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); -#endif - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_update_va_mapping(ULONG_PTR va, u64 val, - * ` enum uvm_flags flags) - * ` - * ` enum neg_errnoval - * ` HYPERVISOR_update_va_mapping_otherdomain(ULONG_PTR va, u64 val, - * ` enum uvm_flags flags, - * ` domid_t domid) - * ` - * ` @va: The virtual address whose mapping we want to change - * ` @val: The new page table entry, must contain a machine address - * ` @flags: Control TLB flushes - */ -/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ -/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ -/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ -/* ` enum uvm_flags { */ -#define UVMF_NONE (0UL<<0) /* No flushing at all. */ -#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ -#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ -#define UVMF_FLUSHTYPE_MASK (3UL<<0) -#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ -#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ -#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ -/* ` } */ - -/* - * Commands to HYPERVISOR_console_io(). - */ -#define CONSOLEIO_write 0 -#define CONSOLEIO_read 1 - -/* - * Commands to HYPERVISOR_vm_assist(). - */ -#define VMASST_CMD_enable 0 -#define VMASST_CMD_disable 1 - -/* x86/32 guests: simulate full 4GB segment limits. */ -#define VMASST_TYPE_4gb_segments 0 - -/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ -#define VMASST_TYPE_4gb_segments_notify 1 - -/* - * x86 guests: support writes to bottom-level PTEs. - * NB1. Page-directory entries cannot be written. - * NB2. Guest must continue to remove all writable mappings of PTEs. - */ -#define VMASST_TYPE_writable_pagetables 2 - -/* x86/PAE guests: support PDPTs above 4GB. */ -#define VMASST_TYPE_pae_extended_cr3 3 - -#define MAX_VMASST_TYPE 3 - -#ifndef __ASSEMBLY__ - -typedef uint16_t domid_t; - -/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ -#define DOMID_FIRST_RESERVED (0x7FF0U) - -/* DOMID_SELF is used in certain contexts to refer to oneself. */ -#define DOMID_SELF (0x7FF0U) - -/* - * DOMID_IO is used to restrict page-table updates to mapping I/O memory. - * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO - * is useful to ensure that no mappings to the OS's own heap are accidentally - * installed. (e.g., in Linux this could cause havoc as reference counts - * aren't adjusted on the I/O-mapping code path). - * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can - * be specified by any calling domain. - */ -#define DOMID_IO (0x7FF1U) - -/* - * DOMID_XEN is used to allow privileged domains to map restricted parts of - * Xen's heap space (e.g., the machine_to_phys table). - * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if - * the caller is privileged. - */ -#define DOMID_XEN (0x7FF2U) - -/* - * DOMID_COW is used as the owner of sharable pages */ -#define DOMID_COW (0x7FF3U) - -/* DOMID_INVALID is used to identify pages with unknown owner. */ -#define DOMID_INVALID (0x7FF4U) - -/* Idle domain. */ -#define DOMID_IDLE (0x7FFFU) - -/* - * Send an array of these to HYPERVISOR_mmu_update(). - * NB. The fields are natural pointer/address size for this architecture. - */ -struct mmu_update { - uint64_t ptr; /* Machine address of PTE. */ - uint64_t val; /* New contents of PTE. */ -}; -typedef struct mmu_update mmu_update_t; -DEFINE_XEN_GUEST_HANDLE(mmu_update_t); - -/* - * ` enum neg_errnoval - * ` HYPERVISOR_multicall(multicall_entry_t call_list[], - * ` unsigned int nr_calls); - * - * NB. The fields are natural register size for this architecture. - */ -struct multicall_entry { - ULONG_PTR op, result; - ULONG_PTR args[6]; -}; -typedef struct multicall_entry multicall_entry_t; -DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); - -/* - * Event channel endpoints per domain: - * 1024 if a LONG_PTR is 32 bits; 4096 if a LONG_PTR is 64 bits. - */ -#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) - -struct vcpu_time_info { - /* - * Updates to the following values are preceded and followed by an - * increment of 'version'. The guest can therefore detect updates by - * looking for changes to 'version'. If the least-significant bit of - * the version number is set then an update is in progress and the guest - * must wait to read a consistent set of values. - * The correct way to interact with the version number is similar to - * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. - */ - uint32_t version; - uint32_t pad0; - uint64_t tsc_timestamp; /* TSC at last update of time vals. */ - uint64_t system_time; /* Time, in nanosecs, since boot. */ - /* - * Current system time: - * system_time + - * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) - * CPU frequency (Hz): - * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift - */ - uint32_t tsc_to_system_mul; - int8_t tsc_shift; - int8_t pad1[3]; -}; /* 32 bytes */ -typedef struct vcpu_time_info vcpu_time_info_t; - -struct vcpu_info { - /* - * 'evtchn_upcall_pending' is written non-zero by Xen to indicate - * a pending notification for a particular VCPU. It is then cleared - * by the guest OS /before/ checking for pending work, thus avoiding - * a set-and-check race. Note that the mask is only accessed by Xen - * on the CPU that is currently hosting the VCPU. This means that the - * pending and mask flags can be updated by the guest without special - * synchronisation (i.e., no need for the x86 LOCK prefix). - * This may seem suboptimal because if the pending flag is set by - * a different CPU then an IPI may be scheduled even when the mask - * is set. However, note: - * 1. The task of 'interrupt holdoff' is covered by the per-event- - * channel mask bits. A 'noisy' event that is continually being - * triggered can be masked at source at this very precise - * granularity. - * 2. The main purpose of the per-VCPU mask is therefore to restrict - * reentrant execution: whether for concurrency control, or to - * prevent unbounded stack usage. Whatever the purpose, we expect - * that the mask will be asserted only for short periods at a time, - * and so the likelihood of a 'spurious' IPI is suitably small. - * The mask is read before making an event upcall to the guest: a - * non-zero mask therefore guarantees that the VCPU will not receive - * an upcall activation. The mask is cleared when the VCPU requests - * to block: this avoids wakeup-waiting races. - */ - uint8_t evtchn_upcall_pending; -#ifdef XEN_HAVE_PV_UPCALL_MASK - uint8_t evtchn_upcall_mask; -#else /* XEN_HAVE_PV_UPCALL_MASK */ - uint8_t pad0; -#endif /* XEN_HAVE_PV_UPCALL_MASK */ - xen_ulong_t evtchn_pending_sel; - struct arch_vcpu_info arch; - struct vcpu_time_info time; -}; /* 64 bytes (x86) */ -#ifndef __XEN__ -typedef struct vcpu_info vcpu_info_t; -#endif - -/* - * `incontents 200 startofday_shared Start-of-day shared data structure - * Xen/kernel shared data -- pointer provided in start_info. - * - * This structure is defined to be both smaller than a page, and the - * only data on the shared page, but may vary in actual size even within - * compatible Xen versions; guests should not rely on the size - * of this structure remaining constant. - */ -struct shared_info { - struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; - - /* - * A domain can create "event channels" on which it can send and receive - * asynchronous event notifications. There are three classes of event that - * are delivered by this mechanism: - * 1. Bi-directional inter- and intra-domain connections. Domains must - * arrange out-of-band to set up a connection (usually by allocating - * an unbound 'listener' port and avertising that via a storage service - * such as xenstore). - * 2. Physical interrupts. A domain with suitable hardware-access - * privileges can bind an event-channel port to a physical interrupt - * source. - * 3. Virtual interrupts ('events'). A domain can bind an event-channel - * port to a virtual interrupt source, such as the virtual-timer - * device or the emergency console. - * - * Event channels are addressed by a "port index". Each channel is - * associated with two bits of information: - * 1. PENDING -- notifies the domain that there is a pending notification - * to be processed. This bit is cleared by the guest. - * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING - * will cause an asynchronous upcall to be scheduled. This bit is only - * updated by the guest. It is read-only within Xen. If a channel - * becomes pending while the channel is masked then the 'edge' is lost - * (i.e., when the channel is unmasked, the guest must manually handle - * pending notifications as no upcall will be scheduled by Xen). - * - * To expedite scanning of pending notifications, any 0->1 pending - * transition on an unmasked channel causes a corresponding bit in a - * per-vcpu selector word to be set. Each bit in the selector covers a - * 'C LONG_PTR' in the PENDING bitfield array. - */ - xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; - xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; - - /* - * Wallclock time: updated only by control software. Guests should base - * their gettimeofday() syscall on this wallclock-base value. - */ - uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ - uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ - uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ - - struct arch_shared_info arch; - -}; -#ifndef __XEN__ -typedef struct shared_info shared_info_t; -#endif - -/* - * `incontents 200 startofday Start-of-day memory layout - * - * 1. The domain is started within contiguous virtual-memory region. - * 2. The contiguous region ends on an aligned 4MB boundary. - * 3. This the order of bootstrap elements in the initial virtual region: - * a. relocated kernel image - * b. initial ram disk [mod_start, mod_len] - * c. list of allocated page frames [mfn_list, nr_pages] - * (unless relocated due to XEN_ELFNOTE_INIT_P2M) - * d. start_info_t structure [register ESI (x86)] - * e. bootstrap page tables [pt_base and CR3 (x86)] - * f. bootstrap stack [register ESP (x86)] - * 4. Bootstrap elements are packed together, but each is 4kB-aligned. - * 5. The initial ram disk may be omitted. - * 6. The list of page frames forms a contiguous 'pseudo-physical' memory - * layout for the domain. In particular, the bootstrap virtual-memory - * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 7. All bootstrap elements are mapped read-writable for the guest OS. The - * only exception is the bootstrap page table, which is mapped read-only. - * 8. There is guaranteed to be at least 512kB padding after the final - * bootstrap element. If necessary, the bootstrap virtual region is - * extended by an extra 4MB to ensure this. - * - * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page - * table layout") a bug caused the pt_base (3.e above) and cr3 to not point - * to the start of the guest page tables (it was offset by two pages). - * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU - * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got - * allocated in the order: 'first L1','first L2', 'first L3', so the offset - * to the page table base is by two pages back. The initial domain if it is - * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the - * pages preceding pt_base and mark them as reserved/unused. - */ -#ifdef XEN_HAVE_PV_GUEST_ENTRY -struct start_info { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - char magic[32]; /* "xen-<version>-<platform>". */ - ULONG_PTR nr_pages; /* Total pages allocated to this domain. */ - ULONG_PTR shared_info; /* MACHINE address of shared info struct. */ - uint32_t flags; /* SIF_xxx flags. */ - xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ - uint32_t store_evtchn; /* Event channel for store communication. */ - union { - struct { - xen_pfn_t mfn; /* MACHINE page number of console page. */ - uint32_t evtchn; /* Event channel for console page. */ - } domU; - struct { - uint32_t info_off; /* Offset of console_info struct. */ - uint32_t info_size; /* Size of console_info struct from start.*/ - } dom0; - } console; - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - ULONG_PTR pt_base; /* VIRTUAL address of page directory. */ - ULONG_PTR nr_pt_frames; /* Number of bootstrap p.t. frames. */ - ULONG_PTR mfn_list; /* VIRTUAL address of page-frame list. */ - ULONG_PTR mod_start; /* VIRTUAL address of pre-loaded module */ - /* (PFN of pre-loaded module if */ - /* SIF_MOD_START_PFN set in flags). */ - ULONG_PTR mod_len; /* Size (bytes) of pre-loaded module. */ -#define MAX_GUEST_CMDLINE 1024 - int8_t cmd_line[MAX_GUEST_CMDLINE]; - /* The pfn range here covers both page table and p->m table frames. */ - ULONG_PTR first_p2m_pfn;/* 1st pfn forming initial P->M table. */ - ULONG_PTR nr_p2m_frames;/* # of pfns forming initial P->M table. */ -}; -typedef struct start_info start_info_t; - -/* New console union for dom0 introduced in 0x00030203. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030203 -#define console_mfn console.domU.mfn -#define console_evtchn console.domU.evtchn -#endif -#endif /* XEN_HAVE_PV_GUEST_ENTRY */ - -/* These flags are passed in the 'flags' field of start_info_t. */ -#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ -#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ -#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ -#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ -#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ - -/* - * A multiboot module is a package containing modules very similar to a - * multiboot module array. The only differences are: - * - the array of module descriptors is by convention simply at the beginning - * of the multiboot module, - * - addresses in the module descriptors are based on the beginning of the - * multiboot module, - * - the number of modules is determined by a termination descriptor that has - * mod_start == 0. - * - * This permits to both build it statically and reference it in a configuration - * file, and let the PV guest easily rebase the addresses to virtual addresses - * and at the same time count the number of modules. - */ -struct xen_multiboot_mod_list -{ - /* Address of first byte of the module */ - uint32_t mod_start; - /* Address of last byte of the module (inclusive) */ - uint32_t mod_end; - /* Address of zero-terminated command line */ - uint32_t cmdline; - /* Unused, must be zero */ - uint32_t pad; -}; -/* - * `incontents 200 startofday_dom0_console Dom0_console - * - * The console structure in start_info.console.dom0 - * - * This structure includes a variety of information required to - * have a working VGA/VESA console. - */ -typedef struct dom0_vga_console_info { - uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ -#define XEN_VGATYPE_TEXT_MODE_3 0x03 -#define XEN_VGATYPE_VESA_LFB 0x23 -#define XEN_VGATYPE_EFI_LFB 0x70 - - union { - struct { - /* Font height, in pixels. */ - uint16_t font_height; - /* Cursor location (column, row). */ - uint16_t cursor_x, cursor_y; - /* Number of rows and columns (dimensions in characters). */ - uint16_t rows, columns; - } text_mode_3; - - struct { - /* Width and height, in pixels. */ - uint16_t width, height; - /* Bytes per scan line. */ - uint16_t bytes_per_line; - /* Bits per pixel. */ - uint16_t bits_per_pixel; - /* LFB physical address, and size (in units of 64kB). */ - uint32_t lfb_base; - uint32_t lfb_size; - /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ - uint8_t red_pos, red_size; - uint8_t green_pos, green_size; - uint8_t blue_pos, blue_size; - uint8_t rsvd_pos, rsvd_size; -#if __XEN_INTERFACE_VERSION__ >= 0x00030206 - /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ - uint32_t gbl_caps; - /* Mode attributes (offset 0x0, VESA command 0x4f01). */ - uint16_t mode_attrs; -#endif - } vesa_lfb; - } u; -} dom0_vga_console_info_t; -#define xen_vga_console_info dom0_vga_console_info -#define xen_vga_console_info_t dom0_vga_console_info_t - -typedef uint8_t xen_domain_handle_t[16]; - -/* Turn a plain number into a C ULONG_PTR constant. */ -#define __mk_unsigned_long(x) x ## UL -#define mk_unsigned_long(x) __mk_unsigned_long(x) - -__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); -__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); -__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); -__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); - -#else /* __ASSEMBLY__ */ - -/* In assembly code we cannot use C numeric constant suffixes. */ -#define mk_unsigned_long(x) x - -#endif /* !__ASSEMBLY__ */ - -/* Default definitions for macros used by domctl/sysctl. */ -#if defined(__XEN__) || defined(__XEN_TOOLS__) - -#ifndef uint64_aligned_t -#define uint64_aligned_t uint64_t -#endif -#ifndef XEN_GUEST_HANDLE_64 -#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) -#endif - -#ifndef __ASSEMBLY__ -struct xenctl_bitmap { - XEN_GUEST_HANDLE_64(uint8) bitmap; - uint32_t nr_bits; -}; -#endif - -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ - -#endif /* __XEN_PUBLIC_XEN_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/xen/errno.h b/include/xen/xen/errno.h new file mode 100644 index 0000000..3178466 --- /dev/null +++ b/include/xen/xen/errno.h @@ -0,0 +1,20 @@ +#ifndef __XEN_ERRNO_H__ +#define __XEN_ERRNO_H__ + +#include <public/errno.h> + +#ifndef __ASSEMBLY__ + +#define XEN_ERRNO(name, value) name = XEN_##name, +enum { +#include <public/errno.h> +}; + +#else /* !__ASSEMBLY__ */ + +#define XEN_ERRNO(name, value) .equ name, XEN_##name +#include <public/errno.h> + +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_ERRNO_H__ */ diff --git a/vs2012/xenvif/xenvif.vcxproj b/vs2012/xenvif/xenvif.vcxproj index 4120018..ee18fc6 100644 --- a/vs2012/xenvif/xenvif.vcxproj +++ b/vs2012/xenvif/xenvif.vcxproj @@ -28,7 +28,7 @@ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <PropertyGroup> - <IncludePath>..\..\include;$(IncludePath)</IncludePath> + <IncludePath>..\..\include;..\..\include\xen;$(IncludePath)</IncludePath> <RunCodeAnalysis>true</RunCodeAnalysis> <EnableInf2cat>false</EnableInf2cat> <CustomBuildBeforeTargets>ClCompile;StampInf</CustomBuildBeforeTargets> diff --git a/vs2013/xenvif/xenvif.vcxproj b/vs2013/xenvif/xenvif.vcxproj index 0e774b7..ef117e5 100644 --- a/vs2013/xenvif/xenvif.vcxproj +++ b/vs2013/xenvif/xenvif.vcxproj @@ -61,7 +61,6 @@ <Import Project="..\targets.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <PropertyGroup> - <IncludePath>..\..\include;$(IncludePath)</IncludePath> <RunCodeAnalysis>true</RunCodeAnalysis> <EnableInf2cat>false</EnableInf2cat> <CustomBuildBeforeTargets>ClCompile;StampInf</CustomBuildBeforeTargets> @@ -79,12 +78,16 @@ <Inputs>..\..\src\xenvif.inf;..\..\include\version.hx</Inputs> </CustomBuildStep> <ClCompile> + <AdditionalIncludeDirectories>$(WindowsSdkDir)\include\km;..\..\include;..\..\include\xen;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <PreprocessorDefinitions>__MODULE__="XENVIF";POOL_NX_OPTIN=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <WarningLevel>EnableAllWarnings</WarningLevel> <DisableSpecificWarnings>4711;4548;4820;4668;4255;6001;6054;28196;%(DisableSpecificWarnings)</DisableSpecificWarnings> <MultiProcessorCompilation>true</MultiProcessorCompilation> <EnablePREfast>true</EnablePREfast> </ClCompile> + <ResourceCompile> + <AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> <Link> <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers> <AdditionalDependencies>$(DDK_LIB_PATH)/libcntpr.lib;$(DDK_LIB_PATH)/aux_klib.lib;$(DDK_LIB_PATH)/ksecdd.lib;%(AdditionalDependencies)</AdditionalDependencies> -- 2.1.1 _______________________________________________ win-pv-devel mailing list win-pv-devel@xxxxxxxxxxxxxxxxxxxx http://lists.xenproject.org/cgi-bin/mailman/listinfo/win-pv-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.