Xen project Mailing List

[Xen-changelog] [linux-2.6.18-xen] sync Xen public headers to 4.7.0 level

From: Xen patchbot-linux-2.6.18-xen <patchbot@xxxxxxx>

Date: Wed, 06 Jul 2016 08:22:02 +0000

Delivery-date: Wed, 06 Jul 2016 08:22:10 +0000

List-id: "Change log for Mercurial $receive only$" <xen-changelog.lists.xen.org>

# HG changeset patch # User Jan Beulich # Date 1467792338 -7200 # Wed Jul 06 10:05:38 2016 +0200 # Node ID c03c2bd4ba0b616fa98a9dafc8ec6ed9bc691865 # Parent 200d580e141c1dc8bc4436073f54be7f50393acd sync Xen public headers to 4.7.0 level --- diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-arm.h --- a/include/xen/interface/arch-arm.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-arm.h Wed Jul 06 10:05:38 2016 +0200 @@ -173,7 +173,7 @@ typedef union { type *p; unsigned long q; } \ __guest_handle_ ## name; \ typedef union { type *p; uint64_aligned_t q; } \ - __guest_handle_64_ ## name; + __guest_handle_64_ ## name /* * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field @@ -195,9 +195,6 @@ _sxghr_tmp->q = 0; \ _sxghr_tmp->p = val; \ } while ( 0 ) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__GNUC__) && !defined(__STRICT_ANSI__) @@ -397,7 +394,7 @@ typedef uint64_t xen_callback_t; #define GUEST_GICD_BASE 0x03001000ULL #define GUEST_GICD_SIZE 0x00001000ULL #define GUEST_GICC_BASE 0x03002000ULL -#define GUEST_GICC_SIZE 0x00000100ULL +#define GUEST_GICC_SIZE 0x00002000ULL /* vGIC v3 mappings */ #define GUEST_GICV3_GICD_BASE 0x03001000ULL diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/cpufeatureset.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/xen/interface/arch-x86/cpufeatureset.h Wed Jul 06 10:05:38 2016 +0200 @@ -0,0 +1,246 @@ +/* + * arch-x86/cpufeatureset.h + * + * CPU featureset definitions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2015, 2016 Citrix Systems, Inc. + */ + +/* + * There are two expected ways of including this header. + * + * 1) The "default" case (expected from tools etc). + * + * Simply #include <public/arch-x86/cpufeatureset.h> + * + * In this circumstance, normal header guards apply and the includer shall get + * an enumeration in the XEN_X86_FEATURE_xxx namespace. + * + * 2) The special case where the includer provides XEN_CPUFEATURE() in scope. + * + * In this case, no inclusion guards apply and the caller is responsible for + * their XEN_CPUFEATURE() being appropriate in the included context. + */ + +#ifndef XEN_CPUFEATURE + +/* + * Includer has not provided a custom XEN_CPUFEATURE(). Arrange for normal + * header guards, an enum and constants in the XEN_X86_FEATURE_xxx namespace. + */ +#ifndef __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__ + +#define XEN_CPUFEATURESET_DEFAULT_INCLUDE + +#define XEN_CPUFEATURE(name, value) XEN_X86_FEATURE_##name = value, +enum { + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__ */ +#endif /* !XEN_CPUFEATURE */ + + +#ifdef XEN_CPUFEATURE +/* + * A featureset is a bitmap of x86 features, represented as a collection of + * 32bit words. + * + * Words are as specified in vendors programming manuals, and shall not + * contain any synthesied values. New words may be added to the end of + * featureset. + * + * All featureset words currently originate from leaves specified for the + * CPUID instruction, but this is not preclude other sources of information. + */ + +/* + * Attribute syntax: + * + * Attributes for a particular feature are provided as characters before the + * first space in the comment immediately following the feature value. Note - + * none of these attributes form part of the Xen public ABI. + * + * Special: '!' + * This bit has special properties and is not a straight indication of a + * piece of new functionality. Xen will handle these differently, + * and may override toolstack settings completely. + * + * Applicability to guests: 'A', 'S' or 'H' + * 'A' = All guests. + * 'S' = All HVM guests (not PV guests). + * 'H' = HVM HAP guests (not PV or HVM Shadow guests). + */ + +/* Intel-defined CPU features, CPUID level 0x00000001.edx, word 0 */ +XEN_CPUFEATURE(FPU, 0*32+ 0) /*A Onboard FPU */ +XEN_CPUFEATURE(VME, 0*32+ 1) /*S Virtual Mode Extensions */ +XEN_CPUFEATURE(DE, 0*32+ 2) /*A Debugging Extensions */ +XEN_CPUFEATURE(PSE, 0*32+ 3) /*S Page Size Extensions */ +XEN_CPUFEATURE(TSC, 0*32+ 4) /*A Time Stamp Counter */ +XEN_CPUFEATURE(MSR, 0*32+ 5) /*A Model-Specific Registers, RDMSR, WRMSR */ +XEN_CPUFEATURE(PAE, 0*32+ 6) /*A Physical Address Extensions */ +XEN_CPUFEATURE(MCE, 0*32+ 7) /*A Machine Check Architecture */ +XEN_CPUFEATURE(CX8, 0*32+ 8) /*A CMPXCHG8 instruction */ +XEN_CPUFEATURE(APIC, 0*32+ 9) /*!A Onboard APIC */ +XEN_CPUFEATURE(SEP, 0*32+11) /*A SYSENTER/SYSEXIT */ +XEN_CPUFEATURE(MTRR, 0*32+12) /*S Memory Type Range Registers */ +XEN_CPUFEATURE(PGE, 0*32+13) /*S Page Global Enable */ +XEN_CPUFEATURE(MCA, 0*32+14) /*A Machine Check Architecture */ +XEN_CPUFEATURE(CMOV, 0*32+15) /*A CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +XEN_CPUFEATURE(PAT, 0*32+16) /*A Page Attribute Table */ +XEN_CPUFEATURE(PSE36, 0*32+17) /*S 36-bit PSEs */ +XEN_CPUFEATURE(CLFLUSH, 0*32+19) /*A CLFLUSH instruction */ +XEN_CPUFEATURE(DS, 0*32+21) /* Debug Store */ +XEN_CPUFEATURE(ACPI, 0*32+22) /*A ACPI via MSR */ +XEN_CPUFEATURE(MMX, 0*32+23) /*A Multimedia Extensions */ +XEN_CPUFEATURE(FXSR, 0*32+24) /*A FXSAVE and FXRSTOR instructions */ +XEN_CPUFEATURE(SSE, 0*32+25) /*A Streaming SIMD Extensions */ +XEN_CPUFEATURE(SSE2, 0*32+26) /*A Streaming SIMD Extensions-2 */ +XEN_CPUFEATURE(HTT, 0*32+28) /*!A Hyper-Threading Technology */ +XEN_CPUFEATURE(TM1, 0*32+29) /* Thermal Monitor 1 */ +XEN_CPUFEATURE(PBE, 0*32+31) /* Pending Break Enable */ + +/* Intel-defined CPU features, CPUID level 0x00000001.ecx, word 1 */ +XEN_CPUFEATURE(SSE3, 1*32+ 0) /*A Streaming SIMD Extensions-3 */ +XEN_CPUFEATURE(PCLMULQDQ, 1*32+ 1) /*A Carry-less mulitplication */ +XEN_CPUFEATURE(DTES64, 1*32+ 2) /* 64-bit Debug Store */ +XEN_CPUFEATURE(MONITOR, 1*32+ 3) /* Monitor/Mwait support */ +XEN_CPUFEATURE(DSCPL, 1*32+ 4) /* CPL Qualified Debug Store */ +XEN_CPUFEATURE(VMX, 1*32+ 5) /*S Virtual Machine Extensions */ +XEN_CPUFEATURE(SMX, 1*32+ 6) /* Safer Mode Extensions */ +XEN_CPUFEATURE(EIST, 1*32+ 7) /* Enhanced SpeedStep */ +XEN_CPUFEATURE(TM2, 1*32+ 8) /* Thermal Monitor 2 */ +XEN_CPUFEATURE(SSSE3, 1*32+ 9) /*A Supplemental Streaming SIMD Extensions-3 */ +XEN_CPUFEATURE(FMA, 1*32+12) /*A Fused Multiply Add */ +XEN_CPUFEATURE(CX16, 1*32+13) /*A CMPXCHG16B */ +XEN_CPUFEATURE(XTPR, 1*32+14) /* Send Task Priority Messages */ +XEN_CPUFEATURE(PDCM, 1*32+15) /* Perf/Debug Capability MSR */ +XEN_CPUFEATURE(PCID, 1*32+17) /*H Process Context ID */ +XEN_CPUFEATURE(DCA, 1*32+18) /* Direct Cache Access */ +XEN_CPUFEATURE(SSE4_1, 1*32+19) /*A Streaming SIMD Extensions 4.1 */ +XEN_CPUFEATURE(SSE4_2, 1*32+20) /*A Streaming SIMD Extensions 4.2 */ +XEN_CPUFEATURE(X2APIC, 1*32+21) /*!A Extended xAPIC */ +XEN_CPUFEATURE(MOVBE, 1*32+22) /*A movbe instruction */ +XEN_CPUFEATURE(POPCNT, 1*32+23) /*A POPCNT instruction */ +XEN_CPUFEATURE(TSC_DEADLINE, 1*32+24) /*S TSC Deadline Timer */ +XEN_CPUFEATURE(AESNI, 1*32+25) /*A AES instructions */ +XEN_CPUFEATURE(XSAVE, 1*32+26) /*A XSAVE/XRSTOR/XSETBV/XGETBV */ +XEN_CPUFEATURE(OSXSAVE, 1*32+27) /*! OSXSAVE */ +XEN_CPUFEATURE(AVX, 1*32+28) /*A Advanced Vector Extensions */ +XEN_CPUFEATURE(F16C, 1*32+29) /*A Half-precision convert instruction */ +XEN_CPUFEATURE(RDRAND, 1*32+30) /*A Digital Random Number Generator */ +XEN_CPUFEATURE(HYPERVISOR, 1*32+31) /*!A Running under some hypervisor */ + +/* AMD-defined CPU features, CPUID level 0x80000001.edx, word 2 */ +XEN_CPUFEATURE(SYSCALL, 2*32+11) /*A SYSCALL/SYSRET */ +XEN_CPUFEATURE(NX, 2*32+20) /*A Execute Disable */ +XEN_CPUFEATURE(MMXEXT, 2*32+22) /*A AMD MMX extensions */ +XEN_CPUFEATURE(FFXSR, 2*32+25) /*A FFXSR instruction optimizations */ +XEN_CPUFEATURE(PAGE1GB, 2*32+26) /*H 1Gb large page support */ +XEN_CPUFEATURE(RDTSCP, 2*32+27) /*S RDTSCP */ +XEN_CPUFEATURE(LM, 2*32+29) /*A Long Mode (x86-64) */ +XEN_CPUFEATURE(3DNOWEXT, 2*32+30) /*A AMD 3DNow! extensions */ +XEN_CPUFEATURE(3DNOW, 2*32+31) /*A 3DNow! */ + +/* AMD-defined CPU features, CPUID level 0x80000001.ecx, word 3 */ +XEN_CPUFEATURE(LAHF_LM, 3*32+ 0) /*A LAHF/SAHF in long mode */ +XEN_CPUFEATURE(CMP_LEGACY, 3*32+ 1) /*!A If yes HyperThreading not valid */ +XEN_CPUFEATURE(SVM, 3*32+ 2) /*S Secure virtual machine */ +XEN_CPUFEATURE(EXTAPIC, 3*32+ 3) /* Extended APIC space */ +XEN_CPUFEATURE(CR8_LEGACY, 3*32+ 4) /*S CR8 in 32-bit mode */ +XEN_CPUFEATURE(ABM, 3*32+ 5) /*A Advanced bit manipulation */ +XEN_CPUFEATURE(SSE4A, 3*32+ 6) /*A SSE-4A */ +XEN_CPUFEATURE(MISALIGNSSE, 3*32+ 7) /*A Misaligned SSE mode */ +XEN_CPUFEATURE(3DNOWPREFETCH, 3*32+ 8) /*A 3DNow prefetch instructions */ +XEN_CPUFEATURE(OSVW, 3*32+ 9) /* OS Visible Workaround */ +XEN_CPUFEATURE(IBS, 3*32+10) /*S Instruction Based Sampling */ +XEN_CPUFEATURE(XOP, 3*32+11) /*A extended AVX instructions */ +XEN_CPUFEATURE(SKINIT, 3*32+12) /* SKINIT/STGI instructions */ +XEN_CPUFEATURE(WDT, 3*32+13) /* Watchdog timer */ +XEN_CPUFEATURE(LWP, 3*32+15) /*S Light Weight Profiling */ +XEN_CPUFEATURE(FMA4, 3*32+16) /*A 4 operands MAC instructions */ +XEN_CPUFEATURE(NODEID_MSR, 3*32+19) /* NodeId MSR */ +XEN_CPUFEATURE(TBM, 3*32+21) /*A trailing bit manipulations */ +XEN_CPUFEATURE(TOPOEXT, 3*32+22) /* topology extensions CPUID leafs */ +XEN_CPUFEATURE(DBEXT, 3*32+26) /*A data breakpoint extension */ +XEN_CPUFEATURE(MONITORX, 3*32+29) /* MONITOR extension (MONITORX/MWAITX) */ + +/* Intel-defined CPU features, CPUID level 0x0000000D:1.eax, word 4 */ +XEN_CPUFEATURE(XSAVEOPT, 4*32+ 0) /*A XSAVEOPT instruction */ +XEN_CPUFEATURE(XSAVEC, 4*32+ 1) /*A XSAVEC/XRSTORC instructions */ +XEN_CPUFEATURE(XGETBV1, 4*32+ 2) /*A XGETBV with %ecx=1 */ +XEN_CPUFEATURE(XSAVES, 4*32+ 3) /*S XSAVES/XRSTORS instructions */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */ +XEN_CPUFEATURE(FSGSBASE, 5*32+ 0) /*A {RD,WR}{FS,GS}BASE instructions */ +XEN_CPUFEATURE(TSC_ADJUST, 5*32+ 1) /*S TSC_ADJUST MSR available */ +XEN_CPUFEATURE(SGX, 5*32+ 2) /* Software Guard extensions */ +XEN_CPUFEATURE(BMI1, 5*32+ 3) /*A 1st bit manipulation extensions */ +XEN_CPUFEATURE(HLE, 5*32+ 4) /*A Hardware Lock Elision */ +XEN_CPUFEATURE(AVX2, 5*32+ 5) /*A AVX2 instructions */ +XEN_CPUFEATURE(FDP_EXCP_ONLY, 5*32+ 6) /*! x87 FDP only updated on exception. */ +XEN_CPUFEATURE(SMEP, 5*32+ 7) /*S Supervisor Mode Execution Protection */ +XEN_CPUFEATURE(BMI2, 5*32+ 8) /*A 2nd bit manipulation extensions */ +XEN_CPUFEATURE(ERMS, 5*32+ 9) /*A Enhanced REP MOVSB/STOSB */ +XEN_CPUFEATURE(INVPCID, 5*32+10) /*H Invalidate Process Context ID */ +XEN_CPUFEATURE(RTM, 5*32+11) /*A Restricted Transactional Memory */ +XEN_CPUFEATURE(PQM, 5*32+12) /* Platform QoS Monitoring */ +XEN_CPUFEATURE(NO_FPU_SEL, 5*32+13) /*! FPU CS/DS stored as zero */ +XEN_CPUFEATURE(MPX, 5*32+14) /*S Memory Protection Extensions */ +XEN_CPUFEATURE(PQE, 5*32+15) /* Platform QoS Enforcement */ +XEN_CPUFEATURE(RDSEED, 5*32+18) /*A RDSEED instruction */ +XEN_CPUFEATURE(ADX, 5*32+19) /*A ADCX, ADOX instructions */ +XEN_CPUFEATURE(SMAP, 5*32+20) /*S Supervisor Mode Access Prevention */ +XEN_CPUFEATURE(CLFLUSHOPT, 5*32+23) /*A CLFLUSHOPT instruction */ +XEN_CPUFEATURE(CLWB, 5*32+24) /*A CLWB instruction */ +XEN_CPUFEATURE(SHA, 5*32+29) /*A SHA1 & SHA256 instructions */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */ +XEN_CPUFEATURE(PREFETCHWT1, 6*32+ 0) /*A PREFETCHWT1 instruction */ +XEN_CPUFEATURE(PKU, 6*32+ 3) /*H Protection Keys for Userspace */ +XEN_CPUFEATURE(OSPKE, 6*32+ 4) /*! OS Protection Keys Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ +XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */ +XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ + +/* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ +XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ + +#endif /* XEN_CPUFEATURE */ + +/* Clean up from a default include. Close the enum (for C). */ +#ifdef XEN_CPUFEATURESET_DEFAULT_INCLUDE +#undef XEN_CPUFEATURESET_DEFAULT_INCLUDE +#undef XEN_CPUFEATURE +}; + +#endif /* XEN_CPUFEATURESET_DEFAULT_INCLUDE */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/hvm/save.h --- a/include/xen/interface/arch-x86/hvm/save.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-x86/hvm/save.h Wed Jul 06 10:05:38 2016 +0200 @@ -47,7 +47,9 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct /* * Processor * - * Compat: Pre-3.4 didn't have msr_tsc_aux + * Compat: + * - Pre-3.4 didn't have msr_tsc_aux + * - Pre-4.7 didn't have fpu_initialised */ struct hvm_hw_cpu { @@ -157,6 +159,11 @@ struct hvm_hw_cpu { }; /* error code for pending event */ uint32_t error_code; + +#define _XEN_X86_FPU_INITIALISED 0 +#define XEN_X86_FPU_INITIALISED (1U<<_XEN_X86_FPU_INITIALISED) + uint32_t flags; + uint32_t pad0; }; struct hvm_hw_cpu_compat { @@ -268,19 +275,26 @@ struct hvm_hw_cpu_compat { uint32_t error_code; }; -static inline int _hvm_hw_fix_cpu(void *h) { +static inline int _hvm_hw_fix_cpu(void *h, uint32_t size) { union hvm_hw_cpu_union { struct hvm_hw_cpu nat; struct hvm_hw_cpu_compat cmp; } *ucpu = (union hvm_hw_cpu_union *)h; - /* If we copy from the end backwards, we should - * be able to do the modification in-place */ - ucpu->nat.error_code = ucpu->cmp.error_code; - ucpu->nat.pending_event = ucpu->cmp.pending_event; - ucpu->nat.tsc = ucpu->cmp.tsc; - ucpu->nat.msr_tsc_aux = 0; + if ( size == sizeof(struct hvm_hw_cpu_compat) ) + { + /* + * If we copy from the end backwards, we should + * be able to do the modification in-place. + */ + ucpu->nat.error_code = ucpu->cmp.error_code; + ucpu->nat.pending_event = ucpu->cmp.pending_event; + ucpu->nat.tsc = ucpu->cmp.tsc; + ucpu->nat.msr_tsc_aux = 0; + } + /* Mimic the old behaviour by unconditionally setting fpu_initialised. */ + ucpu->nat.flags = XEN_X86_FPU_INITIALISED; return 0; } @@ -550,12 +564,11 @@ struct hvm_hw_cpu_xsave { struct { struct { char x[512]; } fpu_sse; - struct { + struct hvm_hw_cpu_xsave_hdr { uint64_t xstate_bv; /* Updated by XRSTOR */ - uint64_t reserved[7]; + uint64_t xcomp_bv; /* Updated by XRSTOR{C,S} */ + uint64_t reserved[6]; } xsave_hdr; /* The 64-byte header */ - - struct { char x[0]; } ymm; /* YMM */ } save_area; }; @@ -575,7 +588,9 @@ struct hvm_viridian_domain_context { DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); struct hvm_viridian_vcpu_context { - uint64_t apic_assist; + uint64_t apic_assist_msr; + uint8_t apic_assist_vector; + uint8_t _pad[7]; }; DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/xen-mca.h --- a/include/xen/interface/arch-x86/xen-mca.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-x86/xen-mca.h Wed Jul 06 10:05:38 2016 +0200 @@ -1,11 +1,11 @@ /****************************************************************************** * arch-x86/mca.h - * + * * Contributed by Advanced Micro Devices, Inc. * Author: Christoph Egger <Christoph.Egger@xxxxxxx> * * Guest OS machine check interface to x86 Xen. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -156,7 +156,7 @@ struct mcinfo_msr { }; /* contains mc information from other - * or additional mc MSRs */ + * or additional mc MSRs */ struct mcinfo_extended { struct mcinfo_common common; @@ -193,10 +193,10 @@ struct mcinfo_extended { /* L3 cache disable Action */ #define MC_ACTION_CACHE_SHRINK (0x1 << 2) -/* Below interface used between XEN/DOM0 for passing XEN's recovery action - * information to DOM0. +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. * usage Senario: After offlining broken page, XEN might pass its page offline - * recovery action result to DOM0. DOM0 will save the information in + * recovery action result to DOM0. DOM0 will save the information in * non-volatile memory for further proactive actions, such as offlining the * easy broken page earlier when doing next reboot. */ @@ -255,8 +255,8 @@ DEFINE_XEN_GUEST_HANDLE(mc_info_t); #define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ struct mcinfo_logical_cpu { - uint32_t mc_cpunr; - uint32_t mc_chipid; + uint32_t mc_cpunr; + uint32_t mc_chipid; uint16_t mc_coreid; uint16_t mc_threadid; uint32_t mc_apicid; @@ -281,7 +281,7 @@ typedef struct mcinfo_logical_cpu xen_mc DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); -/* +/* * OS's should use these instead of writing their own lookup function * each with its own bugs and drawbacks. * We use macros instead of static inline functions to allow guests @@ -388,16 +388,19 @@ struct xen_mc_physcpuinfo { #define XEN_MC_msrinject 4 #define MC_MSRINJ_MAXMSRS 8 struct xen_mc_msrinject { - /* IN */ - uint32_t mcinj_cpunr; /* target processor id */ - uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ - uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ - uint32_t _pad0; - struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + domid_t mcinj_domid; /* valid only if MC_MSRINJ_F_GPADDR is + present in mcinj_flags */ + uint16_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; }; /* Flags for mcinj_flags above; bits 16-31 are reserved */ #define MC_MSRINJ_F_INTERPOSE 0x1 +#define MC_MSRINJ_F_GPADDR 0x2 #define XEN_MC_mceinject 5 struct xen_mc_mceinject { diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/xen-x86_32.h --- a/include/xen/interface/arch-x86/xen-x86_32.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-x86/xen-x86_32.h Wed Jul 06 10:05:38 2016 +0200 @@ -58,34 +58,31 @@ #define __HYPERVISOR_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_END_PAE 0xF6800000 -#define HYPERVISOR_VIRT_START_PAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) -#define MACH2PHYS_VIRT_START_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) -#define MACH2PHYS_VIRT_END_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) +#define HYPERVISOR_VIRT_START_PAE xen_mk_ulong(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE xen_mk_ulong(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE xen_mk_ulong(__MACH2PHYS_VIRT_END_PAE) /* Non-PAE bounds are obsolete. */ #define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 #define HYPERVISOR_VIRT_START_NONPAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) + xen_mk_ulong(__HYPERVISOR_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_START_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) + xen_mk_ulong(__MACH2PHYS_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_END_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + xen_mk_ulong(__MACH2PHYS_VIRT_END_NONPAE) #define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE #define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE #define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE #ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START) #endif -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_VIRT_START xen_mk_ulong(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END xen_mk_ulong(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/xen-x86_64.h --- a/include/xen/interface/arch-x86/xen-x86_64.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-x86/xen-x86_64.h Wed Jul 06 10:05:38 2016 +0200 @@ -76,12 +76,12 @@ #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 #ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END xen_mk_ulong(__HYPERVISOR_VIRT_END) #endif -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_VIRT_START xen_mk_ulong(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END xen_mk_ulong(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/arch-x86/xen.h --- a/include/xen/interface/arch-x86/xen.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/arch-x86/xen.h Wed Jul 06 10:05:38 2016 +0200 @@ -54,9 +54,6 @@ #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) #define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) #define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__i386__) @@ -268,7 +265,31 @@ typedef struct arch_shared_info arch_sha * XEN_DOMCTL_INTERFACE_VERSION. */ struct xen_arch_domainconfig { - char dummy; +#define _XEN_X86_EMU_LAPIC 0 +#define XEN_X86_EMU_LAPIC (1U<<_XEN_X86_EMU_LAPIC) +#define _XEN_X86_EMU_HPET 1 +#define XEN_X86_EMU_HPET (1U<<_XEN_X86_EMU_HPET) +#define _XEN_X86_EMU_PM 2 +#define XEN_X86_EMU_PM (1U<<_XEN_X86_EMU_PM) +#define _XEN_X86_EMU_RTC 3 +#define XEN_X86_EMU_RTC (1U<<_XEN_X86_EMU_RTC) +#define _XEN_X86_EMU_IOAPIC 4 +#define XEN_X86_EMU_IOAPIC (1U<<_XEN_X86_EMU_IOAPIC) +#define _XEN_X86_EMU_PIC 5 +#define XEN_X86_EMU_PIC (1U<<_XEN_X86_EMU_PIC) +#define _XEN_X86_EMU_VGA 6 +#define XEN_X86_EMU_VGA (1U<<_XEN_X86_EMU_VGA) +#define _XEN_X86_EMU_IOMMU 7 +#define XEN_X86_EMU_IOMMU (1U<<_XEN_X86_EMU_IOMMU) +#define _XEN_X86_EMU_PIT 8 +#define XEN_X86_EMU_PIT (1U<<_XEN_X86_EMU_PIT) + +#define XEN_X86_EMU_ALL (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET | \ + XEN_X86_EMU_PM | XEN_X86_EMU_RTC | \ + XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC | \ + XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU | \ + XEN_X86_EMU_PIT) + uint32_t emulation_flags; }; #endif diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/domctl.h --- a/include/xen/interface/domctl.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/domctl.h Wed Jul 06 10:05:38 2016 +0200 @@ -63,6 +63,9 @@ struct xen_domctl_createdomain { /* Is this a PVH guest (as opposed to an HVM or PV guest)? */ #define _XEN_DOMCTL_CDF_pvh_guest 4 #define XEN_DOMCTL_CDF_pvh_guest (1U<<_XEN_DOMCTL_CDF_pvh_guest) + /* Is this a xenstore domain? */ +#define _XEN_DOMCTL_CDF_xs_domain 5 +#define XEN_DOMCTL_CDF_xs_domain (1U<<_XEN_DOMCTL_CDF_xs_domain) uint32_t flags; struct xen_arch_domainconfig config; }; @@ -97,6 +100,9 @@ struct xen_domctl_getdomaininfo { /* domain is PVH */ #define _XEN_DOMINF_pvh_guest 7 #define XEN_DOMINF_pvh_guest (1U<<_XEN_DOMINF_pvh_guest) +/* domain is a xenstore domain */ +#define _XEN_DOMINF_xs_domain 8 +#define XEN_DOMINF_xs_domain (1U<<_XEN_DOMINF_xs_domain) /* XEN_DOMINF_shutdown guest-supplied code. */ #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 @@ -184,8 +190,11 @@ struct xen_domctl_getpageframeinfo3 { #define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ #define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 - /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ + /* + * No longer supported, was equiv. to ENABLE with mode flags + * ENABLE_REFCOUNT and ENABLE_TRANSLATE: #define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + */ /* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ /* @@ -208,6 +217,13 @@ struct xen_domctl_getpageframeinfo3 { */ #define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) +/* Mode flags for XEN_DOMCTL_SHADOW_OP_{CLEAN,PEEK}. */ + /* + * This is the final iteration: Requesting to include pages mapped + * writably by the hypervisor in the dirty bitmap. + */ +#define XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL (1 << 0) + struct xen_domctl_shadow_op_stats { uint32_t fault_count; uint32_t dirty_count; @@ -219,8 +235,9 @@ struct xen_domctl_shadow_op { /* IN variables. */ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ - /* OP_ENABLE */ - uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + /* OP_ENABLE: XEN_DOMCTL_SHADOW_ENABLE_* */ + /* OP_PEAK / OP_CLEAN: XEN_DOMCTL_SHADOW_LOGDIRTY_* */ + uint32_t mode; /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ uint32_t mb; /* Shadow memory allocation in MB */ @@ -290,6 +307,9 @@ struct xen_domctl_vcpuaffinity { /* Set/get the soft affinity for vcpu */ #define _XEN_VCPUAFFINITY_SOFT 1 #define XEN_VCPUAFFINITY_SOFT (1U<<_XEN_VCPUAFFINITY_SOFT) + /* Undo SCHEDOP_pin_override */ +#define _XEN_VCPUAFFINITY_FORCE 2 +#define XEN_VCPUAFFINITY_FORCE (1U<<_XEN_VCPUAFFINITY_FORCE) uint32_t flags; /* * IN/OUT variables. @@ -330,24 +350,63 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_v #define XEN_SCHEDULER_ARINC653 7 #define XEN_SCHEDULER_RTDS 8 -/* Set or get info? */ +typedef struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; +} xen_domctl_sched_credit_t; + +typedef struct xen_domctl_sched_credit2 { + uint16_t weight; +} xen_domctl_sched_credit2_t; + +typedef struct xen_domctl_sched_rtds { + uint32_t period; + uint32_t budget; +} xen_domctl_sched_rtds_t; + +typedef struct xen_domctl_schedparam_vcpu { + union { + xen_domctl_sched_credit_t credit; + xen_domctl_sched_credit2_t credit2; + xen_domctl_sched_rtds_t rtds; + } u; + uint32_t vcpuid; +} xen_domctl_schedparam_vcpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_schedparam_vcpu_t); + +/* + * Set or get info? + * For schedulers supporting per-vcpu settings (e.g., RTDS): + * XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus; + * XEN_DOMCTL_SCHEDOP_getinfo gets default params; + * XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo sets (gets) params of vcpus; + * + * For schedulers not supporting per-vcpu settings: + * XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus; + * XEN_DOMCTL_SCHEDOP_getinfo gets domain-wise params; + * XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo returns error; + */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 +#define XEN_DOMCTL_SCHEDOP_putvcpuinfo 2 +#define XEN_DOMCTL_SCHEDOP_getvcpuinfo 3 struct xen_domctl_scheduler_op { uint32_t sched_id; /* XEN_SCHEDULER_* */ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + /* IN/OUT */ union { - struct xen_domctl_sched_credit { - uint16_t weight; - uint16_t cap; - } credit; - struct xen_domctl_sched_credit2 { - uint16_t weight; - } credit2; - struct xen_domctl_sched_rtds { - uint32_t period; - uint32_t budget; - } rtds; + xen_domctl_sched_credit_t credit; + xen_domctl_sched_credit2_t credit2; + xen_domctl_sched_rtds_t rtds; + struct { + XEN_GUEST_HANDLE_64(xen_domctl_schedparam_vcpu_t) vcpus; + /* + * IN: Number of elements in vcpus array. + * OUT: Number of processed elements of vcpus array. + */ + uint32_t nr_vcpus; + uint32_t padding; + } v; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; @@ -556,8 +615,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_ /* Bind machine I/O address range -> HVM address range. */ -/* If this returns -E2BIG lower nr_mfns value. */ /* XEN_DOMCTL_memory_mapping */ +/* Returns + - zero success, everything done + - -E2BIG passed in nr_mfns value too large for the implementation + - positive partial success for the first <result> page frames (with + <result> less than nr_mfns), requiring re-invocation by the + caller after updating inputs + - negative error; other than -E2BIG +*/ #define DPCI_ADD_MAPPING 1 #define DPCI_REMOVE_MAPPING 0 struct xen_domctl_memory_mapping { @@ -1050,6 +1116,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_c #define XEN_DOMCTL_MONITOR_OP_ENABLE 0 #define XEN_DOMCTL_MONITOR_OP_DISABLE 1 #define XEN_DOMCTL_MONITOR_OP_GET_CAPABILITIES 2 +#define XEN_DOMCTL_MONITOR_OP_EMULATE_EACH_REP 3 #define XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG 0 #define XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR 1 @@ -1099,6 +1166,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_monit struct xen_domctl_psr_cat_op { #define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM 0 #define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM 1 +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CODE 2 +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_DATA 3 +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CODE 4 +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_DATA 5 uint32_t cmd; /* IN: XEN_DOMCTL_PSR_CAT_OP_* */ uint32_t target; /* IN */ uint64_t data; /* IN/OUT */ @@ -1182,6 +1253,7 @@ struct xen_domctl { #define XEN_DOMCTL_psr_cmt_op 75 #define XEN_DOMCTL_monitor_op 77 #define XEN_DOMCTL_psr_cat_op 78 +#define XEN_DOMCTL_soft_reset 79 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/elfnote.h --- a/include/xen/interface/elfnote.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/elfnote.h Wed Jul 06 10:05:38 2016 +0200 @@ -200,9 +200,19 @@ #define XEN_ELFNOTE_SUPPORTED_FEATURES 17 /* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel in a HVM container, Xen will use this entry point to + * launch the guest in 32bit protected mode with paging disabled. + * Ignored otherwise. + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 + +/* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY /* * System information exported through crash notes. diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/errno.h --- a/include/xen/interface/errno.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/errno.h Wed Jul 06 10:05:38 2016 +0200 @@ -1,21 +1,49 @@ +/* + * There are two expected ways of including this header. + * + * 1) The "default" case (expected from tools etc). + * + * Simply #include <public/errno.h> + * + * In this circumstance, normal header guards apply and the includer shall get + * an enumeration in the XEN_xxx namespace, appropriate for C or assembly. + * + * 2) The special case where the includer provides a XEN_ERRNO() in scope. + * + * In this case, no inclusion guards apply and the caller is responsible for + * their XEN_ERRNO() being appropriate in the included context. The header + * will unilaterally #undef XEN_ERRNO(). + */ + +#ifndef XEN_ERRNO + +/* + * Includer has not provided a custom XEN_ERRNO(). Arrange for normal header + * guards, an automatic enum (for C code) and constants in the XEN_xxx + * namespace. + */ #ifndef __XEN_PUBLIC_ERRNO_H__ +#define __XEN_PUBLIC_ERRNO_H__ + +#define XEN_ERRNO_DEFAULT_INCLUDE #ifndef __ASSEMBLY__ #define XEN_ERRNO(name, value) XEN_##name = value, enum xen_errno { -#else /* !__ASSEMBLY__ */ +#elif __XEN_INTERFACE_VERSION__ < 0x00040700 #define XEN_ERRNO(name, value) .equ XEN_##name, value #endif /* __ASSEMBLY__ */ +#endif /* __XEN_PUBLIC_ERRNO_H__ */ +#endif /* !XEN_ERRNO */ + /* ` enum neg_errnoval { [ -Efoo for each Efoo in the list below ] } */ /* ` enum errnoval { */ -#endif /* __XEN_PUBLIC_ERRNO_H__ */ - #ifdef XEN_ERRNO /* @@ -42,6 +70,7 @@ XEN_ERRNO(ENOEXEC, 8) /* Exec format er XEN_ERRNO(EBADF, 9) /* Bad file number */ XEN_ERRNO(ECHILD, 10) /* No child processes */ XEN_ERRNO(EAGAIN, 11) /* Try again */ +XEN_ERRNO(EWOULDBLOCK, 11) /* Operation would block. Aliases EAGAIN */ XEN_ERRNO(ENOMEM, 12) /* Out of memory */ XEN_ERRNO(EACCES, 13) /* Permission denied */ XEN_ERRNO(EFAULT, 14) /* Bad address */ @@ -49,16 +78,20 @@ XEN_ERRNO(EBUSY, 16) /* Device or resour XEN_ERRNO(EEXIST, 17) /* File exists */ XEN_ERRNO(EXDEV, 18) /* Cross-device link */ XEN_ERRNO(ENODEV, 19) /* No such device */ +XEN_ERRNO(EISDIR, 21) /* Is a directory */ XEN_ERRNO(EINVAL, 22) /* Invalid argument */ XEN_ERRNO(ENFILE, 23) /* File table overflow */ XEN_ERRNO(EMFILE, 24) /* Too many open files */ XEN_ERRNO(ENOSPC, 28) /* No space left on device */ +XEN_ERRNO(EROFS, 30) /* Read-only file system */ XEN_ERRNO(EMLINK, 31) /* Too many links */ XEN_ERRNO(EDOM, 33) /* Math argument out of domain of func */ XEN_ERRNO(ERANGE, 34) /* Math result not representable */ XEN_ERRNO(EDEADLK, 35) /* Resource deadlock would occur */ +XEN_ERRNO(EDEADLOCK, 35) /* Resource deadlock would occur. Aliases EDEADLK */ XEN_ERRNO(ENAMETOOLONG, 36) /* File name too long */ XEN_ERRNO(ENOLCK, 37) /* No record locks available */ +XEN_ERRNO(ENOTEMPTY, 39) /* Directory not empty */ XEN_ERRNO(ENOSYS, 38) /* Function not implemented */ XEN_ERRNO(ENODATA, 61) /* No data available */ XEN_ERRNO(ETIME, 62) /* Timer expired */ @@ -79,17 +112,13 @@ XEN_ERRNO(ETIMEDOUT, 110) /* Connection #undef XEN_ERRNO #endif /* XEN_ERRNO */ - -#ifndef __XEN_PUBLIC_ERRNO_H__ -#define __XEN_PUBLIC_ERRNO_H__ - /* ` } */ +/* Clean up from a default include. Close the enum (for C). */ +#ifdef XEN_ERRNO_DEFAULT_INCLUDE +#undef XEN_ERRNO_DEFAULT_INCLUDE #ifndef __ASSEMBLY__ }; #endif -#define XEN_EWOULDBLOCK XEN_EAGAIN /* Operation would block */ -#define XEN_EDEADLOCK XEN_EDEADLK /* Resource deadlock would occur */ - -#endif /* __XEN_PUBLIC_ERRNO_H__ */ +#endif /* XEN_ERRNO_DEFAULT_INCLUDE */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/event_channel.h --- a/include/xen/interface/event_channel.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/event_channel.h Wed Jul 06 10:05:38 2016 +0200 @@ -85,7 +85,7 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); * is allocated in <dom> and returned as <port>. * NOTES: * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. - * 2. <rdom> may be DOMID_SELF, allowing loopback connections. + * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. */ struct evtchn_alloc_unbound { /* IN parameters */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/grant_table.h --- a/include/xen/interface/grant_table.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/grant_table.h Wed Jul 06 10:05:38 2016 +0200 @@ -43,7 +43,7 @@ * table are identified by grant references. A grant reference is an * integer, which indexes into the grant table. It acts as a * capability which the grantee can use to perform operations on the - * granterâ??s memory. + * granter's memory. * * This capability-based system allows shared-memory communications * between unprivileged domains. A grant reference also encapsulates diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/hvm/hvm_op.h --- a/include/xen/interface/hvm/hvm_op.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/hvm/hvm_op.h Wed Jul 06 10:05:38 2016 +0200 @@ -83,7 +83,12 @@ typedef enum { HVMMEM_ram_rw, /* Normal read/write guest RAM */ HVMMEM_ram_ro, /* Read-only; writes are discarded */ HVMMEM_mmio_dm, /* Reads and write go to the device model */ +#if __XEN_INTERFACE_VERSION__ < 0x00040700 HVMMEM_mmio_write_dm /* Read-only; writes go to the device model */ +#else + HVMMEM_unused /* Placeholder; setting memory to this type + will fail for code after 4.7.0 */ +#endif } hvmmem_type_t; /* Following tools-only interfaces may change in future. */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/hvm/hvm_vcpu.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/xen/interface/hvm/hvm_vcpu.h Wed Jul 06 10:05:38 2016 +0200 @@ -0,0 +1,144 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2015, Roger Pau Monne <roger.pau@xxxxxxxxxx> + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__ +#define __XEN_PUBLIC_HVM_HVM_VCPU_H__ + +#include "../xen.h" + +struct vcpu_hvm_x86_32 { + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + uint32_t eip; + uint32_t eflags; + + uint32_t cr0; + uint32_t cr3; + uint32_t cr4; + + uint32_t pad1; + + /* + * EFER should only be used to set the NXE bit (if required) + * when starting a vCPU in 32bit mode with paging enabled or + * to set the LME/LMA bits in order to start the vCPU in + * compatibility mode. + */ + uint64_t efer; + + uint32_t cs_base; + uint32_t ds_base; + uint32_t ss_base; + uint32_t es_base; + uint32_t tr_base; + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t ss_limit; + uint32_t es_limit; + uint32_t tr_limit; + uint16_t cs_ar; + uint16_t ds_ar; + uint16_t ss_ar; + uint16_t es_ar; + uint16_t tr_ar; + + uint16_t pad2[3]; +}; + +/* + * The layout of the _ar fields of the segment registers is the + * following: + * + * Bits [0,3]: type (bits 40-43). + * Bit 4: s (descriptor type, bit 44). + * Bit [5,6]: dpl (descriptor privilege level, bits 45-46). + * Bit 7: p (segment-present, bit 47). + * Bit 8: avl (available for system software, bit 52). + * Bit 9: l (64-bit code segment, bit 53). + * Bit 10: db (meaning depends on the segment, bit 54). + * Bit 11: g (granularity, bit 55) + * Bits [12,15]: unused, must be blank. + * + * A more complete description of the meaning of this fields can be + * obtained from the Intel SDM, Volume 3, section 3.4.5. + */ + +struct vcpu_hvm_x86_64 { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr3; + uint64_t cr4; + uint64_t efer; + + /* + * Using VCPU_HVM_MODE_64B implies that the vCPU is launched + * directly in long mode, so the cached parts of the segment + * registers get set to match that environment. + * + * If the user wants to launch the vCPU in compatibility mode + * the 32-bit structure should be used instead. + */ +}; + +struct vcpu_hvm_context { +#define VCPU_HVM_MODE_32B 0 /* 32bit fields of the structure will be used. */ +#define VCPU_HVM_MODE_64B 1 /* 64bit fields of the structure will be used. */ + uint32_t mode; + + uint32_t pad; + + /* CPU registers. */ + union { + struct vcpu_hvm_x86_32 x86_32; + struct vcpu_hvm_x86_64 x86_64; + } cpu_regs; +}; +typedef struct vcpu_hvm_context vcpu_hvm_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_hvm_context_t); + +#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/hvm/params.h --- a/include/xen/interface/hvm/params.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/hvm/params.h Wed Jul 06 10:05:38 2016 +0200 @@ -29,18 +29,44 @@ * Parameter space for HVMOP_{set,get}_param. */ +#define HVM_PARAM_CALLBACK_IRQ 0 /* * How should CPU0 event-channel notifications be delivered? - * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). - * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: - * Domain = val[47:32], Bus = val[31:16], - * DevFn = val[15: 8], IntX = val[ 1: 0] - * val[63:56] == 2: val[7:0] is a vector number, check for - * XENFEAT_hvm_callback_vector to know if this delivery - * method is available. + * * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: */ -#define HVM_PARAM_CALLBACK_IRQ 0 + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif /* * These are not used by Xen. They are here for convenience of HVM-guest @@ -103,11 +129,21 @@ #define _HVMPV_reference_tsc 3 #define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) +/* Use Hypercall for remote TLB flush */ +#define _HVMPV_hcall_remote_tlb_flush 4 +#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush) + +/* Use APIC assist */ +#define _HVMPV_apic_assist 5 +#define HVMPV_apic_assist (1 << _HVMPV_apic_assist) + #define HVMPV_feature_mask \ - (HVMPV_base_freq | \ - HVMPV_no_freq | \ - HVMPV_time_ref_count | \ - HVMPV_reference_tsc) + (HVMPV_base_freq | \ + HVMPV_no_freq | \ + HVMPV_time_ref_count | \ + HVMPV_reference_tsc | \ + HVMPV_hcall_remote_tlb_flush | \ + HVMPV_apic_assist) #endif @@ -197,6 +233,28 @@ /* Boolean: Enable altp2m */ #define HVM_PARAM_ALTP2M 35 -#define HVM_NR_PARAMS 36 +/* + * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to + * save/restore. This is a workaround for a hardware limitation that + * does not allow the full FIP/FDP and FCS/FDS to be restored. + * + * Valid values are: + * + * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU + * has FPCSDS feature). + * + * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of + * FIP/FDP. + * + * 0: allow hypervisor to choose based on the value of FIP/FDP + * (default if CPU does not have FPCSDS). + * + * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU + * never saves FCS/FDS and this parameter should be left at the + * default of 8. + */ +#define HVM_PARAM_X87_FIP_WIDTH 36 + +#define HVM_NR_PARAMS 37 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/hvm/save.h --- a/include/xen/interface/hvm/save.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/hvm/save.h Wed Jul 06 10:05:38 2016 +0200 @@ -63,13 +63,15 @@ struct hvm_save_descriptor { #ifdef __XEN__ # define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ - static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ - struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h, uint32_t size) \ + { return _fix(h, size); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } # include <xen/lib.h> /* BUG() */ # define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ - static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h, uint32_t size) \ + { BUG(); return -1; } \ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } #else @@ -89,7 +91,7 @@ struct hvm_save_descriptor { # define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) # define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) -# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +# define HVM_SAVE_FIX_COMPAT(_x, _dst, _size) __HVM_SAVE_FIX_COMPAT_##_x(_dst, _size) #endif /* diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/blkif.h --- a/include/xen/interface/io/blkif.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/blkif.h Wed Jul 06 10:05:38 2016 +0200 @@ -89,15 +89,23 @@ * Values: string * * A free formatted string providing sufficient information for the - * backend driver to open the backing device. (e.g. the path to the - * file or block device representing the backing store.) + * hotplug script to attach the device and provide a suitable + * handler (ie: a block device) for blkback to use. * * physical-device * Values: "MAJOR:MINOR" + * Notes: 11 * * MAJOR and MINOR are the major number and minor number of the * backing device respectively. * + * physical-device-path + * Values: path string + * + * A string that contains the absolute path to the disk image. On + * NetBSD and Linux this is always a block device, while on FreeBSD + * it can be either a block device or a regular file. + * * type * Values: "file", "phy", "tap" * @@ -391,6 +399,103 @@ * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. *(10) The discard-secure property may be present and will be set to 1 if the * backing device supports secure discard. + *(11) Only used by Linux and NetBSD. + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * */ /* diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/libxenvchan.h --- a/include/xen/interface/io/libxenvchan.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/libxenvchan.h Wed Jul 06 10:05:38 2016 +0200 @@ -10,18 +10,23 @@ * * @section LICENSE * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; If not, see <http://www.gnu.org/licenses/>. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. * * @section DESCRIPTION * diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/netif.h --- a/include/xen/interface/io/netif.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/netif.h Wed Jul 06 10:05:38 2016 +0200 @@ -1,8 +1,8 @@ /****************************************************************************** * netif.h - * + * * Unified network-device I/O interface for Xen guest OSes. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -136,35 +136,512 @@ */ /* - * "feature-multicast-control" advertises the capability to filter ethernet - * multicast packets in the backend. To enable use of this capability the - * frontend must set "request-multicast-control" before moving into the - * connected state. + * "feature-multicast-control" and "feature-dynamic-multicast-control" + * advertise the capability to filter ethernet multicast packets in the + * backend. If the frontend wishes to take advantage of this feature then + * it may set "request-multicast-control". If the backend only advertises + * "feature-multicast-control" then "request-multicast-control" must be set + * before the frontend moves into the connected state. The backend will + * sample the value on this state transition and any subsequent change in + * value will have no effect. However, if the backend also advertises + * "feature-dynamic-multicast-control" then "request-multicast-control" + * may be set by the frontend at any time. In this case, the backend will + * watch the value and re-sample on watch events. * - * If "request-multicast-control" is set then the backend transmit side should - * no longer flood multicast packets to the frontend, it should instead drop any - * multicast packet that does not match in a filter list. The list is - * amended by the frontend by sending dummy transmit requests containing - * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as specified below. - * Once enabled by the frontend, the feature cannot be disabled except by - * closing and re-connecting to the backend. + * If the sampled value of "request-multicast-control" is set then the + * backend transmit side should no longer flood multicast packets to the + * frontend, it should instead drop any multicast packet that does not + * match in a filter list. + * The list is amended by the frontend by sending dummy transmit requests + * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as + * specified below. + * Note that the filter list may be amended even if the sampled value of + * "request-multicast-control" is not set, however the filter should only + * be applied if it is set. */ /* - * This is the 'wire' format for packets: - * Request 1: netif_tx_request_t -- NETTXF_* (any flags) - * [Request 2: netif_extra_info_t] (only if request 1 has NETTXF_extra_info) - * [Request 3: netif_extra_info_t] (only if request 2 has XEN_NETIF_EXTRA_MORE) - * Request 4: netif_tx_request_t -- NETTXF_more_data - * Request 5: netif_tx_request_t -- NETTXF_more_data - * ... - * Request N: netif_tx_request_t -- 0 + * Control ring + * ============ + * + * Some features, such as hashing (detailed below), require a + * significant amount of out-of-band data to be passed from frontend to + * backend. Use of xenstore is not suitable for large quantities of data + * because of quota limitations and so a dedicated 'control ring' is used. + * The ability of the backend to use a control ring is advertised by + * setting: + * + * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1" + * + * The frontend provides a control ring to the backend by setting: + * + * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref> + * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port> + * + * where <gref> is the grant reference of the shared page used to + * implement the control ring and <port> is an event channel to be used + * as a mailbox interrupt. These keys must be set before the frontend + * moves into the connected state. + * + * The control ring uses a fixed request/response message size and is + * balanced (i.e. one request to one response), so operationally it is much + * the same as a transmit or receive ring. + * Note that there is no requirement that responses are issued in the same + * order as requests. */ /* + * Hash types + * ========== + * + * For the purposes of the definitions below, 'Packet[]' is an array of + * octets containing an IP packet without options, 'Array[X..Y]' means a + * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is + * used to indicate concatenation of arrays. + */ + +/* + * A hash calculated over an IP version 4 header as follows: + * + * Buffer[0..8] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + * + * Result = Hash(Buffer, 8) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) + +/* + * A hash calculated over an IP version 4 header and TCP header as + * follows: + * + * Buffer[0..12] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + + * Packet[20..21] (source port) + + * Packet[22..23] (destination port) + * + * Result = Hash(Buffer, 12) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + +/* + * A hash calculated over an IP version 6 header as follows: + * + * Buffer[0..32] = Packet[8..23] (source address ) + + * Packet[24..39] (destination address) + * + * Result = Hash(Buffer, 32) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) + +/* + * A hash calculated over an IP version 6 header and TCP header as + * follows: + * + * Buffer[0..36] = Packet[8..23] (source address) + + * Packet[24..39] (destination address) + + * Packet[40..41] (source port) + + * Packet[42..43] (destination port) + * + * Result = Hash(Buffer, 36) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + +/* + * Hash algorithms + * =============== + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 + +/* + * Toeplitz hash: + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 + +/* + * This algorithm uses a 'key' as well as the data buffer itself. + * (Buffer[] and Key[] are treated as shift-registers where the MSB of + * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1] + * is the 'right-most'). + * + * Value = 0 + * For number of bits in Buffer[] + * If (left-most bit of Buffer[] is 1) + * Value ^= left-most 32 bits of Key[] + * Key[] << 1 + * Buffer[] << 1 + * + * The code below is provided for convenience where an operating system + * does not already provide an implementation. + */ +#ifdef XEN_NETIF_DEFINE_TOEPLITZ +static uint32_t xen_netif_toeplitz_hash(const uint8_t *key, + unsigned int keylen, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int keyi, bufi; + uint64_t prefix = 0; + uint64_t hash = 0; + + /* Pre-load prefix with the first 8 bytes of the key */ + for (keyi = 0; keyi < 8; keyi++) { + prefix <<= 8; + prefix |= (keyi < keylen) ? key[keyi] : 0; + } + + for (bufi = 0; bufi < buflen; bufi++) { + uint8_t byte = buf[bufi]; + unsigned int bit; + + for (bit = 0; bit < 8; bit++) { + if (byte & 0x80) + hash ^= prefix; + prefix <<= 1; + byte <<=1; + } + + /* + * 'prefix' has now been left-shifted by 8, so + * OR in the next byte. + */ + prefix |= (keyi < keylen) ? key[keyi] : 0; + keyi++; + } + + /* The valid part of the hash is in the upper 32 bits. */ + return hash >> 32; +} +#endif /* XEN_NETIF_DEFINE_TOEPLITZ */ + +/* + * Control requests (struct xen_netif_ctrl_request) + * ================================================ + * + * All requests have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | data[0] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data[1] | data[2] | + * +-----+-----+-----+-----+-----------------------+ + * + * id: the request identifier, echoed in response. + * type: the type of request (see below) + * data[]: any data associated with the request (determined by type) + */ + +struct xen_netif_ctrl_request { + uint16_t id; + uint16_t type; + +#define XEN_NETIF_CTRL_TYPE_INVALID 0 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 + + uint32_t data[3]; +}; + +/* + * Control responses (struct xen_netif_ctrl_response) + * ================================================== + * + * All responses have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data | + * +-----+-----+-----+-----+ + * + * id: the corresponding request identifier + * type: the type of the corresponding request + * status: the status of request processing + * data: any data associated with the response (determined by type and + * status) + */ + +struct xen_netif_ctrl_response { + uint16_t id; + uint16_t type; + uint32_t status; + +#define XEN_NETIF_CTRL_STATUS_SUCCESS 0 +#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 +#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 +#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 + + uint32_t data; +}; + +/* + * Control messages + * ================ + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * -------------------------------------- + * + * This is sent by the frontend to set the desired hash algorithm. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not + * supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables + * hashing and the backend is free to choose how it steers packets + * to queues (which is the default behaviour). + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to query the types of hash supported by + * the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = supported hash types (if operation was successful) + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to set the types of hash that the backend + * should calculate. (See above for hash type definitions). + * Note that the 'maximal' type of hash should always be chosen. For + * example, if the frontend sets both IPV4 and IPV4_TCP hash types then + * the latter hash type should be calculated for any TCP packet and the + * former only calculated for non-TCP packets. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag + * value is invalid or + * unsupported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * Also, setting data[0] to zero disables hashing and the backend + * is free to choose how it steers packets to queues. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * -------------------------------- + * + * This is sent by the frontend to set the key of the hash if the algorithm + * requires it. (See hash algorithms above). + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * data[0] = grant reference of page containing the key (assumed to + * start at beginning of grant) + * data[1] = size of key in octets + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Any key octets not specified are assumed to be zero (the key + * is assumed to be empty by default) and specifying a new key + * invalidates any previous key, hence specifying a key size of + * zero will clear the key (which ensures that the calculated hash + * will always be zero). + * The maximum size of key is algorithm and backend specific, but + * is also limited by the single grant reference. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to query the maximum size of mapping + * table supported by the backend. The size is specified in terms of + * table entries. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the mapping table + * (if operation was successful) or zero if a mapping table is + * not supported (i.e. hash mapping is done only by modular + * arithmetic). + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * ------------------------------------- + * + * This is sent by the frontend to set the actual size of the mapping + * table to be used by the backend. The size is specified in terms of + * table entries. + * Any previous table is invalidated by this message and any new table + * is assumed to be zero filled. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * data[0] = number of entries in mapping table + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Setting data[0] to 0 means that hash mapping should be done + * using modular arithmetic. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * ------------------------------------ + * + * This is sent by the frontend to set the content of the table mapping + * hash value to queue number. The backend should calculate the hash from + * the packet header, use it as an index into the table (modulo the size + * of the table) and then steer the packet to the queue number found at + * that index. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * data[0] = grant reference of page containing the mapping (sub-)table + * (assumed to start at beginning of grant) + * data[1] = size of (sub-)table in entries + * data[2] = offset, in entries, of sub-table within overall table + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content + * is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: The overall table has the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[0] | mapping[1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | . | + * | . | + * | . | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[N-2] | mapping[N-1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * message and each mapping must specifies a queue between 0 and + * "multi-queue-num-queues" (see above). + * The backend may support a mapping table larger than can be + * mapped by a single grant reference. Thus sub-tables within a + * larger table can be individually set by sending multiple messages + * with differing offset values. Specifying a new sub-table does not + * invalidate any table data outside that range. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + */ + +DEFINE_RING_TYPES(xen_netif_ctrl, + struct xen_netif_ctrl_request, + struct xen_netif_ctrl_response); + +/* * Guest transmit * ============== * + * This is the 'wire' format for transmit (frontend -> backend) packets: + * + * Fragment 1: netif_tx_request_t - flags = NETTXF_* + * size = total packet size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETTXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_tx_request_t - (only if fragment N-1 flags include + * NETTXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for receive + * (backend -> frontend) packets. Specifically, in a multi-fragment + * packet the actual size of fragment 1 can only be determined by + * subtracting the sizes of fragments 2..N from the total packet size. + * * Ring slot size is 12 octets, however not all request/response * structs use the full size. * @@ -200,6 +677,29 @@ * Guest receive * ============= * + * This is the 'wire' format for receive (backend -> frontend) packets: + * + * Fragment 1: netif_rx_request_t - flags = NETRXF_* + * size = fragment size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETRXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_rx_request_t - (only if fragment N-1 flags include + * NETRXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for transmit + * (frontend -> backend) packets. Specifically, in a multi-fragment + * packet the size of the packet can only be determined by summing the + * sizes of fragments 1..N. + * * Ring slot size is 8 octets. * * rx request (netif_rx_request_t) @@ -226,15 +726,29 @@ * flags: NETRXF_* * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. * + * NOTE: Historically, to support GSO on the frontend receive side, Linux + * netfront does not make use of the rx response id (because, as + * described below, extra info structures overlay the id field). + * Instead it assumes that responses always appear in the same ring + * slot as their corresponding request. Thus, to maintain + * compatibility, backends must make sure this is the case. + * * Extra Info * ========== * - * Can be present if initial request has NET{T,R}XF_extra_info, or - * previous extra request has XEN_NETIF_EXTRA_MORE. + * Can be present if initial request or response has NET{T,R}XF_extra_info, + * or previous extra request has XEN_NETIF_EXTRA_MORE. * * The struct therefore needs to fit into either a tx or rx slot and * is therefore limited to 8 octets. * + * NOTE: Because extra info data overlays the usual request/response + * structures, there is no id information in the opposite direction. + * So, if an extra info overlays an rx response the frontend can + * assume that it is in the same ring slot as the request that was + * consumed to make the slot available, and the backend must ensure + * this assumption is true. + * * extra info (netif_extra_info_t) * ------------------------------- * @@ -242,7 +756,7 @@ * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ - * |type |flags| type specfic data | + * |type |flags| type specific data | * +-----+-----+-----+-----+-----+-----+-----+-----+ * | padding for tx | * +-----+-----+-----+-----+ @@ -250,7 +764,8 @@ * type: XEN_NETIF_EXTRA_TYPE_* * flags: XEN_NETIF_EXTRA_FLAG_* * padding for tx: present only in the tx case due to 8 octet limit - * from rx case. Not shown in type specific entries below. + * from rx case. Not shown in type specific entries + * below. * * XEN_NETIF_EXTRA_TYPE_GSO: * @@ -261,9 +776,14 @@ * * type: Must be XEN_NETIF_EXTRA_TYPE_GSO * flags: XEN_NETIF_EXTRA_FLAG_* - * size: Maximum payload size of each segment. - * type: XEN_NETIF_GSO_TYPE_* - * features: EN_NETIF_GSO_FEAT_* + * size: Maximum payload size of each segment. For example, + * for TCP this is just the path MSS. + * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of + * the packet and any extra features required to segment the + * packet properly. + * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO + * features required to process this packet, such as ECN + * support for TCPv4. * * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: * @@ -275,6 +795,25 @@ * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} * flags: XEN_NETIF_EXTRA_FLAG_* * addr: address to add/remove + * + * XEN_NETIF_EXTRA_TYPE_HASH: + * + * A backend that supports teoplitz hashing is assumed to accept + * this type of extra info in transmit packets. + * A frontend that enables hashing is assumed to accept + * this type of extra info in receive packets. + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags|htype| alg |LSB ---- value ---- MSB| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_HASH + * flags: XEN_NETIF_EXTRA_FLAG_* + * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) + * alg: The algorithm used to calculate the hash (one of + * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) + * value: Hash value */ /* Protocol checksum field is blank in the packet (hardware offload)? */ @@ -295,11 +834,11 @@ #define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct netif_tx_request { - grant_ref_t gref; /* Reference to buffer page */ - uint16_t offset; /* Offset within buffer page */ - uint16_t flags; /* NETTXF_* */ - uint16_t id; /* Echoed in response message. */ - uint16_t size; /* Packet size in bytes. */ + grant_ref_t gref; + uint16_t offset; + uint16_t flags; + uint16_t id; + uint16_t size; }; typedef struct netif_tx_request netif_tx_request_t; @@ -308,7 +847,8 @@ typedef struct netif_tx_request netif_tx #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ -#define XEN_NETIF_EXTRA_TYPE_MAX (4) +#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ +#define XEN_NETIF_EXTRA_TYPE_MAX (5) /* netif_extra_info_t flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) @@ -324,43 +864,23 @@ typedef struct netif_tx_request netif_tx * netif_rx_response_t for compatibility. */ struct netif_extra_info { - uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ - uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ - + uint8_t type; + uint8_t flags; union { - /* - * XEN_NETIF_EXTRA_TYPE_GSO: - */ struct { - /* - * Maximum payload size of each segment. For example, for TCP this - * is just the path MSS. - */ uint16_t size; - - /* - * GSO type. This determines the protocol of the packet and any - * extra features required to segment the packet properly. - */ - uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ - - /* Future expansion. */ + uint8_t type; uint8_t pad; - - /* - * GSO features. This specifies any extra GSO features required - * to process this packet, such as ECN support for TCPv4. - */ - uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + uint16_t features; } gso; - - /* - * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: - */ struct { - uint8_t addr[6]; /* Address to add/remove. */ + uint8_t addr[6]; } mcast; - + struct { + uint8_t type; + uint8_t algorithm; + uint8_t value[4]; + } hash; uint16_t pad[3]; } u; }; @@ -368,14 +888,14 @@ typedef struct netif_extra_info netif_ex struct netif_tx_response { uint16_t id; - int16_t status; /* NETIF_RSP_* */ + int16_t status; }; typedef struct netif_tx_response netif_tx_response_t; struct netif_rx_request { uint16_t id; /* Echoed in response message. */ uint16_t pad; - grant_ref_t gref; /* Reference to incoming granted frame */ + grant_ref_t gref; }; typedef struct netif_rx_request netif_rx_request_t; @@ -395,11 +915,15 @@ typedef struct netif_rx_request netif_rx #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) +/* Packet has GSO prefix. Deprecated but included for compatibility */ +#define _NETRXF_gso_prefix (4) +#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) + struct netif_rx_response { uint16_t id; - uint16_t offset; /* Offset in page of start of received packet */ - uint16_t flags; /* NETRXF_* */ - int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */ + uint16_t offset; + uint16_t flags; + int16_t status; }; typedef struct netif_rx_response netif_rx_response_t; diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/ring.h --- a/include/xen/interface/io/ring.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/ring.h Wed Jul 06 10:05:38 2016 +0200 @@ -212,6 +212,20 @@ typedef struct __name##_back_ring __name #define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) +/* + * Get a local copy of a request. + * + * Use this in preference to RING_GET_REQUEST() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where _req is a struct which consists of only bitfields. + */ +#define RING_COPY_REQUEST(_r, _idx, _req) do { \ + /* Use volatile to force the copy into _req. */ \ + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +} while (0) + #define RING_GET_RESPONSE(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/usbif.h --- a/include/xen/interface/io/usbif.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/usbif.h Wed Jul 06 10:05:38 2016 +0200 @@ -187,6 +187,7 @@ struct usbif_urb_request { /* basic urb parameter */ uint32_t pipe; uint16_t transfer_flags; +#define USBIF_SHORT_NOT_OK 0x0001 uint16_t buffer_length; union { uint8_t ctrl[8]; /* setup_packet (Ctrl) */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/io/vscsiif.h --- a/include/xen/interface/io/vscsiif.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/io/vscsiif.h Wed Jul 06 10:05:38 2016 +0200 @@ -60,7 +60,7 @@ * * A string specifying the backend device: either a 4-tuple "h:c:t:l" * (host, controller, target, lun, all integers), or a WWN (e.g. - * "naa.60014054ac780582"). + * "naa.60014054ac780582:0"). * * v-dev * Values: string @@ -104,6 +104,75 @@ * response structures. */ +/* + * Xenstore format in practice + * =========================== + * + * The backend driver uses a single_host:many_devices notation to manage domU + * devices. Everything is stored in /local/domain/<backend_domid>/backend/vscsi/. + * The xenstore layout looks like this (dom0 is assumed to be the backend_domid): + * + * <domid>/<vhost>/feature-host = "0" + * <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0" + * <domid>/<vhost>/frontend-id = "<domid>" + * <domid>/<vhost>/online = "1" + * <domid>/<vhost>/state = "4" + * <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1" or "naa.wwn:lun" + * <domid>/<vhost>/vscsi-devs/dev-0/state = "4" + * <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0" + * <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2" + * <domid>/<vhost>/vscsi-devs/dev-1/state = "4" + * <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0" + * + * The frontend driver maintains its state in + * /local/domain/<domid>/device/vscsi/. + * + * <vhost>/backend = "/local/domain/0/backend/vscsi/<domid>/<vhost>" + * <vhost>/backend-id = "0" + * <vhost>/event-channel = "20" + * <vhost>/ring-ref = "43" + * <vhost>/state = "4" + * <vhost>/vscsi-devs/dev-0/state = "4" + * <vhost>/vscsi-devs/dev-1/state = "4" + * + * In addition to the entries for backend and frontend these flags are stored + * for the toolstack: + * + * <domid>/<vhost>/vscsi-devs/dev-1/p-devname = "/dev/$device" + * <domid>/<vhost>/libxl_ctrl_index = "0" + * + * + * Backend/frontend protocol + * ========================= + * + * To create a vhost along with a device: + * <domid>/<vhost>/feature-host = "0" + * <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0" + * <domid>/<vhost>/frontend-id = "<domid>" + * <domid>/<vhost>/online = "1" + * <domid>/<vhost>/state = "1" + * <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1" + * <domid>/<vhost>/vscsi-devs/dev-0/state = "1" + * <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0" + * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-0/state become 4 + * + * To add another device to a vhost: + * <domid>/<vhost>/state = "7" + * <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2" + * <domid>/<vhost>/vscsi-devs/dev-1/state = "1" + * <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0" + * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-1/state become 4 + * + * To remove a device from a vhost: + * <domid>/<vhost>/state = "7" + * <domid>/<vhost>/vscsi-devs/dev-1/state = "5" + * Wait for <domid>/<vhost>/state to become 4 + * Wait for <domid>/<vhost>/vscsi-devs/dev-1/state become 6 + * Remove <domid>/<vhost>/vscsi-devs/dev-1/{state,p-dev,v-dev,p-devname} + * Remove <domid>/<vhost>/vscsi-devs/dev-1/ + * + */ + /* Requests from the frontend to the backend */ /* diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/memory.h --- a/include/xen/interface/memory.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/memory.h Wed Jul 06 10:05:38 2016 +0200 @@ -220,6 +220,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_map #define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, * XENMEM_add_to_physmap_batch only. */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region + ARM only; the region is mapped in + Stage-2 using the memory attribute + "Device-nGnRE" (previously named + "Device" on ARMv7) */ /* ` } */ /* @@ -258,7 +263,15 @@ struct xen_add_to_physmap_batch { /* Number of pages to go through */ uint16_t size; - domid_t foreign_domid; /* IFF gmfn_foreign */ + +#if __XEN_INTERFACE_VERSION__ < 0x00040700 + domid_t foreign_domid; /* IFF gmfn_foreign. Should be 0 for other spaces. */ +#else + union xen_add_to_physmap_batch_extra { + domid_t foreign_domid; /* gmfn_foreign */ + uint16_t res0; /* All the other spaces. Should be 0 */ + } u; +#endif /* Indexes into space being mapped. */ XEN_GUEST_HANDLE(xen_ulong_t) idxs; @@ -390,8 +403,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_o #define XENMEM_access_op 21 #define XENMEM_access_op_set_access 0 #define XENMEM_access_op_get_access 1 -#define XENMEM_access_op_enable_emulate 2 -#define XENMEM_access_op_disable_emulate 3 +/* + * XENMEM_access_op_enable_emulate and XENMEM_access_op_disable_emulate are + * currently unused, but since they have been in use please do not reuse them. + * + * #define XENMEM_access_op_enable_emulate 2 + * #define XENMEM_access_op_disable_emulate 3 + */ typedef enum { XENMEM_access_n, @@ -520,7 +538,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_ /* * XENMEM_claim_pages flags - the are no flags at this time. - * The zero value is appropiate. + * The zero value is appropriate. */ /* diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/pmu.h --- a/include/xen/interface/pmu.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/pmu.h Wed Jul 06 10:05:38 2016 +0200 @@ -84,9 +84,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t /* * PMU features: - * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD) + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD) + * - XENPMU_FEATURE_IPC_ONLY: Restrict PMCs to the most minimum set possible. + * Instructions, cycles, and ref cycles. Can be + * used to calculate instructions-per-cycle (IPC) + * (ignored on AMD). + * - XENPMU_FEATURE_ARCH_ONLY: Restrict PMCs to the Intel Pre-Defined + * Architectural Performance Events exposed by + * cpuid and listed in the Intel developer's manual + * (ignored on AMD). */ -#define XENPMU_FEATURE_INTEL_BTS 1 +#define XENPMU_FEATURE_INTEL_BTS (1<<0) +#define XENPMU_FEATURE_IPC_ONLY (1<<1) +#define XENPMU_FEATURE_ARCH_ONLY (1<<2) /* * Shared PMU data between hypervisor and PV(H) domains. diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/sched.h --- a/include/xen/interface/sched.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/sched.h Wed Jul 06 10:05:38 2016 +0200 @@ -118,6 +118,18 @@ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. */ #define SCHEDOP_watchdog 6 + +/* + * Override the current vcpu affinity by pinning it to one physical cpu or + * undo this override restoring the previous affinity. + * @arg == pointer to sched_pin_override_t structure. + * + * A negative pcpu value will undo a previous pin override and restore the + * previous cpu affinity. + * This call is allowed for the hardware domain only and requires the cpu + * to be part of the domain's cpupool. + */ +#define SCHEDOP_pin_override 7 /* ` } */ struct sched_shutdown { @@ -148,6 +160,12 @@ struct sched_watchdog { typedef struct sched_watchdog sched_watchdog_t; DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); +struct sched_pin_override { + int32_t pcpu; +}; +typedef struct sched_pin_override sched_pin_override_t; +DEFINE_XEN_GUEST_HANDLE(sched_pin_override_t); + /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does @@ -159,7 +177,16 @@ DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ -#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */ + +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */ /* ` } */ #endif /* __XEN_PUBLIC_SCHED_H__ */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/sysctl.h --- a/include/xen/interface/sysctl.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/sysctl.h Wed Jul 06 10:05:38 2016 +0200 @@ -36,7 +36,7 @@ #include "physdev.h" #include "tmem.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000C +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000D /* * Read console content from Xen buffer ring. @@ -559,6 +559,42 @@ struct xen_sysctl_cpupool_op { typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); +/* + * Error return values of cpupool operations: + * + * -EADDRINUSE: + * XEN_SYSCTL_CPUPOOL_OP_RMCPU: A vcpu is temporarily pinned to the cpu + * which is to be removed from a cpupool. + * -EADDRNOTAVAIL: + * XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: A previous + * request to remove a cpu from a cpupool was terminated with -EAGAIN + * and has not been retried using the same parameters. + * -EAGAIN: + * XEN_SYSCTL_CPUPOOL_OP_RMCPU: The cpu can't be removed from the cpupool + * as it is active in the hypervisor. A retry will succeed soon. + * -EBUSY: + * XEN_SYSCTL_CPUPOOL_OP_DESTROY, XEN_SYSCTL_CPUPOOL_OP_RMCPU: A cpupool + * can't be destroyed or the last cpu can't be removed as there is still + * a running domain in that cpupool. + * -EEXIST: + * XEN_SYSCTL_CPUPOOL_OP_CREATE: A cpupool_id was specified and is already + * existing. + * -EINVAL: + * XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: An illegal + * cpu was specified (cpu does not exist). + * XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN: An illegal domain was specified + * (domain id illegal or not suitable for operation). + * -ENODEV: + * XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: The specified + * cpu is either not free (add) or not member of the specified cpupool + * (remove). + * -ENOENT: + * all: The cpupool with the specified cpupool_id doesn't exist. + * + * Some common error return values like -ENOMEM and -EFAULT are possible for + * all the operations. + */ + #define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 /* * This structure is used to pass a new ARINC653 schedule from a @@ -705,6 +741,8 @@ struct xen_sysctl_psr_cat_op { struct { uint32_t cbm_len; /* OUT: CBM length */ uint32_t cos_max; /* OUT: Maximum COS */ +#define XEN_SYSCTL_PSR_CAT_L3_CDP (1u << 0) + uint32_t flags; /* OUT: CAT flags */ } l3_info; } u; }; @@ -764,6 +802,235 @@ struct xen_sysctl_tmem_op { typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t); +/* + * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific) + * + * Return hardware capabilities concerning masking or faulting of the cpuid + * instruction for PV guests. + */ +struct xen_sysctl_cpu_levelling_caps { +#define XEN_SYSCTL_CPU_LEVELCAP_faulting (1ul << 0) /* CPUID faulting */ +#define XEN_SYSCTL_CPU_LEVELCAP_ecx (1ul << 1) /* 0x00000001.ecx */ +#define XEN_SYSCTL_CPU_LEVELCAP_edx (1ul << 2) /* 0x00000001.edx */ +#define XEN_SYSCTL_CPU_LEVELCAP_extd_ecx (1ul << 3) /* 0x80000001.ecx */ +#define XEN_SYSCTL_CPU_LEVELCAP_extd_edx (1ul << 4) /* 0x80000001.edx */ +#define XEN_SYSCTL_CPU_LEVELCAP_xsave_eax (1ul << 5) /* 0x0000000D:1.eax */ +#define XEN_SYSCTL_CPU_LEVELCAP_thermal_ecx (1ul << 6) /* 0x00000006.ecx */ +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax (1ul << 7) /* 0x00000007:0.eax */ +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_ebx (1ul << 8) /* 0x00000007:0.ebx */ + uint32_t caps; +}; +typedef struct xen_sysctl_cpu_levelling_caps xen_sysctl_cpu_levelling_caps_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_levelling_caps_t); + +/* + * XEN_SYSCTL_get_cpu_featureset (x86 specific) + * + * Return information about featuresets available on this host. + * - Raw: The real cpuid values. + * - Host: The values Xen is using, (after command line overrides, etc). + * - PV: Maximum set of features which can be given to a PV guest. + * - HVM: Maximum set of features which can be given to a HVM guest. + */ +struct xen_sysctl_cpu_featureset { +#define XEN_SYSCTL_cpu_featureset_raw 0 +#define XEN_SYSCTL_cpu_featureset_host 1 +#define XEN_SYSCTL_cpu_featureset_pv 2 +#define XEN_SYSCTL_cpu_featureset_hvm 3 + uint32_t index; /* IN: Which featureset to query? */ + uint32_t nr_features; /* IN/OUT: Number of entries in/written to + * 'features', or the maximum number of features if + * the guest handle is NULL. NB. All featuresets + * come from the same numberspace, so have the same + * maximum length. */ + XEN_GUEST_HANDLE_64(uint32) features; /* OUT: */ +}; +typedef struct xen_sysctl_featureset xen_sysctl_featureset_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_featureset_t); + +/* + * XEN_SYSCTL_LIVEPATCH_op + * + * Refer to the docs/unstable/misc/livepatch.markdown + * for the design details of this hypercall. + * + * There are four sub-ops: + * XEN_SYSCTL_LIVEPATCH_UPLOAD (0) + * XEN_SYSCTL_LIVEPATCH_GET (1) + * XEN_SYSCTL_LIVEPATCH_LIST (2) + * XEN_SYSCTL_LIVEPATCH_ACTION (3) + * + * The normal sequence of sub-ops is to: + * 1) XEN_SYSCTL_LIVEPATCH_UPLOAD to upload the payload. If errors STOP. + * 2) XEN_SYSCTL_LIVEPATCH_GET to check the `->rc`. If -XEN_EAGAIN spin. + * If zero go to next step. + * 3) XEN_SYSCTL_LIVEPATCH_ACTION with LIVEPATCH_ACTION_APPLY to apply the patch. + * 4) XEN_SYSCTL_LIVEPATCH_GET to check the `->rc`. If in -XEN_EAGAIN spin. + * If zero exit with success. + */ + +#define LIVEPATCH_PAYLOAD_VERSION 1 +/* + * .livepatch.funcs structure layout defined in the `Payload format` + * section in the Live Patch design document. + * + * We guard this with __XEN__ as toolstacks SHOULD not use it. + */ +#ifdef __XEN__ +struct livepatch_func { + const char *name; /* Name of function to be patched. */ + void *new_addr; + void *old_addr; + uint32_t new_size; + uint32_t old_size; + uint8_t version; /* MUST be LIVEPATCH_PAYLOAD_VERSION. */ + uint8_t opaque[31]; +}; +typedef struct livepatch_func livepatch_func_t; +#endif + +/* + * Structure describing an ELF payload. Uniquely identifies the + * payload. Should be human readable. + * Recommended length is upto XEN_LIVEPATCH_NAME_SIZE. + * Includes the NUL terminator. + */ +#define XEN_LIVEPATCH_NAME_SIZE 128 +struct xen_livepatch_name { + XEN_GUEST_HANDLE_64(char) name; /* IN: pointer to name. */ + uint16_t size; /* IN: size of name. May be upto + XEN_LIVEPATCH_NAME_SIZE. */ + uint16_t pad[3]; /* IN: MUST be zero. */ +}; +typedef struct xen_livepatch_name xen_livepatch_name_t; +DEFINE_XEN_GUEST_HANDLE(xen_livepatch_name_t); + +/* + * Upload a payload to the hypervisor. The payload is verified + * against basic checks and if there are any issues the proper return code + * will be returned. The payload is not applied at this time - that is + * controlled by XEN_SYSCTL_LIVEPATCH_ACTION. + * + * The return value is zero if the payload was succesfully uploaded. + * Otherwise an EXX return value is provided. Duplicate `name` are not + * supported. + * + * The payload at this point is verified against basic checks. + * + * The `payload` is the ELF payload as mentioned in the `Payload format` + * section in the Live Patch design document. + */ +#define XEN_SYSCTL_LIVEPATCH_UPLOAD 0 +struct xen_sysctl_livepatch_upload { + xen_livepatch_name_t name; /* IN, name of the patch. */ + uint64_t size; /* IN, size of the ELF file. */ + XEN_GUEST_HANDLE_64(uint8) payload; /* IN, the ELF file. */ +}; +typedef struct xen_sysctl_livepatch_upload xen_sysctl_livepatch_upload_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_livepatch_upload_t); + +/* + * Retrieve an status of an specific payload. + * + * Upon completion the `struct xen_livepatch_status` is updated. + * + * The return value is zero on success and XEN_EXX on failure. This operation + * is synchronous and does not require preemption. + */ +#define XEN_SYSCTL_LIVEPATCH_GET 1 + +struct xen_livepatch_status { +#define LIVEPATCH_STATE_CHECKED 1 +#define LIVEPATCH_STATE_APPLIED 2 + uint32_t state; /* OUT: LIVEPATCH_STATE_*. */ + int32_t rc; /* OUT: 0 if no error, otherwise -XEN_EXX. */ +}; +typedef struct xen_livepatch_status xen_livepatch_status_t; +DEFINE_XEN_GUEST_HANDLE(xen_livepatch_status_t); + +struct xen_sysctl_livepatch_get { + xen_livepatch_name_t name; /* IN, name of the payload. */ + xen_livepatch_status_t status; /* IN/OUT, state of it. */ +}; +typedef struct xen_sysctl_livepatch_get xen_sysctl_livepatch_get_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_livepatch_get_t); + +/* + * Retrieve an array of abbreviated status and names of payloads that are + * loaded in the hypervisor. + * + * If the hypercall returns an positive number, it is the number (up to `nr`) + * of the payloads returned, along with `nr` updated with the number of remaining + * payloads, `version` updated (it may be the same across hypercalls. If it + * varies the data is stale and further calls could fail). The `status`, + * `name`, and `len`' are updated at their designed index value (`idx`) with + * the returned value of data. + * + * If the hypercall returns E2BIG the `nr` is too big and should be + * lowered. The upper limit of `nr` is left to the implemention. + * + * Note that due to the asynchronous nature of hypercalls the domain might have + * added or removed the number of payloads making this information stale. It is + * the responsibility of the toolstack to use the `version` field to check + * between each invocation. if the version differs it should discard the stale + * data and start from scratch. It is OK for the toolstack to use the new + * `version` field. + */ +#define XEN_SYSCTL_LIVEPATCH_LIST 2 +struct xen_sysctl_livepatch_list { + uint32_t version; /* OUT: Hypervisor stamps value. + If varies between calls, we are + * getting stale data. */ + uint32_t idx; /* IN: Index into hypervisor list. */ + uint32_t nr; /* IN: How many status, name, and len + should fill out. Can be zero to get + amount of payloads and version. + OUT: How many payloads left. */ + uint32_t pad; /* IN: Must be zero. */ + XEN_GUEST_HANDLE_64(xen_livepatch_status_t) status; /* OUT. Must have enough + space allocate for nr of them. */ + XEN_GUEST_HANDLE_64(char) name; /* OUT: Array of names. Each member + MUST XEN_LIVEPATCH_NAME_SIZE in size. + Must have nr of them. */ + XEN_GUEST_HANDLE_64(uint32) len; /* OUT: Array of lengths of name's. + Must have nr of them. */ +}; +typedef struct xen_sysctl_livepatch_list xen_sysctl_livepatch_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_livepatch_list_t); + +/* + * Perform an operation on the payload structure referenced by the `name` field. + * The operation request is asynchronous and the status should be retrieved + * by using either XEN_SYSCTL_LIVEPATCH_GET or XEN_SYSCTL_LIVEPATCH_LIST hypercall. + */ +#define XEN_SYSCTL_LIVEPATCH_ACTION 3 +struct xen_sysctl_livepatch_action { + xen_livepatch_name_t name; /* IN, name of the patch. */ +#define LIVEPATCH_ACTION_UNLOAD 1 +#define LIVEPATCH_ACTION_REVERT 2 +#define LIVEPATCH_ACTION_APPLY 3 +#define LIVEPATCH_ACTION_REPLACE 4 + uint32_t cmd; /* IN: LIVEPATCH_ACTION_*. */ + uint32_t timeout; /* IN: Zero if no timeout. */ + /* Or upper bound of time (ms) */ + /* for operation to take. */ +}; +typedef struct xen_sysctl_livepatch_action xen_sysctl_livepatch_action_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_livepatch_action_t); + +struct xen_sysctl_livepatch_op { + uint32_t cmd; /* IN: XEN_SYSCTL_LIVEPATCH_*. */ + uint32_t pad; /* IN: Always zero. */ + union { + xen_sysctl_livepatch_upload_t upload; + xen_sysctl_livepatch_list_t list; + xen_sysctl_livepatch_get_t get; + xen_sysctl_livepatch_action_t action; + } u; +}; +typedef struct xen_sysctl_livepatch_op xen_sysctl_livepatch_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_livepatch_op_t); + struct xen_sysctl { uint32_t cmd; #define XEN_SYSCTL_readconsole 1 @@ -789,6 +1056,9 @@ struct xen_sysctl { #define XEN_SYSCTL_pcitopoinfo 22 #define XEN_SYSCTL_psr_cat_op 23 #define XEN_SYSCTL_tmem_op 24 +#define XEN_SYSCTL_get_cpu_levelling_caps 25 +#define XEN_SYSCTL_get_cpu_featureset 26 +#define XEN_SYSCTL_livepatch_op 27 uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ union { struct xen_sysctl_readconsole readconsole; @@ -814,6 +1084,9 @@ struct xen_sysctl { struct xen_sysctl_psr_cmt_op psr_cmt_op; struct xen_sysctl_psr_cat_op psr_cat_op; struct xen_sysctl_tmem_op tmem_op; + struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps; + struct xen_sysctl_cpu_featureset cpu_featureset; + struct xen_sysctl_livepatch_op livepatch; uint8_t pad[128]; } u; }; diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/trace.h --- a/include/xen/interface/trace.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/trace.h Wed Jul 06 10:05:38 2016 +0200 @@ -85,6 +85,9 @@ ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \ (_e & TRC_SCHED_EVT_MASK) ) +/* Trace classes for DOM0 operations */ +#define TRC_DOM0_DOMOPS 0x00041000 /* Domains manipulations */ + /* Trace classes for Hardware */ #define TRC_HW_PM 0x00801000 /* Power management traces */ #define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ @@ -113,6 +116,9 @@ #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) #define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) +#define TRC_DOM0_DOM_ADD (TRC_DOM0_DOMOPS + 1) +#define TRC_DOM0_DOM_REM (TRC_DOM0_DOMOPS + 2) + #define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) #define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) #define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/vcpu.h --- a/include/xen/interface/vcpu.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/vcpu.h Wed Jul 06 10:05:38 2016 +0200 @@ -41,8 +41,10 @@ * Initialise a VCPU. Each VCPU can be initialised only once. A * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. * - * @extra_arg == pointer to vcpu_guest_context structure containing initial - * state for the VCPU. + * @extra_arg == For PV or ARM guests this is a pointer to a vcpu_guest_context + * structure containing the initial state for the VCPU. For x86 + * HVM based guests this is a pointer to a vcpu_hvm_context + * structure. */ #define VCPUOP_initialise 0 diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/version.h --- a/include/xen/interface/version.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/version.h Wed Jul 06 10:05:38 2016 +0200 @@ -30,7 +30,8 @@ #include "xen.h" -/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ +/* NB. All ops return zero on success, except XENVER_{version,pagesize} + * XENVER_{version,pagesize,build_id} */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 @@ -77,12 +78,31 @@ typedef struct xen_feature_info xen_feat /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 -/* arg == xen_domain_handle_t. */ +/* arg == xen_domain_handle_t. + * + * The toolstack fills it out for guest consumption. It is intended to hold + * the UUID of the guest. + */ #define XENVER_guest_handle 8 #define XENVER_commandline 9 typedef char xen_commandline_t[1024]; +/* + * Return value is the number of bytes written, or XEN_Exx on error. + * Calling with empty parameter returns the size of build_id. + */ +#define XENVER_build_id 10 +struct xen_build_id { + uint32_t len; /* IN: size of buf[]. */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + unsigned char buf[]; +#elif defined(__GNUC__) + unsigned char buf[1]; /* OUT: Variable length buffer with build_id. */ +#endif +}; +typedef struct xen_build_id xen_build_id_t; + #endif /* __XEN_PUBLIC_VERSION_H__ */ /* diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/vm_event.h --- a/include/xen/interface/vm_event.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/vm_event.h Wed Jul 06 10:05:38 2016 +0200 @@ -89,6 +89,13 @@ * by the altp2m_idx response field if possible. */ #define VM_EVENT_FLAG_ALTERNATE_P2M (1 << 7) +/* + * Set the vCPU registers to the values in the vm_event response. + * At the moment x86-only, applies to EAX-EDX, ESP, EBP, ESI, EDI, R8-R15, + * EFLAGS, and EIP. + * Requires the vCPU to be paused already (synchronous events only). + */ +#define VM_EVENT_FLAG_SET_REGISTERS (1 << 8) /* * Reasons for the vm event request diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/xen-compat.h --- a/include/xen/interface/xen-compat.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/xen-compat.h Wed Jul 06 10:05:38 2016 +0200 @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040700 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ diff -r 200d580e141c -r c03c2bd4ba0b include/xen/interface/xen.h --- a/include/xen/interface/xen.h Fri Jun 24 10:57:59 2016 +0200 +++ b/include/xen/interface/xen.h Wed Jul 06 10:05:38 2016 +0200 @@ -54,6 +54,19 @@ DEFINE_XEN_GUEST_HANDLE(void); DEFINE_XEN_GUEST_HANDLE(uint64_t); DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); + +/* Turn a plain number into a C unsigned (long) constant. */ +#define __xen_mk_uint(x) x ## U +#define __xen_mk_ulong(x) x ## UL +#define xen_mk_uint(x) __xen_mk_uint(x) +#define xen_mk_ulong(x) __xen_mk_ulong(x) + +#else + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define xen_mk_uint(x) x +#define xen_mk_ulong(x) x + #endif /* @@ -453,13 +466,13 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); /* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ /* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ /* ` enum uvm_flags { */ -#define UVMF_NONE (0UL<<0) /* No flushing at all. */ -#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ -#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ -#define UVMF_FLUSHTYPE_MASK (3UL<<0) -#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ -#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ -#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +#define UVMF_NONE (xen_mk_ulong(0)<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (xen_mk_ulong(1)<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (xen_mk_ulong(2)<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (xen_mk_ulong(3)<<0) +#define UVMF_MULTI (xen_mk_ulong(0)<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (xen_mk_ulong(0)<<2) /* Flush local TLB. */ +#define UVMF_ALL (xen_mk_ulong(1)<<2) /* Flush all TLBs. */ /* ` } */ /* @@ -491,6 +504,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #define VMASST_TYPE_pae_extended_cr3 3 /* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* * x86/64 guests: strictly hide M2P from user mode. * This allows the guest to control respective hypervisor behavior: * - when not set, L4 tables get created with the respective slot blank, @@ -506,15 +527,11 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #define MAX_VMASST_TYPE 3 #endif -#ifndef __ASSEMBLY__ - -typedef uint16_t domid_t; - /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ -#define DOMID_FIRST_RESERVED (0x7FF0U) +#define DOMID_FIRST_RESERVED xen_mk_uint(0x7FF0) /* DOMID_SELF is used in certain contexts to refer to oneself. */ -#define DOMID_SELF (0x7FF0U) +#define DOMID_SELF xen_mk_uint(0x7FF0) /* * DOMID_IO is used to restrict page-table updates to mapping I/O memory. @@ -525,7 +542,7 @@ typedef uint16_t domid_t; * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can * be specified by any calling domain. */ -#define DOMID_IO (0x7FF1U) +#define DOMID_IO xen_mk_uint(0x7FF1) /* * DOMID_XEN is used to allow privileged domains to map restricted parts of @@ -533,17 +550,21 @@ typedef uint16_t domid_t; * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if * the caller is privileged. */ -#define DOMID_XEN (0x7FF2U) +#define DOMID_XEN xen_mk_uint(0x7FF2) /* * DOMID_COW is used as the owner of sharable pages */ -#define DOMID_COW (0x7FF3U) +#define DOMID_COW xen_mk_uint(0x7FF3) /* DOMID_INVALID is used to identify pages with unknown owner. */ -#define DOMID_INVALID (0x7FF4U) +#define DOMID_INVALID xen_mk_uint(0x7FF4) /* Idle domain. */ -#define DOMID_IDLE (0x7FFFU) +#define DOMID_IDLE xen_mk_uint(0x7FFF) + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; /* * Send an array of these to HYPERVISOR_mmu_update(). @@ -603,10 +624,18 @@ struct vcpu_time_info { */ uint32_t tsc_to_system_mul; int8_t tsc_shift; +#if __XEN_INTERFACE_VERSION__ > 0x040600 + uint8_t flags; + uint8_t pad1[2]; +#else int8_t pad1[3]; +#endif }; /* 32 bytes */ typedef struct vcpu_time_info vcpu_time_info_t; +#define XEN_PVCLOCK_TSC_STABLE_BIT (1 << 0) +#define XEN_PVCLOCK_GUEST_STOPPED (1 << 1) + struct vcpu_info { /* * 'evtchn_upcall_pending' is written non-zero by Xen to indicate @@ -786,6 +815,52 @@ struct start_info { }; typedef struct start_info start_info_t; +/* + * Start of day structure passed to PVH guests in %ebx. + * + * NOTE: nothing will be loaded at physical address 0, so a 0 value in any + * of the address fields should be treated as not present. + * + * 0 +----------------+ + * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE + * | | ("xEn3" with the 0x80 bit of the "E" set). + * 4 +----------------+ + * | version | Version of this structure. Current version is 0. New + * | | versions are guaranteed to be backwards-compatible. + * 8 +----------------+ + * | flags | SIF_xxx flags. + * 12 +----------------+ + * | nr_modules | Number of modules passed to the kernel. + * 16 +----------------+ + * | modlist_paddr | Physical address of an array of modules + * | | (layout of the structure below). + * 24 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 32 +----------------+ + * | rsdp_paddr | Physical address of the RSDP ACPI data structure. + * 40 +----------------+ + * + * The layout of each entry in the module structure is the following: + * + * 0 +----------------+ + * | paddr | Physical address of the module. + * 8 +----------------+ + * | size | Size of the module in bytes. + * 16 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 24 +----------------+ + * | reserved | + * 32 +----------------+ + * + * The address and sizes are always a 64bit little endian unsigned integer. + * + * NB: Xen on x86 will always try to place all the data below the 4GiB + * boundary. + */ +#define XEN_HVM_START_MAGIC_VALUE 0x336ec578 + /* New console union for dom0 introduced in 0x00030203. */ #if __XEN_INTERFACE_VERSION__ < 0x00030203 #define console_mfn console.domU.mfn @@ -880,20 +955,11 @@ typedef struct dom0_vga_console_info { typedef uint8_t xen_domain_handle_t[16]; -/* Turn a plain number into a C unsigned long constant. */ -#define __mk_unsigned_long(x) x ## UL -#define mk_unsigned_long(x) __mk_unsigned_long(x) - __DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); __DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); __DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); __DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); -#else /* __ASSEMBLY__ */ - -/* In assembly code we cannot use C numeric constant suffixes. */ -#define mk_unsigned_long(x) x - #endif /* !__ASSEMBLY__ */ /* Default definitions for macros used by domctl/sysctl. */

_______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.