[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [MINIOS PATCH] Update public headers from Xen



Wei Liu, on mer. 04 oct. 2017 14:43:30 +0100, wrote:
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>

I only verified that this is the same as in the Xen staging tree, but
it's probably just fine enough for this change.

Acked-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxx>

> ---
>  include/xen/arch-arm.h               |  94 +++-
>  include/xen/arch-x86/cpufeatureset.h | 263 +++++++++++
>  include/xen/arch-x86/cpuid.h         |  32 +-
>  include/xen/arch-x86/hvm/save.h      | 101 ++--
>  include/xen/arch-x86/pmu.h           | 167 +++++++
>  include/xen/arch-x86/xen-mca.h       |  60 +--
>  include/xen/arch-x86/xen-x86_32.h    |  69 ++-
>  include/xen/arch-x86/xen-x86_64.h    |  87 +++-
>  include/xen/arch-x86/xen.h           |  74 ++-
>  include/xen/domctl.h                 | 555 +++++++++++++---------
>  include/xen/elfnote.h                |   4 +-
>  include/xen/errno.h                  | 124 +++++
>  include/xen/event_channel.h          |   2 +-
>  include/xen/features.h               |   3 +
>  include/xen/gcov.h                   | 115 -----
>  include/xen/grant_table.h            |  19 +-
>  include/xen/hvm/dm_op.h              | 417 +++++++++++++++++
>  include/xen/hvm/e820.h               |   8 +-
>  include/xen/hvm/hvm_info_table.h     |  10 +
>  include/xen/hvm/hvm_op.h             | 340 +++++---------
>  include/xen/hvm/hvm_vcpu.h           | 144 ++++++
>  include/xen/hvm/hvm_xs_strings.h     |   3 +
>  include/xen/hvm/ioreq.h              |  22 +-
>  include/xen/hvm/params.h             | 129 +++++-
>  include/xen/hvm/save.h               |  10 +-
>  include/xen/io/9pfs.h                |  49 ++
>  include/xen/io/blkif.h               |  74 ++-
>  include/xen/io/console.h             |   6 +
>  include/xen/io/displif.h             | 864 
> +++++++++++++++++++++++++++++++++++
>  include/xen/io/kbdif.h               | 472 +++++++++++++++++--
>  include/xen/io/libxenvchan.h         |  26 +-
>  include/xen/io/netif.h               | 772 ++++++++++++++++++++++++++++---
>  include/xen/io/protocols.h           |   2 +
>  include/xen/io/pvcalls.h             | 153 +++++++
>  include/xen/io/ring.h                | 176 ++++++-
>  include/xen/io/sndif.h               | 803 ++++++++++++++++++++++++++++++++
>  include/xen/io/usbif.h               | 146 +++++-
>  include/xen/io/vscsiif.h             |  74 ++-
>  include/xen/io/xs_wire.h             |  10 +-
>  include/xen/kexec.h                  |  14 +
>  include/xen/mem_event.h              | 134 ------
>  include/xen/memory.h                 | 118 ++++-
>  include/xen/physdev.h                |   7 +
>  include/xen/platform.h               |  95 +++-
>  include/xen/pmu.h                    | 143 ++++++
>  include/xen/sched.h                  |  29 +-
>  include/xen/sysctl.h                 | 649 +++++++++++++++++++++-----
>  include/xen/tmem.h                   |  64 +--
>  include/xen/trace.h                  |  10 +-
>  include/xen/vcpu.h                   |  12 +-
>  include/xen/version.h                |  24 +-
>  include/xen/vm_event.h               | 378 +++++++++++++++
>  include/xen/xen-compat.h             |   2 +-
>  include/xen/xen.h                    | 144 ++++--
>  include/xen/xenoprof.h               |   2 +-
>  include/xen/xsm/flask_op.h           |  17 +-
>  56 files changed, 7043 insertions(+), 1278 deletions(-)
>  create mode 100644 include/xen/arch-x86/cpufeatureset.h
>  create mode 100644 include/xen/arch-x86/pmu.h
>  create mode 100644 include/xen/errno.h
>  delete mode 100644 include/xen/gcov.h
>  create mode 100644 include/xen/hvm/dm_op.h
>  create mode 100644 include/xen/hvm/hvm_vcpu.h
>  create mode 100644 include/xen/io/9pfs.h
>  create mode 100644 include/xen/io/displif.h
>  create mode 100644 include/xen/io/pvcalls.h
>  create mode 100644 include/xen/io/sndif.h
>  delete mode 100644 include/xen/mem_event.h
>  create mode 100644 include/xen/pmu.h
>  create mode 100644 include/xen/vm_event.h
> 
> diff --git a/include/xen/arch-arm.h b/include/xen/arch-arm.h
> index 124fc90..5708cd2 100644
> --- a/include/xen/arch-arm.h
> +++ b/include/xen/arch-arm.h
> @@ -61,15 +61,15 @@
>   *
>   * All memory which is shared with other entities in the system
>   * (including the hypervisor and other guests) must reside in memory
> - * which is mapped as Normal Inner-cacheable. This applies to:
> + * which is mapped as Normal Inner Write-Back Outer Write-Back 
> Inner-Shareable.
> + * This applies to:
>   *  - hypercall arguments passed via a pointer to guest memory.
>   *  - memory shared via the grant table mechanism (including PV I/O
>   *    rings etc).
>   *  - memory shared with the hypervisor (struct shared_info, struct
>   *    vcpu_info, the grant table, etc).
>   *
> - * Any Inner cache allocation strategy (Write-Back, Write-Through etc)
> - * is acceptable. There is no restriction on the Outer-cacheability.
> + * Any cache allocation hints are acceptable.
>   */
>  
>  /*
> @@ -165,6 +165,7 @@
>  
>  #define XEN_HYPERCALL_TAG   0XEA1
>  
> +#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
>  #define uint64_aligned_t uint64_t __attribute__((aligned(8)))
>  
>  #ifndef __ASSEMBLY__
> @@ -172,7 +173,7 @@
>      typedef union { type *p; unsigned long q; }                 \
>          __guest_handle_ ## name;                                \
>      typedef union { type *p; uint64_aligned_t q; }              \
> -        __guest_handle_64_ ## name;
> +        __guest_handle_64_ ## name
>  
>  /*
>   * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
> @@ -194,9 +195,6 @@
>          _sxghr_tmp->q = 0;                                  \
>          _sxghr_tmp->p = val;                                \
>      } while ( 0 )
> -#ifdef __XEN_TOOLS__
> -#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
> -#endif
>  #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
>  
>  #if defined(__GNUC__) && !defined(__STRICT_ANSI__)
> @@ -297,7 +295,35 @@ struct vcpu_guest_context {
>  };
>  typedef struct vcpu_guest_context vcpu_guest_context_t;
>  DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
> -#endif
> +
> +/*
> + * struct xen_arch_domainconfig's ABI is covered by
> + * XEN_DOMCTL_INTERFACE_VERSION.
> + */
> +#define XEN_DOMCTL_CONFIG_GIC_NATIVE    0
> +#define XEN_DOMCTL_CONFIG_GIC_V2        1
> +#define XEN_DOMCTL_CONFIG_GIC_V3        2
> +struct xen_arch_domainconfig {
> +    /* IN/OUT */
> +    uint8_t gic_version;
> +    /* IN */
> +    uint32_t nr_spis;
> +    /*
> +     * OUT
> +     * Based on the property clock-frequency in the DT timer node.
> +     * The property may be present when the bootloader/firmware doesn't
> +     * set correctly CNTFRQ which hold the timer frequency.
> +     *
> +     * As it's not possible to trap this register, we have to replicate
> +     * the value in the guest DT.
> +     *
> +     * = 0 => property not present
> +     * > 0 => Value of the property
> +     *
> +     */
> +    uint32_t clock_frequency;
> +};
> +#endif /* __XEN__ || __XEN_TOOLS__ */
>  
>  struct arch_vcpu_info {
>  };
> @@ -365,38 +391,46 @@ typedef uint64_t xen_callback_t;
>   */
>  
>  /* vGIC v2 mappings */
> -#define GUEST_GICD_BASE   0x03001000ULL
> -#define GUEST_GICD_SIZE   0x00001000ULL
> -#define GUEST_GICC_BASE   0x03002000ULL
> -#define GUEST_GICC_SIZE   0x00000100ULL
> +#define GUEST_GICD_BASE   xen_mk_ullong(0x03001000)
> +#define GUEST_GICD_SIZE   xen_mk_ullong(0x00001000)
> +#define GUEST_GICC_BASE   xen_mk_ullong(0x03002000)
> +#define GUEST_GICC_SIZE   xen_mk_ullong(0x00002000)
>  
>  /* vGIC v3 mappings */
> -#define GUEST_GICV3_GICD_BASE      0x03001000ULL
> -#define GUEST_GICV3_GICD_SIZE      0x00010000ULL
> +#define GUEST_GICV3_GICD_BASE      xen_mk_ullong(0x03001000)
> +#define GUEST_GICV3_GICD_SIZE      xen_mk_ullong(0x00010000)
>  
> -#define GUEST_GICV3_RDIST_STRIDE   0x20000ULL
> +#define GUEST_GICV3_RDIST_STRIDE   xen_mk_ullong(0x00020000)
>  #define GUEST_GICV3_RDIST_REGIONS  1
>  
> -#define GUEST_GICV3_GICR0_BASE     0x03020000ULL    /* vCPU0 - vCPU7 */
> -#define GUEST_GICV3_GICR0_SIZE     0x00100000ULL
> +#define GUEST_GICV3_GICR0_BASE     xen_mk_ullong(0x03020000) /* vCPU0..127 */
> +#define GUEST_GICV3_GICR0_SIZE     xen_mk_ullong(0x01000000)
> +
> +/* ACPI tables physical address */
> +#define GUEST_ACPI_BASE 0x20000000ULL
> +#define GUEST_ACPI_SIZE 0x02000000ULL
> +
> +/* PL011 mappings */
> +#define GUEST_PL011_BASE    0x22000000ULL
> +#define GUEST_PL011_SIZE    0x00001000ULL
>  
>  /*
>   * 16MB == 4096 pages reserved for guest to use as a region to map its
>   * grant table in.
>   */
> -#define GUEST_GNTTAB_BASE 0x38000000ULL
> -#define GUEST_GNTTAB_SIZE 0x01000000ULL
> +#define GUEST_GNTTAB_BASE xen_mk_ullong(0x38000000)
> +#define GUEST_GNTTAB_SIZE xen_mk_ullong(0x01000000)
>  
> -#define GUEST_MAGIC_BASE  0x39000000ULL
> -#define GUEST_MAGIC_SIZE  0x01000000ULL
> +#define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
> +#define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
>  
>  #define GUEST_RAM_BANKS   2
>  
> -#define GUEST_RAM0_BASE   0x40000000ULL /* 3GB of low RAM @ 1GB */
> -#define GUEST_RAM0_SIZE   0xc0000000ULL
> +#define GUEST_RAM0_BASE   xen_mk_ullong(0x40000000) /* 3GB of low RAM @ 1GB 
> */
> +#define GUEST_RAM0_SIZE   xen_mk_ullong(0xc0000000)
>  
> -#define GUEST_RAM1_BASE   0x0200000000ULL /* 1016GB of RAM @ 8GB */
> -#define GUEST_RAM1_SIZE   0xfe00000000ULL
> +#define GUEST_RAM1_BASE   xen_mk_ullong(0x0200000000) /* 1016GB of RAM @ 8GB 
> */
> +#define GUEST_RAM1_SIZE   xen_mk_ullong(0xfe00000000)
>  
>  #define GUEST_RAM_BASE    GUEST_RAM0_BASE /* Lowest RAM address */
>  /* Largest amount of actual RAM, not including holes */
> @@ -405,12 +439,17 @@ typedef uint64_t xen_callback_t;
>  #define GUEST_RAM_BANK_BASES   { GUEST_RAM0_BASE, GUEST_RAM1_BASE }
>  #define GUEST_RAM_BANK_SIZES   { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE }
>  
> +/* Current supported guest VCPUs */
> +#define GUEST_MAX_VCPUS 128
> +
>  /* Interrupts */
>  #define GUEST_TIMER_VIRT_PPI    27
>  #define GUEST_TIMER_PHYS_S_PPI  29
>  #define GUEST_TIMER_PHYS_NS_PPI 30
>  #define GUEST_EVTCHN_PPI        31
>  
> +#define GUEST_VPL011_SPI        32
> +
>  /* PSCI functions */
>  #define PSCI_cpu_suspend 0
>  #define PSCI_cpu_off     1
> @@ -419,6 +458,11 @@ typedef uint64_t xen_callback_t;
>  
>  #endif
>  
> +#ifndef __ASSEMBLY__
> +/* Stub definition of PMU structure */
> +typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t;
> +#endif
> +
>  #endif /*  __XEN_PUBLIC_ARCH_ARM_H__ */
>  
>  /*
> diff --git a/include/xen/arch-x86/cpufeatureset.h 
> b/include/xen/arch-x86/cpufeatureset.h
> new file mode 100644
> index 0000000..0ee3ea3
> --- /dev/null
> +++ b/include/xen/arch-x86/cpufeatureset.h
> @@ -0,0 +1,263 @@
> +/*
> + * arch-x86/cpufeatureset.h
> + *
> + * CPU featureset definitions
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2015, 2016 Citrix Systems, Inc.
> + */
> +
> +/*
> + * There are two expected ways of including this header.
> + *
> + * 1) The "default" case (expected from tools etc).
> + *
> + * Simply #include <public/arch-x86/cpufeatureset.h>
> + *
> + * In this circumstance, normal header guards apply and the includer shall 
> get
> + * an enumeration in the XEN_X86_FEATURE_xxx namespace.
> + *
> + * 2) The special case where the includer provides XEN_CPUFEATURE() in scope.
> + *
> + * In this case, no inclusion guards apply and the caller is responsible for
> + * their XEN_CPUFEATURE() being appropriate in the included context.
> + */
> +
> +#ifndef XEN_CPUFEATURE
> +
> +/*
> + * Includer has not provided a custom XEN_CPUFEATURE().  Arrange for normal
> + * header guards, an enum and constants in the XEN_X86_FEATURE_xxx namespace.
> + */
> +#ifndef __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__
> +#define __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__
> +
> +#define XEN_CPUFEATURESET_DEFAULT_INCLUDE
> +
> +#define XEN_CPUFEATURE(name, value) XEN_X86_FEATURE_##name = value,
> +enum {
> +
> +#endif /* __XEN_PUBLIC_ARCH_X86_CPUFEATURESET_H__ */
> +#endif /* !XEN_CPUFEATURE */
> +
> +
> +#ifdef XEN_CPUFEATURE
> +/*
> + * A featureset is a bitmap of x86 features, represented as a collection of
> + * 32bit words.
> + *
> + * Words are as specified in vendors programming manuals, and shall not
> + * contain any synthesied values.  New words may be added to the end of
> + * featureset.
> + *
> + * All featureset words currently originate from leaves specified for the
> + * CPUID instruction, but this is not preclude other sources of information.
> + */
> +
> +/*
> + * Attribute syntax:
> + *
> + * Attributes for a particular feature are provided as characters before the
> + * first space in the comment immediately following the feature value.  Note 
> -
> + * none of these attributes form part of the Xen public ABI.
> + *
> + * Special: '!'
> + *   This bit has special properties and is not a straight indication of a
> + *   piece of new functionality.  Xen will handle these differently,
> + *   and may override toolstack settings completely.
> + *
> + * Applicability to guests: 'A', 'S' or 'H'
> + *   'A' = All guests.
> + *   'S' = All HVM guests (not PV guests).
> + *   'H' = HVM HAP guests (not PV or HVM Shadow guests).
> + */
> +
> +/* Intel-defined CPU features, CPUID level 0x00000001.edx, word 0 */
> +XEN_CPUFEATURE(FPU,           0*32+ 0) /*A  Onboard FPU */
> +XEN_CPUFEATURE(VME,           0*32+ 1) /*S  Virtual Mode Extensions */
> +XEN_CPUFEATURE(DE,            0*32+ 2) /*A  Debugging Extensions */
> +XEN_CPUFEATURE(PSE,           0*32+ 3) /*S  Page Size Extensions */
> +XEN_CPUFEATURE(TSC,           0*32+ 4) /*A  Time Stamp Counter */
> +XEN_CPUFEATURE(MSR,           0*32+ 5) /*A  Model-Specific Registers, RDMSR, 
> WRMSR */
> +XEN_CPUFEATURE(PAE,           0*32+ 6) /*A  Physical Address Extensions */
> +XEN_CPUFEATURE(MCE,           0*32+ 7) /*A  Machine Check Architecture */
> +XEN_CPUFEATURE(CX8,           0*32+ 8) /*A  CMPXCHG8 instruction */
> +XEN_CPUFEATURE(APIC,          0*32+ 9) /*!A Onboard APIC */
> +XEN_CPUFEATURE(SEP,           0*32+11) /*A  SYSENTER/SYSEXIT */
> +XEN_CPUFEATURE(MTRR,          0*32+12) /*S  Memory Type Range Registers */
> +XEN_CPUFEATURE(PGE,           0*32+13) /*S  Page Global Enable */
> +XEN_CPUFEATURE(MCA,           0*32+14) /*A  Machine Check Architecture */
> +XEN_CPUFEATURE(CMOV,          0*32+15) /*A  CMOV instruction (FCMOVCC and 
> FCOMI too if FPU present) */
> +XEN_CPUFEATURE(PAT,           0*32+16) /*A  Page Attribute Table */
> +XEN_CPUFEATURE(PSE36,         0*32+17) /*S  36-bit PSEs */
> +XEN_CPUFEATURE(CLFLUSH,       0*32+19) /*A  CLFLUSH instruction */
> +XEN_CPUFEATURE(DS,            0*32+21) /*   Debug Store */
> +XEN_CPUFEATURE(ACPI,          0*32+22) /*A  ACPI via MSR */
> +XEN_CPUFEATURE(MMX,           0*32+23) /*A  Multimedia Extensions */
> +XEN_CPUFEATURE(FXSR,          0*32+24) /*A  FXSAVE and FXRSTOR instructions 
> */
> +XEN_CPUFEATURE(SSE,           0*32+25) /*A  Streaming SIMD Extensions */
> +XEN_CPUFEATURE(SSE2,          0*32+26) /*A  Streaming SIMD Extensions-2 */
> +XEN_CPUFEATURE(SS,            0*32+27) /*A  CPU self snoop */
> +XEN_CPUFEATURE(HTT,           0*32+28) /*!A Hyper-Threading Technology */
> +XEN_CPUFEATURE(TM1,           0*32+29) /*   Thermal Monitor 1 */
> +XEN_CPUFEATURE(PBE,           0*32+31) /*   Pending Break Enable */
> +
> +/* Intel-defined CPU features, CPUID level 0x00000001.ecx, word 1 */
> +XEN_CPUFEATURE(SSE3,          1*32+ 0) /*A  Streaming SIMD Extensions-3 */
> +XEN_CPUFEATURE(PCLMULQDQ,     1*32+ 1) /*A  Carry-less mulitplication */
> +XEN_CPUFEATURE(DTES64,        1*32+ 2) /*   64-bit Debug Store */
> +XEN_CPUFEATURE(MONITOR,       1*32+ 3) /*   Monitor/Mwait support */
> +XEN_CPUFEATURE(DSCPL,         1*32+ 4) /*   CPL Qualified Debug Store */
> +XEN_CPUFEATURE(VMX,           1*32+ 5) /*S  Virtual Machine Extensions */
> +XEN_CPUFEATURE(SMX,           1*32+ 6) /*   Safer Mode Extensions */
> +XEN_CPUFEATURE(EIST,          1*32+ 7) /*   Enhanced SpeedStep */
> +XEN_CPUFEATURE(TM2,           1*32+ 8) /*   Thermal Monitor 2 */
> +XEN_CPUFEATURE(SSSE3,         1*32+ 9) /*A  Supplemental Streaming SIMD 
> Extensions-3 */
> +XEN_CPUFEATURE(FMA,           1*32+12) /*A  Fused Multiply Add */
> +XEN_CPUFEATURE(CX16,          1*32+13) /*A  CMPXCHG16B */
> +XEN_CPUFEATURE(XTPR,          1*32+14) /*   Send Task Priority Messages */
> +XEN_CPUFEATURE(PDCM,          1*32+15) /*   Perf/Debug Capability MSR */
> +XEN_CPUFEATURE(PCID,          1*32+17) /*H  Process Context ID */
> +XEN_CPUFEATURE(DCA,           1*32+18) /*   Direct Cache Access */
> +XEN_CPUFEATURE(SSE4_1,        1*32+19) /*A  Streaming SIMD Extensions 4.1 */
> +XEN_CPUFEATURE(SSE4_2,        1*32+20) /*A  Streaming SIMD Extensions 4.2 */
> +XEN_CPUFEATURE(X2APIC,        1*32+21) /*!A Extended xAPIC */
> +XEN_CPUFEATURE(MOVBE,         1*32+22) /*A  movbe instruction */
> +XEN_CPUFEATURE(POPCNT,        1*32+23) /*A  POPCNT instruction */
> +XEN_CPUFEATURE(TSC_DEADLINE,  1*32+24) /*S  TSC Deadline Timer */
> +XEN_CPUFEATURE(AESNI,         1*32+25) /*A  AES instructions */
> +XEN_CPUFEATURE(XSAVE,         1*32+26) /*A  XSAVE/XRSTOR/XSETBV/XGETBV */
> +XEN_CPUFEATURE(OSXSAVE,       1*32+27) /*!  OSXSAVE */
> +XEN_CPUFEATURE(AVX,           1*32+28) /*A  Advanced Vector Extensions */
> +XEN_CPUFEATURE(F16C,          1*32+29) /*A  Half-precision convert 
> instruction */
> +XEN_CPUFEATURE(RDRAND,        1*32+30) /*A  Digital Random Number Generator 
> */
> +XEN_CPUFEATURE(HYPERVISOR,    1*32+31) /*!A Running under some hypervisor */
> +
> +/* AMD-defined CPU features, CPUID level 0x80000001.edx, word 2 */
> +XEN_CPUFEATURE(SYSCALL,       2*32+11) /*A  SYSCALL/SYSRET */
> +XEN_CPUFEATURE(NX,            2*32+20) /*A  Execute Disable */
> +XEN_CPUFEATURE(MMXEXT,        2*32+22) /*A  AMD MMX extensions */
> +XEN_CPUFEATURE(FFXSR,         2*32+25) /*A  FFXSR instruction optimizations 
> */
> +XEN_CPUFEATURE(PAGE1GB,       2*32+26) /*H  1Gb large page support */
> +XEN_CPUFEATURE(RDTSCP,        2*32+27) /*S  RDTSCP */
> +XEN_CPUFEATURE(LM,            2*32+29) /*A  Long Mode (x86-64) */
> +XEN_CPUFEATURE(3DNOWEXT,      2*32+30) /*A  AMD 3DNow! extensions */
> +XEN_CPUFEATURE(3DNOW,         2*32+31) /*A  3DNow! */
> +
> +/* AMD-defined CPU features, CPUID level 0x80000001.ecx, word 3 */
> +XEN_CPUFEATURE(LAHF_LM,       3*32+ 0) /*A  LAHF/SAHF in long mode */
> +XEN_CPUFEATURE(CMP_LEGACY,    3*32+ 1) /*!A If yes HyperThreading not valid 
> */
> +XEN_CPUFEATURE(SVM,           3*32+ 2) /*S  Secure virtual machine */
> +XEN_CPUFEATURE(EXTAPIC,       3*32+ 3) /*   Extended APIC space */
> +XEN_CPUFEATURE(CR8_LEGACY,    3*32+ 4) /*S  CR8 in 32-bit mode */
> +XEN_CPUFEATURE(ABM,           3*32+ 5) /*A  Advanced bit manipulation */
> +XEN_CPUFEATURE(SSE4A,         3*32+ 6) /*A  SSE-4A */
> +XEN_CPUFEATURE(MISALIGNSSE,   3*32+ 7) /*A  Misaligned SSE mode */
> +XEN_CPUFEATURE(3DNOWPREFETCH, 3*32+ 8) /*A  3DNow prefetch instructions */
> +XEN_CPUFEATURE(OSVW,          3*32+ 9) /*   OS Visible Workaround */
> +XEN_CPUFEATURE(IBS,           3*32+10) /*   Instruction Based Sampling */
> +XEN_CPUFEATURE(XOP,           3*32+11) /*A  extended AVX instructions */
> +XEN_CPUFEATURE(SKINIT,        3*32+12) /*   SKINIT/STGI instructions */
> +XEN_CPUFEATURE(WDT,           3*32+13) /*   Watchdog timer */
> +XEN_CPUFEATURE(LWP,           3*32+15) /*S  Light Weight Profiling */
> +XEN_CPUFEATURE(FMA4,          3*32+16) /*A  4 operands MAC instructions */
> +XEN_CPUFEATURE(NODEID_MSR,    3*32+19) /*   NodeId MSR */
> +XEN_CPUFEATURE(TBM,           3*32+21) /*A  trailing bit manipulations */
> +XEN_CPUFEATURE(TOPOEXT,       3*32+22) /*   topology extensions CPUID leafs 
> */
> +XEN_CPUFEATURE(DBEXT,         3*32+26) /*A  data breakpoint extension */
> +XEN_CPUFEATURE(MONITORX,      3*32+29) /*   MONITOR extension 
> (MONITORX/MWAITX) */
> +
> +/* Intel-defined CPU features, CPUID level 0x0000000D:1.eax, word 4 */
> +XEN_CPUFEATURE(XSAVEOPT,      4*32+ 0) /*A  XSAVEOPT instruction */
> +XEN_CPUFEATURE(XSAVEC,        4*32+ 1) /*A  XSAVEC/XRSTORC instructions */
> +XEN_CPUFEATURE(XGETBV1,       4*32+ 2) /*A  XGETBV with %ecx=1 */
> +XEN_CPUFEATURE(XSAVES,        4*32+ 3) /*S  XSAVES/XRSTORS instructions */
> +
> +/* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */
> +XEN_CPUFEATURE(FSGSBASE,      5*32+ 0) /*A  {RD,WR}{FS,GS}BASE instructions 
> */
> +XEN_CPUFEATURE(TSC_ADJUST,    5*32+ 1) /*S  TSC_ADJUST MSR available */
> +XEN_CPUFEATURE(SGX,           5*32+ 2) /*   Software Guard extensions */
> +XEN_CPUFEATURE(BMI1,          5*32+ 3) /*A  1st bit manipulation extensions 
> */
> +XEN_CPUFEATURE(HLE,           5*32+ 4) /*A  Hardware Lock Elision */
> +XEN_CPUFEATURE(AVX2,          5*32+ 5) /*A  AVX2 instructions */
> +XEN_CPUFEATURE(FDP_EXCP_ONLY, 5*32+ 6) /*!  x87 FDP only updated on 
> exception. */
> +XEN_CPUFEATURE(SMEP,          5*32+ 7) /*S  Supervisor Mode Execution 
> Protection */
> +XEN_CPUFEATURE(BMI2,          5*32+ 8) /*A  2nd bit manipulation extensions 
> */
> +XEN_CPUFEATURE(ERMS,          5*32+ 9) /*A  Enhanced REP MOVSB/STOSB */
> +XEN_CPUFEATURE(INVPCID,       5*32+10) /*H  Invalidate Process Context ID */
> +XEN_CPUFEATURE(RTM,           5*32+11) /*A  Restricted Transactional Memory 
> */
> +XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
> +XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
> +XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
> +XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions 
> */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword 
> Instrs */
> +XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
> +XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
> +XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access 
> Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply 
> Add */
> +XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
> +XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal 
> Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection 
> Instrs */
> +XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word 
> Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions 
> */
> +
> +/* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
> +XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation 
> Instrs */
> +XEN_CPUFEATURE(UMIP,          6*32+ 2) /*S  User Mode Instruction Prevention 
> */
> +XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
> +XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
> +XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
> +XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
> +
> +/* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
> +XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
> +XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface 
> */
> +
> +/* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
> +XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
> +
> +/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
> +XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network 
> Instructions */
> +XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation 
> Single Precision */
> +
> +#endif /* XEN_CPUFEATURE */
> +
> +/* Clean up from a default include.  Close the enum (for C). */
> +#ifdef XEN_CPUFEATURESET_DEFAULT_INCLUDE
> +#undef XEN_CPUFEATURESET_DEFAULT_INCLUDE
> +#undef XEN_CPUFEATURE
> +};
> +
> +#endif /* XEN_CPUFEATURESET_DEFAULT_INCLUDE */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/arch-x86/cpuid.h b/include/xen/arch-x86/cpuid.h
> index d709340..eb76875 100644
> --- a/include/xen/arch-x86/cpuid.h
> +++ b/include/xen/arch-x86/cpuid.h
> @@ -73,11 +73,27 @@
>  #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
>  #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
>  
> +/*
> + * Leaf 4 (0x40000x03)
> + * Sub-leaf 0: EAX: bit 0: emulated tsc
> + *                  bit 1: host tsc is known to be reliable
> + *                  bit 2: RDTSCP instruction available
> + *             EBX: tsc_mode: 0=default (emulate if necessary), 1=emulate,
> + *                            2=no emulation, 3=no emulation + TSC_AUX 
> support
> + *             ECX: guest tsc frequency in kHz
> + *             EDX: guest tsc incarnation (migration count)
> + * Sub-leaf 1: EAX: tsc offset low part
> + *             EBX: tsc offset high part
> + *             ECX: multiplicator for tsc->ns conversion
> + *             EDX: shift amount for tsc->ns conversion
> + * Sub-leaf 2: EAX: host tsc frequency in kHz
> + */
> +
>  /*
>   * Leaf 5 (0x40000x04)
>   * HVM-specific features
> - * EAX: Features
> - * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
> + * Sub-leaf 0: EAX: Features
> + * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
>   */
>  #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC 
> registers */
>  #define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC 
> accesses */
> @@ -85,6 +101,16 @@
>  #define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
>  #define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in 
> EBX */
>  
> -#define XEN_CPUID_MAX_NUM_LEAVES 4
> +/*
> + * Leaf 6 (0x40000x05)
> + * PV-specific parameters
> + * Sub-leaf 0: EAX: max available sub-leaf
> + * Sub-leaf 0: EBX: bits 0-7: max machine address width
> + */
> +
> +/* Max. address width in bits taking memory hotplug into account. */
> +#define XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK (0xffu << 0)
> +
> +#define XEN_CPUID_MAX_NUM_LEAVES 5
>  
>  #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
> diff --git a/include/xen/arch-x86/hvm/save.h b/include/xen/arch-x86/hvm/save.h
> index efb0b62..fd7bf3f 100644
> --- a/include/xen/arch-x86/hvm/save.h
> +++ b/include/xen/arch-x86/hvm/save.h
> @@ -47,7 +47,9 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
>  /*
>   * Processor
>   *
> - * Compat: Pre-3.4 didn't have msr_tsc_aux
> + * Compat:
> + *     - Pre-3.4 didn't have msr_tsc_aux
> + *     - Pre-4.7 didn't have fpu_initialised
>   */
>  
>  struct hvm_hw_cpu {
> @@ -133,7 +135,7 @@ struct hvm_hw_cpu {
>      uint64_t shadow_gs;
>  
>      /* msr content saved/restored. */
> -    uint64_t msr_flags;
> +    uint64_t msr_flags; /* Obsolete, ignored. */
>      uint64_t msr_lstar;
>      uint64_t msr_star;
>      uint64_t msr_cstar;
> @@ -157,6 +159,11 @@ struct hvm_hw_cpu {
>      };
>      /* error code for pending event */
>      uint32_t error_code;
> +
> +#define _XEN_X86_FPU_INITIALISED        0
> +#define XEN_X86_FPU_INITIALISED         (1U<<_XEN_X86_FPU_INITIALISED)
> +    uint32_t flags;
> +    uint32_t pad0;
>  };
>  
>  struct hvm_hw_cpu_compat {
> @@ -242,7 +249,7 @@ struct hvm_hw_cpu_compat {
>      uint64_t shadow_gs;
>  
>      /* msr content saved/restored. */
> -    uint64_t msr_flags;
> +    uint64_t msr_flags; /* Obsolete, ignored. */
>      uint64_t msr_lstar;
>      uint64_t msr_star;
>      uint64_t msr_cstar;
> @@ -268,19 +275,26 @@ struct hvm_hw_cpu_compat {
>      uint32_t error_code;
>  };
>  
> -static inline int _hvm_hw_fix_cpu(void *h) {
> +static inline int _hvm_hw_fix_cpu(void *h, uint32_t size) {
>  
>      union hvm_hw_cpu_union {
>          struct hvm_hw_cpu nat;
>          struct hvm_hw_cpu_compat cmp;
>      } *ucpu = (union hvm_hw_cpu_union *)h;
>  
> -    /* If we copy from the end backwards, we should
> -     * be able to do the modification in-place */
> -    ucpu->nat.error_code = ucpu->cmp.error_code;
> -    ucpu->nat.pending_event = ucpu->cmp.pending_event;
> -    ucpu->nat.tsc = ucpu->cmp.tsc;
> -    ucpu->nat.msr_tsc_aux = 0;
> +    if ( size == sizeof(struct hvm_hw_cpu_compat) )
> +    {
> +        /*
> +         * If we copy from the end backwards, we should
> +         * be able to do the modification in-place.
> +         */
> +        ucpu->nat.error_code = ucpu->cmp.error_code;
> +        ucpu->nat.pending_event = ucpu->cmp.pending_event;
> +        ucpu->nat.tsc = ucpu->cmp.tsc;
> +        ucpu->nat.msr_tsc_aux = 0;
> +    }
> +    /* Mimic the old behaviour by unconditionally setting fpu_initialised. */
> +    ucpu->nat.flags = XEN_X86_FPU_INITIALISED;
>  
>      return 0;
>  }
> @@ -347,30 +361,41 @@ DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);
>   * IO-APIC
>   */
>  
> +union vioapic_redir_entry
> +{
> +    uint64_t bits;
> +    struct {
> +        uint8_t vector;
> +        uint8_t delivery_mode:3;
> +        uint8_t dest_mode:1;
> +        uint8_t delivery_status:1;
> +        uint8_t polarity:1;
> +        uint8_t remote_irr:1;
> +        uint8_t trig_mode:1;
> +        uint8_t mask:1;
> +        uint8_t reserve:7;
> +        uint8_t reserved[4];
> +        uint8_t dest_id;
> +    } fields;
> +};
> +
>  #define VIOAPIC_NUM_PINS  48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */
>  
> -struct hvm_hw_vioapic {
> -    uint64_t base_address;
> -    uint32_t ioregsel;
> -    uint32_t id;
> -    union vioapic_redir_entry
> -    {
> -        uint64_t bits;
> -        struct {
> -            uint8_t vector;
> -            uint8_t delivery_mode:3;
> -            uint8_t dest_mode:1;
> -            uint8_t delivery_status:1;
> -            uint8_t polarity:1;
> -            uint8_t remote_irr:1;
> -            uint8_t trig_mode:1;
> -            uint8_t mask:1;
> -            uint8_t reserve:7;
> -            uint8_t reserved[4];
> -            uint8_t dest_id;
> -        } fields;
> -    } redirtbl[VIOAPIC_NUM_PINS];
> -};
> +#define XEN_HVM_VIOAPIC(name, cnt)                      \
> +    struct name {                                       \
> +        uint64_t base_address;                          \
> +        uint32_t ioregsel;                              \
> +        uint32_t id;                                    \
> +        union vioapic_redir_entry redirtbl[cnt];        \
> +    }
> +
> +XEN_HVM_VIOAPIC(hvm_hw_vioapic, VIOAPIC_NUM_PINS);
> +
> +#ifndef __XEN__
> +#undef XEN_HVM_VIOAPIC
> +#else
> +#undef VIOAPIC_NUM_PINS
> +#endif
>  
>  DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
>  
> @@ -550,12 +575,11 @@ struct hvm_hw_cpu_xsave {
>      struct {
>          struct { char x[512]; } fpu_sse;
>  
> -        struct {
> +        struct hvm_hw_cpu_xsave_hdr {
>              uint64_t xstate_bv;         /* Updated by XRSTOR */
> -            uint64_t reserved[7];
> +            uint64_t xcomp_bv;          /* Updated by XRSTOR{C,S} */
> +            uint64_t reserved[6];
>          } xsave_hdr;                    /* The 64-byte header */
> -
> -        struct { char x[0]; } ymm;    /* YMM */
>      } save_area;
>  };
>  
> @@ -575,7 +599,9 @@ struct hvm_viridian_domain_context {
>  DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct 
> hvm_viridian_domain_context);
>  
>  struct hvm_viridian_vcpu_context {
> -    uint64_t apic_assist;
> +    uint64_t vp_assist_msr;
> +    uint8_t  vp_assist_vector;
> +    uint8_t  _pad[7];
>  };
>  
>  DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context);
> @@ -584,6 +610,7 @@ struct hvm_vmce_vcpu {
>      uint64_t caps;
>      uint64_t mci_ctl2_bank0;
>      uint64_t mci_ctl2_bank1;
> +    uint64_t mcg_ext_ctl;
>  };
>  
>  DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu);
> diff --git a/include/xen/arch-x86/pmu.h b/include/xen/arch-x86/pmu.h
> new file mode 100644
> index 0000000..68ebf12
> --- /dev/null
> +++ b/include/xen/arch-x86/pmu.h
> @@ -0,0 +1,167 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +#ifndef __XEN_PUBLIC_ARCH_X86_PMU_H__
> +#define __XEN_PUBLIC_ARCH_X86_PMU_H__
> +
> +/* x86-specific PMU definitions */
> +
> +/* AMD PMU registers and structures */
> +struct xen_pmu_amd_ctxt {
> +    /*
> +     * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
> +     * For PV(H) guests these fields are RO.
> +     */
> +    uint32_t counters;
> +    uint32_t ctrls;
> +
> +    /* Counter MSRs */
> +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
> +    uint64_t regs[];
> +#elif defined(__GNUC__)
> +    uint64_t regs[0];
> +#endif
> +};
> +typedef struct xen_pmu_amd_ctxt xen_pmu_amd_ctxt_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_amd_ctxt_t);
> +
> +/* Intel PMU registers and structures */
> +struct xen_pmu_cntr_pair {
> +    uint64_t counter;
> +    uint64_t control;
> +};
> +typedef struct xen_pmu_cntr_pair xen_pmu_cntr_pair_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
> +
> +struct xen_pmu_intel_ctxt {
> +   /*
> +    * Offsets to fixed and architectural counter MSRs (relative to
> +    * xen_pmu_arch.c.intel).
> +    * For PV(H) guests these fields are RO.
> +    */
> +    uint32_t fixed_counters;
> +    uint32_t arch_counters;
> +
> +    /* PMU registers */
> +    uint64_t global_ctrl;
> +    uint64_t global_ovf_ctrl;
> +    uint64_t global_status;
> +    uint64_t fixed_ctrl;
> +    uint64_t ds_area;
> +    uint64_t pebs_enable;
> +    uint64_t debugctl;
> +
> +    /* Fixed and architectural counter MSRs */
> +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
> +    uint64_t regs[];
> +#elif defined(__GNUC__)
> +    uint64_t regs[0];
> +#endif
> +};
> +typedef struct xen_pmu_intel_ctxt xen_pmu_intel_ctxt_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_intel_ctxt_t);
> +
> +/* Sampled domain's registers */
> +struct xen_pmu_regs {
> +    uint64_t ip;
> +    uint64_t sp;
> +    uint64_t flags;
> +    uint16_t cs;
> +    uint16_t ss;
> +    uint8_t cpl;
> +    uint8_t pad[3];
> +};
> +typedef struct xen_pmu_regs xen_pmu_regs_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
> +
> +/* PMU flags */
> +#define PMU_CACHED         (1<<0) /* PMU MSRs are cached in the context */
> +#define PMU_SAMPLE_USER    (1<<1) /* Sample is from user or kernel mode */
> +#define PMU_SAMPLE_REAL    (1<<2) /* Sample is from realmode */
> +#define PMU_SAMPLE_PV      (1<<3) /* Sample from a PV guest */
> +
> +/*
> + * Architecture-specific information describing state of the processor at
> + * the time of PMU interrupt.
> + * Fields of this structure marked as RW for guest should only be written by
> + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
> + * hypervisor during PMU interrupt). Hypervisor will read updated data in
> + * XENPMU_flush hypercall and clear PMU_CACHED bit.
> + */
> +struct xen_pmu_arch {
> +    union {
> +        /*
> +         * Processor's registers at the time of interrupt.
> +         * WO for hypervisor, RO for guests.
> +         */
> +        struct xen_pmu_regs regs;
> +        /* Padding for adding new registers to xen_pmu_regs in the future */
> +#define XENPMU_REGS_PAD_SZ  64
> +        uint8_t pad[XENPMU_REGS_PAD_SZ];
> +    } r;
> +
> +    /* WO for hypervisor, RO for guest */
> +    uint64_t pmu_flags;
> +
> +    /*
> +     * APIC LVTPC register.
> +     * RW for both hypervisor and guest.
> +     * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
> +     * during XENPMU_flush or XENPMU_lvtpc_set.
> +     */
> +    union {
> +        uint32_t lapic_lvtpc;
> +        uint64_t pad;
> +    } l;
> +
> +    /*
> +     * Vendor-specific PMU registers.
> +     * RW for both hypervisor and guest (see exceptions above).
> +     * Guest's updates to this field are verified and then loaded by the
> +     * hypervisor into hardware during XENPMU_flush
> +     */
> +    union {
> +        struct xen_pmu_amd_ctxt amd;
> +        struct xen_pmu_intel_ctxt intel;
> +
> +        /*
> +         * Padding for contexts (fixed parts only, does not include MSR banks
> +         * that are specified by offsets)
> +         */
> +#define XENPMU_CTXT_PAD_SZ  128
> +        uint8_t pad[XENPMU_CTXT_PAD_SZ];
> +    } c;
> +};
> +typedef struct xen_pmu_arch xen_pmu_arch_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
> +
> +#endif /* __XEN_PUBLIC_ARCH_X86_PMU_H__ */
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> +
> diff --git a/include/xen/arch-x86/xen-mca.h b/include/xen/arch-x86/xen-mca.h
> index 04382ed..dc35267 100644
> --- a/include/xen/arch-x86/xen-mca.h
> +++ b/include/xen/arch-x86/xen-mca.h
> @@ -1,11 +1,11 @@
>  
> /******************************************************************************
>   * arch-x86/mca.h
> - * 
> + *
>   * Contributed by Advanced Micro Devices, Inc.
>   * Author: Christoph Egger <Christoph.Egger@xxxxxxx>
>   *
>   * Guest OS machine check interface to x86 Xen.
> - * 
> + *
>   * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
>   * of this software and associated documentation files (the "Software"), to
>   * deal in the Software without restriction, including without limitation the
> @@ -88,6 +88,8 @@
>  #define XEN_MC_NOTDELIVERED 0x10
>  /* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. 
> */
>  
> +/* Applicable to all mc_vcpuid fields below. */
> +#define XEN_MC_VCPUID_INVALID 0xffff
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -156,7 +158,7 @@ struct mcinfo_msr {
>  };
>  
>  /* contains mc information from other
> - * or additional mc MSRs */ 
> + * or additional mc MSRs */
>  struct mcinfo_extended {
>      struct mcinfo_common common;
>  
> @@ -193,10 +195,10 @@ struct mcinfo_extended {
>  /* L3 cache disable Action */
>  #define MC_ACTION_CACHE_SHRINK (0x1 << 2)
>  
> -/* Below interface used between XEN/DOM0 for passing XEN's recovery action 
> - * information to DOM0. 
> +/* Below interface used between XEN/DOM0 for passing XEN's recovery action
> + * information to DOM0.
>   * usage Senario: After offlining broken page, XEN might pass its page 
> offline
> - * recovery action result to DOM0. DOM0 will save the information in 
> + * recovery action result to DOM0. DOM0 will save the information in
>   * non-volatile memory for further proactive actions, such as offlining the
>   * easy broken page earlier when doing next reboot.
>  */
> @@ -255,8 +257,8 @@ DEFINE_XEN_GUEST_HANDLE(mc_info_t);
>  #define MC_CAPS_AMD_ECX      6       /* cpuid level 0x80000001 (%ecx) */
>  
>  struct mcinfo_logical_cpu {
> -    uint32_t mc_cpunr;          
> -    uint32_t mc_chipid; 
> +    uint32_t mc_cpunr;
> +    uint32_t mc_chipid;
>      uint16_t mc_coreid;
>      uint16_t mc_threadid;
>      uint32_t mc_apicid;
> @@ -281,7 +283,7 @@ typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
>  
>  
> -/* 
> +/*
>   * OS's should use these instead of writing their own lookup function
>   * each with its own bugs and drawbacks.
>   * We use macros instead of static inline functions to allow guests
> @@ -312,8 +314,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
>          struct mcinfo_common *_mic;                             \
>                                                                  \
>          found = 0;                                              \
> -     (_ret) = NULL;                                          \
> -     if (_mi == NULL) break;                                 \
> +        (_ret) = NULL;                                          \
> +        if (_mi == NULL) break;                                 \
>          _mic = x86_mcinfo_first(_mi);                           \
>          for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \
>              if (_mic->type == (_type)) {                        \
> @@ -345,8 +347,8 @@ struct xen_mc_fetch {
>      /* IN/OUT variables. */
>      uint32_t flags;  /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
>                             XEN_MC_ACK if ack'ing an earlier fetch */
> -                     /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
> -                        XEN_MC_NODATA, XEN_MC_NOMATCH */
> +                       /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
> +                          XEN_MC_NODATA, XEN_MC_NOMATCH */
>      uint32_t _pad0;
>      uint64_t fetch_id;       /* OUT: id for ack, IN: id we are ack'ing */
>  
> @@ -378,30 +380,33 @@ DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
>  
>  #define XEN_MC_physcpuinfo 3
>  struct xen_mc_physcpuinfo {
> -     /* IN/OUT */
> -     uint32_t ncpus;
> -     uint32_t _pad0;
> -     /* OUT */
> -     XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
> +    /* IN/OUT */
> +    uint32_t ncpus;
> +    uint32_t _pad0;
> +    /* OUT */
> +    XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
>  };
>  
>  #define XEN_MC_msrinject    4
>  #define MC_MSRINJ_MAXMSRS       8
>  struct xen_mc_msrinject {
> -       /* IN */
> -     uint32_t mcinj_cpunr;           /* target processor id */
> -     uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
> -     uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
> -     uint32_t _pad0;
> -     struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
> +    /* IN */
> +    uint32_t mcinj_cpunr;           /* target processor id */
> +    uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
> +    uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
> +    domid_t  mcinj_domid;           /* valid only if MC_MSRINJ_F_GPADDR is
> +                                       present in mcinj_flags */
> +    uint16_t _pad0;
> +    struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
>  };
>  
>  /* Flags for mcinj_flags above; bits 16-31 are reserved */
>  #define MC_MSRINJ_F_INTERPOSE   0x1
> +#define MC_MSRINJ_F_GPADDR      0x2
>  
>  #define XEN_MC_mceinject    5
>  struct xen_mc_mceinject {
> -     unsigned int mceinj_cpunr;      /* target processor id */
> +    unsigned int mceinj_cpunr;      /* target processor id */
>  };
>  
>  #if defined(__XEN__) || defined(__XEN_TOOLS__)
> @@ -409,12 +414,13 @@ struct xen_mc_mceinject {
>  #define XEN_MC_INJECT_TYPE_MASK     0x7
>  #define XEN_MC_INJECT_TYPE_MCE      0x0
>  #define XEN_MC_INJECT_TYPE_CMCI     0x1
> +#define XEN_MC_INJECT_TYPE_LMCE     0x2
>  
>  #define XEN_MC_INJECT_CPU_BROADCAST 0x8
>  
>  struct xen_mc_inject_v2 {
> -     uint32_t flags;
> -     struct xenctl_bitmap cpumap;
> +    uint32_t flags;
> +    struct xenctl_bitmap cpumap;
>  };
>  #endif
>  
> diff --git a/include/xen/arch-x86/xen-x86_32.h 
> b/include/xen/arch-x86/xen-x86_32.h
> index 6339727..aa388b7 100644
> --- a/include/xen/arch-x86/xen-x86_32.h
> +++ b/include/xen/arch-x86/xen-x86_32.h
> @@ -55,43 +55,38 @@
>  #define FLAT_USER_DS    FLAT_RING3_DS
>  #define FLAT_USER_SS    FLAT_RING3_SS
>  
> -#ifdef CONFIG_PARAVIRT
>  #define __HYPERVISOR_VIRT_START_PAE    0xF5800000
>  #define __MACH2PHYS_VIRT_START_PAE     0xF5800000
>  #define __MACH2PHYS_VIRT_END_PAE       0xF6800000
> -#define HYPERVISOR_VIRT_START_PAE      \
> -    mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)
> -#define MACH2PHYS_VIRT_START_PAE       \
> -    mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)
> -#define MACH2PHYS_VIRT_END_PAE         \
> -    mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)
> +#define HYPERVISOR_VIRT_START_PAE      
> xen_mk_ulong(__HYPERVISOR_VIRT_START_PAE)
> +#define MACH2PHYS_VIRT_START_PAE       
> xen_mk_ulong(__MACH2PHYS_VIRT_START_PAE)
> +#define MACH2PHYS_VIRT_END_PAE         xen_mk_ulong(__MACH2PHYS_VIRT_END_PAE)
>  
>  /* Non-PAE bounds are obsolete. */
>  #define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
>  #define __MACH2PHYS_VIRT_START_NONPAE  0xFC000000
>  #define __MACH2PHYS_VIRT_END_NONPAE    0xFC400000
>  #define HYPERVISOR_VIRT_START_NONPAE   \
> -    mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)
> +    xen_mk_ulong(__HYPERVISOR_VIRT_START_NONPAE)
>  #define MACH2PHYS_VIRT_START_NONPAE    \
> -    mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)
> +    xen_mk_ulong(__MACH2PHYS_VIRT_START_NONPAE)
>  #define MACH2PHYS_VIRT_END_NONPAE      \
> -    mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)
> +    xen_mk_ulong(__MACH2PHYS_VIRT_END_NONPAE)
>  
>  #define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
>  #define __MACH2PHYS_VIRT_START  __MACH2PHYS_VIRT_START_PAE
>  #define __MACH2PHYS_VIRT_END    __MACH2PHYS_VIRT_END_PAE
>  
>  #ifndef HYPERVISOR_VIRT_START
> -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
> +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
>  #endif
>  
> -#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
> -#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
> +#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
> +#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
>  #define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
>  #ifndef machine_to_phys_mapping
>  #define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
>  #endif
> -#endif
>  
>  /* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
>  #if defined(__XEN__) || defined(__XEN_TOOLS__)
> @@ -106,6 +101,7 @@
>      do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
>           (hnd).p = val;                                     \
>      } while ( 0 )
> +#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
>  #define uint64_aligned_t uint64_t __attribute__((aligned(8)))
>  #define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
>  #define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
> @@ -113,22 +109,44 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +#if defined(XEN_GENERATING_COMPAT_HEADERS)
> +/* nothing */
> +#elif defined(__XEN__) || defined(__XEN_TOOLS__)
> +/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax). */
> +#define __DECL_REG_LO8(which) union { \
> +    uint32_t e ## which ## x; \
> +    uint16_t which ## x; \
> +    struct { \
> +        uint8_t which ## l; \
> +        uint8_t which ## h; \
> +    }; \
> +}
> +#define __DECL_REG_LO16(name) union { \
> +    uint32_t e ## name, _e ## name; \
> +    uint16_t name; \
> +}
> +#else
> +/* Other sources must always use the proper 32-bit name (e.g., eax). */
> +#define __DECL_REG_LO8(which) uint32_t e ## which ## x
> +#define __DECL_REG_LO16(name) uint32_t e ## name
> +#endif
> +
>  struct cpu_user_regs {
> -    uint32_t ebx;
> -    uint32_t ecx;
> -    uint32_t edx;
> -    uint32_t esi;
> -    uint32_t edi;
> -    uint32_t ebp;
> -    uint32_t eax;
> +    __DECL_REG_LO8(b);
> +    __DECL_REG_LO8(c);
> +    __DECL_REG_LO8(d);
> +    __DECL_REG_LO16(si);
> +    __DECL_REG_LO16(di);
> +    __DECL_REG_LO16(bp);
> +    __DECL_REG_LO8(a);
>      uint16_t error_code;    /* private */
>      uint16_t entry_vector;  /* private */
> -    uint32_t eip;
> +    __DECL_REG_LO16(ip);
>      uint16_t cs;
>      uint8_t  saved_upcall_mask;
>      uint8_t  _pad0;
> -    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
> -    uint32_t esp;
> +    __DECL_REG_LO16(flags); /* eflags.IF == !saved_upcall_mask */
> +    __DECL_REG_LO16(sp);
>      uint16_t ss, _pad1;
>      uint16_t es, _pad2;
>      uint16_t ds, _pad3;
> @@ -138,6 +156,9 @@ struct cpu_user_regs {
>  typedef struct cpu_user_regs cpu_user_regs_t;
>  DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
>  
> +#undef __DECL_REG_LO8
> +#undef __DECL_REG_LO16
> +
>  /*
>   * Page-directory addresses above 4GB do not fit into architectural %cr3.
>   * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
> diff --git a/include/xen/arch-x86/xen-x86_64.h 
> b/include/xen/arch-x86/xen-x86_64.h
> index 2c00111..978f8cb 100644
> --- a/include/xen/arch-x86/xen-x86_64.h
> +++ b/include/xen/arch-x86/xen-x86_64.h
> @@ -70,24 +70,22 @@
>  #define FLAT_USER_SS32 FLAT_RING3_SS32
>  #define FLAT_USER_SS   FLAT_USER_SS64
>  
> -#ifdef CONFIG_PARAVIRT
>  #define __HYPERVISOR_VIRT_START 0xFFFF800000000000
>  #define __HYPERVISOR_VIRT_END   0xFFFF880000000000
>  #define __MACH2PHYS_VIRT_START  0xFFFF800000000000
>  #define __MACH2PHYS_VIRT_END    0xFFFF804000000000
>  
>  #ifndef HYPERVISOR_VIRT_START
> -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
> -#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
> +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
> +#define HYPERVISOR_VIRT_END   xen_mk_ulong(__HYPERVISOR_VIRT_END)
>  #endif
>  
> -#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
> -#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
> +#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
> +#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
>  #define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
>  #ifndef machine_to_phys_mapping
>  #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
>  #endif
> -#endif
>  
>  /*
>   * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
> @@ -132,7 +130,35 @@ struct iret_context {
>      /* Bottom of iret stack frame. */
>  };
>  
> -#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
> +#if defined(__XEN__) || defined(__XEN_TOOLS__)
> +/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax/rax). 
> */
> +#define __DECL_REG_LOHI(which) union { \
> +    uint64_t r ## which ## x; \
> +    uint32_t e ## which ## x; \
> +    uint16_t which ## x; \
> +    struct { \
> +        uint8_t which ## l; \
> +        uint8_t which ## h; \
> +    }; \
> +}
> +#define __DECL_REG_LO8(name) union { \
> +    uint64_t r ## name; \
> +    uint32_t e ## name; \
> +    uint16_t name; \
> +    uint8_t name ## l; \
> +}
> +#define __DECL_REG_LO16(name) union { \
> +    uint64_t r ## name; \
> +    uint32_t e ## name; \
> +    uint16_t name; \
> +}
> +#define __DECL_REG_HI(num) union { \
> +    uint64_t r ## num; \
> +    uint32_t r ## num ## d; \
> +    uint16_t r ## num ## w; \
> +    uint8_t r ## num ## b; \
> +}
> +#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
>  /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
>  #define __DECL_REG(name) union { \
>      uint64_t r ## name, e ## name; \
> @@ -143,30 +169,37 @@ struct iret_context {
>  #define __DECL_REG(name) uint64_t r ## name
>  #endif
>  
> +#ifndef __DECL_REG_LOHI
> +#define __DECL_REG_LOHI(name) __DECL_REG(name ## x)
> +#define __DECL_REG_LO8        __DECL_REG
> +#define __DECL_REG_LO16       __DECL_REG
> +#define __DECL_REG_HI(num)    uint64_t r ## num
> +#endif
> +
>  struct cpu_user_regs {
> -    uint64_t r15;
> -    uint64_t r14;
> -    uint64_t r13;
> -    uint64_t r12;
> -    __DECL_REG(bp);
> -    __DECL_REG(bx);
> -    uint64_t r11;
> -    uint64_t r10;
> -    uint64_t r9;
> -    uint64_t r8;
> -    __DECL_REG(ax);
> -    __DECL_REG(cx);
> -    __DECL_REG(dx);
> -    __DECL_REG(si);
> -    __DECL_REG(di);
> +    __DECL_REG_HI(15);
> +    __DECL_REG_HI(14);
> +    __DECL_REG_HI(13);
> +    __DECL_REG_HI(12);
> +    __DECL_REG_LO8(bp);
> +    __DECL_REG_LOHI(b);
> +    __DECL_REG_HI(11);
> +    __DECL_REG_HI(10);
> +    __DECL_REG_HI(9);
> +    __DECL_REG_HI(8);
> +    __DECL_REG_LOHI(a);
> +    __DECL_REG_LOHI(c);
> +    __DECL_REG_LOHI(d);
> +    __DECL_REG_LO8(si);
> +    __DECL_REG_LO8(di);
>      uint32_t error_code;    /* private */
>      uint32_t entry_vector;  /* private */
> -    __DECL_REG(ip);
> +    __DECL_REG_LO16(ip);
>      uint16_t cs, _pad0[1];
>      uint8_t  saved_upcall_mask;
>      uint8_t  _pad1[3];
> -    __DECL_REG(flags);      /* rflags.IF == !saved_upcall_mask */
> -    __DECL_REG(sp);
> +    __DECL_REG_LO16(flags); /* rflags.IF == !saved_upcall_mask */
> +    __DECL_REG_LO8(sp);
>      uint16_t ss, _pad2[3];
>      uint16_t es, _pad3[3];
>      uint16_t ds, _pad4[3];
> @@ -177,6 +210,10 @@ typedef struct cpu_user_regs cpu_user_regs_t;
>  DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
>  
>  #undef __DECL_REG
> +#undef __DECL_REG_LOHI
> +#undef __DECL_REG_LO8
> +#undef __DECL_REG_LO16
> +#undef __DECL_REG_HI
>  
>  #define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
>  #define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
> diff --git a/include/xen/arch-x86/xen.h b/include/xen/arch-x86/xen.h
> index c5e880b..ff91831 100644
> --- a/include/xen/arch-x86/xen.h
> +++ b/include/xen/arch-x86/xen.h
> @@ -54,13 +54,20 @@
>  #define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
>  #define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)
>  #define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)
> -#ifdef __XEN_TOOLS__
> -#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
> -#endif
>  #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
>  
>  #if defined(__i386__)
> +# ifdef __XEN__
> +__DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x
> +__DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; }
> +# endif
>  #include "xen-x86_32.h"
> +# ifdef __XEN__
> +__UnDeF__ __DECL_REG_LO8
> +__UnDeF__ __DECL_REG_LO16
> +__DeFiNe__ __DECL_REG_LO8(which) e ## which ## x
> +__DeFiNe__ __DECL_REG_LO16(name) e ## name
> +# endif
>  #elif defined(__x86_64__)
>  #include "xen-x86_64.h"
>  #endif
> @@ -152,17 +159,15 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
>  typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
>  
>  /*
> - * The following is all CPU context. Note that the fpu_ctxt block is filled 
> + * The following is all CPU context. Note that the fpu_ctxt block is filled
>   * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
>   *
> - * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
> - * for HVM and PVH guests, not all information in this structure is updated:
> - *
> - * - For HVM guests, the structures read include: fpu_ctxt (if
> - * VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
> + * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all
> + * information in this structure is updated, the fields read include: 
> fpu_ctxt
> + * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*].
>   *
> - * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] 
> to
> - * set cr3. All other fields not used should be set to 0.
> + * Note: VCPUOP_initialise for HVM guests is non-symetric with
> + * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from 
> hvm/hvm_vcpu.h
>   */
>  struct vcpu_guest_context {
>      /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
> @@ -255,9 +260,56 @@ struct arch_shared_info {
>      unsigned long p2m_cr3;         /* cr3 value of the p2m address space */
>      unsigned long p2m_vaddr;       /* virtual address of the p2m list */
>      unsigned long p2m_generation;  /* generation count of p2m mapping */
> +#ifdef __i386__
> +    /* There's no room for this field in the generic structure. */
> +    uint32_t wc_sec_hi;
> +#endif
>  };
>  typedef struct arch_shared_info arch_shared_info_t;
>  
> +#if defined(__XEN__) || defined(__XEN_TOOLS__)
> +/*
> + * struct xen_arch_domainconfig's ABI is covered by
> + * XEN_DOMCTL_INTERFACE_VERSION.
> + */
> +struct xen_arch_domainconfig {
> +#define _XEN_X86_EMU_LAPIC          0
> +#define XEN_X86_EMU_LAPIC           (1U<<_XEN_X86_EMU_LAPIC)
> +#define _XEN_X86_EMU_HPET           1
> +#define XEN_X86_EMU_HPET            (1U<<_XEN_X86_EMU_HPET)
> +#define _XEN_X86_EMU_PM             2
> +#define XEN_X86_EMU_PM              (1U<<_XEN_X86_EMU_PM)
> +#define _XEN_X86_EMU_RTC            3
> +#define XEN_X86_EMU_RTC             (1U<<_XEN_X86_EMU_RTC)
> +#define _XEN_X86_EMU_IOAPIC         4
> +#define XEN_X86_EMU_IOAPIC          (1U<<_XEN_X86_EMU_IOAPIC)
> +#define _XEN_X86_EMU_PIC            5
> +#define XEN_X86_EMU_PIC             (1U<<_XEN_X86_EMU_PIC)
> +#define _XEN_X86_EMU_VGA            6
> +#define XEN_X86_EMU_VGA             (1U<<_XEN_X86_EMU_VGA)
> +#define _XEN_X86_EMU_IOMMU          7
> +#define XEN_X86_EMU_IOMMU           (1U<<_XEN_X86_EMU_IOMMU)
> +#define _XEN_X86_EMU_PIT            8
> +#define XEN_X86_EMU_PIT             (1U<<_XEN_X86_EMU_PIT)
> +#define _XEN_X86_EMU_USE_PIRQ       9
> +#define XEN_X86_EMU_USE_PIRQ        (1U<<_XEN_X86_EMU_USE_PIRQ)
> +
> +#define XEN_X86_EMU_ALL             (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET |  
> \
> +                                     XEN_X86_EMU_PM | XEN_X86_EMU_RTC |      
> \
> +                                     XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC |  
> \
> +                                     XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU |   
> \
> +                                     XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ)
> +    uint32_t emulation_flags;
> +};
> +
> +/* Location of online VCPU bitmap. */
> +#define XEN_ACPI_CPU_MAP             0xaf00
> +#define XEN_ACPI_CPU_MAP_LEN         ((HVM_MAX_VCPUS + 7) / 8)
> +
> +/* GPE0 bit set during CPU hotplug */
> +#define XEN_ACPI_GPE0_CPUHP_BIT      2
> +#endif
> +
>  #endif /* !__ASSEMBLY__ */
>  
>  /*
> diff --git a/include/xen/domctl.h b/include/xen/domctl.h
> index 57e2ed7..8853445 100644
> --- a/include/xen/domctl.h
> +++ b/include/xen/domctl.h
> @@ -33,11 +33,12 @@
>  #endif
>  
>  #include "xen.h"
> +#include "event_channel.h"
>  #include "grant_table.h"
>  #include "hvm/save.h"
>  #include "memory.h"
>  
> -#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
> +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000e
>  
>  /*
>   * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
> @@ -60,26 +61,12 @@ struct xen_domctl_createdomain {
>   /* Disable out-of-sync shadow page tables? */
>  #define _XEN_DOMCTL_CDF_oos_off       3
>  #define XEN_DOMCTL_CDF_oos_off        (1U<<_XEN_DOMCTL_CDF_oos_off)
> - /* Is this a PVH guest (as opposed to an HVM or PV guest)? */
> -#define _XEN_DOMCTL_CDF_pvh_guest     4
> -#define XEN_DOMCTL_CDF_pvh_guest      (1U<<_XEN_DOMCTL_CDF_pvh_guest)
> + /* Is this a xenstore domain? */
> +#define _XEN_DOMCTL_CDF_xs_domain     4
> +#define XEN_DOMCTL_CDF_xs_domain      (1U<<_XEN_DOMCTL_CDF_xs_domain)
>      uint32_t flags;
> +    struct xen_arch_domainconfig config;
>  };
> -typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
> -
> -#if defined(__arm__) || defined(__aarch64__)
> -#define XEN_DOMCTL_CONFIG_GIC_DEFAULT   0
> -#define XEN_DOMCTL_CONFIG_GIC_V2        1
> -#define XEN_DOMCTL_CONFIG_GIC_V3        2
> -/* XEN_DOMCTL_configure_domain */
> -struct xen_domctl_arm_configuredomain {
> -    /* IN/OUT parameters */
> -    uint8_t gic_version;
> -};
> -typedef struct xen_domctl_arm_configuredomain 
> xen_domctl_arm_configuredomain_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_arm_configuredomain_t);
> -#endif
>  
>  /* XEN_DOMCTL_getdomaininfo */
>  struct xen_domctl_getdomaininfo {
> @@ -106,9 +93,12 @@ struct xen_domctl_getdomaininfo {
>   /* Being debugged.  */
>  #define _XEN_DOMINF_debugged  6
>  #define XEN_DOMINF_debugged   (1U<<_XEN_DOMINF_debugged)
> -/* domain is PVH */
> -#define _XEN_DOMINF_pvh_guest 7
> -#define XEN_DOMINF_pvh_guest  (1U<<_XEN_DOMINF_pvh_guest)
> +/* domain is a xenstore domain */
> +#define _XEN_DOMINF_xs_domain 7
> +#define XEN_DOMINF_xs_domain  (1U<<_XEN_DOMINF_xs_domain)
> +/* domain has hardware assisted paging */
> +#define _XEN_DOMINF_hap       8
> +#define XEN_DOMINF_hap        (1U<<_XEN_DOMINF_hap)
>   /* XEN_DOMINF_shutdown guest-supplied code.  */
>  #define XEN_DOMINF_shutdownmask 255
>  #define XEN_DOMINF_shutdownshift 16
> @@ -142,8 +132,6 @@ struct xen_domctl_getmemlist {
>      /* OUT variables. */
>      uint64_aligned_t num_pfns;
>  };
> -typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
>  
>  
>  /* XEN_DOMCTL_getpageframeinfo */
> @@ -161,27 +149,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
>  #define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
>  #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
>  
> -struct xen_domctl_getpageframeinfo {
> -    /* IN variables. */
> -    uint64_aligned_t gmfn; /* GMFN to query */
> -    /* OUT variables. */
> -    /* Is the page PINNED to a type? */
> -    uint32_t type;         /* see above type defs */
> -};
> -typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
> -
> -
> -/* XEN_DOMCTL_getpageframeinfo2 */
> -struct xen_domctl_getpageframeinfo2 {
> -    /* IN variables. */
> -    uint64_aligned_t num;
> -    /* IN/OUT variables. */
> -    XEN_GUEST_HANDLE_64(uint32) array;
> -};
> -typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
> -
>  /* XEN_DOMCTL_getpageframeinfo3 */
>  struct xen_domctl_getpageframeinfo3 {
>      /* IN variables. */
> @@ -217,8 +184,11 @@ struct xen_domctl_getpageframeinfo3 {
>  #define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST       1
>   /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */
>  #define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY   2
> - /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */
> + /*
> +  * No longer supported, was equiv. to ENABLE with mode flags
> +  * ENABLE_REFCOUNT and ENABLE_TRANSLATE:
>  #define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE  3
> +  */
>  
>  /* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */
>   /*
> @@ -241,19 +211,25 @@ struct xen_domctl_getpageframeinfo3 {
>    */
>  #define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL  (1 << 4)
>  
> +/* Mode flags for XEN_DOMCTL_SHADOW_OP_{CLEAN,PEEK}. */
> + /*
> +  * This is the final iteration: Requesting to include pages mapped
> +  * writably by the hypervisor in the dirty bitmap.
> +  */
> +#define XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL   (1 << 0)
> +
>  struct xen_domctl_shadow_op_stats {
>      uint32_t fault_count;
>      uint32_t dirty_count;
>  };
> -typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t);
>  
>  struct xen_domctl_shadow_op {
>      /* IN variables. */
>      uint32_t       op;       /* XEN_DOMCTL_SHADOW_OP_* */
>  
> -    /* OP_ENABLE */
> -    uint32_t       mode;     /* XEN_DOMCTL_SHADOW_ENABLE_* */
> +    /* OP_ENABLE: XEN_DOMCTL_SHADOW_ENABLE_* */
> +    /* OP_PEAK / OP_CLEAN: XEN_DOMCTL_SHADOW_LOGDIRTY_* */
> +    uint32_t       mode;
>  
>      /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */
>      uint32_t       mb;       /* Shadow memory allocation in MB */
> @@ -263,8 +239,6 @@ struct xen_domctl_shadow_op {
>      uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */
>      struct xen_domctl_shadow_op_stats stats;
>  };
> -typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);
>  
>  
>  /* XEN_DOMCTL_max_mem */
> @@ -272,8 +246,6 @@ struct xen_domctl_max_mem {
>      /* IN variables. */
>      uint64_aligned_t max_memkb;
>  };
> -typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
>  
>  
>  /* XEN_DOMCTL_setvcpucontext */
> @@ -282,8 +254,6 @@ struct xen_domctl_vcpucontext {
>      uint32_t              vcpu;                  /* IN */
>      XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
>  };
> -typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
>  
>  
>  /* XEN_DOMCTL_getvcpuinfo */
> @@ -297,8 +267,6 @@ struct xen_domctl_getvcpuinfo {
>      uint64_aligned_t cpu_time;        /* total cpu time consumed (ns) */
>      uint32_t cpu;                     /* current mapping   */
>  };
> -typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
>  
>  
>  /* Get/set the NUMA node(s) with which the guest has affinity with. */
> @@ -307,8 +275,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
>  struct xen_domctl_nodeaffinity {
>      struct xenctl_bitmap nodemap;/* IN */
>  };
> -typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t);
>  
>  
>  /* Get/set which physical cpus a vcpu can execute on. */
> @@ -323,6 +289,9 @@ struct xen_domctl_vcpuaffinity {
>   /* Set/get the soft affinity for vcpu */
>  #define _XEN_VCPUAFFINITY_SOFT  1
>  #define XEN_VCPUAFFINITY_SOFT   (1U<<_XEN_VCPUAFFINITY_SOFT)
> + /* Undo SCHEDOP_pin_override */
> +#define _XEN_VCPUAFFINITY_FORCE 2
> +#define XEN_VCPUAFFINITY_FORCE  (1U<<_XEN_VCPUAFFINITY_FORCE)
>      uint32_t flags;
>      /*
>       * IN/OUT variables.
> @@ -343,71 +312,95 @@ struct xen_domctl_vcpuaffinity {
>      struct xenctl_bitmap cpumap_hard;
>      struct xenctl_bitmap cpumap_soft;
>  };
> -typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t);
>  
>  
>  /* XEN_DOMCTL_max_vcpus */
>  struct xen_domctl_max_vcpus {
>      uint32_t max;           /* maximum number of vcpus */
>  };
> -typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
>  
>  
>  /* XEN_DOMCTL_scheduler_op */
>  /* Scheduler types. */
> -#define XEN_SCHEDULER_SEDF     4
> +/* #define XEN_SCHEDULER_SEDF  4 (Removed) */
>  #define XEN_SCHEDULER_CREDIT   5
>  #define XEN_SCHEDULER_CREDIT2  6
>  #define XEN_SCHEDULER_ARINC653 7
>  #define XEN_SCHEDULER_RTDS     8
> +#define XEN_SCHEDULER_NULL     9
> +
> +struct xen_domctl_sched_credit {
> +    uint16_t weight;
> +    uint16_t cap;
> +};
> +
> +struct xen_domctl_sched_credit2 {
> +    uint16_t weight;
> +    uint16_t cap;
> +};
> +
> +struct xen_domctl_sched_rtds {
> +    uint32_t period;
> +    uint32_t budget;
> +};
> +
> +typedef struct xen_domctl_schedparam_vcpu {
> +    union {
> +        struct xen_domctl_sched_credit credit;
> +        struct xen_domctl_sched_credit2 credit2;
> +        struct xen_domctl_sched_rtds rtds;
> +    } u;
> +    uint32_t vcpuid;
> +} xen_domctl_schedparam_vcpu_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_schedparam_vcpu_t);
>  
> -/* Set or get info? */
> +/*
> + * Set or get info?
> + * For schedulers supporting per-vcpu settings (e.g., RTDS):
> + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
> + *  XEN_DOMCTL_SCHEDOP_getinfo gets default params;
> + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo sets (gets) params of vcpus;
> + *
> + * For schedulers not supporting per-vcpu settings:
> + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
> + *  XEN_DOMCTL_SCHEDOP_getinfo gets domain-wise params;
> + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo returns error;
> + */
>  #define XEN_DOMCTL_SCHEDOP_putinfo 0
>  #define XEN_DOMCTL_SCHEDOP_getinfo 1
> +#define XEN_DOMCTL_SCHEDOP_putvcpuinfo 2
> +#define XEN_DOMCTL_SCHEDOP_getvcpuinfo 3
>  struct xen_domctl_scheduler_op {
>      uint32_t sched_id;  /* XEN_SCHEDULER_* */
>      uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */
> +    /* IN/OUT */
>      union {
> -        struct xen_domctl_sched_sedf {
> -            uint64_aligned_t period;
> -            uint64_aligned_t slice;
> -            uint64_aligned_t latency;
> -            uint32_t extratime;
> -            uint32_t weight;
> -        } sedf;
> -        struct xen_domctl_sched_credit {
> -            uint16_t weight;
> -            uint16_t cap;
> -        } credit;
> -        struct xen_domctl_sched_credit2 {
> -            uint16_t weight;
> -        } credit2;
> -        struct xen_domctl_sched_rtds {
> -            uint32_t period;
> -            uint32_t budget;
> -        } rtds;
> +        struct xen_domctl_sched_credit credit;
> +        struct xen_domctl_sched_credit2 credit2;
> +        struct xen_domctl_sched_rtds rtds;
> +        struct {
> +            XEN_GUEST_HANDLE_64(xen_domctl_schedparam_vcpu_t) vcpus;
> +            /*
> +             * IN: Number of elements in vcpus array.
> +             * OUT: Number of processed elements of vcpus array.
> +             */
> +            uint32_t nr_vcpus;
> +            uint32_t padding;
> +        } v;
>      } u;
>  };
> -typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t);
>  
>  
>  /* XEN_DOMCTL_setdomainhandle */
>  struct xen_domctl_setdomainhandle {
>      xen_domain_handle_t handle;
>  };
> -typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t);
>  
>  
>  /* XEN_DOMCTL_setdebugging */
>  struct xen_domctl_setdebugging {
>      uint8_t enable;
>  };
> -typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t);
>  
>  
>  /* XEN_DOMCTL_irq_permission */
> @@ -415,8 +408,6 @@ struct xen_domctl_irq_permission {
>      uint8_t pirq;
>      uint8_t allow_access;    /* flag to specify enable/disable of IRQ access 
> */
>  };
> -typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);
>  
>  
>  /* XEN_DOMCTL_iomem_permission */
> @@ -425,8 +416,6 @@ struct xen_domctl_iomem_permission {
>      uint64_aligned_t nr_mfns;  /* number of pages in range (>0) */
>      uint8_t  allow_access;     /* allow (!0) or deny (0) access to range? */
>  };
> -typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);
>  
>  
>  /* XEN_DOMCTL_ioport_permission */
> @@ -435,42 +424,34 @@ struct xen_domctl_ioport_permission {
>      uint32_t nr_ports;                /* size of port range */
>      uint8_t  allow_access;            /* allow or deny access to range? */
>  };
> -typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);
>  
>  
>  /* XEN_DOMCTL_hypercall_init */
>  struct xen_domctl_hypercall_init {
>      uint64_aligned_t  gmfn;           /* GMFN to be initialised */
>  };
> -typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);
>  
>  
>  /* XEN_DOMCTL_settimeoffset */
>  struct xen_domctl_settimeoffset {
> -    int32_t  time_offset_seconds; /* applied to domain wallclock time */
> +    int64_aligned_t time_offset_seconds; /* applied to domain wallclock time 
> */
>  };
> -typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
>  
>  /* XEN_DOMCTL_gethvmcontext */
>  /* XEN_DOMCTL_sethvmcontext */
> -typedef struct xen_domctl_hvmcontext {
> +struct xen_domctl_hvmcontext {
>      uint32_t size; /* IN/OUT: size of buffer / bytes filled */
>      XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call
>                                          * gethvmcontext with NULL
>                                          * buffer to get size req'd */
> -} xen_domctl_hvmcontext_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
> +};
>  
>  
>  /* XEN_DOMCTL_set_address_size */
>  /* XEN_DOMCTL_get_address_size */
> -typedef struct xen_domctl_address_size {
> +struct xen_domctl_address_size {
>      uint32_t size;
> -} xen_domctl_address_size_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);
> +};
>  
>  
>  /* XEN_DOMCTL_sendtrigger */
> @@ -483,19 +464,40 @@ struct xen_domctl_sendtrigger {
>      uint32_t  trigger;  /* IN */
>      uint32_t  vcpu;     /* IN */
>  };
> -typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
>  
>  
> -/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
> +/* Assign a device to a guest. Sets up IOMMU structures. */
>  /* XEN_DOMCTL_assign_device */
> -/* XEN_DOMCTL_test_assign_device */
> -/* XEN_DOMCTL_deassign_device */
> +/*
> + * XEN_DOMCTL_test_assign_device: Pass DOMID_INVALID to find out whether the
> + * given device is assigned to any DomU at all. Pass a specific domain ID to
> + * find out whether the given device can be assigned to that domain.
> + */
> +/*
> + * XEN_DOMCTL_deassign_device: The behavior of this DOMCTL differs
> + * between the different type of device:
> + *  - PCI device (XEN_DOMCTL_DEV_PCI) will be reassigned to DOM0
> + *  - DT device (XEN_DOMCTL_DEV_DT) will left unassigned. DOM0
> + *  will have to call XEN_DOMCTL_assign_device in order to use the
> + *  device.
> + */
> +#define XEN_DOMCTL_DEV_PCI      0
> +#define XEN_DOMCTL_DEV_DT       1
>  struct xen_domctl_assign_device {
> -    uint32_t  machine_sbdf;   /* machine PCI ID of assigned device */
> +    /* IN */
> +    uint32_t dev;   /* XEN_DOMCTL_DEV_* */
> +    uint32_t flags;
> +#define XEN_DOMCTL_DEV_RDM_RELAXED      1 /* assign only */
> +    union {
> +        struct {
> +            uint32_t machine_sbdf;   /* machine PCI ID of assigned device */
> +        } pci;
> +        struct {
> +            uint32_t size; /* Length of the path */
> +            XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node 
> */
> +        } dt;
> +    } u;
>  };
> -typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
>  
>  /* Retrieve sibling devices infomation of machine_sbdf */
>  /* XEN_DOMCTL_get_device_group */
> @@ -505,22 +507,20 @@ struct xen_domctl_get_device_group {
>      uint32_t  num_sdevs;        /* OUT */
>      XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
>  };
> -typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
>  
>  /* Pass-through interrupts: bind real irq -> hvm devfn. */
>  /* XEN_DOMCTL_bind_pt_irq */
>  /* XEN_DOMCTL_unbind_pt_irq */
> -typedef enum pt_irq_type_e {
> +enum pt_irq_type {
>      PT_IRQ_TYPE_PCI,
>      PT_IRQ_TYPE_ISA,
>      PT_IRQ_TYPE_MSI,
>      PT_IRQ_TYPE_MSI_TRANSLATE,
> -} pt_irq_type_t;
> +    PT_IRQ_TYPE_SPI,    /* ARM: valid range 32-1019 */
> +};
>  struct xen_domctl_bind_pt_irq {
>      uint32_t machine_irq;
> -    pt_irq_type_t irq_type;
> -    uint32_t hvm_domid;
> +    uint32_t irq_type; /* enum pt_irq_type */
>  
>      union {
>          struct {
> @@ -534,16 +534,32 @@ struct xen_domctl_bind_pt_irq {
>          struct {
>              uint8_t gvec;
>              uint32_t gflags;
> +#define XEN_DOMCTL_VMSI_X86_DEST_ID_MASK 0x0000ff
> +#define XEN_DOMCTL_VMSI_X86_RH_MASK      0x000100
> +#define XEN_DOMCTL_VMSI_X86_DM_MASK      0x000200
> +#define XEN_DOMCTL_VMSI_X86_DELIV_MASK   0x007000
> +#define XEN_DOMCTL_VMSI_X86_TRIG_MASK    0x008000
> +#define XEN_DOMCTL_VMSI_X86_UNMASKED     0x010000
> +
>              uint64_aligned_t gtable;
>          } msi;
> +        struct {
> +            uint16_t spi;
> +        } spi;
>      } u;
>  };
> -typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t);
>  
>  
>  /* Bind machine I/O address range -> HVM address range. */
>  /* XEN_DOMCTL_memory_mapping */
> +/* Returns
> +   - zero     success, everything done
> +   - -E2BIG   passed in nr_mfns value too large for the implementation
> +   - positive partial success for the first <result> page frames (with
> +              <result> less than nr_mfns), requiring re-invocation by the
> +              caller after updating inputs
> +   - negative error; other than -E2BIG
> +*/
>  #define DPCI_ADD_MAPPING         1
>  #define DPCI_REMOVE_MAPPING      0
>  struct xen_domctl_memory_mapping {
> @@ -553,8 +569,6 @@ struct xen_domctl_memory_mapping {
>      uint32_t add_mapping;       /* add or remove mapping */
>      uint32_t padding;           /* padding for 64-bit aligned structure */
>  };
> -typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);
>  
>  
>  /* Bind machine I/O port range -> HVM I/O port range. */
> @@ -565,8 +579,6 @@ struct xen_domctl_ioport_mapping {
>      uint32_t nr_ports;        /* size of port range */
>      uint32_t add_mapping;     /* add or remove mapping */
>  };
> -typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
>  
>  
>  /*
> @@ -585,8 +597,6 @@ struct xen_domctl_pin_mem_cacheattr {
>      uint64_aligned_t start, end;
>      uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */
>  };
> -typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
>  
>  
>  /* XEN_DOMCTL_set_ext_vcpucontext */
> @@ -618,8 +628,6 @@ struct xen_domctl_ext_vcpucontext {
>  #endif
>  #endif
>  };
> -typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
>  
>  /*
>   * Set the target domain for a domain
> @@ -628,8 +636,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
>  struct xen_domctl_set_target {
>      domid_t target;
>  };
> -typedef struct xen_domctl_set_target xen_domctl_set_target_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);
>  
>  #if defined(__i386__) || defined(__x86_64__)
>  # define XEN_CPUID_INPUT_UNUSED  0xFFFFFFFF
> @@ -641,8 +647,6 @@ struct xen_domctl_cpuid {
>    uint32_t ecx;
>    uint32_t edx;
>  };
> -typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
>  #endif
>  
>  /*
> @@ -665,8 +669,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
>  struct xen_domctl_subscribe {
>      uint32_t port; /* IN */
>  };
> -typedef struct xen_domctl_subscribe xen_domctl_subscribe_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t);
>  
>  /*
>   * Define the maximum machine address size which should be allocated
> @@ -687,41 +689,34 @@ struct xen_domctl_debug_op {
>      uint32_t op;   /* IN */
>      uint32_t vcpu; /* IN */
>  };
> -typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
>  
>  /*
>   * Request a particular record from the HVM context
>   */
>  /* XEN_DOMCTL_gethvmcontext_partial */
> -typedef struct xen_domctl_hvmcontext_partial {
> +struct xen_domctl_hvmcontext_partial {
>      uint32_t type;                      /* IN: Type of record required */
>      uint32_t instance;                  /* IN: Instance of that type */
> +    uint64_aligned_t bufsz;             /* IN: size of buffer */
>      XEN_GUEST_HANDLE_64(uint8) buffer;  /* OUT: buffer to write record into 
> */
> -} xen_domctl_hvmcontext_partial_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
> +};
>  
>  /* XEN_DOMCTL_disable_migrate */
> -typedef struct xen_domctl_disable_migrate {
> +struct xen_domctl_disable_migrate {
>      uint32_t disable; /* IN: 1: disable migration and restore */
> -} xen_domctl_disable_migrate_t;
> +};
>  
>  
>  /* XEN_DOMCTL_gettscinfo */
>  /* XEN_DOMCTL_settscinfo */
> -struct xen_guest_tsc_info {
> +struct xen_domctl_tsc_info {
> +    /* IN/OUT */
>      uint32_t tsc_mode;
>      uint32_t gtsc_khz;
>      uint32_t incarnation;
>      uint32_t pad;
>      uint64_aligned_t elapsed_nsec;
>  };
> -typedef struct xen_guest_tsc_info xen_guest_tsc_info_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t);
> -typedef struct xen_domctl_tsc_info {
> -    XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */
> -    xen_guest_tsc_info_t info; /* IN */
> -} xen_domctl_tsc_info_t;
>  
>  /* XEN_DOMCTL_gdbsx_guestmemio      guest mem io */
>  struct xen_domctl_gdbsx_memio {
> @@ -750,10 +745,21 @@ struct xen_domctl_gdbsx_domstatus {
>  };
>  
>  /*
> - * Memory event operations
> + * VM event operations
>   */
>  
> -/* XEN_DOMCTL_mem_event_op */
> +/* XEN_DOMCTL_vm_event_op */
> +
> +/*
> + * There are currently three rings available for VM events:
> + * sharing, monitor and paging. This hypercall allows one to
> + * control these rings (enable/disable), as well as to signal
> + * to the hypervisor to pull responses (resume) from the given
> + * ring.
> + */
> +#define XEN_VM_EVENT_ENABLE               0
> +#define XEN_VM_EVENT_DISABLE              1
> +#define XEN_VM_EVENT_RESUME               2
>  
>  /*
>   * Domain memory paging
> @@ -762,42 +768,38 @@ struct xen_domctl_gdbsx_domstatus {
>   * pager<->hypervisor interface. Use XENMEM_paging_op*
>   * to perform per-page operations.
>   *
> - * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several
> + * The XEN_VM_EVENT_PAGING_ENABLE domctl returns several
>   * non-standard error codes to indicate why paging could not be enabled:
>   * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
>   * EMLINK - guest has iommu passthrough enabled
>   * EXDEV  - guest has PoD enabled
>   * EBUSY  - guest has or had paging enabled, ring buffer still active
>   */
> -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING            1
> -
> -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE     0
> -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE    1
> +#define XEN_DOMCTL_VM_EVENT_OP_PAGING            1
>  
>  /*
> - * Access permissions.
> + * Monitor helper.
>   *
>   * As with paging, use the domctl for teardown/setup of the
>   * helper<->hypervisor interface.
>   *
> - * There are HVM hypercalls to set the per-page access permissions of every
> - * page in a domain.  When one of these permissions--independent, read, 
> - * write, and execute--is violated, the VCPU is paused and a memory event 
> - * is sent with what happened.  (See public/mem_event.h) .
> + * The monitor interface can be used to register for various VM events. For
> + * example, there are HVM hypercalls to set the per-page access permissions
> + * of every page in a domain.  When one of these permissions--independent,
> + * read, write, and execute--is violated, the VCPU is paused and a memory 
> event
> + * is sent with what happened. The memory event handler can then resume the
> + * VCPU and redo the access with a XEN_VM_EVENT_RESUME option.
>   *
> - * The memory event handler can then resume the VCPU and redo the access 
> - * with a XENMEM_access_op_resume hypercall.
> + * See public/vm_event.h for the list of available events that can be
> + * subscribed to via the monitor interface.
>   *
> - * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several
> + * The XEN_VM_EVENT_MONITOR_* domctls returns
>   * non-standard error codes to indicate why access could not be enabled:
>   * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
>   * EBUSY  - guest has or had access enabled, ring buffer still active
> + *
>   */
> -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS                        2
> -
> -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE                 0
> -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE                1
> -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION   2
> +#define XEN_DOMCTL_VM_EVENT_OP_MONITOR           2
>  
>  /*
>   * Sharing ENOMEM helper.
> @@ -812,21 +814,16 @@ struct xen_domctl_gdbsx_domstatus {
>   * Note that shring can be turned on (as per the domctl below)
>   * *without* this ring being setup.
>   */
> -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING           3
> -
> -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE    0
> -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE   1
> +#define XEN_DOMCTL_VM_EVENT_OP_SHARING           3
>  
>  /* Use for teardown/setup of helper<->hypervisor interface for paging, 
>   * access and sharing.*/
> -struct xen_domctl_mem_event_op {
> -    uint32_t       op;           /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
> -    uint32_t       mode;         /* XEN_DOMCTL_MEM_EVENT_OP_* */
> +struct xen_domctl_vm_event_op {
> +    uint32_t       op;           /* XEN_VM_EVENT_* */
> +    uint32_t       mode;         /* XEN_DOMCTL_VM_EVENT_OP_* */
>  
>      uint32_t port;              /* OUT: event channel for ring */
>  };
> -typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t);
>  
>  /*
>   * Memory sharing operations
> @@ -842,8 +839,6 @@ struct xen_domctl_mem_sharing_op {
>          uint8_t enable;                   /* CONTROL */
>      } u;
>  };
> -typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);
>  
>  struct xen_domctl_audit_p2m {
>      /* OUT error counts */
> @@ -851,14 +846,10 @@ struct xen_domctl_audit_p2m {
>      uint64_t m2p_bad;
>      uint64_t p2m_bad;
>  };
> -typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t);
>  
>  struct xen_domctl_set_virq_handler {
>      uint32_t virq; /* IN */
>  };
> -typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t);
>  
>  #if defined(__i386__) || defined(__x86_64__)
>  /* XEN_DOMCTL_setvcpuextstate */
> @@ -881,8 +872,6 @@ struct xen_domctl_vcpuextstate {
>      uint64_aligned_t         size;
>      XEN_GUEST_HANDLE_64(uint64) buffer;
>  };
> -typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);
>  #endif
>  
>  /* XEN_DOMCTL_set_access_required: sets whether a memory event listener
> @@ -892,14 +881,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);
>  struct xen_domctl_set_access_required {
>      uint8_t access_required;
>  };
> -typedef struct xen_domctl_set_access_required 
> xen_domctl_set_access_required_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);
>  
>  struct xen_domctl_set_broken_page_p2m {
>      uint64_aligned_t pfn;
>  };
> -typedef struct xen_domctl_set_broken_page_p2m 
> xen_domctl_set_broken_page_p2m_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>  
>  /*
>   * XEN_DOMCTL_set_max_evtchn: sets the maximum event channel port
> @@ -909,8 +894,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>  struct xen_domctl_set_max_evtchn {
>      uint32_t max_port;
>  };
> -typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
>  
>  /*
>   * ARM: Clean and invalidate caches associated with given region of
> @@ -920,8 +903,6 @@ struct xen_domctl_cacheflush {
>      /* IN: page range to flush. */
>      xen_pfn_t start_pfn, nr_pfns;
>  };
> -typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
>  
>  #if defined(__i386__) || defined(__x86_64__)
>  struct xen_domctl_vcpu_msr {
> @@ -954,36 +935,42 @@ struct xen_domctl_vcpu_msrs {
>      uint32_t msr_count;                              /* IN/OUT */
>      XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */
>  };
> -typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
>  #endif
>  
> -/*
> - * Use in XEN_DOMCTL_setvnumainfo to set
> - * vNUMA domain topology.
> - */
> +/* XEN_DOMCTL_setvnumainfo: specifies a virtual NUMA topology for the guest 
> */
>  struct xen_domctl_vnuma {
> +    /* IN: number of vNUMA nodes to setup. Shall be greater than 0 */
>      uint32_t nr_vnodes;
> +    /* IN: number of memory ranges to setup */
>      uint32_t nr_vmemranges;
> +    /*
> +     * IN: number of vCPUs of the domain (used as size of the vcpu_to_vnode
> +     * array declared below). Shall be equal to the domain's max_vcpus.
> +     */
>      uint32_t nr_vcpus;
> -    uint32_t pad;
> +    uint32_t pad;                                  /* must be zero */
> +
> +    /*
> +     * IN: array for specifying the distances of the vNUMA nodes
> +     * between each others. Shall have nr_vnodes*nr_vnodes elements.
> +     */
>      XEN_GUEST_HANDLE_64(uint) vdistance;
> +    /*
> +     * IN: array for specifying to what vNUMA node each vCPU belongs.
> +     * Shall have nr_vcpus elements.
> +     */
>      XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> -
>      /*
> -     * vnodes to physical NUMA nodes mask.
> -     * This kept on per-domain basis for
> -     * interested consumers, such as numa aware ballooning.
> +     * IN: array for specifying on what physical NUMA node each vNUMA
> +     * node is placed. Shall have nr_vnodes elements.
>       */
>      XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
> -
>      /*
> -     * memory rages for each vNUMA node
> +     * IN: array for specifying the memory ranges. Shall have
> +     * nr_vmemranges elements.
>       */
>      XEN_GUEST_HANDLE_64(xen_vmemrange_t) vmemrange;
>  };
> -typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
>  
>  struct xen_domctl_psr_cmt_op {
>  #define XEN_DOMCTL_PSR_CMT_OP_DETACH         0
> @@ -992,8 +979,127 @@ struct xen_domctl_psr_cmt_op {
>      uint32_t cmd;
>      uint32_t data;
>  };
> -typedef struct xen_domctl_psr_cmt_op xen_domctl_psr_cmt_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
> +
> +/*  XEN_DOMCTL_MONITOR_*
> + *
> + * Enable/disable monitoring various VM events.
> + * This domctl configures what events will be reported to helper apps
> + * via the ring buffer "MONITOR". The ring has to be first enabled
> + * with the domctl XEN_DOMCTL_VM_EVENT_OP_MONITOR.
> + *
> + * GET_CAPABILITIES can be used to determine which of these features is
> + * available on a given platform.
> + *
> + * NOTICE: mem_access events are also delivered via the "MONITOR" ring 
> buffer;
> + * however, enabling/disabling those events is performed with the use of
> + * memory_op hypercalls!
> + */
> +#define XEN_DOMCTL_MONITOR_OP_ENABLE            0
> +#define XEN_DOMCTL_MONITOR_OP_DISABLE           1
> +#define XEN_DOMCTL_MONITOR_OP_GET_CAPABILITIES  2
> +#define XEN_DOMCTL_MONITOR_OP_EMULATE_EACH_REP  3
> +
> +#define XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG         0
> +#define XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR            1
> +#define XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP            2
> +#define XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT   3
> +#define XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST         4
> +#define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       5
> +#define XEN_DOMCTL_MONITOR_EVENT_CPUID                 6
> +#define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       7
> +#define XEN_DOMCTL_MONITOR_EVENT_INTERRUPT             8
> +#define XEN_DOMCTL_MONITOR_EVENT_DESC_ACCESS           9
> +#define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
> +
> +struct xen_domctl_monitor_op {
> +    uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> +
> +    /*
> +     * When used with ENABLE/DISABLE this has to be set to
> +     * the requested XEN_DOMCTL_MONITOR_EVENT_* value.
> +     * With GET_CAPABILITIES this field returns a bitmap of
> +     * events supported by the platform, in the format
> +     * (1 << XEN_DOMCTL_MONITOR_EVENT_*).
> +     */
> +    uint32_t event;
> +
> +    /*
> +     * Further options when issuing XEN_DOMCTL_MONITOR_OP_ENABLE.
> +     */
> +    union {
> +        struct {
> +            /* Which control register */
> +            uint8_t index;
> +            /* Pause vCPU until response */
> +            uint8_t sync;
> +            /* Send event only on a change of value */
> +            uint8_t onchangeonly;
> +            /* Allignment padding */
> +            uint8_t pad1;
> +            uint32_t pad2;
> +            /*
> +             * Send event only if the changed bit in the control register
> +             * is not masked.
> +             */
> +            uint64_aligned_t bitmask;
> +        } mov_to_cr;
> +
> +        struct {
> +            uint32_t msr;
> +        } mov_to_msr;
> +
> +        struct {
> +            /* Pause vCPU until response */
> +            uint8_t sync;
> +            uint8_t allow_userspace;
> +        } guest_request;
> +
> +        struct {
> +            /* Pause vCPU until response */
> +            uint8_t sync;
> +        } debug_exception;
> +    } u;
> +};
> +
> +struct xen_domctl_psr_cat_op {
> +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM     0
> +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM     1
> +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CODE    2
> +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_DATA    3
> +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CODE    4
> +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_DATA    5
> +#define XEN_DOMCTL_PSR_CAT_OP_SET_L2_CBM     6
> +#define XEN_DOMCTL_PSR_CAT_OP_GET_L2_CBM     7
> +    uint32_t cmd;       /* IN: XEN_DOMCTL_PSR_CAT_OP_* */
> +    uint32_t target;    /* IN */
> +    uint64_t data;      /* IN/OUT */
> +};
> +
> +struct xen_domctl_set_gnttab_limits {
> +    uint32_t grant_frames;     /* IN */
> +    uint32_t maptrack_frames;  /* IN */
> +};
> +
> +/* XEN_DOMCTL_vuart_op */
> +struct xen_domctl_vuart_op {
> +#define XEN_DOMCTL_VUART_OP_INIT  0
> +        uint32_t cmd;           /* XEN_DOMCTL_VUART_OP_* */
> +#define XEN_DOMCTL_VUART_TYPE_VPL011 0
> +        uint32_t type;          /* IN - type of vuart.
> +                                 *      Currently only vpl011 supported.
> +                                 */
> +        uint64_aligned_t  gfn;  /* IN - guest gfn to be used as a
> +                                 *      ring buffer.
> +                                 */
> +        domid_t console_domid;  /* IN - domid of domain running the
> +                                 *      backend console.
> +                                 */
> +        uint8_t pad[2];
> +        evtchn_port_t evtchn;   /* OUT - remote port of the event
> +                                 *       channel used for sending
> +                                 *       ring buffer events.
> +                                 */
> +};
>  
>  struct xen_domctl {
>      uint32_t cmd;
> @@ -1003,8 +1109,8 @@ struct xen_domctl {
>  #define XEN_DOMCTL_unpausedomain                  4
>  #define XEN_DOMCTL_getdomaininfo                  5
>  #define XEN_DOMCTL_getmemlist                     6
> -#define XEN_DOMCTL_getpageframeinfo               7
> -#define XEN_DOMCTL_getpageframeinfo2              8
> +/* #define XEN_DOMCTL_getpageframeinfo            7 Obsolete - use 
> getpageframeinfo3 */
> +/* #define XEN_DOMCTL_getpageframeinfo2           8 Obsolete - use 
> getpageframeinfo3 */
>  #define XEN_DOMCTL_setvcpuaffinity                9
>  #define XEN_DOMCTL_shadow_op                     10
>  #define XEN_DOMCTL_max_mem                       11
> @@ -1049,7 +1155,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_suppress_spurious_page_faults 53
>  #define XEN_DOMCTL_debug_op                      54
>  #define XEN_DOMCTL_gethvmcontext_partial         55
> -#define XEN_DOMCTL_mem_event_op                  56
> +#define XEN_DOMCTL_vm_event_op                   56
>  #define XEN_DOMCTL_mem_sharing_op                57
>  #define XEN_DOMCTL_disable_migrate               58
>  #define XEN_DOMCTL_gettscinfo                    59
> @@ -1069,7 +1175,11 @@ struct xen_domctl {
>  #define XEN_DOMCTL_set_vcpu_msrs                 73
>  #define XEN_DOMCTL_setvnumainfo                  74
>  #define XEN_DOMCTL_psr_cmt_op                    75
> -#define XEN_DOMCTL_arm_configure_domain          76
> +#define XEN_DOMCTL_monitor_op                    77
> +#define XEN_DOMCTL_psr_cat_op                    78
> +#define XEN_DOMCTL_soft_reset                    79
> +#define XEN_DOMCTL_set_gnttab_limits             80
> +#define XEN_DOMCTL_vuart_op                      81
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1078,13 +1188,8 @@ struct xen_domctl {
>      domid_t  domain;
>      union {
>          struct xen_domctl_createdomain      createdomain;
> -#if defined(__arm__) || defined(__aarch64__)
> -        struct xen_domctl_arm_configuredomain configuredomain;
> -#endif
>          struct xen_domctl_getdomaininfo     getdomaininfo;
>          struct xen_domctl_getmemlist        getmemlist;
> -        struct xen_domctl_getpageframeinfo  getpageframeinfo;
> -        struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
>          struct xen_domctl_getpageframeinfo3 getpageframeinfo3;
>          struct xen_domctl_nodeaffinity      nodeaffinity;
>          struct xen_domctl_vcpuaffinity      vcpuaffinity;
> @@ -1117,7 +1222,7 @@ struct xen_domctl {
>          struct xen_domctl_set_target        set_target;
>          struct xen_domctl_subscribe         subscribe;
>          struct xen_domctl_debug_op          debug_op;
> -        struct xen_domctl_mem_event_op      mem_event_op;
> +        struct xen_domctl_vm_event_op       vm_event_op;
>          struct xen_domctl_mem_sharing_op    mem_sharing_op;
>  #if defined(__i386__) || defined(__x86_64__)
>          struct xen_domctl_cpuid             cpuid;
> @@ -1135,6 +1240,10 @@ struct xen_domctl {
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          struct xen_domctl_vnuma             vnuma;
>          struct xen_domctl_psr_cmt_op        psr_cmt_op;
> +        struct xen_domctl_monitor_op        monitor_op;
> +        struct xen_domctl_psr_cat_op        psr_cat_op;
> +        struct xen_domctl_set_gnttab_limits set_gnttab_limits;
> +        struct xen_domctl_vuart_op          vuart_op;
>          uint8_t                             pad[128];
>      } u;
>  };
> diff --git a/include/xen/elfnote.h b/include/xen/elfnote.h
> index 353985f..936aa65 100644
> --- a/include/xen/elfnote.h
> +++ b/include/xen/elfnote.h
> @@ -173,7 +173,9 @@
>   * The (non-default) location the initial phys-to-machine map should be
>   * placed at by the hypervisor (Dom0) or the tools (DomU).
>   * The kernel must be prepared for this mapping to be established using
> - * large pages, despite such otherwise not being available to guests.
> + * large pages, despite such otherwise not being available to guests. Note
> + * that these large pages may be misaligned in PFN space (they'll obviously
> + * be aligned in MFN and virtual address spaces).
>   * The kernel must also be able to handle the page table pages used for
>   * this mapping not being accessible through the initial mapping.
>   * (Only x86-64 supports this at present.)
> diff --git a/include/xen/errno.h b/include/xen/errno.h
> new file mode 100644
> index 0000000..305c112
> --- /dev/null
> +++ b/include/xen/errno.h
> @@ -0,0 +1,124 @@
> +/*
> + * There are two expected ways of including this header.
> + *
> + * 1) The "default" case (expected from tools etc).
> + *
> + * Simply #include <public/errno.h>
> + *
> + * In this circumstance, normal header guards apply and the includer shall 
> get
> + * an enumeration in the XEN_xxx namespace, appropriate for C or assembly.
> + *
> + * 2) The special case where the includer provides a XEN_ERRNO() in scope.
> + *
> + * In this case, no inclusion guards apply and the caller is responsible for
> + * their XEN_ERRNO() being appropriate in the included context.  The header
> + * will unilaterally #undef XEN_ERRNO().
> + */
> +
> +#ifndef XEN_ERRNO
> +
> +/*
> + * Includer has not provided a custom XEN_ERRNO().  Arrange for normal header
> + * guards, an automatic enum (for C code) and constants in the XEN_xxx
> + * namespace.
> + */
> +#ifndef __XEN_PUBLIC_ERRNO_H__
> +#define __XEN_PUBLIC_ERRNO_H__
> +
> +#define XEN_ERRNO_DEFAULT_INCLUDE
> +
> +#ifndef __ASSEMBLY__
> +
> +#define XEN_ERRNO(name, value) XEN_##name = value,
> +enum xen_errno {
> +
> +#elif __XEN_INTERFACE_VERSION__ < 0x00040700
> +
> +#define XEN_ERRNO(name, value) .equ XEN_##name, value
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* __XEN_PUBLIC_ERRNO_H__ */
> +#endif /* !XEN_ERRNO */
> +
> +/* ` enum neg_errnoval {  [ -Efoo for each Efoo in the list below ]  } */
> +/* ` enum errnoval { */
> +
> +#ifdef XEN_ERRNO
> +
> +/*
> + * Values originating from x86 Linux. Please consider using respective
> + * values when adding new definitions here.
> + *
> + * The set of identifiers to be added here shouldn't extend beyond what
> + * POSIX mandates (see e.g.
> + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html)
> + * with the exception that we support some optional (XSR) values
> + * specified there (but no new ones should be added).
> + */
> +
> +XEN_ERRNO(EPERM,      1)     /* Operation not permitted */
> +XEN_ERRNO(ENOENT,     2)     /* No such file or directory */
> +XEN_ERRNO(ESRCH,      3)     /* No such process */
> +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */
> +XEN_ERRNO(EINTR,      4)     /* Interrupted system call */
> +#endif
> +XEN_ERRNO(EIO,                5)     /* I/O error */
> +XEN_ERRNO(ENXIO,      6)     /* No such device or address */
> +XEN_ERRNO(E2BIG,      7)     /* Arg list too long */
> +XEN_ERRNO(ENOEXEC,    8)     /* Exec format error */
> +XEN_ERRNO(EBADF,      9)     /* Bad file number */
> +XEN_ERRNO(ECHILD,    10)     /* No child processes */
> +XEN_ERRNO(EAGAIN,    11)     /* Try again */
> +XEN_ERRNO(EWOULDBLOCK,       11)     /* Operation would block.  Aliases 
> EAGAIN */
> +XEN_ERRNO(ENOMEM,    12)     /* Out of memory */
> +XEN_ERRNO(EACCES,    13)     /* Permission denied */
> +XEN_ERRNO(EFAULT,    14)     /* Bad address */
> +XEN_ERRNO(EBUSY,     16)     /* Device or resource busy */
> +XEN_ERRNO(EEXIST,    17)     /* File exists */
> +XEN_ERRNO(EXDEV,     18)     /* Cross-device link */
> +XEN_ERRNO(ENODEV,    19)     /* No such device */
> +XEN_ERRNO(EISDIR,    21)     /* Is a directory */
> +XEN_ERRNO(EINVAL,    22)     /* Invalid argument */
> +XEN_ERRNO(ENFILE,    23)     /* File table overflow */
> +XEN_ERRNO(EMFILE,    24)     /* Too many open files */
> +XEN_ERRNO(ENOSPC,    28)     /* No space left on device */
> +XEN_ERRNO(EROFS,     30)     /* Read-only file system */
> +XEN_ERRNO(EMLINK,    31)     /* Too many links */
> +XEN_ERRNO(EDOM,              33)     /* Math argument out of domain of func 
> */
> +XEN_ERRNO(ERANGE,    34)     /* Math result not representable */
> +XEN_ERRNO(EDEADLK,   35)     /* Resource deadlock would occur */
> +XEN_ERRNO(EDEADLOCK, 35)     /* Resource deadlock would occur. Aliases 
> EDEADLK */
> +XEN_ERRNO(ENAMETOOLONG,      36)     /* File name too long */
> +XEN_ERRNO(ENOLCK,    37)     /* No record locks available */
> +XEN_ERRNO(ENOSYS,    38)     /* Function not implemented */
> +XEN_ERRNO(ENOTEMPTY, 39)     /* Directory not empty */
> +XEN_ERRNO(ENODATA,   61)     /* No data available */
> +XEN_ERRNO(ETIME,     62)     /* Timer expired */
> +XEN_ERRNO(EBADMSG,   74)     /* Not a data message */
> +XEN_ERRNO(EOVERFLOW, 75)     /* Value too large for defined data type */
> +XEN_ERRNO(EILSEQ,    84)     /* Illegal byte sequence */
> +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */
> +XEN_ERRNO(ERESTART,  85)     /* Interrupted system call should be restarted 
> */
> +#endif
> +XEN_ERRNO(ENOTSOCK,  88)     /* Socket operation on non-socket */
> +XEN_ERRNO(EOPNOTSUPP,        95)     /* Operation not supported on transport 
> endpoint */
> +XEN_ERRNO(EADDRINUSE,        98)     /* Address already in use */
> +XEN_ERRNO(EADDRNOTAVAIL, 99) /* Cannot assign requested address */
> +XEN_ERRNO(ENOBUFS,   105)    /* No buffer space available */
> +XEN_ERRNO(EISCONN,   106)    /* Transport endpoint is already connected */
> +XEN_ERRNO(ENOTCONN,  107)    /* Transport endpoint is not connected */
> +XEN_ERRNO(ETIMEDOUT, 110)    /* Connection timed out */
> +
> +#undef XEN_ERRNO
> +#endif /* XEN_ERRNO */
> +/* ` } */
> +
> +/* Clean up from a default include.  Close the enum (for C). */
> +#ifdef XEN_ERRNO_DEFAULT_INCLUDE
> +#undef XEN_ERRNO_DEFAULT_INCLUDE
> +#ifndef __ASSEMBLY__
> +};
> +#endif
> +
> +#endif /* XEN_ERRNO_DEFAULT_INCLUDE */
> diff --git a/include/xen/event_channel.h b/include/xen/event_channel.h
> index 05e531d..44c549d 100644
> --- a/include/xen/event_channel.h
> +++ b/include/xen/event_channel.h
> @@ -85,7 +85,7 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
>   * is allocated in <dom> and returned as <port>.
>   * NOTES:
>   *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
> - *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
> + *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
>   */
>  struct evtchn_alloc_unbound {
>      /* IN parameters */
> diff --git a/include/xen/features.h b/include/xen/features.h
> index 16d92aa..2110b04 100644
> --- a/include/xen/features.h
> +++ b/include/xen/features.h
> @@ -99,6 +99,9 @@
>  #define XENFEAT_grant_map_identity        12
>   */
>  
> +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
> +#define XENFEAT_memory_op_vnode_supported 13
> +
>  #define XENFEAT_NR_SUBMAPS 1
>  
>  #endif /* __XEN_PUBLIC_FEATURES_H__ */
> diff --git a/include/xen/gcov.h b/include/xen/gcov.h
> deleted file mode 100644
> index 1b29b48..0000000
> --- a/include/xen/gcov.h
> +++ /dev/null
> @@ -1,115 +0,0 @@
> -/******************************************************************************
> - * gcov.h
> - *
> - * Coverage structures exported by Xen.
> - * Structure is different from Gcc one.
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> - * of this software and associated documentation files (the "Software"), to
> - * deal in the Software without restriction, including without limitation the
> - * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> - * sell copies of the Software, and to permit persons to whom the Software is
> - * furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice shall be included in
> - * all copies or substantial portions of the Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - *
> - * Copyright (c) 2013, Citrix Systems R&D Ltd.
> - */
> -
> -#ifndef __XEN_PUBLIC_GCOV_H__
> -#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__
> -
> -#define XENCOV_COUNTERS         5
> -#define XENCOV_TAG_BASE         0x58544300u
> -#define XENCOV_TAG_FILE         (XENCOV_TAG_BASE+0x46u)
> -#define XENCOV_TAG_FUNC         (XENCOV_TAG_BASE+0x66u)
> -#define XENCOV_TAG_COUNTER(n)   (XENCOV_TAG_BASE+0x30u+((n)&0xfu))
> -#define XENCOV_TAG_END          (XENCOV_TAG_BASE+0x2eu)
> -#define XENCOV_IS_TAG_COUNTER(n) \
> -    ((n) >= XENCOV_TAG_COUNTER(0) && (n) < 
> XENCOV_TAG_COUNTER(XENCOV_COUNTERS))
> -#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0))
> -
> -/*
> - * The main structure for the blob is
> - * BLOB := FILE.. END
> - * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS
> - * FILENAME := LEN characters
> - *   characters are padded to 32 bit
> - * LEN := 32 bit value
> - * COUNTERS := TAG_COUNTER(n) NUM COUNTER..
> - * NUM := 32 bit valie
> - * COUNTER := 64 bit value
> - * FUNCTIONS := TAG_FUNC NUM FUNCTION..
> - * FUNCTION := IDENT CHECKSUM NUM_COUNTERS
> - *
> - * All tagged structures are aligned to 8 bytes
> - */
> -
> -/**
> - * File information
> - * Prefixed with XENCOV_TAG_FILE and a string with filename
> - * Aligned to 8 bytes
> - */
> -struct xencov_file
> -{
> -    uint32_t tag; /* XENCOV_TAG_FILE */
> -    uint32_t version;
> -    uint32_t stamp;
> -    uint32_t fn_len;
> -    char filename[1];
> -};
> -
> -
> -/**
> - * Counters information
> - * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1)
> - * Aligned to 8 bytes
> - */
> -struct xencov_counter
> -{
> -    uint32_t tag; /* XENCOV_TAG_COUNTER(n) */
> -    uint32_t num;
> -    uint64_t values[1];
> -};
> -
> -/**
> - * Information for each function
> - * Number of counter is equal to the number of counter structures got before
> - */
> -struct xencov_function
> -{
> -    uint32_t ident;
> -    uint32_t checksum;
> -    uint32_t num_counters[1];
> -};
> -
> -/**
> - * Information for all functions
> - * Aligned to 8 bytes
> - */
> -struct xencov_functions
> -{
> -    uint32_t tag; /* XENCOV_TAG_FUNC */
> -    uint32_t num;
> -    struct xencov_function xencov_function[1];
> -};
> -
> -/**
> - * Terminator
> - */
> -struct xencov_end
> -{
> -    uint32_t tag; /* XENCOV_TAG_END */
> -};
> -
> -#endif /* __XEN_PUBLIC_GCOV_H__ */
> -
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index 20d4e77..018036e 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -43,7 +43,7 @@
>   * table are identified by grant references. A grant reference is an
>   * integer, which indexes into the grant table. It acts as a
>   * capability which the grantee can use to perform operations on the
> - * granter’s memory.
> + * granter's memory.
>   *
>   * This capability-based system allows shared-memory communications
>   * between unprivileged domains. A grant reference also encapsulates
> @@ -134,8 +134,10 @@ struct grant_entry_v1 {
>      /* The domain being granted foreign privileges. [GST] */
>      domid_t  domid;
>      /*
> -     * GTF_permit_access: Frame that @domid is allowed to map and access. 
> [GST]
> -     * GTF_accept_transfer: Frame whose ownership transferred by @domid. 
> [XEN]
> +     * GTF_permit_access: GFN that @domid is allowed to map and access. [GST]
> +     * GTF_accept_transfer: GFN that @domid is allowed to transfer into. 
> [GST]
> +     * GTF_transfer_completed: MFN whose ownership transferred by @domid
> +     *                         (non-translated guests only). [XEN]
>       */
>      uint32_t frame;
>  };
> @@ -321,7 +323,7 @@ typedef uint32_t grant_handle_t;
>  /*
>   * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
>   * by devices and/or host CPUs. If successful, <handle> is a tracking number
> - * that must be presented later to destroy the mapping(s). On error, <handle>
> + * that must be presented later to destroy the mapping(s). On error, <status>
>   * is a negative status code.
>   * NOTES:
>   *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
> @@ -409,12 +411,13 @@ typedef struct gnttab_dump_table gnttab_dump_table_t;
>  DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
>  
>  /*
> - * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
> - * foreign domain has previously registered its interest in the transfer via
> - * <domid, ref>.
> + * GNTTABOP_transfer: Transfer <frame> to a foreign domain. The foreign 
> domain
> + * has previously registered its interest in the transfer via <domid, ref>.
>   *
>   * Note that, even if the transfer fails, the specified page no longer 
> belongs
>   * to the calling domain *unless* the error is GNTST_bad_page.
> + *
> + * Note further that only PV guests can use this operation.
>   */
>  struct gnttab_transfer {
>      /* IN parameters. */
> @@ -453,7 +456,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
>  
>  struct gnttab_copy {
>      /* IN parameters. */
> -    struct {
> +    struct gnttab_copy_ptr {
>          union {
>              grant_ref_t ref;
>              xen_pfn_t   gmfn;
> diff --git a/include/xen/hvm/dm_op.h b/include/xen/hvm/dm_op.h
> new file mode 100644
> index 0000000..6bbab5f
> --- /dev/null
> +++ b/include/xen/hvm/dm_op.h
> @@ -0,0 +1,417 @@
> +/*
> + * Copyright (c) 2016, Citrix Systems Inc
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __XEN_PUBLIC_HVM_DM_OP_H__
> +#define __XEN_PUBLIC_HVM_DM_OP_H__
> +
> +#include "../xen.h"
> +
> +#if defined(__XEN__) || defined(__XEN_TOOLS__)
> +
> +#include "../event_channel.h"
> +
> +#ifndef uint64_aligned_t
> +#define uint64_aligned_t uint64_t
> +#endif
> +
> +/*
> + * IOREQ Servers
> + *
> + * The interface between an I/O emulator an Xen is called an IOREQ Server.
> + * A domain supports a single 'legacy' IOREQ Server which is instantiated if
> + * parameter...
> + *
> + * HVM_PARAM_IOREQ_PFN is read (to get the gfn containing the synchronous
> + * ioreq structures), or...
> + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gfn containing the buffered
> + * ioreq ring), or...
> + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses
> + * to request buffered I/O emulation).
> + *
> + * The following hypercalls facilitate the creation of IOREQ Servers for
> + * 'secondary' emulators which are invoked to implement port I/O, memory, or
> + * PCI config space ranges which they explicitly register.
> + */
> +
> +typedef uint16_t ioservid_t;
> +
> +/*
> + * XEN_DMOP_create_ioreq_server: Instantiate a new IOREQ Server for a
> + *                               secondary emulator.
> + *
> + * The <id> handed back is unique for target domain. The valur of
> + * <handle_bufioreq> should be one of HVM_IOREQSRV_BUFIOREQ_* defined in
> + * hvm_op.h. If the value is HVM_IOREQSRV_BUFIOREQ_OFF then  the buffered
> + * ioreq ring will not be allocated and hence all emulation requests to
> + * this server will be synchronous.
> + */
> +#define XEN_DMOP_create_ioreq_server 1
> +
> +struct xen_dm_op_create_ioreq_server {
> +    /* IN - should server handle buffered ioreqs */
> +    uint8_t handle_bufioreq;
> +    uint8_t pad[3];
> +    /* OUT - server id */
> +    ioservid_t id;
> +};
> +
> +/*
> + * XEN_DMOP_get_ioreq_server_info: Get all the information necessary to
> + *                                 access IOREQ Server <id>.
> + *
> + * The emulator needs to map the synchronous ioreq structures and buffered
> + * ioreq ring (if it exists) that Xen uses to request emulation. These are
> + * hosted in the target domain's gmfns <ioreq_gfn> and <bufioreq_gfn>
> + * respectively. In addition, if the IOREQ Server is handling buffered
> + * emulation requests, the emulator needs to bind to event channel
> + * <bufioreq_port> to listen for them. (The event channels used for
> + * synchronous emulation requests are specified in the per-CPU ioreq
> + * structures in <ioreq_gfn>).
> + * If the IOREQ Server is not handling buffered emulation requests then the
> + * values handed back in <bufioreq_gfn> and <bufioreq_port> will both be 0.
> + */
> +#define XEN_DMOP_get_ioreq_server_info 2
> +
> +struct xen_dm_op_get_ioreq_server_info {
> +    /* IN - server id */
> +    ioservid_t id;
> +    uint16_t pad;
> +    /* OUT - buffered ioreq port */
> +    evtchn_port_t bufioreq_port;
> +    /* OUT - sync ioreq gfn */
> +    uint64_aligned_t ioreq_gfn;
> +    /* OUT - buffered ioreq gfn */
> +    uint64_aligned_t bufioreq_gfn;
> +};
> +
> +/*
> + * XEN_DMOP_map_io_range_to_ioreq_server: Register an I/O range for
> + *                                        emulation by the client of
> + *                                        IOREQ Server <id>.
> + * XEN_DMOP_unmap_io_range_from_ioreq_server: Deregister an I/O range
> + *                                            previously registered for
> + *                                            emulation by the client of
> + *                                            IOREQ Server <id>.
> + *
> + * There are three types of I/O that can be emulated: port I/O, memory
> + * accesses and PCI config space accesses. The <type> field denotes which
> + * type of range* the <start> and <end> (inclusive) fields are specifying.
> + * PCI config space ranges are specified by segment/bus/device/function
> + * values which should be encoded using the DMOP_PCI_SBDF helper macro
> + * below.
> + *
> + * NOTE: unless an emulation request falls entirely within a range mapped
> + * by a secondary emulator, it will not be passed to that emulator.
> + */
> +#define XEN_DMOP_map_io_range_to_ioreq_server 3
> +#define XEN_DMOP_unmap_io_range_from_ioreq_server 4
> +
> +struct xen_dm_op_ioreq_server_range {
> +    /* IN - server id */
> +    ioservid_t id;
> +    uint16_t pad;
> +    /* IN - type of range */
> +    uint32_t type;
> +# define XEN_DMOP_IO_RANGE_PORT   0 /* I/O port range */
> +# define XEN_DMOP_IO_RANGE_MEMORY 1 /* MMIO range */
> +# define XEN_DMOP_IO_RANGE_PCI    2 /* PCI segment/bus/dev/func range */
> +    /* IN - inclusive start and end of range */
> +    uint64_aligned_t start, end;
> +};
> +
> +#define XEN_DMOP_PCI_SBDF(s,b,d,f) \
> +     ((((s) & 0xffff) << 16) |  \
> +      (((b) & 0xff) << 8) |     \
> +      (((d) & 0x1f) << 3) |     \
> +      ((f) & 0x07))
> +
> +/*
> + * XEN_DMOP_set_ioreq_server_state: Enable or disable the IOREQ Server <id>
> + *
> + * The IOREQ Server will not be passed any emulation requests until it is
> + * in the enabled state.
> + * Note that the contents of the ioreq_gfn and bufioreq_gfn (see
> + * XEN_DMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server
> + * is in the enabled state.
> + */
> +#define XEN_DMOP_set_ioreq_server_state 5
> +
> +struct xen_dm_op_set_ioreq_server_state {
> +    /* IN - server id */
> +    ioservid_t id;
> +    /* IN - enabled? */
> +    uint8_t enabled;
> +    uint8_t pad;
> +};
> +
> +/*
> + * XEN_DMOP_destroy_ioreq_server: Destroy the IOREQ Server <id>.
> + *
> + * Any registered I/O ranges will be automatically deregistered.
> + */
> +#define XEN_DMOP_destroy_ioreq_server 6
> +
> +struct xen_dm_op_destroy_ioreq_server {
> +    /* IN - server id */
> +    ioservid_t id;
> +    uint16_t pad;
> +};
> +
> +/*
> + * XEN_DMOP_track_dirty_vram: Track modifications to the specified pfn
> + *                            range.
> + *
> + * NOTE: The bitmap passed back to the caller is passed in a
> + *       secondary buffer.
> + */
> +#define XEN_DMOP_track_dirty_vram 7
> +
> +struct xen_dm_op_track_dirty_vram {
> +    /* IN - number of pages to be tracked */
> +    uint32_t nr;
> +    uint32_t pad;
> +    /* IN - first pfn to track */
> +    uint64_aligned_t first_pfn;
> +};
> +
> +/*
> + * XEN_DMOP_set_pci_intx_level: Set the logical level of one of a domain's
> + *                              PCI INTx pins.
> + */
> +#define XEN_DMOP_set_pci_intx_level 8
> +
> +struct xen_dm_op_set_pci_intx_level {
> +    /* IN - PCI INTx identification (domain:bus:device:intx) */
> +    uint16_t domain;
> +    uint8_t bus, device, intx;
> +    /* IN - Level: 0 -> deasserted, 1 -> asserted */
> +    uint8_t  level;
> +};
> +
> +/*
> + * XEN_DMOP_set_isa_irq_level: Set the logical level of a one of a domain's
> + *                             ISA IRQ lines.
> + */
> +#define XEN_DMOP_set_isa_irq_level 9
> +
> +struct xen_dm_op_set_isa_irq_level {
> +    /* IN - ISA IRQ (0-15) */
> +    uint8_t  isa_irq;
> +    /* IN - Level: 0 -> deasserted, 1 -> asserted */
> +    uint8_t  level;
> +};
> +
> +/*
> + * XEN_DMOP_set_pci_link_route: Map a PCI INTx line to an IRQ line.
> + */
> +#define XEN_DMOP_set_pci_link_route 10
> +
> +struct xen_dm_op_set_pci_link_route {
> +    /* PCI INTx line (0-3) */
> +    uint8_t  link;
> +    /* ISA IRQ (1-15) or 0 -> disable link */
> +    uint8_t  isa_irq;
> +};
> +
> +/*
> + * XEN_DMOP_modified_memory: Notify that a set of pages were modified by
> + *                           an emulator.
> + *
> + * DMOP buf 1 contains an array of xen_dm_op_modified_memory_extent with
> + * @nr_extents entries.
> + *
> + * On error, @nr_extents will contain the index+1 of the extent that
> + * had the error.  It is not defined if or which pages may have been
> + * marked as dirty, in this event.
> + */
> +#define XEN_DMOP_modified_memory 11
> +
> +struct xen_dm_op_modified_memory {
> +    /*
> +     * IN - Number of extents to be processed
> +     * OUT -returns n+1 for failing extent
> +     */
> +    uint32_t nr_extents;
> +    /* IN/OUT - Must be set to 0 */
> +    uint32_t opaque;
> +};
> +
> +struct xen_dm_op_modified_memory_extent {
> +    /* IN - number of contiguous pages modified */
> +    uint32_t nr;
> +    uint32_t pad;
> +    /* IN - first pfn modified */
> +    uint64_aligned_t first_pfn;
> +};
> +
> +/*
> + * XEN_DMOP_set_mem_type: Notify that a region of memory is to be treated
> + *                        in a specific way. (See definition of
> + *                        hvmmem_type_t).
> + *
> + * NOTE: In the event of a continuation (return code -ERESTART), the
> + *       @first_pfn is set to the value of the pfn of the remaining
> + *       region and @nr reduced to the size of the remaining region.
> + */
> +#define XEN_DMOP_set_mem_type 12
> +
> +struct xen_dm_op_set_mem_type {
> +    /* IN - number of contiguous pages */
> +    uint32_t nr;
> +    /* IN - new hvmmem_type_t of region */
> +    uint16_t mem_type;
> +    uint16_t pad;
> +    /* IN - first pfn in region */
> +    uint64_aligned_t first_pfn;
> +};
> +
> +/*
> + * XEN_DMOP_inject_event: Inject an event into a VCPU, which will
> + *                        get taken up when it is next scheduled.
> + *
> + * Note that the caller should know enough of the state of the CPU before
> + * injecting, to know what the effect of injecting the event will be.
> + */
> +#define XEN_DMOP_inject_event 13
> +
> +struct xen_dm_op_inject_event {
> +    /* IN - index of vCPU */
> +    uint32_t vcpuid;
> +    /* IN - interrupt vector */
> +    uint8_t vector;
> +    /* IN - event type (DMOP_EVENT_* ) */
> +    uint8_t type;
> +/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */
> +# define XEN_DMOP_EVENT_ext_int    0 /* external interrupt */
> +# define XEN_DMOP_EVENT_nmi        2 /* nmi */
> +# define XEN_DMOP_EVENT_hw_exc     3 /* hardware exception */
> +# define XEN_DMOP_EVENT_sw_int     4 /* software interrupt (CD nn) */
> +# define XEN_DMOP_EVENT_pri_sw_exc 5 /* ICEBP (F1) */
> +# define XEN_DMOP_EVENT_sw_exc     6 /* INT3 (CC), INTO (CE) */
> +    /* IN - instruction length */
> +    uint8_t insn_len;
> +    uint8_t pad0;
> +    /* IN - error code (or ~0 to skip) */
> +    uint32_t error_code;
> +    uint32_t pad1;
> +    /* IN - CR2 for page faults */
> +    uint64_aligned_t cr2;
> +};
> +
> +/*
> + * XEN_DMOP_inject_msi: Inject an MSI for an emulated device.
> + */
> +#define XEN_DMOP_inject_msi 14
> +
> +struct xen_dm_op_inject_msi {
> +    /* IN - MSI data (lower 32 bits) */
> +    uint32_t data;
> +    uint32_t pad;
> +    /* IN - MSI address (0xfeexxxxx) */
> +    uint64_aligned_t addr;
> +};
> +
> +/*
> + * XEN_DMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server <id>
> + *                                      to specific memory type <type>
> + *                                      for specific accesses <flags>
> + *
> + * For now, flags only accept the value of XEN_DMOP_IOREQ_MEM_ACCESS_WRITE,
> + * which means only write operations are to be forwarded to an ioreq server.
> + * Support for the emulation of read operations can be added when an ioreq
> + * server has such requirement in future.
> + */
> +#define XEN_DMOP_map_mem_type_to_ioreq_server 15
> +
> +struct xen_dm_op_map_mem_type_to_ioreq_server {
> +    ioservid_t id;      /* IN - ioreq server id */
> +    uint16_t type;      /* IN - memory type */
> +    uint32_t flags;     /* IN - types of accesses to be forwarded to the
> +                           ioreq server. flags with 0 means to unmap the
> +                           ioreq server */
> +
> +#define XEN_DMOP_IOREQ_MEM_ACCESS_READ (1u << 0)
> +#define XEN_DMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1)
> +
> +    uint64_t opaque;    /* IN/OUT - only used for hypercall continuation,
> +                           has to be set to zero by the caller */
> +};
> +
> +struct xen_dm_op {
> +    uint32_t op;
> +    uint32_t pad;
> +    union {
> +        struct xen_dm_op_create_ioreq_server create_ioreq_server;
> +        struct xen_dm_op_get_ioreq_server_info get_ioreq_server_info;
> +        struct xen_dm_op_ioreq_server_range map_io_range_to_ioreq_server;
> +        struct xen_dm_op_ioreq_server_range unmap_io_range_from_ioreq_server;
> +        struct xen_dm_op_set_ioreq_server_state set_ioreq_server_state;
> +        struct xen_dm_op_destroy_ioreq_server destroy_ioreq_server;
> +        struct xen_dm_op_track_dirty_vram track_dirty_vram;
> +        struct xen_dm_op_set_pci_intx_level set_pci_intx_level;
> +        struct xen_dm_op_set_isa_irq_level set_isa_irq_level;
> +        struct xen_dm_op_set_pci_link_route set_pci_link_route;
> +        struct xen_dm_op_modified_memory modified_memory;
> +        struct xen_dm_op_set_mem_type set_mem_type;
> +        struct xen_dm_op_inject_event inject_event;
> +        struct xen_dm_op_inject_msi inject_msi;
> +        struct xen_dm_op_map_mem_type_to_ioreq_server
> +                map_mem_type_to_ioreq_server;
> +    } u;
> +};
> +
> +#endif /* __XEN__ || __XEN_TOOLS__ */
> +
> +struct xen_dm_op_buf {
> +    XEN_GUEST_HANDLE(void) h;
> +    xen_ulong_t size;
> +};
> +typedef struct xen_dm_op_buf xen_dm_op_buf_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_dm_op_buf_t);
> +
> +/* ` enum neg_errnoval
> + * ` HYPERVISOR_dm_op(domid_t domid,
> + * `                  unsigned int nr_bufs,
> + * `                  xen_dm_op_buf_t bufs[])
> + * `
> + *
> + * @domid is the domain the hypercall operates on.
> + * @nr_bufs is the number of buffers in the @bufs array.
> + * @bufs points to an array of buffers where @bufs[0] contains a struct
> + * xen_dm_op, describing the specific device model operation and its
> + * parameters.
> + * @bufs[1..] may be referenced in the parameters for the purposes of
> + * passing extra information to or from the domain.
> + */
> +
> +#endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/hvm/e820.h b/include/xen/hvm/e820.h
> index 5bdc227..4c42f33 100644
> --- a/include/xen/hvm/e820.h
> +++ b/include/xen/hvm/e820.h
> @@ -1,4 +1,3 @@
> -
>  /*
>   * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
>   * of this software and associated documentation files (the "Software"), to
> @@ -17,11 +16,15 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2006, Keir Fraser
>   */
>  
>  #ifndef __XEN_PUBLIC_HVM_E820_H__
>  #define __XEN_PUBLIC_HVM_E820_H__
>  
> +#include "../xen.h"
> +
>  /* E820 location in HVM virtual address space. */
>  #define HVM_E820_PAGE        0x00090000
>  #define HVM_E820_NR_OFFSET   0x000001E8
> @@ -29,6 +32,7 @@
>  
>  #define HVM_BELOW_4G_RAM_END        0xF0000000
>  #define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
> -#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
> +#define HVM_BELOW_4G_MMIO_LENGTH    ((xen_mk_ullong(1) << 32) - \
> +                                     HVM_BELOW_4G_MMIO_START)
>  
>  #endif /* __XEN_PUBLIC_HVM_E820_H__ */
> diff --git a/include/xen/hvm/hvm_info_table.h 
> b/include/xen/hvm/hvm_info_table.h
> index 36085fa..08c252e 100644
> --- a/include/xen/hvm/hvm_info_table.h
> +++ b/include/xen/hvm/hvm_info_table.h
> @@ -20,6 +20,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2006, Keir Fraser
>   */
>  
>  #ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
> @@ -32,6 +34,14 @@
>  /* Maximum we can support with current vLAPIC ID mapping. */
>  #define HVM_MAX_VCPUS        128
>  
> +/*
> + * In some cases SMP HVM guests may require knowledge of Xen's idea of vCPU 
> ids
> + * for their vCPUs. For example, HYPERVISOR_vcpu_op and some EVTCHNOP_*
> + * hypercalls take vcpu id as a parameter. It is valid for HVM guests to 
> assume
> + * that Xen's vCPU id always equals to ACPI (not APIC!) id in MADT table 
> which
> + * is always present for SMP guests.
> + */
> +
>  struct hvm_info_table {
>      char        signature[8]; /* "HVM INFO" */
>      uint32_t    length;
> diff --git a/include/xen/hvm/hvm_op.h b/include/xen/hvm/hvm_op.h
> index cde3571..0bdafdf 100644
> --- a/include/xen/hvm/hvm_op.h
> +++ b/include/xen/hvm/hvm_op.h
> @@ -16,6 +16,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2007, Keir Fraser
>   */
>  
>  #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
> @@ -36,6 +38,8 @@ struct xen_hvm_param {
>  typedef struct xen_hvm_param xen_hvm_param_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
>  
> +#if __XEN_INTERFACE_VERSION__ < 0x00040900
> +
>  /* Set the logical level of one of a domain's PCI INTx wires. */
>  #define HVMOP_set_pci_intx_level  2
>  struct xen_hvm_set_pci_intx_level {
> @@ -74,6 +78,8 @@ struct xen_hvm_set_pci_link_route {
>  typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
>  
> +#endif /* __XEN_INTERFACE_VERSION__ < 0x00040900 */
> +
>  /* Flushes all VCPU TLBs: @arg must be NULL. */
>  #define HVMOP_flush_tlbs          5
>  
> @@ -81,58 +87,21 @@ typedef enum {
>      HVMMEM_ram_rw,             /* Normal read/write guest RAM */
>      HVMMEM_ram_ro,             /* Read-only; writes are discarded */
>      HVMMEM_mmio_dm,            /* Reads and write go to the device model */
> -    HVMMEM_mmio_write_dm       /* Read-only; writes go to the device model */
> +#if __XEN_INTERFACE_VERSION__ < 0x00040700
> +    HVMMEM_mmio_write_dm,      /* Read-only; writes go to the device model */
> +#else
> +    HVMMEM_unused,             /* Placeholder; setting memory to this type
> +                                  will fail for code after 4.7.0 */
> +#endif
> +    HVMMEM_ioreq_server        /* Memory type claimed by an ioreq server; 
> type
> +                                  changes to this value are only allowed 
> after
> +                                  an ioreq server has claimed its ownership.
> +                                  Only pages with HVMMEM_ram_rw are allowed 
> to
> +                                  change to this type; conversely, pages with
> +                                  this type are only allowed to be changed 
> back
> +                                  to HVMMEM_ram_rw. */
>  } hvmmem_type_t;
>  
> -/* Following tools-only interfaces may change in future. */
> -#if defined(__XEN__) || defined(__XEN_TOOLS__)
> -
> -/* Track dirty VRAM. */
> -#define HVMOP_track_dirty_vram    6
> -struct xen_hvm_track_dirty_vram {
> -    /* Domain to be tracked. */
> -    domid_t  domid;
> -    /* Number of pages to track. */
> -    uint32_t nr;
> -    /* First pfn to track. */
> -    uint64_aligned_t first_pfn;
> -    /* OUT variable. */
> -    /* Dirty bitmap buffer. */
> -    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
> -};
> -typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
> -
> -/* Notify that some pages got modified by the Device Model. */
> -#define HVMOP_modified_memory    7
> -struct xen_hvm_modified_memory {
> -    /* Domain to be updated. */
> -    domid_t  domid;
> -    /* Number of pages. */
> -    uint32_t nr;
> -    /* First pfn. */
> -    uint64_aligned_t first_pfn;
> -};
> -typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t);
> -
> -#define HVMOP_set_mem_type    8
> -/* Notify that a region of memory is to be treated in a specific way. */
> -struct xen_hvm_set_mem_type {
> -    /* Domain to be updated. */
> -    domid_t domid;
> -    /* Memory type */
> -    uint16_t hvmmem_type;
> -    /* Number of pages. */
> -    uint32_t nr;
> -    /* First pfn. */
> -    uint64_aligned_t first_pfn;
> -};
> -typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
> -
> -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> -
>  /* Hint from PV drivers for pagetable destruction. */
>  #define HVMOP_pagetable_dying        9
>  struct xen_hvm_pagetable_dying {
> @@ -170,38 +139,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t);
>  /* Deprecated by XENMEM_access_op_get_access */
>  #define HVMOP_get_mem_access        13
>  
> -#define HVMOP_inject_trap            14
> -/* Inject a trap into a VCPU, which will get taken up on the next
> - * scheduling of it. Note that the caller should know enough of the
> - * state of the CPU before injecting, to know what the effect of
> - * injecting the trap will be.
> - */
> -struct xen_hvm_inject_trap {
> -    /* Domain to be queried. */
> -    domid_t domid;
> -    /* VCPU */
> -    uint32_t vcpuid;
> -    /* Vector number */
> -    uint32_t vector;
> -    /* Trap type (HVMOP_TRAP_*) */
> -    uint32_t type;
> -/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */
> -# define HVMOP_TRAP_ext_int    0 /* external interrupt */
> -# define HVMOP_TRAP_nmi        2 /* nmi */
> -# define HVMOP_TRAP_hw_exc     3 /* hardware exception */
> -# define HVMOP_TRAP_sw_int     4 /* software interrupt (CD nn) */
> -# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */
> -# define HVMOP_TRAP_sw_exc     6 /* INT3 (CC), INTO (CE) */
> -    /* Error code, or ~0u to skip */
> -    uint32_t error_code;
> -    /* Intruction length */
> -    uint32_t insn_len;
> -    /* CR2 for page faults */
> -    uint64_aligned_t cr2;
> -};
> -typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t);
> -
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
>  #define HVMOP_get_mem_type    15
> @@ -221,152 +158,18 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t);
>  /* Following tools-only interfaces may change in future. */
>  #if defined(__XEN__) || defined(__XEN_TOOLS__)
>  
> -/* MSI injection for emulated devices */
> -#define HVMOP_inject_msi         16
> -struct xen_hvm_inject_msi {
> -    /* Domain to be injected */
> -    domid_t   domid;
> -    /* Data -- lower 32 bits */
> -    uint32_t  data;
> -    /* Address (0xfeexxxxx) */
> -    uint64_t  addr;
> -};
> -typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t);
> -
>  /*
> - * IOREQ Servers
> - *
> - * The interface between an I/O emulator an Xen is called an IOREQ Server.
> - * A domain supports a single 'legacy' IOREQ Server which is instantiated if
> - * parameter...
> - *
> - * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous
> - * ioreq structures), or...
> - * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered
> - * ioreq ring), or...
> - * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses
> - * to request buffered I/O emulation).
> - * 
> - * The following hypercalls facilitate the creation of IOREQ Servers for
> - * 'secondary' emulators which are invoked to implement port I/O, memory, or
> - * PCI config space ranges which they explicitly register.
> + * Definitions relating to DMOP_create_ioreq_server. (Defined here for
> + * backwards compatibility).
>   */
>  
> -typedef uint16_t ioservid_t;
> -
> +#define HVM_IOREQSRV_BUFIOREQ_OFF    0
> +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
>  /*
> - * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary
> - *                            emulator servicing domain <domid>.
> - *
> - * The <id> handed back is unique for <domid>. If <handle_bufioreq> is zero
> - * the buffered ioreq ring will not be allocated and hence all emulation
> - * requestes to this server will be synchronous.
> + * Use this when read_pointer gets updated atomically and
> + * the pointer pair gets read atomically:
>   */
> -#define HVMOP_create_ioreq_server 17
> -struct xen_hvm_create_ioreq_server {
> -    domid_t domid;           /* IN - domain to be serviced */
> -    uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */
> -    ioservid_t id;           /* OUT - server id */
> -};
> -typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_create_ioreq_server_t);
> -
> -/*
> - * HVMOP_get_ioreq_server_info: Get all the information necessary to access
> - *                              IOREQ Server <id>. 
> - *
> - * The emulator needs to map the synchronous ioreq structures and buffered
> - * ioreq ring (if it exists) that Xen uses to request emulation. These are
> - * hosted in domain <domid>'s gmfns <ioreq_pfn> and <bufioreq_pfn>
> - * respectively. In addition, if the IOREQ Server is handling buffered
> - * emulation requests, the emulator needs to bind to event channel
> - * <bufioreq_port> to listen for them. (The event channels used for
> - * synchronous emulation requests are specified in the per-CPU ioreq
> - * structures in <ioreq_pfn>).
> - * If the IOREQ Server is not handling buffered emulation requests then the
> - * values handed back in <bufioreq_pfn> and <bufioreq_port> will both be 0.
> - */
> -#define HVMOP_get_ioreq_server_info 18
> -struct xen_hvm_get_ioreq_server_info {
> -    domid_t domid;                 /* IN - domain to be serviced */
> -    ioservid_t id;                 /* IN - server id */
> -    evtchn_port_t bufioreq_port;   /* OUT - buffered ioreq port */
> -    uint64_aligned_t ioreq_pfn;    /* OUT - sync ioreq pfn */
> -    uint64_aligned_t bufioreq_pfn; /* OUT - buffered ioreq pfn */
> -};
> -typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t);
> -
> -/*
> - * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain <domid>
> - *                                   for emulation by the client of IOREQ
> - *                                   Server <id>
> - * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of <domid>
> - *                                       for emulation by the client of IOREQ
> - *                                       Server <id>
> - *
> - * There are three types of I/O that can be emulated: port I/O, memory 
> accesses
> - * and PCI config space accesses. The <type> field denotes which type of 
> range
> - * the <start> and <end> (inclusive) fields are specifying.
> - * PCI config space ranges are specified by segment/bus/device/function 
> values
> - * which should be encoded using the HVMOP_PCI_SBDF helper macro below.
> - *
> - * NOTE: unless an emulation request falls entirely within a range mapped
> - * by a secondary emulator, it will not be passed to that emulator.
> - */
> -#define HVMOP_map_io_range_to_ioreq_server 19
> -#define HVMOP_unmap_io_range_from_ioreq_server 20
> -struct xen_hvm_io_range {
> -    domid_t domid;               /* IN - domain to be serviced */
> -    ioservid_t id;               /* IN - server id */
> -    uint32_t type;               /* IN - type of range */
> -# define HVMOP_IO_RANGE_PORT   0 /* I/O port range */
> -# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */
> -# define HVMOP_IO_RANGE_PCI    2 /* PCI segment/bus/dev/func range */
> -    uint64_aligned_t start, end; /* IN - inclusive start and end of range */
> -};
> -typedef struct xen_hvm_io_range xen_hvm_io_range_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_io_range_t);
> -
> -#define HVMOP_PCI_SBDF(s,b,d,f)                 \
> -     ((((s) & 0xffff) << 16) |                   \
> -      (((b) & 0xff) << 8) |                      \
> -      (((d) & 0x1f) << 3) |                      \
> -      ((f) & 0x07))
> -
> -/*
> - * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server <id> servicing domain
> - *                             <domid>.
> - *
> - * Any registered I/O ranges will be automatically deregistered.
> - */
> -#define HVMOP_destroy_ioreq_server 21
> -struct xen_hvm_destroy_ioreq_server {
> -    domid_t domid; /* IN - domain to be serviced */
> -    ioservid_t id; /* IN - server id */
> -};
> -typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_destroy_ioreq_server_t);
> -
> -/*
> - * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server <id> 
> servicing
> - *                               domain <domid>.
> - *
> - * The IOREQ Server will not be passed any emulation requests until it is in 
> the
> - * enabled state.
> - * Note that the contents of the ioreq_pfn and bufioreq_fn (see
> - * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is 
> in
> - * the enabled state.
> - */
> -#define HVMOP_set_ioreq_server_state 22
> -struct xen_hvm_set_ioreq_server_state {
> -    domid_t domid;   /* IN - domain to be serviced */
> -    ioservid_t id;   /* IN - server id */
> -    uint8_t enabled; /* IN - enabled? */    
> -};
> -typedef struct xen_hvm_set_ioreq_server_state 
> xen_hvm_set_ioreq_server_state_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
> +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
>  
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
> @@ -389,6 +192,97 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t);
>  
>  #endif /* defined(__i386__) || defined(__x86_64__) */
>  
> +#define HVMOP_guest_request_vm_event 24
> +
> +/* HVMOP_altp2m: perform altp2m state operations */
> +#define HVMOP_altp2m 25
> +
> +#define HVMOP_ALTP2M_INTERFACE_VERSION 0x00000001
> +
> +struct xen_hvm_altp2m_domain_state {
> +    /* IN or OUT variable on/off */
> +    uint8_t state;
> +};
> +typedef struct xen_hvm_altp2m_domain_state xen_hvm_altp2m_domain_state_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_domain_state_t);
> +
> +struct xen_hvm_altp2m_vcpu_enable_notify {
> +    uint32_t vcpu_id;
> +    uint32_t pad;
> +    /* #VE info area gfn */
> +    uint64_t gfn;
> +};
> +typedef struct xen_hvm_altp2m_vcpu_enable_notify 
> xen_hvm_altp2m_vcpu_enable_notify_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_enable_notify_t);
> +
> +struct xen_hvm_altp2m_view {
> +    /* IN/OUT variable */
> +    uint16_t view;
> +    /* Create view only: default access type
> +     * NOTE: currently ignored */
> +    uint16_t hvmmem_default_access; /* xenmem_access_t */
> +};
> +typedef struct xen_hvm_altp2m_view xen_hvm_altp2m_view_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_view_t);
> +
> +struct xen_hvm_altp2m_set_mem_access {
> +    /* view */
> +    uint16_t view;
> +    /* Memory type */
> +    uint16_t hvmmem_access; /* xenmem_access_t */
> +    uint32_t pad;
> +    /* gfn */
> +    uint64_t gfn;
> +};
> +typedef struct xen_hvm_altp2m_set_mem_access xen_hvm_altp2m_set_mem_access_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_set_mem_access_t);
> +
> +struct xen_hvm_altp2m_change_gfn {
> +    /* view */
> +    uint16_t view;
> +    uint16_t pad1;
> +    uint32_t pad2;
> +    /* old gfn */
> +    uint64_t old_gfn;
> +    /* new gfn, INVALID_GFN (~0UL) means revert */
> +    uint64_t new_gfn;
> +};
> +typedef struct xen_hvm_altp2m_change_gfn xen_hvm_altp2m_change_gfn_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_change_gfn_t);
> +
> +struct xen_hvm_altp2m_op {
> +    uint32_t version;   /* HVMOP_ALTP2M_INTERFACE_VERSION */
> +    uint32_t cmd;
> +/* Get/set the altp2m state for a domain */
> +#define HVMOP_altp2m_get_domain_state     1
> +#define HVMOP_altp2m_set_domain_state     2
> +/* Set the current VCPU to receive altp2m event notifications */
> +#define HVMOP_altp2m_vcpu_enable_notify   3
> +/* Create a new view */
> +#define HVMOP_altp2m_create_p2m           4
> +/* Destroy a view */
> +#define HVMOP_altp2m_destroy_p2m          5
> +/* Switch view for an entire domain */
> +#define HVMOP_altp2m_switch_p2m           6
> +/* Notify that a page of memory is to have specific access types */
> +#define HVMOP_altp2m_set_mem_access       7
> +/* Change a p2m entry to have a different gfn->mfn mapping */
> +#define HVMOP_altp2m_change_gfn           8
> +    domid_t domain;
> +    uint16_t pad1;
> +    uint32_t pad2;
> +    union {
> +        struct xen_hvm_altp2m_domain_state       domain_state;
> +        struct xen_hvm_altp2m_vcpu_enable_notify enable_notify;
> +        struct xen_hvm_altp2m_view               view;
> +        struct xen_hvm_altp2m_set_mem_access     set_mem_access;
> +        struct xen_hvm_altp2m_change_gfn         change_gfn;
> +        uint8_t pad[64];
> +    } u;
> +};
> +typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t);
> +
>  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
>  
>  /*
> diff --git a/include/xen/hvm/hvm_vcpu.h b/include/xen/hvm/hvm_vcpu.h
> new file mode 100644
> index 0000000..d21abf1
> --- /dev/null
> +++ b/include/xen/hvm/hvm_vcpu.h
> @@ -0,0 +1,144 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2015, Roger Pau Monne <roger.pau@xxxxxxxxxx>
> + */
> +
> +#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__
> +#define __XEN_PUBLIC_HVM_HVM_VCPU_H__
> +
> +#include "../xen.h"
> +
> +struct vcpu_hvm_x86_32 {
> +    uint32_t eax;
> +    uint32_t ecx;
> +    uint32_t edx;
> +    uint32_t ebx;
> +    uint32_t esp;
> +    uint32_t ebp;
> +    uint32_t esi;
> +    uint32_t edi;
> +    uint32_t eip;
> +    uint32_t eflags;
> +
> +    uint32_t cr0;
> +    uint32_t cr3;
> +    uint32_t cr4;
> +
> +    uint32_t pad1;
> +
> +    /*
> +     * EFER should only be used to set the NXE bit (if required)
> +     * when starting a vCPU in 32bit mode with paging enabled or
> +     * to set the LME/LMA bits in order to start the vCPU in
> +     * compatibility mode.
> +     */
> +    uint64_t efer;
> +
> +    uint32_t cs_base;
> +    uint32_t ds_base;
> +    uint32_t ss_base;
> +    uint32_t es_base;
> +    uint32_t tr_base;
> +    uint32_t cs_limit;
> +    uint32_t ds_limit;
> +    uint32_t ss_limit;
> +    uint32_t es_limit;
> +    uint32_t tr_limit;
> +    uint16_t cs_ar;
> +    uint16_t ds_ar;
> +    uint16_t ss_ar;
> +    uint16_t es_ar;
> +    uint16_t tr_ar;
> +
> +    uint16_t pad2[3];
> +};
> +
> +/*
> + * The layout of the _ar fields of the segment registers is the
> + * following:
> + *
> + * Bits   [0,3]: type (bits 40-43).
> + * Bit        4: s    (descriptor type, bit 44).
> + * Bit    [5,6]: dpl  (descriptor privilege level, bits 45-46).
> + * Bit        7: p    (segment-present, bit 47).
> + * Bit        8: avl  (available for system software, bit 52).
> + * Bit        9: l    (64-bit code segment, bit 53).
> + * Bit       10: db   (meaning depends on the segment, bit 54).
> + * Bit       11: g    (granularity, bit 55)
> + * Bits [12,15]: unused, must be blank.
> + *
> + * A more complete description of the meaning of this fields can be
> + * obtained from the Intel SDM, Volume 3, section 3.4.5.
> + */
> +
> +struct vcpu_hvm_x86_64 {
> +    uint64_t rax;
> +    uint64_t rcx;
> +    uint64_t rdx;
> +    uint64_t rbx;
> +    uint64_t rsp;
> +    uint64_t rbp;
> +    uint64_t rsi;
> +    uint64_t rdi;
> +    uint64_t rip;
> +    uint64_t rflags;
> +
> +    uint64_t cr0;
> +    uint64_t cr3;
> +    uint64_t cr4;
> +    uint64_t efer;
> +
> +    /*
> +     * Using VCPU_HVM_MODE_64B implies that the vCPU is launched
> +     * directly in long mode, so the cached parts of the segment
> +     * registers get set to match that environment.
> +     *
> +     * If the user wants to launch the vCPU in compatibility mode
> +     * the 32-bit structure should be used instead.
> +     */
> +};
> +
> +struct vcpu_hvm_context {
> +#define VCPU_HVM_MODE_32B 0  /* 32bit fields of the structure will be used. 
> */
> +#define VCPU_HVM_MODE_64B 1  /* 64bit fields of the structure will be used. 
> */
> +    uint32_t mode;
> +
> +    uint32_t pad;
> +
> +    /* CPU registers. */
> +    union {
> +        struct vcpu_hvm_x86_32 x86_32;
> +        struct vcpu_hvm_x86_64 x86_64;
> +    } cpu_regs;
> +};
> +typedef struct vcpu_hvm_context vcpu_hvm_context_t;
> +DEFINE_XEN_GUEST_HANDLE(vcpu_hvm_context_t);
> +
> +#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/hvm/hvm_xs_strings.h 
> b/include/xen/hvm/hvm_xs_strings.h
> index 8aec935..fea1dd4 100644
> --- a/include/xen/hvm/hvm_xs_strings.h
> +++ b/include/xen/hvm/hvm_xs_strings.h
> @@ -20,6 +20,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2013, Citrix Systems
>   */
>  
>  #ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__
> @@ -69,6 +71,7 @@
>  #define HVM_XS_SYSTEM_SERIAL_NUMBER    "bios-strings/system-serial-number"
>  #define HVM_XS_ENCLOSURE_MANUFACTURER  "bios-strings/enclosure-manufacturer"
>  #define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number"
> +#define HVM_XS_ENCLOSURE_ASSET_TAG     "bios-strings/enclosure-asset-tag"
>  #define HVM_XS_BATTERY_MANUFACTURER    "bios-strings/battery-manufacturer"
>  #define HVM_XS_BATTERY_DEVICE_NAME     "bios-strings/battery-device-name"
>  
> diff --git a/include/xen/hvm/ioreq.h b/include/xen/hvm/ioreq.h
> index 5b5fedf..d309d12 100644
> --- a/include/xen/hvm/ioreq.h
> +++ b/include/xen/hvm/ioreq.h
> @@ -83,8 +83,17 @@ typedef struct buf_ioreq buf_ioreq_t;
>  
>  #define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes 
> */
>  struct buffered_iopage {
> -    unsigned int read_pointer;
> -    unsigned int write_pointer;
> +#ifdef __XEN__
> +    union bufioreq_pointers {
> +        struct {
> +#endif
> +            uint32_t read_pointer;
> +            uint32_t write_pointer;
> +#ifdef __XEN__
> +        };
> +        uint64_t full;
> +    } ptrs;
> +#endif
>      buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
>  }; /* NB. Size of this structure must be no greater than one page. */
>  typedef struct buffered_iopage buffered_iopage_t;
> @@ -94,14 +103,19 @@ typedef struct buffered_iopage buffered_iopage_t;
>   * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION.
>   */
>  
> -/* Version 0 (default): Traditional Xen locations. */
> +/*
> + * Version 0 (default): Traditional (obsolete) Xen locations.
> + *
> + * These are now only used for compatibility with VMs migrated
> + * from older Xen versions.
> + */
>  #define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40
>  #define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04)
>  #define ACPI_PM_TMR_BLK_ADDRESS_V0   (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08)
>  #define ACPI_GPE0_BLK_ADDRESS_V0     (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20)
>  #define ACPI_GPE0_BLK_LEN_V0         0x08
>  
> -/* Version 1: Locations preferred by modern Qemu. */
> +/* Version 1: Locations preferred by modern Qemu (including Qemu-trad). */
>  #define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000
>  #define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04)
>  #define ACPI_PM_TMR_BLK_ADDRESS_V1   (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08)
> diff --git a/include/xen/hvm/params.h b/include/xen/hvm/params.h
> index a2d43bc..2ec2e7c 100644
> --- a/include/xen/hvm/params.h
> +++ b/include/xen/hvm/params.h
> @@ -16,6 +16,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2007, Keir Fraser
>   */
>  
>  #ifndef __XEN_PUBLIC_HVM_PARAMS_H__
> @@ -27,18 +29,47 @@
>   * Parameter space for HVMOP_{set,get}_param.
>   */
>  
> +#define HVM_PARAM_CALLBACK_IRQ 0
> +#define HVM_PARAM_CALLBACK_IRQ_TYPE_MASK xen_mk_ullong(0xFF00000000000000)
>  /*
>   * How should CPU0 event-channel notifications be delivered?
> - * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
> - * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
> - *                  Domain = val[47:32], Bus  = val[31:16],
> - *                  DevFn  = val[15: 8], IntX = val[ 1: 0]
> - * val[63:56] == 2: val[7:0] is a vector number, check for
> - *                  XENFEAT_hvm_callback_vector to know if this delivery
> - *                  method is available.
> + *
>   * If val == 0 then CPU0 event-channel notifications are not delivered.
> + * If val != 0, val[63:56] encodes the type, as follows:
> + */
> +
> +#define HVM_PARAM_CALLBACK_TYPE_GSI      0
> +/*
> + * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 
> 0,
> + * and disables all notifications.
> + */
> +
> +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
> +/*
> + * val[55:0] is a delivery PCI INTx line:
> + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
>   */
> -#define HVM_PARAM_CALLBACK_IRQ 0
> +
> +#if defined(__i386__) || defined(__x86_64__)
> +#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
> +/*
> + * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to 
> know
> + * if this delivery method is available.
> + */
> +#elif defined(__arm__) || defined(__aarch64__)
> +#define HVM_PARAM_CALLBACK_TYPE_PPI      2
> +/*
> + * val[55:16] needs to be zero.
> + * val[15:8] is interrupt flag of the PPI used by event-channel:
> + *  bit 8: the PPI is edge(1) or level(0) triggered
> + *  bit 9: the PPI is active low(1) or high(0)
> + * val[7:0] is a PPI number used by event-channel.
> + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
> + * the notification is handled by the interrupt controller.
> + */
> +#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK      0xFF00
> +#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL 2
> +#endif
>  
>  /*
>   * These are not used by Xen. They are here for convenience of HVM-guest
> @@ -96,11 +127,26 @@
>  #define _HVMPV_reference_tsc 3
>  #define HVMPV_reference_tsc  (1 << _HVMPV_reference_tsc)
>  
> +/* Use Hypercall for remote TLB flush */
> +#define _HVMPV_hcall_remote_tlb_flush 4
> +#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush)
> +
> +/* Use APIC assist */
> +#define _HVMPV_apic_assist 5
> +#define HVMPV_apic_assist (1 << _HVMPV_apic_assist)
> +
> +/* Enable crash MSRs */
> +#define _HVMPV_crash_ctl 6
> +#define HVMPV_crash_ctl (1 << _HVMPV_crash_ctl)
> +
>  #define HVMPV_feature_mask \
> -     (HVMPV_base_freq | \
> -      HVMPV_no_freq | \
> -      HVMPV_time_ref_count | \
> -      HVMPV_reference_tsc)
> +        (HVMPV_base_freq | \
> +         HVMPV_no_freq | \
> +         HVMPV_time_ref_count | \
> +         HVMPV_reference_tsc | \
> +         HVMPV_hcall_remote_tlb_flush | \
> +         HVMPV_apic_assist | \
> +         HVMPV_crash_ctl)
>  
>  #endif
>  
> @@ -162,8 +208,7 @@
>   */
>  #define HVM_PARAM_ACPI_IOPORTS_LOCATION 19
>  
> -/* Enable blocking memory events, async or sync (pause vcpu until response) 
> - * onchangeonly indicates messages only on a change of value */
> +/* Deprecated */
>  #define HVM_PARAM_MEMORY_EVENT_CR0          20
>  #define HVM_PARAM_MEMORY_EVENT_CR3          21
>  #define HVM_PARAM_MEMORY_EVENT_CR4          22
> @@ -171,18 +216,12 @@
>  #define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP  25
>  #define HVM_PARAM_MEMORY_EVENT_MSR          30
>  
> -#define HVMPME_MODE_MASK       (3 << 0)
> -#define HVMPME_mode_disabled   0
> -#define HVMPME_mode_async      1
> -#define HVMPME_mode_sync       2
> -#define HVMPME_onchangeonly    (1 << 2)
> -
>  /* Boolean: Enable nestedhvm (hvm only) */
>  #define HVM_PARAM_NESTEDHVM    24
>  
>  /* Params for the mem event rings */
>  #define HVM_PARAM_PAGING_RING_PFN   27
> -#define HVM_PARAM_ACCESS_RING_PFN   28
> +#define HVM_PARAM_MONITOR_RING_PFN  28
>  #define HVM_PARAM_SHARING_RING_PFN  29
>  
>  /* SHUTDOWN_* action in case of a triple fault */
> @@ -194,6 +233,52 @@
>  /* Location of the VM Generation ID in guest physical address space. */
>  #define HVM_PARAM_VM_GENERATION_ID_ADDR 34
>  
> -#define HVM_NR_PARAMS          35
> +/*
> + * Set mode for altp2m:
> + *  disabled: don't activate altp2m (default)
> + *  mixed: allow access to all altp2m ops for both in-guest and external 
> tools
> + *  external: allow access to external privileged tools only
> + *  limited: guest only has limited access (ie. control VMFUNC and #VE)
> + */
> +#define HVM_PARAM_ALTP2M       35
> +#define XEN_ALTP2M_disabled      0
> +#define XEN_ALTP2M_mixed         1
> +#define XEN_ALTP2M_external      2
> +#define XEN_ALTP2M_limited       3
> +
> +/*
> + * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to
> + * save/restore.  This is a workaround for a hardware limitation that
> + * does not allow the full FIP/FDP and FCS/FDS to be restored.
> + *
> + * Valid values are:
> + *
> + * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU
> + *    has FPCSDS feature).
> + *
> + * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of
> + *    FIP/FDP.
> + *
> + * 0: allow hypervisor to choose based on the value of FIP/FDP
> + *    (default if CPU does not have FPCSDS).
> + *
> + * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU
> + * never saves FCS/FDS and this parameter should be left at the
> + * default of 8.
> + */
> +#define HVM_PARAM_X87_FIP_WIDTH 36
> +
> +/*
> + * TSS (and its size) used on Intel when CR0.PE=0. The address occupies
> + * the low 32 bits, while the size is in the high 32 ones.
> + */
> +#define HVM_PARAM_VM86_TSS_SIZED 37
> +
> +/* Enable MCA capabilities. */
> +#define HVM_PARAM_MCA_CAP 38
> +#define XEN_HVM_MCA_CAP_LMCE   (xen_mk_ullong(1) << 0)
> +#define XEN_HVM_MCA_CAP_MASK   XEN_HVM_MCA_CAP_LMCE
> +
> +#define HVM_NR_PARAMS 39
>  
>  #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
> diff --git a/include/xen/hvm/save.h b/include/xen/hvm/save.h
> index cc8b5fd..0bd240d 100644
> --- a/include/xen/hvm/save.h
> +++ b/include/xen/hvm/save.h
> @@ -63,13 +63,15 @@ struct hvm_save_descriptor {
>  
>  #ifdef __XEN__
>  # define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix)     \
> -    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); 
> } \
> -    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \
> +    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h, uint32_t size)  \
> +        { return _fix(h, size); }                                         \
> +    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];};  \
>      struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; }                   
>  
>  # include <xen/lib.h> /* BUG() */
>  # define DECLARE_HVM_SAVE_TYPE(_x, _code, _type)                         \
> -    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return 
> -1; } \
> +    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h, uint32_t size) \
> +        { BUG(); return -1; }                                            \
>      struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \
>      struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; }                   
>  #else
> @@ -89,7 +91,7 @@ struct hvm_save_descriptor {
>  # define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x)))
>  
>  # define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x 
> *)(0))->cpt)-1)
> -# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst)
> +# define HVM_SAVE_FIX_COMPAT(_x, _dst, _size) 
> __HVM_SAVE_FIX_COMPAT_##_x(_dst, _size)
>  #endif
>  
>  /* 
> diff --git a/include/xen/io/9pfs.h b/include/xen/io/9pfs.h
> new file mode 100644
> index 0000000..4bfd5d4
> --- /dev/null
> +++ b/include/xen/io/9pfs.h
> @@ -0,0 +1,49 @@
> +/*
> + * 9pfs.h -- Xen 9PFS transport
> + *
> + * Refer to docs/misc/9pfs.markdown for the specification
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2017 Stefano Stabellini <stefano@xxxxxxxxxxx>
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_9PFS_H__
> +#define __XEN_PUBLIC_IO_9PFS_H__
> +
> +#include "../grant_table.h"
> +#include "ring.h"
> +
> +/*
> + * See docs/misc/9pfs.markdown in xen.git for the full specification:
> + * https://xenbits.xen.org/docs/unstable/misc/9pfs.html
> + */
> +DEFINE_XEN_FLEX_RING_AND_INTF(xen_9pfs);
> +
> +#endif
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/io/blkif.h b/include/xen/io/blkif.h
> index 6baf7fb..15a71e3 100644
> --- a/include/xen/io/blkif.h
> +++ b/include/xen/io/blkif.h
> @@ -89,8 +89,22 @@
>   *      Values:         string
>   *
>   *      A free formatted string providing sufficient information for the
> - *      backend driver to open the backing device.  (e.g. the path to the
> - *      file or block device representing the backing store.)
> + *      hotplug script to attach the device and provide a suitable
> + *      handler (ie: a block device) for blkback to use.
> + *
> + * physical-device
> + *      Values:         "MAJOR:MINOR"
> + *      Notes: 11
> + *
> + *      MAJOR and MINOR are the major number and minor number of the
> + *      backing device respectively.
> + *
> + * physical-device-path
> + *      Values:         path string
> + *
> + *      A string that contains the absolute path to the disk image. On
> + *      NetBSD and Linux this is always a block device, while on FreeBSD
> + *      it can be either a block device or a regular file.
>   *
>   * type
>   *      Values:         "file", "phy", "tap"
> @@ -202,10 +216,9 @@
>   *      Default Value:  1
>   *
>   *      This optional property, set by the toolstack, instructs the backend
> - *      to offer discard to the frontend. If the property is missing the
> - *      backend should offer discard if the backing storage actually supports
> - *      it. This optional property, set by the toolstack, requests that the
> - *      backend offer, or not offer, discard to the frontend.
> + *      to offer (or not to offer) discard to the frontend. If the property
> + *      is missing the backend should offer discard if the backing storage
> + *      actually supports it.
>   *
>   * discard-alignment
>   *      Values:         <uint32_t>
> @@ -385,6 +398,55 @@
>   *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
>   *(10) The discard-secure property may be present and will be set to 1 if the
>   *     backing device supports secure discard.
> + *(11) Only used by Linux and NetBSD.
> + */
> +
> +/*
> + * Multiple hardware queues/rings:
> + * If supported, the backend will write the key "multi-queue-max-queues" to
> + * the directory for that vbd, and set its value to the maximum supported
> + * number of queues.
> + * Frontends that are aware of this feature and wish to use it can write the
> + * key "multi-queue-num-queues" with the number they wish to use, which must 
> be
> + * greater than zero, and no more than the value reported by the backend in
> + * "multi-queue-max-queues".
> + *
> + * For frontends requesting just one queue, the usual event-channel and
> + * ring-ref keys are written as before, simplifying the backend processing
> + * to avoid distinguishing between a frontend that doesn't understand the
> + * multi-queue feature, and one that does, but requested only one queue.
> + *
> + * Frontends requesting two or more queues must not write the toplevel
> + * event-channel and ring-ref keys, instead writing those keys under sub-keys
> + * having the name "queue-N" where N is the integer ID of the queue/ring for
> + * which those keys belong. Queues are indexed from zero.
> + * For example, a frontend with two queues must write the following set of
> + * queue-related keys:
> + *
> + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> + * /local/domain/1/device/vbd/0/queue-0 = ""
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
> + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
> + * /local/domain/1/device/vbd/0/queue-1 = ""
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
> + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
> + *
> + * It is also possible to use multiple queues/rings together with
> + * feature multi-page ring buffer.
> + * For example, a frontend requests two queues/rings and the size of each 
> ring
> + * buffer is two pages must write the following set of related keys:
> + *
> + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> + * /local/domain/1/device/vbd/0/ring-page-order = "1"
> + * /local/domain/1/device/vbd/0/queue-0 = ""
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
> + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
> + * /local/domain/1/device/vbd/0/queue-1 = ""
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
> + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
> + *
>   */
>  
>  /*
> diff --git a/include/xen/io/console.h b/include/xen/io/console.h
> index e2cd97f..0f0711f 100644
> --- a/include/xen/io/console.h
> +++ b/include/xen/io/console.h
> @@ -27,6 +27,8 @@
>  #ifndef __XEN_PUBLIC_IO_CONSOLE_H__
>  #define __XEN_PUBLIC_IO_CONSOLE_H__
>  
> +#include "ring.h"
> +
>  typedef uint32_t XENCONS_RING_IDX;
>  
>  #define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
> @@ -38,6 +40,10 @@ struct xencons_interface {
>      XENCONS_RING_IDX out_cons, out_prod;
>  };
>  
> +#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
> +DEFINE_XEN_FLEX_RING(xencons);
> +#endif
> +
>  #endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
>  
>  /*
> diff --git a/include/xen/io/displif.h b/include/xen/io/displif.h
> new file mode 100644
> index 0000000..8a94f1f
> --- /dev/null
> +++ b/include/xen/io/displif.h
> @@ -0,0 +1,864 @@
> +/******************************************************************************
> + * displif.h
> + *
> + * Unified display device I/O interface for Xen guest OSes.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2016-2017 EPAM Systems Inc.
> + *
> + * Authors: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> + *          Oleksandr Grytsov <oleksandr_grytsov@xxxxxxxx>
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_DISPLIF_H__
> +#define __XEN_PUBLIC_IO_DISPLIF_H__
> +
> +#include "ring.h"
> +#include "../grant_table.h"
> +
> +/*
> + 
> ******************************************************************************
> + *                           Protocol version
> + 
> ******************************************************************************
> + */
> +#define XENDISPL_PROTOCOL_VERSION     "1"
> +
> +/*
> + 
> ******************************************************************************
> + *                  Main features provided by the protocol
> + 
> ******************************************************************************
> + * This protocol aims to provide a unified protocol which fits more
> + * sophisticated use-cases than a framebuffer device can handle. At the
> + * moment basic functionality is supported with the intention to be extended:
> + *  o multiple dynamically allocated/destroyed framebuffers
> + *  o buffers of arbitrary sizes
> + *  o buffer allocation at either back or front end
> + *  o better configuration options including multiple display support
> + *
> + * Note: existing fbif can be used together with displif running at the
> + * same time, e.g. on Linux one provides framebuffer and another DRM/KMS
> + *
> + * Note: display resolution (XenStore's "resolution" property) defines
> + * visible area of the virtual display. At the same time resolution of
> + * the display and frame buffers may differ: buffers can be smaller, equal
> + * or bigger than the visible area. This is to enable use-cases, where 
> backend
> + * may do some post-processing of the display and frame buffers supplied,
> + * e.g. those buffers can be just a part of the final composition.
> + *
> + 
> ******************************************************************************
> + *                        Direction of improvements
> + 
> ******************************************************************************
> + * Future extensions to the existing protocol may include:
> + *  o display/connector cloning
> + *  o allocation of objects other than display buffers
> + *  o plane/overlay support
> + *  o scaling support
> + *  o rotation support
> + *
> + 
> ******************************************************************************
> + *                  Feature and Parameter Negotiation
> + 
> ******************************************************************************
> + *
> + * Front->back notifications: when enqueuing a new request, sending a
> + * notification can be made conditional on xendispl_req (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Backends must set
> + * xendispl_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
> + *
> + * Back->front notifications: when enqueuing a new response, sending a
> + * notification can be made conditional on xendispl_resp (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Frontends must set
> + * xendispl_resp appropriately (e.g., using 
> RING_FINAL_CHECK_FOR_RESPONSES()).
> + *
> + * The two halves of a para-virtual display driver utilize nodes within
> + * XenStore to communicate capabilities and to negotiate operating 
> parameters.
> + * This section enumerates these nodes which reside in the respective front 
> and
> + * backend portions of XenStore, following the XenBus convention.
> + *
> + * All data in XenStore is stored as strings. Nodes specifying numeric
> + * values are encoded in decimal. Integer value ranges listed below are
> + * expressed as fixed sized integer types capable of storing the conversion
> + * of a properly formated node string, without loss of information.
> + *
> + 
> ******************************************************************************
> + *                        Example configuration
> + 
> ******************************************************************************
> + *
> + * Note: depending on the use-case backend can expose more display connectors
> + * than the underlying HW physically has by employing SW graphics compositors
> + *
> + * This is an example of backend and frontend configuration:
> + *
> + *--------------------------------- Backend 
> -----------------------------------
> + *
> + * /local/domain/0/backend/vdispl/1/0/frontend-id = "1"
> + * /local/domain/0/backend/vdispl/1/0/frontend = 
> "/local/domain/1/device/vdispl/0"
> + * /local/domain/0/backend/vdispl/1/0/state = "4"
> + * /local/domain/0/backend/vdispl/1/0/versions = "1,2"
> + *
> + *--------------------------------- Frontend 
> ----------------------------------
> + *
> + * /local/domain/1/device/vdispl/0/backend-id = "0"
> + * /local/domain/1/device/vdispl/0/backend = 
> "/local/domain/0/backend/vdispl/1/0"
> + * /local/domain/1/device/vdispl/0/state = "4"
> + * /local/domain/1/device/vdispl/0/version = "1"
> + * /local/domain/1/device/vdispl/0/be-alloc = "1"
> + *
> + *-------------------------- Connector 0 configuration 
> ------------------------
> + *
> + * /local/domain/1/device/vdispl/0/0/resolution = "1920x1080"
> + * /local/domain/1/device/vdispl/0/0/req-ring-ref = "2832"
> + * /local/domain/1/device/vdispl/0/0/req-event-channel = "15"
> + * /local/domain/1/device/vdispl/0/0/evt-ring-ref = "387"
> + * /local/domain/1/device/vdispl/0/0/evt-event-channel = "16"
> + *
> + *-------------------------- Connector 1 configuration 
> ------------------------
> + *
> + * /local/domain/1/device/vdispl/0/1/resolution = "800x600"
> + * /local/domain/1/device/vdispl/0/1/req-ring-ref = "2833"
> + * /local/domain/1/device/vdispl/0/1/req-event-channel = "17"
> + * /local/domain/1/device/vdispl/0/1/evt-ring-ref = "388"
> + * /local/domain/1/device/vdispl/0/1/evt-event-channel = "18"
> + *
> + 
> ******************************************************************************
> + *                            Backend XenBus Nodes
> + 
> ******************************************************************************
> + *
> + *----------------------------- Protocol version 
> ------------------------------
> + *
> + * versions
> + *      Values:         <string>
> + *
> + *      List of XENDISPL_LIST_SEPARATOR separated protocol versions supported
> + *      by the backend. For example "1,2,3".
> + *
> + 
> ******************************************************************************
> + *                            Frontend XenBus Nodes
> + 
> ******************************************************************************
> + *
> + *-------------------------------- Addressing 
> ---------------------------------
> + *
> + * dom-id
> + *      Values:         <uint16_t>
> + *
> + *      Domain identifier.
> + *
> + * dev-id
> + *      Values:         <uint16_t>
> + *
> + *      Device identifier.
> + *
> + * conn-idx
> + *      Values:         <uint8_t>
> + *
> + *      Zero based contigous index of the connector.
> + *      /local/domain/<dom-id>/device/vdispl/<dev-id>/<conn-idx>/...
> + *
> + *----------------------------- Protocol version 
> ------------------------------
> + *
> + * version
> + *      Values:         <string>
> + *
> + *      Protocol version, chosen among the ones supported by the backend.
> + *
> + *------------------------- Backend buffer allocation 
> -------------------------
> + *
> + * be-alloc
> + *      Values:         "0", "1"
> + *
> + *      If value is set to "1", then backend can be a buffer 
> provider/allocator
> + *      for this domain during XENDISPL_OP_DBUF_CREATE operation (see below
> + *      for negotiation).
> + *      If value is not "1" or omitted frontend must allocate buffers itself.
> + *
> + *----------------------------- Connector settings 
> ----------------------------
> + *
> + * resolution
> + *      Values:         <width, uint32_t>x<height, uint32_t>
> + *
> + *      Width and height of the connector in pixels separated by
> + *      XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the
> + *      display.
> + *
> + *------------------ Connector Request Transport Parameters 
> -------------------
> + *
> + * This communication path is used to deliver requests from frontend to 
> backend
> + * and get the corresponding responses from backend to frontend,
> + * set up per connector.
> + *
> + * req-event-channel
> + *      Values:         <uint32_t>
> + *
> + *      The identifier of the Xen connector's control event channel
> + *      used to signal activity in the ring buffer.
> + *
> + * req-ring-ref
> + *      Values:         <uint32_t>
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      a sole page of connector's control ring buffer.
> + *
> + *------------------- Connector Event Transport Parameters 
> --------------------
> + *
> + * This communication path is used to deliver asynchronous events from 
> backend
> + * to frontend, set up per connector.
> + *
> + * evt-event-channel
> + *      Values:         <uint32_t>
> + *
> + *      The identifier of the Xen connector's event channel
> + *      used to signal activity in the ring buffer.
> + *
> + * evt-ring-ref
> + *      Values:         <uint32_t>
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      a sole page of connector's event ring buffer.
> + */
> +
> +/*
> + 
> ******************************************************************************
> + *                               STATE DIAGRAMS
> + 
> ******************************************************************************
> + *
> + * Tool stack creates front and back state nodes with initial state
> + * XenbusStateInitialising.
> + * Tool stack creates and sets up frontend display configuration
> + * nodes per domain.
> + *
> + *-------------------------------- Normal flow 
> --------------------------------
> + *
> + * Front                                Back
> + * =================================    =====================================
> + * XenbusStateInitialising              XenbusStateInitialising
> + *                                       o Query backend device 
> identification
> + *                                         data.
> + *                                       o Open and validate backend device.
> + *                                                |
> + *                                                |
> + *                                                V
> + *                                      XenbusStateInitWait
> + *
> + * o Query frontend configuration
> + * o Allocate and initialize
> + *   event channels per configured
> + *   connector.
> + * o Publish transport parameters
> + *   that will be in effect during
> + *   this connection.
> + *              |
> + *              |
> + *              V
> + * XenbusStateInitialised
> + *
> + *                                       o Query frontend transport 
> parameters.
> + *                                       o Connect to the event channels.
> + *                                                |
> + *                                                |
> + *                                                V
> + *                                      XenbusStateConnected
> + *
> + *  o Create and initialize OS
> + *    virtual display connectors
> + *    as per configuration.
> + *              |
> + *              |
> + *              V
> + * XenbusStateConnected
> + *
> + *                                      XenbusStateUnknown
> + *                                      XenbusStateClosed
> + *                                      XenbusStateClosing
> + * o Remove virtual display device
> + * o Remove event channels
> + *              |
> + *              |
> + *              V
> + * XenbusStateClosed
> + *
> + *------------------------------- Recovery flow 
> -------------------------------
> + *
> + * In case of frontend unrecoverable errors backend handles that as
> + * if frontend goes into the XenbusStateClosed state.
> + *
> + * In case of backend unrecoverable errors frontend tries removing
> + * the virtualized device. If this is possible at the moment of error,
> + * then frontend goes into the XenbusStateInitialising state and is ready for
> + * new connection with backend. If the virtualized device is still in use and
> + * cannot be removed, then frontend goes into the XenbusStateReconfiguring 
> state
> + * until either the virtualized device is removed or backend initiates a new
> + * connection. On the virtualized device removal frontend goes into the
> + * XenbusStateInitialising state.
> + *
> + * Note on XenbusStateReconfiguring state of the frontend: if backend has
> + * unrecoverable errors then frontend cannot send requests to the backend
> + * and thus cannot provide functionality of the virtualized device anymore.
> + * After backend is back to normal the virtualized device may still hold some
> + * state: configuration in use, allocated buffers, client application state 
> etc.
> + * In most cases, this will require frontend to implement complex recovery
> + * reconnect logic. Instead, by going into XenbusStateReconfiguring state,
> + * frontend will make sure no new clients of the virtualized device are
> + * accepted, allow existing client(s) to exit gracefully by signaling error
> + * state etc.
> + * Once all the clients are gone frontend can reinitialize the virtualized
> + * device and get into XenbusStateInitialising state again signaling the
> + * backend that a new connection can be made.
> + *
> + * There are multiple conditions possible under which frontend will go from
> + * XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS
> + * specific. For example:
> + * 1. The underlying OS framework may provide callbacks to signal that the 
> last
> + *    client of the virtualized device has gone and the device can be removed
> + * 2. Frontend can schedule a deferred work (timer/tasklet/workqueue)
> + *    to periodically check if this is the right time to re-try removal of
> + *    the virtualized device.
> + * 3. By any other means.
> + *
> + 
> ******************************************************************************
> + *                             REQUEST CODES
> + 
> ******************************************************************************
> + * Request codes [0; 15] are reserved and must not be used
> + */
> +
> +#define XENDISPL_OP_DBUF_CREATE       0x10
> +#define XENDISPL_OP_DBUF_DESTROY      0x11
> +#define XENDISPL_OP_FB_ATTACH         0x12
> +#define XENDISPL_OP_FB_DETACH         0x13
> +#define XENDISPL_OP_SET_CONFIG        0x14
> +#define XENDISPL_OP_PG_FLIP           0x15
> +
> +/*
> + 
> ******************************************************************************
> + *                                 EVENT CODES
> + 
> ******************************************************************************
> + */
> +#define XENDISPL_EVT_PG_FLIP          0x00
> +
> +/*
> + 
> ******************************************************************************
> + *               XENSTORE FIELD AND PATH NAME STRINGS, HELPERS
> + 
> ******************************************************************************
> + */
> +#define XENDISPL_DRIVER_NAME          "vdispl"
> +
> +#define XENDISPL_LIST_SEPARATOR       ","
> +#define XENDISPL_RESOLUTION_SEPARATOR "x"
> +
> +#define XENDISPL_FIELD_BE_VERSIONS    "versions"
> +#define XENDISPL_FIELD_FE_VERSION     "version"
> +#define XENDISPL_FIELD_REQ_RING_REF   "req-ring-ref"
> +#define XENDISPL_FIELD_REQ_CHANNEL    "req-event-channel"
> +#define XENDISPL_FIELD_EVT_RING_REF   "evt-ring-ref"
> +#define XENDISPL_FIELD_EVT_CHANNEL    "evt-event-channel"
> +#define XENDISPL_FIELD_RESOLUTION     "resolution"
> +#define XENDISPL_FIELD_BE_ALLOC       "be-alloc"
> +
> +/*
> + 
> ******************************************************************************
> + *                          STATUS RETURN CODES
> + 
> ******************************************************************************
> + *
> + * Status return code is zero on success and -XEN_EXX on failure.
> + *
> + 
> ******************************************************************************
> + *                              Assumptions
> + 
> ******************************************************************************
> + * o usage of grant reference 0 as invalid grant reference:
> + *   grant reference 0 is valid, but never exposed to a PV driver,
> + *   because of the fact it is already in use/reserved by the PV console.
> + * o all references in this document to page sizes must be treated
> + *   as pages of size XEN_PAGE_SIZE unless otherwise noted.
> + *
> + 
> ******************************************************************************
> + *       Description of the protocol between frontend and backend driver
> + 
> ******************************************************************************
> + *
> + * The two halves of a Para-virtual display driver communicate with
> + * each other using shared pages and event channels.
> + * Shared page contains a ring with request/response packets.
> + *
> + * All reserved fields in the structures below must be 0.
> + * Display buffers's cookie of value 0 is treated as invalid.
> + * Framebuffer's cookie of value 0 is treated as invalid.
> + *
> + * For all request/response/event packets that use cookies:
> + *   dbuf_cookie - uint64_t, unique to guest domain value used by the backend
> + *     to map remote display buffer to its local one
> + *   fb_cookie - uint64_t, unique to guest domain value used by the backend
> + *     to map remote framebuffer to its local one
> + *
> + *---------------------------------- Requests 
> ---------------------------------
> + *
> + * All requests/responses, which are not connector specific, must be sent 
> over
> + * control ring of the connector which has the index value of 0:
> + *   /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
> + *
> + * All request packets have the same length (64 octets)
> + * All request packets have common header:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |    operation   |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + *   id - uint16_t, private guest value, echoed in response
> + *   operation - uint8_t, operation code, XENDISPL_OP_???
> + *
> + * Request dbuf creation - request creation of a display buffer.
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |_OP_DBUF_CREATE |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie low 32-bit                      | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie high 32-bit                     | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                               width                               | 20
> + * +----------------+----------------+----------------+----------------+
> + * |                               height                              | 24
> + * +----------------+----------------+----------------+----------------+
> + * |                                bpp                                | 28
> + * +----------------+----------------+----------------+----------------+
> + * |                             buffer_sz                             | 32
> + * +----------------+----------------+----------------+----------------+
> + * |                               flags                               | 36
> + * +----------------+----------------+----------------+----------------+
> + * |                           gref_directory                          | 40
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 44
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Must be sent over control ring of the connector which has the index
> + * value of 0:
> + *   /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
> + * All unused bits in flags field must be set to 0.
> + *
> + * An attempt to create multiple display buffers with the same dbuf_cookie is
> + * an error. dbuf_cookie can be re-used after destroying the corresponding
> + * display buffer.
> + *
> + * Width and height of the display buffers can be smaller, equal or bigger
> + * than the connector's resolution. Depth/pixel format of the individual
> + * buffers can differ as well.
> + *
> + * width - uint32_t, width in pixels
> + * height - uint32_t, height in pixels
> + * bpp - uint32_t, bits per pixel
> + * buffer_sz - uint32_t, buffer size to be allocated, octets
> + * flags - uint32_t, flags of the operation
> + *   o XENDISPL_DBUF_FLG_REQ_ALLOC - if set, then backend is requested
> + *     to allocate the buffer with the parameters provided in this request.
> + *     Page directory is handled as follows:
> + *       Frontend on request:
> + *         o allocates pages for the directory (gref_directory,
> + *           gref_dir_next_page(s)
> + *         o grants permissions for the pages of the directory to the backend
> + *         o sets gref_dir_next_page fields
> + *       Backend on response:
> + *         o grants permissions for the pages of the buffer allocated to
> + *           the frontend
> + *         o fills in page directory with grant references
> + *           (gref[] in struct xendispl_page_directory)
> + * gref_directory - grant_ref_t, a reference to the first shared page
> + *   describing shared buffer references. At least one page exists. If shared
> + *   buffer size (buffer_sz) exceeds what can be addressed by this single 
> page,
> + *   then reference to the next page must be supplied (see gref_dir_next_page
> + *   below)
> + */
> +
> +#define XENDISPL_DBUF_FLG_REQ_ALLOC       (1 << 0)
> +
> +struct xendispl_dbuf_create_req {
> +    uint64_t dbuf_cookie;
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t bpp;
> +    uint32_t buffer_sz;
> +    uint32_t flags;
> +    grant_ref_t gref_directory;
> +};
> +
> +/*
> + * Shared page for XENDISPL_OP_DBUF_CREATE buffer descriptor (gref_directory 
> in
> + * the request) employs a list of pages, describing all pages of the shared
> + * data buffer:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |                        gref_dir_next_page                         | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                              gref[0]                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                              gref[i]                              | 
> i*4+8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             gref[N - 1]                           | 
> N*4+8
> + * +----------------+----------------+----------------+----------------+
> + *
> + * gref_dir_next_page - grant_ref_t, reference to the next page describing
> + *   page directory. Must be 0 if there are no more pages in the list.
> + * gref[i] - grant_ref_t, reference to a shared page of the buffer
> + *   allocated at XENDISPL_OP_DBUF_CREATE
> + *
> + * Number of grant_ref_t entries in the whole page directory is not
> + * passed, but instead can be calculated as:
> + *   num_grefs_total = (XENDISPL_OP_DBUF_CREATE.buffer_sz + XEN_PAGE_SIZE - 
> 1) /
> + *       XEN_PAGE_SIZE
> + */
> +
> +struct xendispl_page_directory {
> +    grant_ref_t gref_dir_next_page;
> +    grant_ref_t gref[1]; /* Variable length */
> +};
> +
> +/*
> + * Request dbuf destruction - destroy a previously allocated display buffer:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |_OP_DBUF_DESTROY|   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie low 32-bit                      | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie high 32-bit                     | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Must be sent over control ring of the connector which has the index
> + * value of 0:
> + *   /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
> + */
> +
> +struct xendispl_dbuf_destroy_req {
> +    uint64_t dbuf_cookie;
> +};
> +
> +/*
> + * Request framebuffer attachment - request attachment of a framebuffer to
> + * previously created display buffer.
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                | _OP_FB_ATTACH  |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie low 32-bit                      | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                       dbuf_cookie high 32-bit                     | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie low 32-bit                       | 20
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie high 32-bit                      | 24
> + * +----------------+----------------+----------------+----------------+
> + * |                               width                               | 28
> + * +----------------+----------------+----------------+----------------+
> + * |                               height                              | 32
> + * +----------------+----------------+----------------+----------------+
> + * |                            pixel_format                           | 36
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Must be sent over control ring of the connector which has the index
> + * value of 0:
> + *   /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
> + * Width and height can be smaller, equal or bigger than the connector's
> + * resolution.
> + *
> + * An attempt to create multiple frame buffers with the same fb_cookie is
> + * an error. fb_cookie can be re-used after destroying the corresponding
> + * frame buffer.
> + *
> + * width - uint32_t, width in pixels
> + * height - uint32_t, height in pixels
> + * pixel_format - uint32_t, pixel format of the framebuffer, FOURCC code
> + */
> +
> +struct xendispl_fb_attach_req {
> +    uint64_t dbuf_cookie;
> +    uint64_t fb_cookie;
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t pixel_format;
> +};
> +
> +/*
> + * Request framebuffer detach - detach a previously
> + * attached framebuffer from the display buffer in request:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |  _OP_FB_DETACH |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie low 32-bit                       | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie high 32-bit                      | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Must be sent over control ring of the connector which has the index
> + * value of 0:
> + *   /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
> + */
> +
> +struct xendispl_fb_detach_req {
> +    uint64_t fb_cookie;
> +};
> +
> +/*
> + * Request configuration set/reset - request to set or reset
> + * the configuration/mode of the display:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                | _OP_SET_CONFIG |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie low 32-bit                       | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie high 32-bit                      | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                                 x                                 | 20
> + * +----------------+----------------+----------------+----------------+
> + * |                                 y                                 | 24
> + * +----------------+----------------+----------------+----------------+
> + * |                               width                               | 28
> + * +----------------+----------------+----------------+----------------+
> + * |                               height                              | 32
> + * +----------------+----------------+----------------+----------------+
> + * |                                bpp                                | 40
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 44
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Pass all zeros to reset, otherwise command is treated as
> + * configuration set.
> + * Framebuffer's cookie defines which framebuffer/dbuf must be
> + * displayed while enabling display (applying configuration).
> + * x, y, width and height are bound by the connector's resolution and must 
> not
> + * exceed it.
> + *
> + * x - uint32_t, starting position in pixels by X axis
> + * y - uint32_t, starting position in pixels by Y axis
> + * width - uint32_t, width in pixels
> + * height - uint32_t, height in pixels
> + * bpp - uint32_t, bits per pixel
> + */
> +
> +struct xendispl_set_config_req {
> +    uint64_t fb_cookie;
> +    uint32_t x;
> +    uint32_t y;
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t bpp;
> +};
> +
> +/*
> + * Request page flip - request to flip a page identified by the framebuffer
> + * cookie:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                | _OP_PG_FLIP    |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie low 32-bit                       | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie high 32-bit                      | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + */
> +
> +struct xendispl_page_flip_req {
> +    uint64_t fb_cookie;
> +};
> +
> +/*
> + *---------------------------------- Responses 
> --------------------------------
> + *
> + * All response packets have the same length (64 octets)
> + *
> + * All response packets have common header:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |            reserved             | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                              status                               | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 12
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + *
> + * id - uint16_t, private guest value, echoed from request
> + * status - int32_t, response status, zero on success and -XEN_EXX on failure
> + *
> + *----------------------------------- Events 
> ----------------------------------
> + *
> + * Events are sent via a shared page allocated by the front and propagated by
> + *   evt-event-channel/evt-ring-ref XenStore entries
> + * All event packets have the same length (64 octets)
> + * All event packets have common header:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |      type      |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + *
> + * id - uint16_t, event id, may be used by front
> + * type - uint8_t, type of the event
> + *
> + *
> + * Page flip complete event - event from back to front on page flip 
> completed:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |   _EVT_PG_FLIP |   reserved     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie low 32-bit                       | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                        fb_cookie high 32-bit                      | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 64
> + * +----------------+----------------+----------------+----------------+
> + */
> +
> +struct xendispl_pg_flip_evt {
> +    uint64_t fb_cookie;
> +};
> +
> +struct xendispl_req {
> +    uint16_t id;
> +    uint8_t operation;
> +    uint8_t reserved[5];
> +    union {
> +        struct xendispl_dbuf_create_req dbuf_create;
> +        struct xendispl_dbuf_destroy_req dbuf_destroy;
> +        struct xendispl_fb_attach_req fb_attach;
> +        struct xendispl_fb_detach_req fb_detach;
> +        struct xendispl_set_config_req set_config;
> +        struct xendispl_page_flip_req pg_flip;
> +        uint8_t reserved[56];
> +    } op;
> +};
> +
> +struct xendispl_resp {
> +    uint16_t id;
> +    uint8_t operation;
> +    uint8_t reserved;
> +    int32_t status;
> +    uint8_t reserved1[56];
> +};
> +
> +struct xendispl_evt {
> +    uint16_t id;
> +    uint8_t type;
> +    uint8_t reserved[5];
> +    union {
> +        struct xendispl_pg_flip_evt pg_flip;
> +        uint8_t reserved[56];
> +    } op;
> +};
> +
> +DEFINE_RING_TYPES(xen_displif, struct xendispl_req, struct xendispl_resp);
> +
> +/*
> + 
> ******************************************************************************
> + *                        Back to front events delivery
> + 
> ******************************************************************************
> + * In order to deliver asynchronous events from back to front a shared page 
> is
> + * allocated by front and its granted reference propagated to back via
> + * XenStore entries (evt-ring-ref/evt-event-channel).
> + * This page has a common header used by both front and back to synchronize
> + * access and control event's ring buffer, while back being a producer of the
> + * events and front being a consumer. The rest of the page after the header
> + * is used for event packets.
> + *
> + * Upon reception of an event(s) front may confirm its reception
> + * for either each event, group of events or none.
> + */
> +
> +struct xendispl_event_page {
> +    uint32_t in_cons;
> +    uint32_t in_prod;
> +    uint8_t reserved[56];
> +};
> +
> +#define XENDISPL_EVENT_PAGE_SIZE 4096
> +#define XENDISPL_IN_RING_OFFS (sizeof(struct xendispl_event_page))
> +#define XENDISPL_IN_RING_SIZE (XENDISPL_EVENT_PAGE_SIZE - 
> XENDISPL_IN_RING_OFFS)
> +#define XENDISPL_IN_RING_LEN (XENDISPL_IN_RING_SIZE / sizeof(struct 
> xendispl_evt))
> +#define XENDISPL_IN_RING(page) \
> +     ((struct xendispl_evt *)((char *)(page) + XENDISPL_IN_RING_OFFS))
> +#define XENDISPL_IN_RING_REF(page, idx) \
> +     (XENDISPL_IN_RING((page))[(idx) % XENDISPL_IN_RING_LEN])
> +
> +#endif /* __XEN_PUBLIC_IO_DISPLIF_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/io/kbdif.h b/include/xen/io/kbdif.h
> index 2d2aebd..3ce54e9 100644
> --- a/include/xen/io/kbdif.h
> +++ b/include/xen/io/kbdif.h
> @@ -26,46 +26,449 @@
>  #ifndef __XEN_PUBLIC_IO_KBDIF_H__
>  #define __XEN_PUBLIC_IO_KBDIF_H__
>  
> -/* In events (backend -> frontend) */
> +/*
> + 
> *****************************************************************************
> + *                     Feature and Parameter Negotiation
> + 
> *****************************************************************************
> + *
> + * The two halves of a para-virtual driver utilize nodes within
> + * XenStore to communicate capabilities and to negotiate operating 
> parameters.
> + * This section enumerates these nodes which reside in the respective front 
> and
> + * backend portions of XenStore, following XenBus convention.
> + *
> + * All data in XenStore is stored as strings.  Nodes specifying numeric
> + * values are encoded in decimal. Integer value ranges listed below are
> + * expressed as fixed sized integer types capable of storing the conversion
> + * of a properly formated node string, without loss of information.
> + *
> + 
> *****************************************************************************
> + *                            Backend XenBus Nodes
> + 
> *****************************************************************************
> + *
> + *---------------------------- Features supported 
> ----------------------------
> + *
> + * Capable backend advertises supported features by publishing
> + * corresponding entries in XenStore and puts 1 as the value of the entry.
> + * If a feature is not supported then 0 must be set or feature entry omitted.
> + *
> + * feature-abs-pointer
> + *      Values:         <uint>
> + *
> + *      Backends, which support reporting of absolute coordinates for pointer
> + *      device should set this to 1.
> + *
> + * feature-multi-touch
> + *      Values:         <uint>
> + *
> + *      Backends, which support reporting of multi-touch events
> + *      should set this to 1.
> + *
> + * feature-raw-pointer
> + *      Values:        <uint>
> + *
> + *      Backends, which support reporting raw (unscaled) absolute coordinates
> + *      for pointer devices should set this to 1. Raw (unscaled) values have
> + *      a range of [0, 0x7fff].
> + *
> + *------------------------- Pointer Device Parameters 
> ------------------------
> + *
> + * width
> + *      Values:         <uint>
> + *
> + *      Maximum X coordinate (width) to be used by the frontend
> + *      while reporting input events, pixels, [0; UINT32_MAX].
> + *
> + * height
> + *      Values:         <uint>
> + *
> + *      Maximum Y coordinate (height) to be used by the frontend
> + *      while reporting input events, pixels, [0; UINT32_MAX].
> + *
> + 
> *****************************************************************************
> + *                            Frontend XenBus Nodes
> + 
> *****************************************************************************
> + *
> + *------------------------------ Feature request 
> -----------------------------
> + *
> + * Capable frontend requests features from backend via setting corresponding
> + * entries to 1 in XenStore. Requests for features not advertised as 
> supported
> + * by the backend have no effect.
> + *
> + * request-abs-pointer
> + *      Values:         <uint>
> + *
> + *      Request backend to report absolute pointer coordinates
> + *      (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
> + *
> + * request-multi-touch
> + *      Values:         <uint>
> + *
> + *      Request backend to report multi-touch events.
> + *
> + * request-raw-pointer
> + *      Values:         <uint>
> + *
> + *      Request backend to report raw unscaled absolute pointer coordinates.
> + *      This option is only valid if request-abs-pointer is also set.
> + *      Raw unscaled coordinates have the range [0, 0x7fff]
> + *
> + *----------------------- Request Transport Parameters 
> -----------------------
> + *
> + * event-channel
> + *      Values:         <uint>
> + *
> + *      The identifier of the Xen event channel used to signal activity
> + *      in the ring buffer.
> + *
> + * page-gref
> + *      Values:         <uint>
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      a sole page in a single page sized event ring buffer.
> + *
> + * page-ref
> + *      Values:         <uint>
> + *
> + *      OBSOLETE, not recommended for use.
> + *      PFN of the shared page.
> + *
> + *----------------------- Multi-touch Device Parameters 
> -----------------------
> + *
> + * multi-touch-num-contacts
> + *      Values:         <uint>
> + *
> + *      Number of simultaneous touches reported.
> + *
> + * multi-touch-width
> + *      Values:         <uint>
> + *
> + *      Width of the touch area to be used by the frontend
> + *      while reporting input events, pixels, [0; UINT32_MAX].
> + *
> + * multi-touch-height
> + *      Values:         <uint>
> + *
> + *      Height of the touch area to be used by the frontend
> + *      while reporting input events, pixels, [0; UINT32_MAX].
> + */
>  
>  /*
> - * Frontends should ignore unknown in events.
> + * EVENT CODES.
> + */
> +
> +#define XENKBD_TYPE_MOTION             1
> +#define XENKBD_TYPE_RESERVED           2
> +#define XENKBD_TYPE_KEY                3
> +#define XENKBD_TYPE_POS                4
> +#define XENKBD_TYPE_MTOUCH             5
> +
> +/* Multi-touch event sub-codes */
> +
> +#define XENKBD_MT_EV_DOWN              0
> +#define XENKBD_MT_EV_UP                1
> +#define XENKBD_MT_EV_MOTION            2
> +#define XENKBD_MT_EV_SYN               3
> +#define XENKBD_MT_EV_SHAPE             4
> +#define XENKBD_MT_EV_ORIENT            5
> +
> +/*
> + * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
>   */
>  
> -/* Pointer movement event */
> -#define XENKBD_TYPE_MOTION  1
> -/* Event type 2 currently not used */
> -/* Key event (includes pointer buttons) */
> -#define XENKBD_TYPE_KEY     3
> +#define XENKBD_DRIVER_NAME             "vkbd"
> +
> +#define XENKBD_FIELD_FEAT_ABS_POINTER  "feature-abs-pointer"
> +#define XENKBD_FIELD_FEAT_MTOUCH       "feature-multi-touch"
> +#define XENKBD_FIELD_REQ_ABS_POINTER   "request-abs-pointer"
> +#define XENKBD_FIELD_REQ_MTOUCH        "request-multi-touch"
> +#define XENKBD_FIELD_RING_GREF         "page-gref"
> +#define XENKBD_FIELD_EVT_CHANNEL       "event-channel"
> +#define XENKBD_FIELD_WIDTH             "width"
> +#define XENKBD_FIELD_HEIGHT            "height"
> +#define XENKBD_FIELD_MT_WIDTH          "multi-touch-width"
> +#define XENKBD_FIELD_MT_HEIGHT         "multi-touch-height"
> +#define XENKBD_FIELD_MT_NUM_CONTACTS   "multi-touch-num-contacts"
> +
> +/* OBSOLETE, not recommended for use */
> +#define XENKBD_FIELD_RING_REF          "page-ref"
> +
>  /*
> - * Pointer position event
> - * Capable backend sets feature-abs-pointer in xenstore.
> - * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
> - * request-abs-update in xenstore.
> + 
> *****************************************************************************
> + * Description of the protocol between frontend and backend driver.
> + 
> *****************************************************************************
> + *
> + * The two halves of a Para-virtual driver communicate with
> + * each other using a shared page and an event channel.
> + * Shared page contains a ring with event structures.
> + *
> + * All reserved fields in the structures below must be 0.
> + *
> + 
> *****************************************************************************
> + *                           Backend to frontend events
> + 
> *****************************************************************************
> + *
> + * Frontends should ignore unknown in events.
> + * All event packets have the same length (40 octets)
> + * All event packets have common header:
> + *
> + *          0         octet
> + * +-----------------+
> + * |       type      |
> + * +-----------------+
> + * type - uint8_t, event code, XENKBD_TYPE_???
> + *
> + *
> + * Pointer relative movement event
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MOTION  |                     reserved                     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                               rel_x                               | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                               rel_y                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                               rel_z                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * rel_x - int32_t, relative X motion
> + * rel_y - int32_t, relative Y motion
> + * rel_z - int32_t, relative Z motion (wheel)
>   */
> -#define XENKBD_TYPE_POS     4
>  
>  struct xenkbd_motion
>  {
> -    uint8_t type;        /* XENKBD_TYPE_MOTION */
> -    int32_t rel_x;       /* relative X motion */
> -    int32_t rel_y;       /* relative Y motion */
> -    int32_t rel_z;       /* relative Z motion (wheel) */
> +    uint8_t type;
> +    int32_t rel_x;
> +    int32_t rel_y;
> +    int32_t rel_z;
>  };
>  
> +/*
> + * Key event (includes pointer buttons)
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_KEY     |     pressed    |            reserved             | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                              keycode                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 12
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * pressed - uint8_t, 1 if pressed; 0 otherwise
> + * keycode - uint32_t, KEY_* from linux/input.h
> + */
> +
>  struct xenkbd_key
>  {
> -    uint8_t type;         /* XENKBD_TYPE_KEY */
> -    uint8_t pressed;      /* 1 if pressed; 0 otherwise */
> -    uint32_t keycode;     /* KEY_* from linux/input.h */
> +    uint8_t type;
> +    uint8_t pressed;
> +    uint32_t keycode;
>  };
>  
> +/*
> + * Pointer absolute position event
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_POS     |                     reserved                     | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_x                               | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_y                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                               rel_z                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * abs_x - int32_t, absolute X position (in FB pixels)
> + * abs_y - int32_t, absolute Y position (in FB pixels)
> + * rel_z - int32_t, relative Z motion (wheel)
> + */
> +
>  struct xenkbd_position
>  {
> -    uint8_t type;        /* XENKBD_TYPE_POS */
> -    int32_t abs_x;       /* absolute X position (in FB pixels) */
> -    int32_t abs_y;       /* absolute Y position (in FB pixels) */
> -    int32_t rel_z;       /* relative Z motion (wheel) */
> +    uint8_t type;
> +    int32_t abs_x;
> +    int32_t abs_y;
> +    int32_t rel_z;
> +};
> +
> +/*
> + * Multi-touch event and its sub-types
> + *
> + * All multi-touch event packets have common header:
> + *
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |   event_type   |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + *
> + * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
> + * contact_id - unt8_t, ID of the contact
> + *
> + * Touch interactions can consist of one or more contacts.
> + * For each contact, a series of events is generated, starting
> + * with a down event, followed by zero or more motion events,
> + * and ending with an up event. Events relating to the same
> + * contact point can be identified by the ID of the sequence: contact ID.
> + * Contact ID may be reused after XENKBD_MT_EV_UP event and
> + * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
> + *
> + * For further information please refer to documentation on Wayland [1],
> + * Linux [2] and Windows [3] multi-touch support.
> + *
> + * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
> + * [2] 
> https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
> + * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
> + *
> + *
> + * Multi-touch down event - sent when a new touch is made: touch is assigned
> + * a unique contact ID, sent with this and consequent events related
> + * to this touch.
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |   _MT_EV_DOWN  |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_x                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_y                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * abs_x - int32_t, absolute X position, in pixels
> + * abs_y - int32_t, absolute Y position, in pixels
> + *
> + * Multi-touch contact release event
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |  _MT_EV_UP     |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Multi-touch motion event
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |  _MT_EV_MOTION |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_x                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                               abs_y                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * abs_x - int32_t, absolute X position, in pixels,
> + * abs_y - int32_t, absolute Y position, in pixels,
> + *
> + * Multi-touch input synchronization event - shows end of a set of events
> + * which logically belong together.
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |  _MT_EV_SYN    |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Multi-touch shape event - touch point's shape has changed its shape.
> + * Shape is approximated by an ellipse through the major and minor axis
> + * lengths: major is the longer diameter of the ellipse and minor is the
> + * shorter one. Center of the ellipse is reported via
> + * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |  _MT_EV_SHAPE  |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                               major                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                               minor                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * major - unt32_t, length of the major axis, pixels
> + * minor - unt32_t, length of the minor axis, pixels
> + *
> + * Multi-touch orientation event - touch point's shape has changed
> + * its orientation: calculated as a clockwise angle between the major axis
> + * of the ellipse and positive Y axis in degrees, [-180; +180].
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |  _TYPE_MTOUCH  |  _MT_EV_ORIENT |   contact_id   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |           orientation           |            reserved             | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 16
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 40
> + * +----------------+----------------+----------------+----------------+
> + *
> + * orientation - int16_t, clockwise angle of the major axis
> + */
> +
> +struct xenkbd_mtouch {
> +    uint8_t type;            /* XENKBD_TYPE_MTOUCH */
> +    uint8_t event_type;      /* XENKBD_MT_EV_??? */
> +    uint8_t contact_id;
> +    uint8_t reserved[5];     /* reserved for the future use */
> +    union {
> +        struct {
> +            int32_t abs_x;   /* absolute X position, pixels */
> +            int32_t abs_y;   /* absolute Y position, pixels */
> +        } pos;
> +        struct {
> +            uint32_t major;  /* length of the major axis, pixels */
> +            uint32_t minor;  /* length of the minor axis, pixels */
> +        } shape;
> +        int16_t orientation; /* clockwise angle of the major axis */
> +    } u;
>  };
>  
>  #define XENKBD_IN_EVENT_SIZE 40
> @@ -76,15 +479,26 @@ union xenkbd_in_event
>      struct xenkbd_motion motion;
>      struct xenkbd_key key;
>      struct xenkbd_position pos;
> +    struct xenkbd_mtouch mtouch;
>      char pad[XENKBD_IN_EVENT_SIZE];
>  };
>  
> -/* Out events (frontend -> backend) */
> -
>  /*
> + 
> *****************************************************************************
> + *                            Frontend to backend events
> + 
> *****************************************************************************
> + *
>   * Out events may be sent only when requested by backend, and receipt
>   * of an unknown out event is an error.
>   * No out events currently defined.
> +
> + * All event packets have the same length (40 octets)
> + * All event packets have common header:
> + *          0         octet
> + * +-----------------+
> + * |       type      |
> + * +-----------------+
> + * type - uint8_t, event code
>   */
>  
>  #define XENKBD_OUT_EVENT_SIZE 40
> @@ -95,7 +509,11 @@ union xenkbd_out_event
>      char pad[XENKBD_OUT_EVENT_SIZE];
>  };
>  
> -/* shared page */
> +/*
> + 
> *****************************************************************************
> + *                            Shared page
> + 
> *****************************************************************************
> + */
>  
>  #define XENKBD_IN_RING_SIZE 2048
>  #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
> @@ -119,7 +537,7 @@ struct xenkbd_page
>      uint32_t out_cons, out_prod;
>  };
>  
> -#endif
> +#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
>  
>  /*
>   * Local variables:
> diff --git a/include/xen/io/libxenvchan.h b/include/xen/io/libxenvchan.h
> index 5c3d3d4..44284f4 100644
> --- a/include/xen/io/libxenvchan.h
> +++ b/include/xen/io/libxenvchan.h
> @@ -10,19 +10,23 @@
>   *
>   * @section LICENSE
>   *
> - *  This library is free software; you can redistribute it and/or
> - *  modify it under the terms of the GNU Lesser General Public
> - *  License as published by the Free Software Foundation; either
> - *  version 2.1 of the License, or (at your option) any later version.
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
>   *
> - *  This library is distributed in the hope that it will be useful,
> - *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - *  Lesser General Public License for more details.
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
>   *
> - *  You should have received a copy of the GNU Lesser General Public
> - *  License along with this library; if not, write to the Free Software
> - *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
> 02110-1301 USA
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
>   *
>   * @section DESCRIPTION
>   *
> diff --git a/include/xen/io/netif.h b/include/xen/io/netif.h
> index 61e9aea..ca00614 100644
> --- a/include/xen/io/netif.h
> +++ b/include/xen/io/netif.h
> @@ -1,8 +1,8 @@
>  
> /******************************************************************************
>   * netif.h
> - * 
> + *
>   * Unified network-device I/O interface for Xen guest OSes.
> - * 
> + *
>   * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
>   * of this software and associated documentation files (the "Software"), to
>   * deal in the Software without restriction, including without limitation the
> @@ -136,14 +136,684 @@
>   */
>  
>  /*
> - * This is the 'wire' format for packets:
> - *  Request 1: netif_tx_request -- NETTXF_* (any flags)
> - * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
> - * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)
> - *  Request 4: netif_tx_request -- NETTXF_more_data
> - *  Request 5: netif_tx_request -- NETTXF_more_data
> + * "feature-multicast-control" and "feature-dynamic-multicast-control"
> + * advertise the capability to filter ethernet multicast packets in the
> + * backend. If the frontend wishes to take advantage of this feature then
> + * it may set "request-multicast-control". If the backend only advertises
> + * "feature-multicast-control" then "request-multicast-control" must be set
> + * before the frontend moves into the connected state. The backend will
> + * sample the value on this state transition and any subsequent change in
> + * value will have no effect. However, if the backend also advertises
> + * "feature-dynamic-multicast-control" then "request-multicast-control"
> + * may be set by the frontend at any time. In this case, the backend will
> + * watch the value and re-sample on watch events.
> + *
> + * If the sampled value of "request-multicast-control" is set then the
> + * backend transmit side should no longer flood multicast packets to the
> + * frontend, it should instead drop any multicast packet that does not
> + * match in a filter list.
> + * The list is amended by the frontend by sending dummy transmit requests
> + * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
> + * specified below.
> + * Note that the filter list may be amended even if the sampled value of
> + * "request-multicast-control" is not set, however the filter should only
> + * be applied if it is set.
> + */
> +
> +/*
> + * Control ring
> + * ============
> + *
> + * Some features, such as hashing (detailed below), require a
> + * significant amount of out-of-band data to be passed from frontend to
> + * backend. Use of xenstore is not suitable for large quantities of data
> + * because of quota limitations and so a dedicated 'control ring' is used.
> + * The ability of the backend to use a control ring is advertised by
> + * setting:
> + *
> + * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1"
> + *
> + * The frontend provides a control ring to the backend by setting:
> + *
> + * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
> + * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
> + *
> + * where <gref> is the grant reference of the shared page used to
> + * implement the control ring and <port> is an event channel to be used
> + * as a mailbox interrupt. These keys must be set before the frontend
> + * moves into the connected state.
> + *
> + * The control ring uses a fixed request/response message size and is
> + * balanced (i.e. one request to one response), so operationally it is much
> + * the same as a transmit or receive ring.
> + * Note that there is no requirement that responses are issued in the same
> + * order as requests.
> + */
> +
> +/*
> + * Hash types
> + * ==========
> + *
> + * For the purposes of the definitions below, 'Packet[]' is an array of
> + * octets containing an IP packet without options, 'Array[X..Y]' means a
> + * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
> + * used to indicate concatenation of arrays.
> + */
> +
> +/*
> + * A hash calculated over an IP version 4 header as follows:
> + *
> + * Buffer[0..8] = Packet[12..15] (source address) +
> + *                Packet[16..19] (destination address)
> + *
> + * Result = Hash(Buffer, 8)
> + */
> +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
> +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
> +    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
> +
> +/*
> + * A hash calculated over an IP version 4 header and TCP header as
> + * follows:
> + *
> + * Buffer[0..12] = Packet[12..15] (source address) +
> + *                 Packet[16..19] (destination address) +
> + *                 Packet[20..21] (source port) +
> + *                 Packet[22..23] (destination port)
> + *
> + * Result = Hash(Buffer, 12)
> + */
> +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
> +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
> +    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
> +
> +/*
> + * A hash calculated over an IP version 6 header as follows:
> + *
> + * Buffer[0..32] = Packet[8..23]  (source address ) +
> + *                 Packet[24..39] (destination address)
> + *
> + * Result = Hash(Buffer, 32)
> + */
> +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
> +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
> +    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
> +
> +/*
> + * A hash calculated over an IP version 6 header and TCP header as
> + * follows:
> + *
> + * Buffer[0..36] = Packet[8..23]  (source address) +
> + *                 Packet[24..39] (destination address) +
> + *                 Packet[40..41] (source port) +
> + *                 Packet[42..43] (destination port)
> + *
> + * Result = Hash(Buffer, 36)
> + */
> +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
> +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
> +    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
> +
> +/*
> + * Hash algorithms
> + * ===============
> + */
> +
> +#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
> +
> +/*
> + * Toeplitz hash:
> + */
> +
> +#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
> +
> +/*
> + * This algorithm uses a 'key' as well as the data buffer itself.
> + * (Buffer[] and Key[] are treated as shift-registers where the MSB of
> + * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1]
> + * is the 'right-most').
> + *
> + * Value = 0
> + * For number of bits in Buffer[]
> + *    If (left-most bit of Buffer[] is 1)
> + *        Value ^= left-most 32 bits of Key[]
> + *    Key[] << 1
> + *    Buffer[] << 1
> + *
> + * The code below is provided for convenience where an operating system
> + * does not already provide an implementation.
> + */
> +#ifdef XEN_NETIF_DEFINE_TOEPLITZ
> +static uint32_t xen_netif_toeplitz_hash(const uint8_t *key,
> +                                        unsigned int keylen,
> +                                        const uint8_t *buf,
> +                                        unsigned int buflen)
> +{
> +    unsigned int keyi, bufi;
> +    uint64_t prefix = 0;
> +    uint64_t hash = 0;
> +
> +    /* Pre-load prefix with the first 8 bytes of the key */
> +    for (keyi = 0; keyi < 8; keyi++) {
> +        prefix <<= 8;
> +        prefix |= (keyi < keylen) ? key[keyi] : 0;
> +    }
> +
> +    for (bufi = 0; bufi < buflen; bufi++) {
> +        uint8_t byte = buf[bufi];
> +        unsigned int bit;
> +
> +        for (bit = 0; bit < 8; bit++) {
> +            if (byte & 0x80)
> +                hash ^= prefix;
> +            prefix <<= 1;
> +            byte <<=1;
> +        }
> +
> +        /*
> +         * 'prefix' has now been left-shifted by 8, so
> +         * OR in the next byte.
> +         */
> +        prefix |= (keyi < keylen) ? key[keyi] : 0;
> +        keyi++;
> +    }
> +
> +    /* The valid part of the hash is in the upper 32 bits. */
> +    return hash >> 32;
> +}
> +#endif /* XEN_NETIF_DEFINE_TOEPLITZ */
> +
> +/*
> + * Control requests (struct xen_netif_ctrl_request)
> + * ================================================
> + *
> + * All requests have the following format:
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |    id     |   type    |         data[0]       |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |         data[1]       |         data[2]       |
> + * +-----+-----+-----+-----+-----------------------+
> + *
> + * id: the request identifier, echoed in response.
> + * type: the type of request (see below)
> + * data[]: any data associated with the request (determined by type)
> + */
> +
> +struct xen_netif_ctrl_request {
> +    uint16_t id;
> +    uint16_t type;
> +
> +#define XEN_NETIF_CTRL_TYPE_INVALID               0
> +#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS        1
> +#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS        2
> +#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY          3
> +#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
> +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
> +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING      6
> +#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM    7
> +
> +    uint32_t data[3];
> +};
> +
> +/*
> + * Control responses (struct xen_netif_ctrl_response)
> + * ==================================================
> + *
> + * All responses have the following format:
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |    id     |   type    |         status        |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |         data          |
> + * +-----+-----+-----+-----+
> + *
> + * id: the corresponding request identifier
> + * type: the type of the corresponding request
> + * status: the status of request processing
> + * data: any data associated with the response (determined by type and
> + *       status)
> + */
> +
> +struct xen_netif_ctrl_response {
> +    uint16_t id;
> +    uint16_t type;
> +    uint32_t status;
> +
> +#define XEN_NETIF_CTRL_STATUS_SUCCESS           0
> +#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     1
> +#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
> +#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   3
> +
> +    uint32_t data;
> +};
> +
> +/*
> + * Control messages
> + * ================
> + *
> + * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
> + * --------------------------------------
> + *
> + * This is sent by the frontend to set the desired hash algorithm.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
> + *  data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
> + *  data[1] = 0
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
> + *
> + * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
> + *       hashing and the backend is free to choose how it steers packets
> + *       to queues (which is the default behaviour).
> + *
> + * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
> + * ----------------------------------
> + *
> + * This is sent by the frontend to query the types of hash supported by
> + * the backend.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
> + *  data[0] = 0
> + *  data[1] = 0
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
> + *  data   = supported hash types (if operation was successful)
> + *
> + * NOTE: A valid hash algorithm must be selected before this operation can
> + *       succeed.
> + *
> + * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
> + * ----------------------------------
> + *
> + * This is sent by the frontend to set the types of hash that the backend
> + * should calculate. (See above for hash type definitions).
> + * Note that the 'maximal' type of hash should always be chosen. For
> + * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
> + * the latter hash type should be calculated for any TCP packet and the
> + * former only calculated for non-TCP packets.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
> + *  data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
> + *  data[1] = 0
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
> + *                                                     value is invalid or
> + *                                                     unsupported
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
> + *  data   = 0
> + *
> + * NOTE: A valid hash algorithm must be selected before this operation can
> + *       succeed.
> + *       Also, setting data[0] to zero disables hashing and the backend
> + *       is free to choose how it steers packets to queues.
> + *
> + * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
> + * --------------------------------
> + *
> + * This is sent by the frontend to set the key of the hash if the algorithm
> + * requires it. (See hash algorithms above).
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
> + *  data[0] = grant reference of page containing the key (assumed to
> + *            start at beginning of grant)
> + *  data[1] = size of key in octets
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
> + *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Key size is larger
> + *                                                     than the backend
> + *                                                     supports
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
> + *  data   = 0
> + *
> + * NOTE: Any key octets not specified are assumed to be zero (the key
> + *       is assumed to be empty by default) and specifying a new key
> + *       invalidates any previous key, hence specifying a key size of
> + *       zero will clear the key (which ensures that the calculated hash
> + *       will always be zero).
> + *       The maximum size of key is algorithm and backend specific, but
> + *       is also limited by the single grant reference.
> + *       The grant reference may be read-only and must remain valid until
> + *       the response has been processed.
> + *
> + * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
> + * -----------------------------------------
> + *
> + * This is sent by the frontend to query the maximum size of mapping
> + * table supported by the backend. The size is specified in terms of
> + * table entries.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
> + *  data[0] = 0
> + *  data[1] = 0
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
> + *  data   = maximum number of entries allowed in the mapping table
> + *           (if operation was successful) or zero if a mapping table is
> + *           not supported (i.e. hash mapping is done only by modular
> + *           arithmetic).
> + *
> + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
> + * -------------------------------------
> + *
> + * This is sent by the frontend to set the actual size of the mapping
> + * table to be used by the backend. The size is specified in terms of
> + * table entries.
> + * Any previous table is invalidated by this message and any new table
> + * is assumed to be zero filled.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
> + *  data[0] = number of entries in mapping table
> + *  data[1] = 0
> + *  data[2] = 0
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
> + *  data   = 0
> + *
> + * NOTE: Setting data[0] to 0 means that hash mapping should be done
> + *       using modular arithmetic.
> + *
> + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
> + * ------------------------------------
> + *
> + * This is sent by the frontend to set the content of the table mapping
> + * hash value to queue number. The backend should calculate the hash from
> + * the packet header, use it as an index into the table (modulo the size
> + * of the table) and then steer the packet to the queue number found at
> + * that index.
> + *
> + * Request:
> + *
> + *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
> + *  data[0] = grant reference of page containing the mapping (sub-)table
> + *            (assumed to start at beginning of grant)
> + *  data[1] = size of (sub-)table in entries
> + *  data[2] = offset, in entries, of sub-table within overall table
> + *
> + * Response:
> + *
> + *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
> + *                                                     supported
> + *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
> + *                                                     is invalid
> + *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Table size is larger
> + *                                                     than the backend
> + *                                                     supports
> + *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
> + *  data   = 0
> + *
> + * NOTE: The overall table has the following format:
> + *
> + *          0     1     2     3     4     5     6     7  octet
> + *       +-----+-----+-----+-----+-----+-----+-----+-----+
> + *       |       mapping[0]      |       mapping[1]      |
> + *       +-----+-----+-----+-----+-----+-----+-----+-----+
> + *       |                       .                       |
> + *       |                       .                       |
> + *       |                       .                       |
> + *       +-----+-----+-----+-----+-----+-----+-----+-----+
> + *       |      mapping[N-2]     |      mapping[N-1]     |
> + *       +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + *       where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
> + *       message and each  mapping must specifies a queue between 0 and
> + *       "multi-queue-num-queues" (see above).
> + *       The backend may support a mapping table larger than can be
> + *       mapped by a single grant reference. Thus sub-tables within a
> + *       larger table can be individually set by sending multiple messages
> + *       with differing offset values. Specifying a new sub-table does not
> + *       invalidate any table data outside that range.
> + *       The grant reference may be read-only and must remain valid until
> + *       the response has been processed.
> + */
> +
> +DEFINE_RING_TYPES(xen_netif_ctrl,
> +                  struct xen_netif_ctrl_request,
> +                  struct xen_netif_ctrl_response);
> +
> +/*
> + * Guest transmit
> + * ==============
> + *
> + * This is the 'wire' format for transmit (frontend -> backend) packets:
> + *
> + *  Fragment 1: netif_tx_request_t  - flags = NETTXF_*
> + *                                    size = total packet size
> + * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
> + *                                     NETTXF_extra_info)
> + *  ...
> + * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
> + *                                     XEN_NETIF_EXTRA_MORE)
> + *  ...
> + *  Fragment N: netif_tx_request_t  - (only if fragment N-1 flags include
> + *                                     NETTXF_more_data - flags on preceding
> + *                                     extras are not relevant here)
> + *                                    flags = 0
> + *                                    size = fragment size
> + *
> + * NOTE:
> + *
> + * This format slightly is different from that used for receive
> + * (backend -> frontend) packets. Specifically, in a multi-fragment
> + * packet the actual size of fragment 1 can only be determined by
> + * subtracting the sizes of fragments 2..N from the total packet size.
> + *
> + * Ring slot size is 12 octets, however not all request/response
> + * structs use the full size.
> + *
> + * tx request data (netif_tx_request_t)
> + * ------------------------------------
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | grant ref             | offset    | flags     |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | id        | size      |
> + * +-----+-----+-----+-----+
> + *
> + * grant ref: Reference to buffer page.
> + * offset: Offset within buffer page.
> + * flags: NETTXF_*.
> + * id: request identifier, echoed in response.
> + * size: packet size in bytes.
> + *
> + * tx response (netif_tx_response_t)
> + * ---------------------------------
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | id        | status    | unused                |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | unused                |
> + * +-----+-----+-----+-----+
> + *
> + * id: reflects id in transmit request
> + * status: NETIF_RSP_*
> + *
> + * Guest receive
> + * =============
> + *
> + * This is the 'wire' format for receive (backend -> frontend) packets:
> + *
> + *  Fragment 1: netif_rx_request_t  - flags = NETRXF_*
> + *                                    size = fragment size
> + * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
> + *                                     NETRXF_extra_info)
> + *  ...
> + * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
> + *                                     XEN_NETIF_EXTRA_MORE)
>   *  ...
> - *  Request N: netif_tx_request -- 0
> + *  Fragment N: netif_rx_request_t  - (only if fragment N-1 flags include
> + *                                     NETRXF_more_data - flags on preceding
> + *                                     extras are not relevant here)
> + *                                    flags = 0
> + *                                    size = fragment size
> + *
> + * NOTE:
> + *
> + * This format slightly is different from that used for transmit
> + * (frontend -> backend) packets. Specifically, in a multi-fragment
> + * packet the size of the packet can only be determined by summing the
> + * sizes of fragments 1..N.
> + *
> + * Ring slot size is 8 octets.
> + *
> + * rx request (netif_rx_request_t)
> + * -------------------------------
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | id        | pad       | gref                  |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + * id: request identifier, echoed in response.
> + * gref: reference to incoming granted frame.
> + *
> + * rx response (netif_rx_response_t)
> + * ---------------------------------
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | id        | offset    | flags     | status    |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + * id: reflects id in receive request
> + * offset: offset in page of start of received packet
> + * flags: NETRXF_*
> + * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
> + *
> + * NOTE: Historically, to support GSO on the frontend receive side, Linux
> + *       netfront does not make use of the rx response id (because, as
> + *       described below, extra info structures overlay the id field).
> + *       Instead it assumes that responses always appear in the same ring
> + *       slot as their corresponding request. Thus, to maintain
> + *       compatibility, backends must make sure this is the case.
> + *
> + * Extra Info
> + * ==========
> + *
> + * Can be present if initial request or response has NET{T,R}XF_extra_info,
> + * or previous extra request has XEN_NETIF_EXTRA_MORE.
> + *
> + * The struct therefore needs to fit into either a tx or rx slot and
> + * is therefore limited to 8 octets.
> + *
> + * NOTE: Because extra info data overlays the usual request/response
> + *       structures, there is no id information in the opposite direction.
> + *       So, if an extra info overlays an rx response the frontend can
> + *       assume that it is in the same ring slot as the request that was
> + *       consumed to make the slot available, and the backend must ensure
> + *       this assumption is true.
> + *
> + * extra info (netif_extra_info_t)
> + * -------------------------------
> + *
> + * General format:
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |type |flags| type specific data                |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * | padding for tx        |
> + * +-----+-----+-----+-----+
> + *
> + * type: XEN_NETIF_EXTRA_TYPE_*
> + * flags: XEN_NETIF_EXTRA_FLAG_*
> + * padding for tx: present only in the tx case due to 8 octet limit
> + *                 from rx case. Not shown in type specific entries
> + *                 below.
> + *
> + * XEN_NETIF_EXTRA_TYPE_GSO:
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |type |flags| size      |type | pad | features  |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
> + * flags: XEN_NETIF_EXTRA_FLAG_*
> + * size: Maximum payload size of each segment. For example,
> + *       for TCP this is just the path MSS.
> + * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
> + *       the packet and any extra features required to segment the
> + *       packet properly.
> + * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
> + *           features required to process this packet, such as ECN
> + *           support for TCPv4.
> + *
> + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |type |flags| addr                              |
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
> + * flags: XEN_NETIF_EXTRA_FLAG_*
> + * addr: address to add/remove
> + *
> + * XEN_NETIF_EXTRA_TYPE_HASH:
> + *
> + * A backend that supports teoplitz hashing is assumed to accept
> + * this type of extra info in transmit packets.
> + * A frontend that enables hashing is assumed to accept
> + * this type of extra info in receive packets.
> + *
> + *    0     1     2     3     4     5     6     7  octet
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + * |type |flags|htype| alg |LSB ---- value ---- MSB|
> + * +-----+-----+-----+-----+-----+-----+-----+-----+
> + *
> + * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
> + * flags: XEN_NETIF_EXTRA_FLAG_*
> + * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
> + * alg: The algorithm used to calculate the hash (one of
> + *      XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
> + * value: Hash value
>   */
>  
>  /* Protocol checksum field is blank in the packet (hardware offload)? */
> @@ -164,11 +834,11 @@
>  
>  #define XEN_NETIF_MAX_TX_SIZE 0xFFFF
>  struct netif_tx_request {
> -    grant_ref_t gref;      /* Reference to buffer page */
> -    uint16_t offset;       /* Offset within buffer page */
> -    uint16_t flags;        /* NETTXF_* */
> -    uint16_t id;           /* Echoed in response message. */
> -    uint16_t size;         /* Packet size in bytes.       */
> +    grant_ref_t gref;
> +    uint16_t offset;
> +    uint16_t flags;
> +    uint16_t id;
> +    uint16_t size;
>  };
>  typedef struct netif_tx_request netif_tx_request_t;
>  
> @@ -177,9 +847,10 @@ typedef struct netif_tx_request netif_tx_request_t;
>  #define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
>  #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
>  #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
> -#define XEN_NETIF_EXTRA_TYPE_MAX       (4)
> +#define XEN_NETIF_EXTRA_TYPE_HASH      (4)  /* u.hash */
> +#define XEN_NETIF_EXTRA_TYPE_MAX       (5)
>  
> -/* netif_extra_info flags. */
> +/* netif_extra_info_t flags. */
>  #define _XEN_NETIF_EXTRA_FLAG_MORE (0)
>  #define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
>  
> @@ -189,55 +860,27 @@ typedef struct netif_tx_request netif_tx_request_t;
>  #define XEN_NETIF_GSO_TYPE_TCPV6        (2)
>  
>  /*
> - * This structure needs to fit within both netif_tx_request and
> - * netif_rx_response for compatibility.
> + * This structure needs to fit within both netif_tx_request_t and
> + * netif_rx_response_t for compatibility.
>   */
>  struct netif_extra_info {
> -    uint8_t type;  /* XEN_NETIF_EXTRA_TYPE_* */
> -    uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
> -
> +    uint8_t type;
> +    uint8_t flags;
>      union {
> -        /*
> -         * XEN_NETIF_EXTRA_TYPE_GSO:
> -         */
>          struct {
> -            /*
> -             * Maximum payload size of each segment. For example, for TCP 
> this
> -             * is just the path MSS.
> -             */
>              uint16_t size;
> -
> -            /*
> -             * GSO type. This determines the protocol of the packet and any
> -             * extra features required to segment the packet properly.
> -             */
> -            uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
> -
> -            /* Future expansion. */
> +            uint8_t type;
>              uint8_t pad;
> -
> -            /*
> -             * GSO features. This specifies any extra GSO features required
> -             * to process this packet, such as ECN support for TCPv4.
> -             */
> -            uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
> +            uint16_t features;
>          } gso;
> -
> -        /*
> -         * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
> -         * Backend advertises availability via 'feature-multicast-control'
> -         * xenbus node containing value '1'.
> -         * Frontend requests this feature by advertising
> -         * 'request-multicast-control' xenbus node containing value '1'.
> -         * If multicast control is requested then multicast flooding is
> -         * disabled and the frontend must explicitly register its interest
> -         * in multicast groups using dummy transmit requests containing
> -         * MCAST_{ADD,DEL} extra-info fragments.
> -         */
>          struct {
> -            uint8_t addr[6]; /* Address to add/remove. */
> +            uint8_t addr[6];
>          } mcast;
> -
> +        struct {
> +            uint8_t type;
> +            uint8_t algorithm;
> +            uint8_t value[4];
> +        } hash;
>          uint16_t pad[3];
>      } u;
>  };
> @@ -245,13 +888,14 @@ typedef struct netif_extra_info netif_extra_info_t;
>  
>  struct netif_tx_response {
>      uint16_t id;
> -    int16_t  status;       /* NETIF_RSP_* */
> +    int16_t  status;
>  };
>  typedef struct netif_tx_response netif_tx_response_t;
>  
>  struct netif_rx_request {
>      uint16_t    id;        /* Echoed in response message.        */
> -    grant_ref_t gref;      /* Reference to incoming granted frame */
> +    uint16_t    pad;
> +    grant_ref_t gref;
>  };
>  typedef struct netif_rx_request netif_rx_request_t;
>  
> @@ -271,11 +915,15 @@ typedef struct netif_rx_request netif_rx_request_t;
>  #define _NETRXF_extra_info     (3)
>  #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
>  
> +/* Packet has GSO prefix. Deprecated but included for compatibility */
> +#define _NETRXF_gso_prefix     (4)
> +#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
> +
>  struct netif_rx_response {
>      uint16_t id;
> -    uint16_t offset;       /* Offset in page of start of received packet  */
> -    uint16_t flags;        /* NETRXF_* */
> -    int16_t  status;       /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */
> +    uint16_t offset;
> +    uint16_t flags;
> +    int16_t  status;
>  };
>  typedef struct netif_rx_response netif_rx_response_t;
>  
> @@ -289,7 +937,7 @@ DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, 
> struct netif_rx_response);
>  #define NETIF_RSP_DROPPED         -2
>  #define NETIF_RSP_ERROR           -1
>  #define NETIF_RSP_OKAY             0
> -/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
> +/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
>  #define NETIF_RSP_NULL             1
>  
>  #endif
> diff --git a/include/xen/io/protocols.h b/include/xen/io/protocols.h
> index 80b196b..40a9b30 100644
> --- a/include/xen/io/protocols.h
> +++ b/include/xen/io/protocols.h
> @@ -18,6 +18,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2008, Keir Fraser
>   */
>  
>  #ifndef __XEN_PROTOCOLS_H__
> diff --git a/include/xen/io/pvcalls.h b/include/xen/io/pvcalls.h
> new file mode 100644
> index 0000000..cb81712
> --- /dev/null
> +++ b/include/xen/io/pvcalls.h
> @@ -0,0 +1,153 @@
> +/*
> + * pvcalls.h -- Xen PV Calls Protocol
> + *
> + * Refer to docs/misc/pvcalls.markdown for the specification
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2017 Stefano Stabellini <stefano@xxxxxxxxxxx>
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_PVCALLS_H__
> +#define __XEN_PUBLIC_IO_PVCALLS_H__
> +
> +#include "../grant_table.h"
> +#include "ring.h"
> +
> +/*
> + * See docs/misc/pvcalls.markdown in xen.git for the full specification:
> + * https://xenbits.xen.org/docs/unstable/misc/pvcalls.html
> + */
> +struct pvcalls_data_intf {
> +    RING_IDX in_cons, in_prod, in_error;
> +
> +    uint8_t pad1[52];
> +
> +    RING_IDX out_cons, out_prod, out_error;
> +
> +    uint8_t pad2[52];
> +
> +    RING_IDX ring_order;
> +    grant_ref_t ref[];
> +};
> +DEFINE_XEN_FLEX_RING(pvcalls);
> +
> +#define PVCALLS_SOCKET         0
> +#define PVCALLS_CONNECT        1
> +#define PVCALLS_RELEASE        2
> +#define PVCALLS_BIND           3
> +#define PVCALLS_LISTEN         4
> +#define PVCALLS_ACCEPT         5
> +#define PVCALLS_POLL           6
> +
> +struct xen_pvcalls_request {
> +    uint32_t req_id; /* private to guest, echoed in response */
> +    uint32_t cmd;    /* command to execute */
> +    union {
> +        struct xen_pvcalls_socket {
> +            uint64_t id;
> +            uint32_t domain;
> +            uint32_t type;
> +            uint32_t protocol;
> +        } socket;
> +        struct xen_pvcalls_connect {
> +            uint64_t id;
> +            uint8_t addr[28];
> +            uint32_t len;
> +            uint32_t flags;
> +            grant_ref_t ref;
> +            uint32_t evtchn;
> +        } connect;
> +        struct xen_pvcalls_release {
> +            uint64_t id;
> +            uint8_t reuse;
> +        } release;
> +        struct xen_pvcalls_bind {
> +            uint64_t id;
> +            uint8_t addr[28];
> +            uint32_t len;
> +        } bind;
> +        struct xen_pvcalls_listen {
> +            uint64_t id;
> +            uint32_t backlog;
> +        } listen;
> +        struct xen_pvcalls_accept {
> +            uint64_t id;
> +            uint64_t id_new;
> +            grant_ref_t ref;
> +            uint32_t evtchn;
> +        } accept;
> +        struct xen_pvcalls_poll {
> +            uint64_t id;
> +        } poll;
> +        /* dummy member to force sizeof(struct xen_pvcalls_request)
> +         * to match across archs */
> +        struct xen_pvcalls_dummy {
> +            uint8_t dummy[56];
> +        } dummy;
> +    } u;
> +};
> +
> +struct xen_pvcalls_response {
> +    uint32_t req_id;
> +    uint32_t cmd;
> +    int32_t ret;
> +    uint32_t pad;
> +    union {
> +        struct _xen_pvcalls_socket {
> +            uint64_t id;
> +        } socket;
> +        struct _xen_pvcalls_connect {
> +            uint64_t id;
> +        } connect;
> +        struct _xen_pvcalls_release {
> +            uint64_t id;
> +        } release;
> +        struct _xen_pvcalls_bind {
> +            uint64_t id;
> +        } bind;
> +        struct _xen_pvcalls_listen {
> +            uint64_t id;
> +        } listen;
> +        struct _xen_pvcalls_accept {
> +            uint64_t id;
> +        } accept;
> +        struct _xen_pvcalls_poll {
> +            uint64_t id;
> +        } poll;
> +        struct _xen_pvcalls_dummy {
> +            uint8_t dummy[8];
> +        } dummy;
> +    } u;
> +};
> +
> +DEFINE_RING_TYPES(xen_pvcalls, struct xen_pvcalls_request,
> +                  struct xen_pvcalls_response);
> +
> +#endif
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/io/ring.h b/include/xen/io/ring.h
> index 73e13d7..30342fc 100644
> --- a/include/xen/io/ring.h
> +++ b/include/xen/io/ring.h
> @@ -27,6 +27,21 @@
>  #ifndef __XEN_PUBLIC_IO_RING_H__
>  #define __XEN_PUBLIC_IO_RING_H__
>  
> +/*
> + * When #include'ing this header, you need to provide the following
> + * declaration upfront:
> + * - standard integers types (uint8_t, uint16_t, etc)
> + * They are provided by stdint.h of the standard headers.
> + *
> + * In addition, if you intend to use the FLEX macros, you also need to
> + * provide the following, before invoking the FLEX macros:
> + * - size_t
> + * - memcpy
> + * - grant_ref_t
> + * These declarations are provided by string.h of the standard headers,
> + * and grant_table.h from the Xen public headers.
> + */
> +
>  #include "../xen-compat.h"
>  
>  #if __XEN_INTERFACE_VERSION__ < 0x00030208
> @@ -111,7 +126,7 @@ struct __name##_sring {                                   
>               \
>              uint8_t msg;                                                \
>          } tapif_user;                                                   \
>          uint8_t pvt_pad[4];                                             \
> -    } private;                                                          \
> +    } pvt;                                                              \
>      uint8_t __pad[44];                                                  \
>      union __name##_sring_entry ring[1]; /* variable-length */           \
>  };                                                                      \
> @@ -156,7 +171,7 @@ typedef struct __name##_back_ring __name##_back_ring_t
>  #define SHARED_RING_INIT(_s) do {                                       \
>      (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
>      (_s)->req_event = (_s)->rsp_event = 1;                              \
> -    (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
> +    (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));      \
>      (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
>  } while(0)
>  
> @@ -212,6 +227,20 @@ typedef struct __name##_back_ring __name##_back_ring_t
>  #define RING_GET_REQUEST(_r, _idx)                                      \
>      (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
>  
> +/*
> + * Get a local copy of a request.
> + *
> + * Use this in preference to RING_GET_REQUEST() so all processing is
> + * done on a local copy that cannot be modified by the other end.
> + *
> + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause 
> this
> + * to be ineffective where _req is a struct which consists of only bitfields.
> + */
> +#define RING_COPY_REQUEST(_r, _idx, _req) do {                               
> \
> +     /* Use volatile to force the copy into _req. */                 \
> +     *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);   \
> +} while (0)
> +
>  #define RING_GET_RESPONSE(_r, _idx)                                     \
>      (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
>  
> @@ -299,6 +328,149 @@ typedef struct __name##_back_ring __name##_back_ring_t
>      (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
>  } while (0)
>  
> +
> +/*
> + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
> + * functions to check if there is data on the ring, and to read and
> + * write to them.
> + *
> + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
> + * does not define the indexes page. As different protocols can have
> + * extensions to the basic format, this macro allow them to define their
> + * own struct.
> + *
> + * XEN_FLEX_RING_SIZE
> + *   Convenience macro to calculate the size of one of the two rings
> + *   from the overall order.
> + *
> + * $NAME_mask
> + *   Function to apply the size mask to an index, to reduce the index
> + *   within the range [0-size].
> + *
> + * $NAME_read_packet
> + *   Function to read data from the ring. The amount of data to read is
> + *   specified by the "size" argument.
> + *
> + * $NAME_write_packet
> + *   Function to write data to the ring. The amount of data to write is
> + *   specified by the "size" argument.
> + *
> + * $NAME_get_ring_ptr
> + *   Convenience function that returns a pointer to read/write to the
> + *   ring at the right location.
> + *
> + * $NAME_data_intf
> + *   Indexes page, shared between frontend and backend. It also
> + *   contains the array of grant refs.
> + *
> + * $NAME_queued
> + *   Function to calculate how many bytes are currently on the ring,
> + *   ready to be read. It can also be used to calculate how much free
> + *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
> + *   $NAME_queued()).
> + */
> +
> +#ifndef XEN_PAGE_SHIFT
> +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
> + * 4K, regardless of the architecture, and page granularity chosen by
> + * operating systems.
> + */
> +#define XEN_PAGE_SHIFT 12
> +#endif
> +#define XEN_FLEX_RING_SIZE(order)                                            
>  \
> +    (1UL << ((order) + XEN_PAGE_SHIFT - 1))
> +
> +#define DEFINE_XEN_FLEX_RING(name)                                           
>  \
> +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size)         
>  \
> +{                                                                            
>  \
> +    return idx & (ring_size - 1);                                            
>  \
> +}                                                                            
>  \
> +                                                                             
>  \
> +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf,         
>  \
> +                                                 RING_IDX idx,               
>  \
> +                                                 RING_IDX ring_size)         
>  \
> +{                                                                            
>  \
> +    return buf + name##_mask(idx, ring_size);                                
>  \
> +}                                                                            
>  \
> +                                                                             
>  \
> +static inline void name##_read_packet(void *opaque,                          
>  \
> +                                      const unsigned char *buf,              
>  \
> +                                      size_t size,                           
>  \
> +                                      RING_IDX masked_prod,                  
>  \
> +                                      RING_IDX *masked_cons,                 
>  \
> +                                      RING_IDX ring_size)                    
>  \
> +{                                                                            
>  \
> +    if (*masked_cons < masked_prod ||                                        
>  \
> +        size <= ring_size - *masked_cons) {                                  
>  \
> +        memcpy(opaque, buf + *masked_cons, size);                            
>  \
> +    } else {                                                                 
>  \
> +        memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons);        
>  \
> +        memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf,      
>  \
> +               size - (ring_size - *masked_cons));                           
>  \
> +    }                                                                        
>  \
> +    *masked_cons = name##_mask(*masked_cons + size, ring_size);              
>  \
> +}                                                                            
>  \
> +                                                                             
>  \
> +static inline void name##_write_packet(unsigned char *buf,                   
>  \
> +                                       const void *opaque,                   
>  \
> +                                       size_t size,                          
>  \
> +                                       RING_IDX *masked_prod,                
>  \
> +                                       RING_IDX masked_cons,                 
>  \
> +                                       RING_IDX ring_size)                   
>  \
> +{                                                                            
>  \
> +    if (*masked_prod < masked_cons ||                                        
>  \
> +        size <= ring_size - *masked_prod) {                                  
>  \
> +        memcpy(buf + *masked_prod, opaque, size);                            
>  \
> +    } else {                                                                 
>  \
> +        memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod);        
>  \
> +        memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod),    
>  \
> +               size - (ring_size - *masked_prod));                           
>  \
> +    }                                                                        
>  \
> +    *masked_prod = name##_mask(*masked_prod + size, ring_size);              
>  \
> +}                                                                            
>  \
> +                                                                             
>  \
> +static inline RING_IDX name##_queued(RING_IDX prod,                          
>  \
> +                                     RING_IDX cons,                          
>  \
> +                                     RING_IDX ring_size)                     
>  \
> +{                                                                            
>  \
> +    RING_IDX size;                                                           
>  \
> +                                                                             
>  \
> +    if (prod == cons)                                                        
>  \
> +        return 0;                                                            
>  \
> +                                                                             
>  \
> +    prod = name##_mask(prod, ring_size);                                     
>  \
> +    cons = name##_mask(cons, ring_size);                                     
>  \
> +                                                                             
>  \
> +    if (prod == cons)                                                        
>  \
> +        return ring_size;                                                    
>  \
> +                                                                             
>  \
> +    if (prod > cons)                                                         
>  \
> +        size = prod - cons;                                                  
>  \
> +    else                                                                     
>  \
> +        size = ring_size - (cons - prod);                                    
>  \
> +    return size;                                                             
>  \
> +}                                                                            
>  \
> +                                                                             
>  \
> +struct name##_data {                                                         
>  \
> +    unsigned char *in; /* half of the allocation */                          
>  \
> +    unsigned char *out; /* half of the allocation */                         
>  \
> +}
> +
> +#define DEFINE_XEN_FLEX_RING_AND_INTF(name)                                  
>  \
> +struct name##_data_intf {                                                    
>  \
> +    RING_IDX in_cons, in_prod;                                               
>  \
> +                                                                             
>  \
> +    uint8_t pad1[56];                                                        
>  \
> +                                                                             
>  \
> +    RING_IDX out_cons, out_prod;                                             
>  \
> +                                                                             
>  \
> +    uint8_t pad2[56];                                                        
>  \
> +                                                                             
>  \
> +    RING_IDX ring_order;                                                     
>  \
> +    grant_ref_t ref[];                                                       
>  \
> +};                                                                           
>  \
> +DEFINE_XEN_FLEX_RING(name)
> +
>  #endif /* __XEN_PUBLIC_IO_RING_H__ */
>  
>  /*
> diff --git a/include/xen/io/sndif.h b/include/xen/io/sndif.h
> new file mode 100644
> index 0000000..c5c1978
> --- /dev/null
> +++ b/include/xen/io/sndif.h
> @@ -0,0 +1,803 @@
> +/******************************************************************************
> + * sndif.h
> + *
> + * Unified sound-device I/O interface for Xen guest OSes.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2013-2015 GlobalLogic Inc.
> + * Copyright (C) 2016-2017 EPAM Systems Inc.
> + *
> + * Authors: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> + *          Oleksandr Grytsov <oleksandr_grytsov@xxxxxxxx>
> + *          Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
> + *          Iurii Konovalenko <iurii.konovalenko@xxxxxxxxxxxxxxx>
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_SNDIF_H__
> +#define __XEN_PUBLIC_IO_SNDIF_H__
> +
> +#include "ring.h"
> +#include "../grant_table.h"
> +
> +/*
> + 
> ******************************************************************************
> + *                  Feature and Parameter Negotiation
> + 
> ******************************************************************************
> + *
> + * Front->back notifications: when enqueuing a new request, sending a
> + * notification can be made conditional on xensnd_req (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Backends must set
> + * xensnd_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
> + *
> + * Back->front notifications: when enqueuing a new response, sending a
> + * notification can be made conditional on xensnd_resp (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Frontends must set
> + * xensnd_resp appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
> + *
> + * The two halves of a para-virtual sound card driver utilize nodes within
> + * XenStore to communicate capabilities and to negotiate operating 
> parameters.
> + * This section enumerates these nodes which reside in the respective front 
> and
> + * backend portions of XenStore, following the XenBus convention.
> + *
> + * All data in XenStore is stored as strings. Nodes specifying numeric
> + * values are encoded in decimal. Integer value ranges listed below are
> + * expressed as fixed sized integer types capable of storing the conversion
> + * of a properly formated node string, without loss of information.
> + *
> + 
> ******************************************************************************
> + *                        Example configuration
> + 
> ******************************************************************************
> + *
> + * Note: depending on the use-case backend can expose more sound cards and
> + * PCM devices/streams than the underlying HW physically has by employing
> + * SW mixers, configuring virtual sound streams, channels etc.
> + *
> + * This is an example of backend and frontend configuration:
> + *
> + *--------------------------------- Backend 
> -----------------------------------
> + *
> + * /local/domain/0/backend/vsnd/1/0/frontend-id = "1"
> + * /local/domain/0/backend/vsnd/1/0/frontend = 
> "/local/domain/1/device/vsnd/0"
> + * /local/domain/0/backend/vsnd/1/0/state = "4"
> + * /local/domain/0/backend/vsnd/1/0/versions = "1,2"
> + *
> + *--------------------------------- Frontend 
> ----------------------------------
> + *
> + * /local/domain/1/device/vsnd/0/backend-id = "0"
> + * /local/domain/1/device/vsnd/0/backend = "/local/domain/0/backend/vsnd/1/0"
> + * /local/domain/1/device/vsnd/0/state = "4"
> + * /local/domain/1/device/vsnd/0/version = "1"
> + *
> + *----------------------------- Card configuration 
> ----------------------------
> + *
> + * /local/domain/1/device/vsnd/0/short-name = "Card short name"
> + * /local/domain/1/device/vsnd/0/long-name = "Card long name"
> + * /local/domain/1/device/vsnd/0/sample-rates = 
> "8000,32000,44100,48000,96000"
> + * /local/domain/1/device/vsnd/0/sample-formats = "s8,u8,s16_le,s16_be"
> + * /local/domain/1/device/vsnd/0/buffer-size = "262144"
> + *
> + *------------------------------- PCM device 0 
> --------------------------------
> + *
> + * /local/domain/1/device/vsnd/0/0/name = "General analog"
> + * /local/domain/1/device/vsnd/0/0/channels-max = "5"
> + *
> + *----------------------------- Stream 0, playback 
> ----------------------------
> + *
> + * /local/domain/1/device/vsnd/0/0/0/type = "p"
> + * /local/domain/1/device/vsnd/0/0/0/sample-formats = "s8,u8"
> + * /local/domain/1/device/vsnd/0/0/0/unique-id = "0"
> + *
> + * /local/domain/1/device/vsnd/0/0/0/ring-ref = "386"
> + * /local/domain/1/device/vsnd/0/0/0/event-channel = "15"
> + *
> + *------------------------------ Stream 1, capture 
> ----------------------------
> + *
> + * /local/domain/1/device/vsnd/0/0/1/type = "c"
> + * /local/domain/1/device/vsnd/0/0/1/channels-max = "2"
> + * /local/domain/1/device/vsnd/0/0/1/unique-id = "1"
> + *
> + * /local/domain/1/device/vsnd/0/0/1/ring-ref = "384"
> + * /local/domain/1/device/vsnd/0/0/1/event-channel = "13"
> + *
> + *------------------------------- PCM device 1 
> --------------------------------
> + *
> + * /local/domain/1/device/vsnd/0/1/name = "HDMI-0"
> + * /local/domain/1/device/vsnd/0/1/sample-rates = "8000,32000,44100"
> + *
> + *------------------------------ Stream 0, capture 
> ----------------------------
> + *
> + * /local/domain/1/device/vsnd/0/1/0/type = "c"
> + * /local/domain/1/device/vsnd/0/1/0/unique-id = "2"
> + *
> + * /local/domain/1/device/vsnd/0/1/0/ring-ref = "387"
> + * /local/domain/1/device/vsnd/0/1/0/event-channel = "151"
> + *
> + *------------------------------- PCM device 2 
> --------------------------------
> + *
> + * /local/domain/1/device/vsnd/0/2/name = "SPDIF"
> + *
> + *----------------------------- Stream 0, playback 
> ----------------------------
> + *
> + * /local/domain/1/device/vsnd/0/2/0/type = "p"
> + * /local/domain/1/device/vsnd/0/2/0/unique-id = "3"
> + *
> + * /local/domain/1/device/vsnd/0/2/0/ring-ref = "389"
> + * /local/domain/1/device/vsnd/0/2/0/event-channel = "152"
> + *
> + 
> ******************************************************************************
> + *                            Backend XenBus Nodes
> + 
> ******************************************************************************
> + *
> + *----------------------------- Protocol version 
> ------------------------------
> + *
> + * versions
> + *      Values:         <string>
> + *
> + *      List of XENSND_LIST_SEPARATOR separated protocol versions supported
> + *      by the backend. For example "1,2,3".
> + *
> + 
> ******************************************************************************
> + *                            Frontend XenBus Nodes
> + 
> ******************************************************************************
> + *
> + *-------------------------------- Addressing 
> ---------------------------------
> + *
> + * dom-id
> + *      Values:         <uint16_t>
> + *
> + *      Domain identifier.
> + *
> + * dev-id
> + *      Values:         <uint16_t>
> + *
> + *      Device identifier.
> + *
> + * pcm-dev-idx
> + *      Values:         <uint8_t>
> + *
> + *      Zero based contigous index of the PCM device.
> + *
> + * stream-idx
> + *      Values:         <uint8_t>
> + *
> + *      Zero based contigous index of the stream of the PCM device.
> + *
> + * The following pattern is used for addressing:
> + *   
> /local/domain/<dom-id>/device/vsnd/<dev-id>/<pcm-dev-idx>/<stream-idx>/...
> + *
> + *----------------------------- Protocol version 
> ------------------------------
> + *
> + * version
> + *      Values:         <string>
> + *
> + *      Protocol version, chosen among the ones supported by the backend.
> + *
> + *------------------------------- PCM settings 
> --------------------------------
> + *
> + * Every virtualized sound frontend has a set of PCM devices and streams, 
> each
> + * could be individually configured. Part of the PCM configuration can be
> + * defined at higher level of the hierarchy and be fully or partially re-used
> + * by the underlying layers. These configuration values are:
> + *  o number of channels (min/max)
> + *  o supported sample rates
> + *  o supported sample formats.
> + * E.g. one can define these values for the whole card, device or stream.
> + * Every underlying layer in turn can re-define some or all of them to better
> + * fit its needs. For example, card may define number of channels to be
> + * in [1; 8] range, and some particular stream may be limited to [1; 2] only.
> + * The rule is that the underlying layer must be a subset of the upper layer
> + * range.
> + *
> + * channels-min
> + *      Values:         <uint8_t>
> + *
> + *      The minimum amount of channels that is supported, [1; channels-max].
> + *      Optional, if not set or omitted a value of 1 is used.
> + *
> + * channels-max
> + *      Values:         <uint8_t>
> + *
> + *      The maximum amount of channels that is supported.
> + *      Must be at least <channels-min>.
> + *
> + * sample-rates
> + *      Values:         <list of uint32_t>
> + *
> + *      List of supported sample rates separated by XENSND_LIST_SEPARATOR.
> + *      Sample rates are expressed as a list of decimal values w/o any
> + *      ordering requirement.
> + *
> + * sample-formats
> + *      Values:         <list of XENSND_PCM_FORMAT_XXX_STR>
> + *
> + *      List of supported sample formats separated by XENSND_LIST_SEPARATOR.
> + *      Items must not exceed XENSND_SAMPLE_FORMAT_MAX_LEN length.
> + *
> + * buffer-size
> + *      Values:         <uint32_t>
> + *
> + *      The maximum size in octets of the buffer to allocate per stream.
> + *
> + *----------------------- Virtual sound card settings 
> -------------------------
> + * short-name
> + *      Values:         <char[32]>
> + *
> + *      Short name of the virtual sound card. Optional.
> + *
> + * long-name
> + *      Values:         <char[80]>
> + *
> + *      Long name of the virtual sound card. Optional.
> + *
> + *----------------------------- Device settings 
> -------------------------------
> + * name
> + *      Values:         <char[80]>
> + *
> + *      Name of the sound device within the virtual sound card. Optional.
> + *
> + *----------------------------- Stream settings 
> -------------------------------
> + *
> + * type
> + *      Values:         "p", "c"
> + *
> + *      Stream type: "p" - playback stream, "c" - capture stream
> + *
> + *      If both capture and playback are needed then two streams need to be
> + *      defined under the same device.
> + *
> + * unique-id
> + *      Values:         <uint32_t>
> + *
> + *      After stream initialization it is assigned a unique ID (within the 
> front
> + *      driver), so every stream of the frontend can be identified by the
> + *      backend by this ID. This is not equal to stream-idx as the later is
> + *      zero based within the device, but this index is contigous within the
> + *      driver.
> + *
> + *-------------------- Stream Request Transport Parameters 
> --------------------
> + *
> + * event-channel
> + *      Values:         <uint32_t>
> + *
> + *      The identifier of the Xen event channel used to signal activity
> + *      in the ring buffer.
> + *
> + * ring-ref
> + *      Values:         <uint32_t>
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      a sole page in a single page sized ring buffer.
> + *
> + 
> ******************************************************************************
> + *                               STATE DIAGRAMS
> + 
> ******************************************************************************
> + *
> + * Tool stack creates front and back state nodes with initial state
> + * XenbusStateInitialising.
> + * Tool stack creates and sets up frontend sound configuration nodes per 
> domain.
> + *
> + * Front                                Back
> + * =================================    =====================================
> + * XenbusStateInitialising              XenbusStateInitialising
> + *                                       o Query backend device 
> identification
> + *                                         data.
> + *                                       o Open and validate backend device.
> + *                                                      |
> + *                                                      |
> + *                                                      V
> + *                                      XenbusStateInitWait
> + *
> + * o Query frontend configuration
> + * o Allocate and initialize
> + *   event channels per configured
> + *   playback/capture stream.
> + * o Publish transport parameters
> + *   that will be in effect during
> + *   this connection.
> + *              |
> + *              |
> + *              V
> + * XenbusStateInitialised
> + *
> + *                                       o Query frontend transport 
> parameters.
> + *                                       o Connect to the event channels.
> + *                                                      |
> + *                                                      |
> + *                                                      V
> + *                                      XenbusStateConnected
> + *
> + *  o Create and initialize OS
> + *    virtual sound device instances
> + *    as per configuration.
> + *              |
> + *              |
> + *              V
> + * XenbusStateConnected
> + *
> + *                                      XenbusStateUnknown
> + *                                      XenbusStateClosed
> + *                                      XenbusStateClosing
> + * o Remove virtual sound device
> + * o Remove event channels
> + *              |
> + *              |
> + *              V
> + * XenbusStateClosed
> + *
> + *------------------------------- Recovery flow 
> -------------------------------
> + *
> + * In case of frontend unrecoverable errors backend handles that as
> + * if frontend goes into the XenbusStateClosed state.
> + *
> + * In case of backend unrecoverable errors frontend tries removing
> + * the virtualized device. If this is possible at the moment of error,
> + * then frontend goes into the XenbusStateInitialising state and is ready for
> + * new connection with backend. If the virtualized device is still in use and
> + * cannot be removed, then frontend goes into the XenbusStateReconfiguring 
> state
> + * until either the virtualized device removed or backend initiates a new
> + * connection. On the virtualized device removal frontend goes into the
> + * XenbusStateInitialising state.
> + *
> + * Note on XenbusStateReconfiguring state of the frontend: if backend has
> + * unrecoverable errors then frontend cannot send requests to the backend
> + * and thus cannot provide functionality of the virtualized device anymore.
> + * After backend is back to normal the virtualized device may still hold some
> + * state: configuration in use, allocated buffers, client application state 
> etc.
> + * So, in most cases, this will require frontend to implement complex 
> recovery
> + * reconnect logic. Instead, by going into XenbusStateReconfiguring state,
> + * frontend will make sure no new clients of the virtualized device are
> + * accepted, allow existing client(s) to exit gracefully by signaling error
> + * state etc.
> + * Once all the clients are gone frontend can reinitialize the virtualized
> + * device and get into XenbusStateInitialising state again signaling the
> + * backend that a new connection can be made.
> + *
> + * There are multiple conditions possible under which frontend will go from
> + * XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS
> + * specific. For example:
> + * 1. The underlying OS framework may provide callbacks to signal that the 
> last
> + *    client of the virtualized device has gone and the device can be removed
> + * 2. Frontend can schedule a deferred work (timer/tasklet/workqueue)
> + *    to periodically check if this is the right time to re-try removal of
> + *    the virtualized device.
> + * 3. By any other means.
> + *
> + 
> ******************************************************************************
> + *                             PCM FORMATS
> + 
> ******************************************************************************
> + *
> + * XENSND_PCM_FORMAT_<format>[_<endian>]
> + *
> + * format: <S/U/F><bits> or <name>
> + *     S - signed, U - unsigned, F - float
> + *     bits - 8, 16, 24, 32
> + *     name - MU_LAW, GSM, etc.
> + *
> + * endian: <LE/BE>, may be absent
> + *     LE - Little endian, BE - Big endian
> + */
> +#define XENSND_PCM_FORMAT_S8            0
> +#define XENSND_PCM_FORMAT_U8            1
> +#define XENSND_PCM_FORMAT_S16_LE        2
> +#define XENSND_PCM_FORMAT_S16_BE        3
> +#define XENSND_PCM_FORMAT_U16_LE        4
> +#define XENSND_PCM_FORMAT_U16_BE        5
> +#define XENSND_PCM_FORMAT_S24_LE        6
> +#define XENSND_PCM_FORMAT_S24_BE        7
> +#define XENSND_PCM_FORMAT_U24_LE        8
> +#define XENSND_PCM_FORMAT_U24_BE        9
> +#define XENSND_PCM_FORMAT_S32_LE        10
> +#define XENSND_PCM_FORMAT_S32_BE        11
> +#define XENSND_PCM_FORMAT_U32_LE        12
> +#define XENSND_PCM_FORMAT_U32_BE        13
> +#define XENSND_PCM_FORMAT_F32_LE        14 /* 4-byte float, IEEE-754 32-bit, 
> */
> +#define XENSND_PCM_FORMAT_F32_BE        15 /* range -1.0 to 1.0              
> */
> +#define XENSND_PCM_FORMAT_F64_LE        16 /* 8-byte float, IEEE-754 64-bit, 
> */
> +#define XENSND_PCM_FORMAT_F64_BE        17 /* range -1.0 to 1.0              
> */
> +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE 18
> +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE 19
> +#define XENSND_PCM_FORMAT_MU_LAW        20
> +#define XENSND_PCM_FORMAT_A_LAW         21
> +#define XENSND_PCM_FORMAT_IMA_ADPCM     22
> +#define XENSND_PCM_FORMAT_MPEG          23
> +#define XENSND_PCM_FORMAT_GSM           24
> +
> +/*
> + 
> ******************************************************************************
> + *                             REQUEST CODES
> + 
> ******************************************************************************
> + */
> +#define XENSND_OP_OPEN                  0
> +#define XENSND_OP_CLOSE                 1
> +#define XENSND_OP_READ                  2
> +#define XENSND_OP_WRITE                 3
> +#define XENSND_OP_SET_VOLUME            4
> +#define XENSND_OP_GET_VOLUME            5
> +#define XENSND_OP_MUTE                  6
> +#define XENSND_OP_UNMUTE                7
> +
> +/*
> + 
> ******************************************************************************
> + *               XENSTORE FIELD AND PATH NAME STRINGS, HELPERS
> + 
> ******************************************************************************
> + */
> +#define XENSND_DRIVER_NAME              "vsnd"
> +
> +#define XENSND_LIST_SEPARATOR           ","
> +/* Field names */
> +#define XENSND_FIELD_BE_VERSIONS        "versions"
> +#define XENSND_FIELD_FE_VERSION         "version"
> +#define XENSND_FIELD_VCARD_SHORT_NAME   "short-name"
> +#define XENSND_FIELD_VCARD_LONG_NAME    "long-name"
> +#define XENSND_FIELD_RING_REF           "ring-ref"
> +#define XENSND_FIELD_EVT_CHNL           "event-channel"
> +#define XENSND_FIELD_DEVICE_NAME        "name"
> +#define XENSND_FIELD_TYPE               "type"
> +#define XENSND_FIELD_STREAM_UNIQUE_ID   "unique-id"
> +#define XENSND_FIELD_CHANNELS_MIN       "channels-min"
> +#define XENSND_FIELD_CHANNELS_MAX       "channels-max"
> +#define XENSND_FIELD_SAMPLE_RATES       "sample-rates"
> +#define XENSND_FIELD_SAMPLE_FORMATS     "sample-formats"
> +#define XENSND_FIELD_BUFFER_SIZE        "buffer-size"
> +
> +/* Stream type field values. */
> +#define XENSND_STREAM_TYPE_PLAYBACK     "p"
> +#define XENSND_STREAM_TYPE_CAPTURE      "c"
> +/* Sample rate max string length */
> +#define XENSND_SAMPLE_RATE_MAX_LEN      11
> +/* Sample format field values */
> +#define XENSND_SAMPLE_FORMAT_MAX_LEN    24
> +
> +#define XENSND_PCM_FORMAT_S8_STR        "s8"
> +#define XENSND_PCM_FORMAT_U8_STR        "u8"
> +#define XENSND_PCM_FORMAT_S16_LE_STR    "s16_le"
> +#define XENSND_PCM_FORMAT_S16_BE_STR    "s16_be"
> +#define XENSND_PCM_FORMAT_U16_LE_STR    "u16_le"
> +#define XENSND_PCM_FORMAT_U16_BE_STR    "u16_be"
> +#define XENSND_PCM_FORMAT_S24_LE_STR    "s24_le"
> +#define XENSND_PCM_FORMAT_S24_BE_STR    "s24_be"
> +#define XENSND_PCM_FORMAT_U24_LE_STR    "u24_le"
> +#define XENSND_PCM_FORMAT_U24_BE_STR    "u24_be"
> +#define XENSND_PCM_FORMAT_S32_LE_STR    "s32_le"
> +#define XENSND_PCM_FORMAT_S32_BE_STR    "s32_be"
> +#define XENSND_PCM_FORMAT_U32_LE_STR    "u32_le"
> +#define XENSND_PCM_FORMAT_U32_BE_STR    "u32_be"
> +#define XENSND_PCM_FORMAT_F32_LE_STR    "float_le"
> +#define XENSND_PCM_FORMAT_F32_BE_STR    "float_be"
> +#define XENSND_PCM_FORMAT_F64_LE_STR    "float64_le"
> +#define XENSND_PCM_FORMAT_F64_BE_STR    "float64_be"
> +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE_STR "iec958_subframe_le"
> +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE_STR "iec958_subframe_be"
> +#define XENSND_PCM_FORMAT_MU_LAW_STR    "mu_law"
> +#define XENSND_PCM_FORMAT_A_LAW_STR     "a_law"
> +#define XENSND_PCM_FORMAT_IMA_ADPCM_STR "ima_adpcm"
> +#define XENSND_PCM_FORMAT_MPEG_STR      "mpeg"
> +#define XENSND_PCM_FORMAT_GSM_STR       "gsm"
> +
> +
> +/*
> + 
> ******************************************************************************
> + *                          STATUS RETURN CODES
> + 
> ******************************************************************************
> + *
> + * Status return code is zero on success and -XEN_EXX on failure.
> + *
> + 
> ******************************************************************************
> + *                              Assumptions
> + 
> ******************************************************************************
> + * o usage of grant reference 0 as invalid grant reference:
> + *   grant reference 0 is valid, but never exposed to a PV driver,
> + *   because of the fact it is already in use/reserved by the PV console.
> + * o all references in this document to page sizes must be treated
> + *   as pages of size XEN_PAGE_SIZE unless otherwise noted.
> + *
> + 
> ******************************************************************************
> + *       Description of the protocol between frontend and backend driver
> + 
> ******************************************************************************
> + *
> + * The two halves of a Para-virtual sound driver communicate with
> + * each other using shared pages and event channels.
> + * Shared page contains a ring with request/response packets.
> + *
> + * Packets, used for input/output operations, e.g. read/write, set/get 
> volume,
> + * etc., provide offset/length fields in order to allow asynchronous protocol
> + * operation with buffer space sharing: part of the buffer allocated at
> + * XENSND_OP_OPEN can be used for audio samples and part, for example,
> + * for volume control.
> + *
> + * All reserved fields in the structures below must be 0.
> + *
> + *---------------------------------- Requests 
> ---------------------------------
> + *
> + * All request packets have the same length (32 octets)
> + * All request packets have common header:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |    operation   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + *   id - uint16_t, private guest value, echoed in response
> + *   operation - uint8_t, operation code, XENSND_OP_???
> + *
> + * For all packets which use offset and length:
> + *   offset - uint32_t, read or write data offset within the shared buffer,
> + *     passed with XENSND_OP_OPEN request, octets,
> + *     [0; XENSND_OP_OPEN.buffer_sz - 1].
> + *   length - uint32_t, read or write data length, octets
> + *
> + * Request open - open a PCM stream for playback or capture:
> + *
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                | XENSND_OP_OPEN |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                             pcm_rate                              | 12
> + * +----------------+----------------+----------------+----------------+
> + * |  pcm_format    |  pcm_channels  |             reserved            | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             buffer_sz                             | 20
> + * +----------------+----------------+----------------+----------------+
> + * |                           gref_directory                          | 24
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 28
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * pcm_rate - uint32_t, stream data rate, Hz
> + * pcm_format - uint8_t, XENSND_PCM_FORMAT_XXX value
> + * pcm_channels - uint8_t, number of channels of this stream,
> + *   [channels-min; channels-max]
> + * buffer_sz - uint32_t, buffer size to be allocated, octets
> + * gref_directory - grant_ref_t, a reference to the first shared page
> + *   describing shared buffer references. At least one page exists. If shared
> + *   buffer size  (buffer_sz) exceeds what can be addressed by this single 
> page,
> + *   then reference to the next page must be supplied (see gref_dir_next_page
> + *   below)
> + */
> +
> +struct xensnd_open_req {
> +    uint32_t pcm_rate;
> +    uint8_t pcm_format;
> +    uint8_t pcm_channels;
> +    uint16_t reserved;
> +    uint32_t buffer_sz;
> +    grant_ref_t gref_directory;
> +};
> +
> +/*
> + * Shared page for XENSND_OP_OPEN buffer descriptor (gref_directory in the
> + *   request) employs a list of pages, describing all pages of the shared 
> data
> + *   buffer:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |                        gref_dir_next_page                         | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                              gref[0]                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                              gref[i]                              | 
> i*4+8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             gref[N - 1]                           | 
> N*4+8
> + * +----------------+----------------+----------------+----------------+
> + *
> + * gref_dir_next_page - grant_ref_t, reference to the next page describing
> + *   page directory. Must be 0 if there are no more pages in the list.
> + * gref[i] - grant_ref_t, reference to a shared page of the buffer
> + *   allocated at XENSND_OP_OPEN
> + *
> + * Number of grant_ref_t entries in the whole page directory is not
> + * passed, but instead can be calculated as:
> + *   num_grefs_total = (XENSND_OP_OPEN.buffer_sz + XEN_PAGE_SIZE - 1) /
> + *       XEN_PAGE_SIZE
> + */
> +
> +struct xensnd_page_directory {
> +    grant_ref_t gref_dir_next_page;
> +    grant_ref_t gref[1]; /* Variable length */
> +};
> +
> +/*
> + *  Request close - close an opened pcm stream:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                | XENSND_OP_CLOSE|    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * Request read/write - used for read (for capture) or write (for playback):
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |   operation    |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                              offset                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                              length                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 20
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * operation - XENSND_OP_READ for read or XENSND_OP_WRITE for write
> + */
> +
> +struct xensnd_rw_req {
> +    uint32_t offset;
> +    uint32_t length;
> +};
> +
> +/*
> + * Request set/get volume - set/get channels' volume of the stream given:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |   operation    |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                              offset                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                              length                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * operation - XENSND_OP_SET_VOLUME for volume set
> + *   or XENSND_OP_GET_VOLUME for volume get
> + * Buffer passed with XENSND_OP_OPEN is used to exchange volume
> + * values:
> + *
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |                             channel[0]                            | 4
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             channel[i]                            | i*4
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                           channel[N - 1]                          | 
> (N-1)*4
> + * +----------------+----------------+----------------+----------------+
> + *
> + * N = XENSND_OP_OPEN.pcm_channels
> + * i - uint8_t, index of a channel
> + * channel[i] - sint32_t, volume of i-th channel
> + * Volume is expressed as a signed value in steps of 0.001 dB,
> + * while 0 being 0 dB.
> + *
> + * Request mute/unmute - mute/unmute stream:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |   operation    |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                              offset                               | 12
> + * +----------------+----------------+----------------+----------------+
> + * |                              length                               | 16
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * operation - XENSND_OP_MUTE for mute or XENSND_OP_UNMUTE for unmute
> + * Buffer passed with XENSND_OP_OPEN is used to exchange mute/unmute
> + * values:
> + *
> + *                                   0                                 octet
> + * +----------------+----------------+----------------+----------------+
> + * |                             channel[0]                            | 4
> + * +----------------+----------------+----------------+----------------+
> + * +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             channel[i]                            | i*4
> + * +----------------+----------------+----------------+----------------+
> + * +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                           channel[N - 1]                          | 
> (N-1)*4
> + * +----------------+----------------+----------------+----------------+
> + *
> + * N = XENSND_OP_OPEN.pcm_channels
> + * i - uint8_t, index of a channel
> + * channel[i] - uint8_t, non-zero if i-th channel needs to be muted/unmuted
> + *
> + *------------------------------------ N.B. 
> -----------------------------------
> + *
> + * The 'struct xensnd_rw_req' is also used for XENSND_OP_SET_VOLUME,
> + * XENSND_OP_GET_VOLUME, XENSND_OP_MUTE, XENSND_OP_UNMUTE.
> + */
> +
> +/*
> + *---------------------------------- Responses 
> --------------------------------
> + *
> + * All response packets have the same length (32 octets)
> + *
> + * Response for all requests:
> + *         0                1                 2               3        octet
> + * +----------------+----------------+----------------+----------------+
> + * |               id                |    operation   |    reserved    | 4
> + * +----------------+----------------+----------------+----------------+
> + * |                              status                               | 8
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 12
> + * +----------------+----------------+----------------+----------------+
> + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
> + * +----------------+----------------+----------------+----------------+
> + * |                             reserved                              | 32
> + * +----------------+----------------+----------------+----------------+
> + *
> + * id - uint16_t, copied from the request
> + * operation - uint8_t, XENSND_OP_* - copied from request
> + * status - int32_t, response status, zero on success and -XEN_EXX on failure
> + */
> +
> +struct xensnd_req {
> +    uint16_t id;
> +    uint8_t operation;
> +    uint8_t reserved[5];
> +    union {
> +        struct xensnd_open_req open;
> +        struct xensnd_rw_req rw;
> +        uint8_t reserved[24];
> +    } op;
> +};
> +
> +struct xensnd_resp {
> +    uint16_t id;
> +    uint8_t operation;
> +    uint8_t reserved;
> +    int32_t status;
> +    uint8_t reserved1[24];
> +};
> +
> +DEFINE_RING_TYPES(xen_sndif, struct xensnd_req, struct xensnd_resp);
> +
> +#endif /* __XEN_PUBLIC_IO_SNDIF_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/io/usbif.h b/include/xen/io/usbif.h
> index 0af2a38..c6a5863 100644
> --- a/include/xen/io/usbif.h
> +++ b/include/xen/io/usbif.h
> @@ -31,6 +31,76 @@
>  #include "ring.h"
>  #include "../grant_table.h"
>  
> +/*
> + * Feature and Parameter Negotiation
> + * =================================
> + * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
> + * communicate capabilities and to negotiate operating parameters. This
> + * section enumerates these nodes which reside in the respective front and
> + * backend portions of the XenStore, following the XenBus convention.
> + *
> + * Any specified default value is in effect if the corresponding XenBus node
> + * is not present in the XenStore.
> + *
> + * XenStore nodes in sections marked "PRIVATE" are solely for use by the
> + * driver side whose XenBus tree contains them.
> + *
> + 
> *****************************************************************************
> + *                            Backend XenBus Nodes
> + 
> *****************************************************************************
> + *
> + *------------------ Backend Device Identification (PRIVATE) 
> ------------------
> + *
> + * num-ports
> + *      Values:         unsigned [1...31]
> + *
> + *      Number of ports for this (virtual) USB host connector.
> + *
> + * usb-ver
> + *      Values:         unsigned [1...2]
> + *
> + *      USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
> + *
> + * port/[1...31]
> + *      Values:         string
> + *
> + *      Physical USB device connected to the given port, e.g. "3-1.5".
> + *
> + 
> *****************************************************************************
> + *                            Frontend XenBus Nodes
> + 
> *****************************************************************************
> + *
> + *----------------------- Request Transport Parameters 
> -----------------------
> + *
> + * event-channel
> + *      Values:         unsigned
> + *
> + *      The identifier of the Xen event channel used to signal activity
> + *      in the ring buffer.
> + *
> + * urb-ring-ref
> + *      Values:         unsigned
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      the sole page in a single page sized ring buffer. This is the ring
> + *      buffer for urb requests.
> + *
> + * conn-ring-ref
> + *      Values:         unsigned
> + *
> + *      The Xen grant reference granting permission for the backend to map
> + *      the sole page in a single page sized ring buffer. This is the ring
> + *      buffer for connection/disconnection requests.
> + *
> + * protocol
> + *      Values:         string (XEN_IO_PROTO_ABI_*)
> + *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
> + *
> + *      The machine ABI rules governing the format of all ring request and
> + *      response structures.
> + *
> + */
> +
>  enum usb_spec_version {
>       USB_VER_UNKNOWN = 0,
>       USB_VER_USB11,
> @@ -41,37 +111,66 @@ enum usb_spec_version {
>  /*
>   *  USB pipe in usbif_request
>   *
> - *  bits 0-5 are specific bits for virtual USB driver.
> - *  bits 7-31 are standard urb pipe.
> - *
> - *  - port number(NEW):      bits 0-4
> - *                           (USB_MAXCHILDREN is 31)
> + *  - port number:   bits 0-4
> + *                           (USB_MAXCHILDREN is 31)
>   *
> - *  - operation flag(NEW):   bit 5
> - *                           (0 = submit urb,
> - *                            1 = unlink urb)
> + *  - operation flag:        bit 5
> + *                           (0 = submit urb,
> + *                            1 = unlink urb)
>   *
>   *  - direction:             bit 7
> - *                           (0 = Host-to-Device [Out]
> - *                           1 = Device-to-Host [In])
> + *                           (0 = Host-to-Device [Out]
> + *                            1 = Device-to-Host [In])
>   *
>   *  - device address:        bits 8-14
>   *
>   *  - endpoint:              bits 15-18
>   *
> - *  - pipe type:             bits 30-31
> - *                           (00 = isochronous, 01 = interrupt,
> - *                           10 = control, 11 = bulk)
> + *  - pipe type:     bits 30-31
> + *                           (00 = isochronous, 01 = interrupt,
> + *                            10 = control, 11 = bulk)
>   */
> -#define usbif_pipeportnum(pipe) ((pipe) & 0x1f)
> -#define usbif_setportnum_pipe(pipe, portnum) \
> -     ((pipe)|(portnum))
>  
> -#define usbif_pipeunlink(pipe) ((pipe) & 0x20)
> -#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe))
> -#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20))
> +#define USBIF_PIPE_PORT_MASK 0x0000001f
> +#define USBIF_PIPE_UNLINK    0x00000020
> +#define USBIF_PIPE_DIR               0x00000080
> +#define USBIF_PIPE_DEV_MASK  0x0000007f
> +#define USBIF_PIPE_DEV_SHIFT 8
> +#define USBIF_PIPE_EP_MASK   0x0000000f
> +#define USBIF_PIPE_EP_SHIFT  15
> +#define USBIF_PIPE_TYPE_MASK 0x00000003
> +#define USBIF_PIPE_TYPE_SHIFT        30
> +#define USBIF_PIPE_TYPE_ISOC 0
> +#define USBIF_PIPE_TYPE_INT  1
> +#define USBIF_PIPE_TYPE_CTRL 2
> +#define USBIF_PIPE_TYPE_BULK 3
> +
> +#define usbif_pipeportnum(pipe)                      ((pipe) & 
> USBIF_PIPE_PORT_MASK)
> +#define usbif_setportnum_pipe(pipe, portnum) ((pipe) | (portnum))
> +
> +#define usbif_pipeunlink(pipe)                       ((pipe) & 
> USBIF_PIPE_UNLINK)
> +#define usbif_pipesubmit(pipe)                       
> (!usbif_pipeunlink(pipe))
> +#define usbif_setunlink_pipe(pipe)           ((pipe) | USBIF_PIPE_UNLINK)
> +
> +#define usbif_pipein(pipe)                   ((pipe) & USBIF_PIPE_DIR)
> +#define usbif_pipeout(pipe)                  (!usbif_pipein(pipe))
> +
> +#define usbif_pipedevice(pipe)                       \
> +             (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
> +
> +#define usbif_pipeendpoint(pipe)             \
> +             (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
> +
> +#define usbif_pipetype(pipe)                 \
> +             (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
> +#define usbif_pipeisoc(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
> +#define usbif_pipeint(pipe)  (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
> +#define usbif_pipectrl(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
> +#define usbif_pipebulk(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
>  
>  #define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
> +#define USBIF_MAX_PORTNR     31
> +#define USBIF_RING_SIZE      4096
>  
>  /*
>   * RING for transferring urbs.
> @@ -89,6 +188,7 @@ struct usbif_urb_request {
>       /* basic urb parameter */
>       uint32_t pipe;
>       uint16_t transfer_flags;
> +#define USBIF_SHORT_NOT_OK   0x0001
>       uint16_t buffer_length;
>       union {
>               uint8_t ctrl[8]; /* setup_packet (Ctrl) */
> @@ -127,7 +227,7 @@ struct usbif_urb_response {
>  typedef struct usbif_urb_response usbif_urb_response_t;
>  
>  DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct 
> usbif_urb_response);
> -#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE)
> +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE)
>  
>  /*
>   * RING for notifying connect/disconnect events to frontend
> @@ -141,10 +241,14 @@ struct usbif_conn_response {
>       uint16_t id; /* request id */
>       uint8_t portnum; /* port number */
>       uint8_t speed; /* usb_device_speed */
> +#define USBIF_SPEED_NONE     0
> +#define USBIF_SPEED_LOW              1
> +#define USBIF_SPEED_FULL     2
> +#define USBIF_SPEED_HIGH     3
>  };
>  typedef struct usbif_conn_response usbif_conn_response_t;
>  
>  DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct 
> usbif_conn_response);
> -#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE)
> +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE)
>  
>  #endif /* __XEN_PUBLIC_IO_USBIF_H__ */
> diff --git a/include/xen/io/vscsiif.h b/include/xen/io/vscsiif.h
> index 7a1db05..d0bd3b5 100644
> --- a/include/xen/io/vscsiif.h
> +++ b/include/xen/io/vscsiif.h
> @@ -60,7 +60,7 @@
>   *
>   *      A string specifying the backend device: either a 4-tuple "h:c:t:l"
>   *      (host, controller, target, lun, all integers), or a WWN (e.g.
> - *      "naa.60014054ac780582").
> + *      "naa.60014054ac780582:0").
>   *
>   * v-dev
>   *      Values:         string
> @@ -104,6 +104,75 @@
>   *      response structures.
>   */
>  
> +/*
> + * Xenstore format in practice
> + * ===========================
> + * 
> + * The backend driver uses a single_host:many_devices notation to manage domU
> + * devices. Everything is stored in 
> /local/domain/<backend_domid>/backend/vscsi/.
> + * The xenstore layout looks like this (dom0 is assumed to be the 
> backend_domid):
> + * 
> + *     <domid>/<vhost>/feature-host = "0"
> + *     <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0"
> + *     <domid>/<vhost>/frontend-id = "<domid>"
> + *     <domid>/<vhost>/online = "1"
> + *     <domid>/<vhost>/state = "4"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1" or "naa.wwn:lun"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/state = "4"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/state = "4"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0"
> + * 
> + * The frontend driver maintains its state in
> + * /local/domain/<domid>/device/vscsi/.
> + * 
> + *     <vhost>/backend = "/local/domain/0/backend/vscsi/<domid>/<vhost>"
> + *     <vhost>/backend-id = "0"
> + *     <vhost>/event-channel = "20"
> + *     <vhost>/ring-ref = "43"
> + *     <vhost>/state = "4"
> + *     <vhost>/vscsi-devs/dev-0/state = "4"
> + *     <vhost>/vscsi-devs/dev-1/state = "4"
> + * 
> + * In addition to the entries for backend and frontend these flags are stored
> + * for the toolstack:
> + * 
> + *     <domid>/<vhost>/vscsi-devs/dev-1/p-devname = "/dev/$device"
> + *     <domid>/<vhost>/libxl_ctrl_index = "0"
> + * 
> + * 
> + * Backend/frontend protocol
> + * =========================
> + * 
> + * To create a vhost along with a device:
> + *     <domid>/<vhost>/feature-host = "0"
> + *     <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0"
> + *     <domid>/<vhost>/frontend-id = "<domid>"
> + *     <domid>/<vhost>/online = "1"
> + *     <domid>/<vhost>/state = "1"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/state = "1"
> + *     <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0"
> + * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-0/state 
> become 4
> + * 
> + * To add another device to a vhost:
> + *     <domid>/<vhost>/state = "7"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/state = "1"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0"
> + * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-1/state 
> become 4
> + * 
> + * To remove a device from a vhost:
> + *     <domid>/<vhost>/state = "7"
> + *     <domid>/<vhost>/vscsi-devs/dev-1/state = "5"
> + * Wait for <domid>/<vhost>/state to become 4
> + * Wait for <domid>/<vhost>/vscsi-devs/dev-1/state become 6
> + * Remove <domid>/<vhost>/vscsi-devs/dev-1/{state,p-dev,v-dev,p-devname}
> + * Remove <domid>/<vhost>/vscsi-devs/dev-1/
> + *
> + */
> +
>  /* Requests from the frontend to the backend */
>  
>  /*
> @@ -179,6 +248,7 @@
>   */
>  #define VSCSIIF_MAX_COMMAND_SIZE         16
>  #define VSCSIIF_SENSE_BUFFERSIZE         96
> +#define VSCSIIF_PAGE_SIZE              4096
>  
>  struct scsiif_request_segment {
>      grant_ref_t gref;
> @@ -187,7 +257,7 @@ struct scsiif_request_segment {
>  };
>  typedef struct scsiif_request_segment vscsiif_segment_t;
>  
> -#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct 
> scsiif_request_segment))
> +#define VSCSIIF_SG_PER_PAGE (VSCSIIF_PAGE_SIZE / sizeof(struct 
> scsiif_request_segment))
>  
>  /* Size of one request is 252 bytes */
>  struct vscsiif_request {
> diff --git a/include/xen/io/xs_wire.h b/include/xen/io/xs_wire.h
> index 0a0cdbc..4dd6632 100644
> --- a/include/xen/io/xs_wire.h
> +++ b/include/xen/io/xs_wire.h
> @@ -28,7 +28,8 @@
>  
>  enum xsd_sockmsg_type
>  {
> -    XS_DEBUG,
> +    XS_CONTROL,
> +#define XS_DEBUG XS_CONTROL
>      XS_DIRECTORY,
>      XS_READ,
>      XS_GET_PERMS,
> @@ -48,8 +49,11 @@ enum xsd_sockmsg_type
>      XS_IS_DOMAIN_INTRODUCED,
>      XS_RESUME,
>      XS_SET_TARGET,
> -    XS_RESTRICT,
> -    XS_RESET_WATCHES,
> +    /* XS_RESTRICT has been removed */
> +    XS_RESET_WATCHES = XS_SET_TARGET + 2,
> +    XS_DIRECTORY_PART,
> +
> +    XS_TYPE_COUNT,      /* Number of valid types. */
>  
>      XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
>  };
> diff --git a/include/xen/kexec.h b/include/xen/kexec.h
> index a6a0a88..74ea981 100644
> --- a/include/xen/kexec.h
> +++ b/include/xen/kexec.h
> @@ -227,6 +227,20 @@ typedef struct xen_kexec_unload {
>  } xen_kexec_unload_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_kexec_unload_t);
>  
> +/*
> + * Figure out whether we have an image loaded. A return value of
> + * zero indicates no image loaded. A return value of one
> + * indicates an image is loaded. A negative return value
> + * indicates an error.
> + *
> + * Type must be one of KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH.
> + */
> +#define KEXEC_CMD_kexec_status 6
> +typedef struct xen_kexec_status {
> +    uint8_t type;
> +} xen_kexec_status_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_kexec_status_t);
> +
>  #else /* __XEN_INTERFACE_VERSION__ < 0x00040400 */
>  
>  #define KEXEC_CMD_kexec_load KEXEC_CMD_kexec_load_v1
> diff --git a/include/xen/mem_event.h b/include/xen/mem_event.h
> deleted file mode 100644
> index 599f9e8..0000000
> --- a/include/xen/mem_event.h
> +++ /dev/null
> @@ -1,134 +0,0 @@
> -/******************************************************************************
> - * mem_event.h
> - *
> - * Memory event common structures.
> - *
> - * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> - * of this software and associated documentation files (the "Software"), to
> - * deal in the Software without restriction, including without limitation the
> - * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> - * sell copies of the Software, and to permit persons to whom the Software is
> - * furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice shall be included in
> - * all copies or substantial portions of the Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - */
> -
> -#ifndef _XEN_PUBLIC_MEM_EVENT_H
> -#define _XEN_PUBLIC_MEM_EVENT_H
> -
> -#include "xen.h"
> -#include "io/ring.h"
> -
> -/* Memory event flags */
> -#define MEM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
> -#define MEM_EVENT_FLAG_DROP_PAGE       (1 << 1)
> -#define MEM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
> -#define MEM_EVENT_FLAG_FOREIGN         (1 << 3)
> -#define MEM_EVENT_FLAG_DUMMY           (1 << 4)
> -/*
> - * Emulate the fault-causing instruction (if set in the event response 
> flags).
> - * This will allow the guest to continue execution without lifting the page
> - * access restrictions.
> - */
> -#define MEM_EVENT_FLAG_EMULATE         (1 << 5)
> -/*
> - * Same as MEM_EVENT_FLAG_EMULATE, but with write operations or operations
> - * potentially having side effects (like memory mapped or port I/O) disabled.
> - */
> -#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
> -
> -/* Reasons for the memory event request */
> -#define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */
> -#define MEM_EVENT_REASON_VIOLATION   1    /* access violation, GFN is 
> address */
> -#define MEM_EVENT_REASON_CR0         2    /* CR0 was hit: gfn is new CR0 
> value, gla is previous */
> -#define MEM_EVENT_REASON_CR3         3    /* CR3 was hit: gfn is new CR3 
> value, gla is previous */
> -#define MEM_EVENT_REASON_CR4         4    /* CR4 was hit: gfn is new CR4 
> value, gla is previous */
> -#define MEM_EVENT_REASON_INT3        5    /* int3 was hit: gla/gfn are RIP */
> -#define MEM_EVENT_REASON_SINGLESTEP  6    /* single step was invoked: 
> gla/gfn are RIP */
> -#define MEM_EVENT_REASON_MSR         7    /* MSR was hit: gfn is MSR value, 
> gla is MSR address;
> -                                             does NOT honour 
> HVMPME_onchangeonly */
> -
> -/* Using a custom struct (not hvm_hw_cpu) so as to not fill
> - * the mem_event ring buffer too quickly. */
> -struct mem_event_regs_x86 {
> -    uint64_t rax;
> -    uint64_t rcx;
> -    uint64_t rdx;
> -    uint64_t rbx;
> -    uint64_t rsp;
> -    uint64_t rbp;
> -    uint64_t rsi;
> -    uint64_t rdi;
> -    uint64_t r8;
> -    uint64_t r9;
> -    uint64_t r10;
> -    uint64_t r11;
> -    uint64_t r12;
> -    uint64_t r13;
> -    uint64_t r14;
> -    uint64_t r15;
> -    uint64_t rflags;
> -    uint64_t dr7;
> -    uint64_t rip;
> -    uint64_t cr0;
> -    uint64_t cr2;
> -    uint64_t cr3;
> -    uint64_t cr4;
> -    uint64_t sysenter_cs;
> -    uint64_t sysenter_esp;
> -    uint64_t sysenter_eip;
> -    uint64_t msr_efer;
> -    uint64_t msr_star;
> -    uint64_t msr_lstar;
> -    uint64_t fs_base;
> -    uint64_t gs_base;
> -    uint32_t cs_arbytes;
> -    uint32_t _pad;
> -};
> -
> -typedef struct mem_event_st {
> -    uint32_t flags;
> -    uint32_t vcpu_id;
> -
> -    uint64_t gfn;
> -    uint64_t offset;
> -    uint64_t gla; /* if gla_valid */
> -
> -    uint32_t p2mt;
> -
> -    uint16_t access_r:1;
> -    uint16_t access_w:1;
> -    uint16_t access_x:1;
> -    uint16_t gla_valid:1;
> -    uint16_t fault_with_gla:1;
> -    uint16_t fault_in_gpt:1;
> -    uint16_t available:10;
> -
> -    uint16_t reason;
> -    struct mem_event_regs_x86 x86_regs;
> -} mem_event_request_t, mem_event_response_t;
> -
> -DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
> -
> -#endif
> -
> -/*
> - * Local variables:
> - * mode: C
> - * c-file-style: "BSD"
> - * c-basic-offset: 4
> - * tab-width: 4
> - * indent-tabs-mode: nil
> - * End:
> - */
> diff --git a/include/xen/memory.h b/include/xen/memory.h
> index 595f953..29386df 100644
> --- a/include/xen/memory.h
> +++ b/include/xen/memory.h
> @@ -28,6 +28,7 @@
>  #define __XEN_PUBLIC_MEMORY_H__
>  
>  #include "xen.h"
> +#include "physdev.h"
>  
>  /*
>   * Increase or decrease the specified domain's memory reservation. Returns 
> the
> @@ -55,6 +56,8 @@
>  /* Flag to request allocation only from the node specified */
>  #define XENMEMF_exact_node_request  (1<<17)
>  #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
> +/* Flag to indicate the node specified is virtual node */
> +#define XENMEMF_vnode  (1<<18)
>  #endif
>  
>  struct xen_memory_reservation {
> @@ -99,6 +102,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
>   * Returns zero on complete success, otherwise a negative error code.
>   * On complete success then always @nr_exchanged == @in.nr_extents.
>   * On partial success @nr_exchanged indicates how much work was done.
> + *
> + * Note that only PV guests can use this operation.
>   */
>  #define XENMEM_exchange             11
>  struct xen_memory_exchange {
> @@ -217,6 +222,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
>  #define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap 
> only. */
>  #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
>                                      * XENMEM_add_to_physmap_batch only. */
> +#define XENMAPSPACE_dev_mmio     5 /* device mmio region
> +                                      ARM only; the region is mapped in
> +                                      Stage-2 using the Normal Memory
> +                                      Inner/Outer Write-Back Cacheable
> +                                      memory attribute. */
>  /* ` } */
>  
>  /*
> @@ -255,7 +265,15 @@ struct xen_add_to_physmap_batch {
>  
>      /* Number of pages to go through */
>      uint16_t size;
> -    domid_t foreign_domid; /* IFF gmfn_foreign */
> +
> +#if __XEN_INTERFACE_VERSION__ < 0x00040700
> +    domid_t foreign_domid; /* IFF gmfn_foreign. Should be 0 for other 
> spaces. */
> +#else
> +    union xen_add_to_physmap_batch_extra {
> +        domid_t foreign_domid; /* gmfn_foreign */
> +        uint16_t res0;  /* All the other spaces. Should be 0 */
> +    } u;
> +#endif
>  
>      /* Indexes into space being mapped. */
>      XEN_GUEST_HANDLE(xen_ulong_t) idxs;
> @@ -323,6 +341,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
>  /*
>   * Returns the real physical memory map. Passes the same structure as
>   * XENMEM_memory_map.
> + * Specifying buffer as NULL will return the number of entries required
> + * to store the complete memory map.
>   * arg == addr of xen_memory_map_t.
>   */
>  #define XENMEM_machine_memory_map   10
> @@ -372,23 +392,29 @@ typedef struct xen_pod_target xen_pod_target_t;
>  #define XENMEM_paging_op_evict              1
>  #define XENMEM_paging_op_prep               2
>  
> -struct xen_mem_event_op {
> -    uint8_t     op;         /* XENMEM_*_op_* */
> +struct xen_mem_paging_op {
> +    uint8_t     op;         /* XENMEM_paging_op_* */
>      domid_t     domain;
> -    
>  
>      /* PAGING_PREP IN: buffer to immediately fill page in */
>      uint64_aligned_t    buffer;
>      /* Other OPs */
>      uint64_aligned_t    gfn;           /* IN:  gfn of page being operated on 
> */
>  };
> -typedef struct xen_mem_event_op xen_mem_event_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
> +typedef struct xen_mem_paging_op xen_mem_paging_op_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_op_t);
>  
>  #define XENMEM_access_op                    21
> -#define XENMEM_access_op_resume             0
> -#define XENMEM_access_op_set_access         1
> -#define XENMEM_access_op_get_access         2
> +#define XENMEM_access_op_set_access         0
> +#define XENMEM_access_op_get_access         1
> +/*
> + * XENMEM_access_op_enable_emulate and XENMEM_access_op_disable_emulate are
> + * currently unused, but since they have been in use please do not reuse 
> them.
> + *
> + * #define XENMEM_access_op_enable_emulate     2
> + * #define XENMEM_access_op_disable_emulate    3
> + */
> +#define XENMEM_access_op_set_access_multi   4
>  
>  typedef enum {
>      XENMEM_access_n,
> @@ -421,7 +447,8 @@ struct xen_mem_access_op {
>      uint8_t access;
>      domid_t domid;
>      /*
> -     * Number of pages for set op
> +     * Number of pages for set op (or size of pfn_list for
> +     * XENMEM_access_op_set_access_multi)
>       * Ignored on setting default access and other ops
>       */
>      uint32_t nr;
> @@ -431,6 +458,16 @@ struct xen_mem_access_op {
>       * ~0ull is used to set and get the default access for pages
>       */
>      uint64_aligned_t pfn;
> +    /*
> +     * List of pfns to set access for
> +     * Used only with XENMEM_access_op_set_access_multi
> +     */
> +    XEN_GUEST_HANDLE(const_uint64) pfn_list;
> +    /*
> +     * Corresponding list of access settings for pfn_list
> +     * Used only with XENMEM_access_op_set_access_multi
> +     */
> +    XEN_GUEST_HANDLE(const_uint8) access_list;
>  };
>  typedef struct xen_mem_access_op xen_mem_access_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
> @@ -439,12 +476,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
>  #define XENMEM_sharing_op_nominate_gfn      0
>  #define XENMEM_sharing_op_nominate_gref     1
>  #define XENMEM_sharing_op_share             2
> -#define XENMEM_sharing_op_resume            3
> -#define XENMEM_sharing_op_debug_gfn         4
> -#define XENMEM_sharing_op_debug_mfn         5
> -#define XENMEM_sharing_op_debug_gref        6
> -#define XENMEM_sharing_op_add_physmap       7
> -#define XENMEM_sharing_op_audit             8
> +#define XENMEM_sharing_op_debug_gfn         3
> +#define XENMEM_sharing_op_debug_mfn         4
> +#define XENMEM_sharing_op_debug_gref        5
> +#define XENMEM_sharing_op_add_physmap       6
> +#define XENMEM_sharing_op_audit             7
> +#define XENMEM_sharing_op_range_share       8
>  
>  #define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
>  #define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
> @@ -453,7 +490,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
>   * for sharing utilities sitting as "filters" in IO backends
>   * (e.g. memshr + blktap(2)). The IO backend is only exposed 
>   * to grant references, and this allows sharing of the grefs */
> -#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG   (1ULL << 62)
> +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG   (xen_mk_ullong(1) << 62)
>  
>  #define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val)  \
>      (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val)
> @@ -480,7 +517,14 @@ struct xen_mem_sharing_op {
>              uint64_aligned_t client_gfn;    /* IN: the client gfn */
>              uint64_aligned_t client_handle; /* IN: handle to the client page 
> */
>              domid_t  client_domain; /* IN: the client domain id */
> -        } share; 
> +        } share;
> +        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */
> +            uint64_aligned_t first_gfn;      /* IN: the first gfn */
> +            uint64_aligned_t last_gfn;       /* IN: the last gfn */
> +            uint64_aligned_t opaque;         /* Must be set to 0 */
> +            domid_t client_domain;           /* IN: the client domain id */
> +            uint16_t _pad[3];                /* Must be set to 0 */
> +        } range;
>          struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
>              union {
>                  uint64_aligned_t gfn;      /* IN: gfn to debug          */
> @@ -518,9 +562,43 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>  
>  /*
>   * XENMEM_claim_pages flags - the are no flags at this time.
> - * The zero value is appropiate.
> + * The zero value is appropriate.
>   */
>  
> +/*
> + * With some legacy devices, certain guest-physical addresses cannot safely
> + * be used for other purposes, e.g. to map guest RAM.  This hypercall
> + * enumerates those regions so the toolstack can avoid using them.
> + */
> +#define XENMEM_reserved_device_memory_map   27
> +struct xen_reserved_device_memory {
> +    xen_pfn_t start_pfn;
> +    xen_ulong_t nr_pages;
> +};
> +typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
> +
> +struct xen_reserved_device_memory_map {
> +#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
> +    /* IN */
> +    uint32_t flags;
> +    /*
> +     * IN/OUT
> +     *
> +     * Gets set to the required number of entries when too low,
> +     * signaled by error code -ERANGE.
> +     */
> +    unsigned int nr_entries;
> +    /* OUT */
> +    XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
> +    /* IN */
> +    union {
> +        struct physdev_pci_device pci;
> +    } dev;
> +};
> +typedef struct xen_reserved_device_memory_map 
> xen_reserved_device_memory_map_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
> +
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
>  /*
> @@ -572,7 +650,7 @@ struct xen_vnuma_topology_info {
>  typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
>  
> -/* Next available subop number is 27 */
> +/* Next available subop number is 28 */
>  
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
>  
> diff --git a/include/xen/physdev.h b/include/xen/physdev.h
> index 2683719..0e54635 100644
> --- a/include/xen/physdev.h
> +++ b/include/xen/physdev.h
> @@ -16,6 +16,8 @@
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2006, Keir Fraser
>   */
>  
>  #ifndef __XEN_PUBLIC_PHYSDEV_H__
> @@ -293,6 +295,11 @@ struct physdev_pci_device_add {
>          uint8_t bus;
>          uint8_t devfn;
>      } physfn;
> +    /*
> +     * Optional parameters array.
> +     * First element ([0]) is PXM domain associated with the device (if
> +     * XEN_PCI_DEV_PXM is set)
> +     */
>  #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
>      uint32_t optarr[];
>  #elif defined(__GNUC__)
> diff --git a/include/xen/platform.h b/include/xen/platform.h
> index 5c57615..94dbc3f 100644
> --- a/include/xen/platform.h
> +++ b/include/xen/platform.h
> @@ -35,13 +35,28 @@
>   * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
>   * 1 January, 1970 if the current system time was <system_time>.
>   */
> -#define XENPF_settime             17
> -struct xenpf_settime {
> +#define XENPF_settime32           17
> +struct xenpf_settime32 {
>      /* IN variables. */
>      uint32_t secs;
>      uint32_t nsecs;
>      uint64_t system_time;
>  };
> +#define XENPF_settime64           62
> +struct xenpf_settime64 {
> +    /* IN variables. */
> +    uint64_t secs;
> +    uint32_t nsecs;
> +    uint32_t mbz;
> +    uint64_t system_time;
> +};
> +#if __XEN_INTERFACE_VERSION__ < 0x00040600
> +#define XENPF_settime XENPF_settime32
> +#define xenpf_settime xenpf_settime32
> +#else
> +#define XENPF_settime XENPF_settime64
> +#define xenpf_settime xenpf_settime64
> +#endif
>  typedef struct xenpf_settime xenpf_settime_t;
>  DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t);
>  
> @@ -126,6 +141,26 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
>  #define XEN_EFI_query_variable_info           9
>  #define XEN_EFI_query_capsule_capabilities   10
>  #define XEN_EFI_update_capsule               11
> +
> +struct xenpf_efi_time {
> +    uint16_t year;
> +    uint8_t month;
> +    uint8_t day;
> +    uint8_t hour;
> +    uint8_t min;
> +    uint8_t sec;
> +    uint32_t ns;
> +    int16_t tz;
> +    uint8_t daylight;
> +};
> +
> +struct xenpf_efi_guid {
> +    uint32_t data1;
> +    uint16_t data2;
> +    uint16_t data3;
> +    uint8_t data4[8];
> +};
> +
>  struct xenpf_efi_runtime_call {
>      uint32_t function;
>      /*
> @@ -138,17 +173,7 @@ struct xenpf_efi_runtime_call {
>      union {
>  #define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001
>          struct {
> -            struct xenpf_efi_time {
> -                uint16_t year;
> -                uint8_t month;
> -                uint8_t day;
> -                uint8_t hour;
> -                uint8_t min;
> -                uint8_t sec;
> -                uint32_t ns;
> -                int16_t tz;
> -                uint8_t daylight;
> -            } time;
> +            struct xenpf_efi_time time;
>              uint32_t resolution;
>              uint32_t accuracy;
>          } get_time;
> @@ -170,12 +195,7 @@ struct xenpf_efi_runtime_call {
>              XEN_GUEST_HANDLE(void) name;  /* UCS-2/UTF-16 string */
>              xen_ulong_t size;
>              XEN_GUEST_HANDLE(void) data;
> -            struct xenpf_efi_guid {
> -                uint32_t data1;
> -                uint16_t data2;
> -                uint16_t data3;
> -                uint8_t data4[8];
> -            } vendor_guid;
> +            struct xenpf_efi_guid vendor_guid;
>          } get_variable, set_variable;
>  
>          struct {
> @@ -220,6 +240,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t);
>  #define  XEN_FW_EFI_MEM_INFO       3
>  #define  XEN_FW_EFI_RT_VERSION     4
>  #define  XEN_FW_EFI_PCI_ROM        5
> +#define  XEN_FW_EFI_APPLE_PROPERTIES 6
>  #define XEN_FW_KBD_SHIFT_FLAGS    5
>  struct xenpf_firmware_info {
>      /* IN variables. */
> @@ -279,6 +300,11 @@ struct xenpf_firmware_info {
>                  uint64_t address;
>                  xen_ulong_t size;
>              } pci_rom;
> +            struct {
> +                /* OUT variables */
> +                uint64_t address;
> +                xen_ulong_t size;
> +            } apple_properties;
>          } efi_info; /* XEN_FW_EFI_INFO */
>  
>          /* Int16, Fn02: Get keyboard shift flags. */
> @@ -540,6 +566,16 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);
>  #define XEN_RESOURCE_OP_MSR_READ  0
>  #define XEN_RESOURCE_OP_MSR_WRITE 1
>  
> +/*
> + * Specially handled MSRs:
> + * - MSR_IA32_TSC
> + * READ: Returns the scaled system time(ns) instead of raw timestamp. In
> + *       multiple entry case, if other MSR read is followed by a MSR_IA32_TSC
> + *       read, then both reads are guaranteed to be performed atomically 
> (with
> + *       IRQ disabled). The return time indicates the point of reading that 
> MSR.
> + * WRITE: Not supported.
> + */
> +
>  struct xenpf_resource_entry {
>      union {
>          uint32_t cmd;   /* IN: XEN_RESOURCE_OP_* */
> @@ -560,6 +596,24 @@ struct xenpf_resource_op {
>  typedef struct xenpf_resource_op xenpf_resource_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xenpf_resource_op_t);
>  
> +#define XENPF_get_symbol   63
> +struct xenpf_symdata {
> +    /* IN/OUT variables */
> +    uint32_t namelen; /* IN:  size of name buffer                       */
> +                      /* OUT: strlen(name) of hypervisor symbol (may be */
> +                      /*      larger than what's been copied to guest)  */
> +    uint32_t symnum;  /* IN:  Symbol to read                            */
> +                      /* OUT: Next available symbol. If same as IN then */
> +                      /*      we reached the end                        */
> +
> +    /* OUT variables */
> +    XEN_GUEST_HANDLE(char) name;
> +    uint64_t address;
> +    char type;
> +};
> +typedef struct xenpf_symdata xenpf_symdata_t;
> +DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
> +
>  /*
>   * ` enum neg_errnoval
>   * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
> @@ -569,6 +623,8 @@ struct xen_platform_op {
>      uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
>      union {
>          struct xenpf_settime           settime;
> +        struct xenpf_settime32         settime32;
> +        struct xenpf_settime64         settime64;
>          struct xenpf_add_memtype       add_memtype;
>          struct xenpf_del_memtype       del_memtype;
>          struct xenpf_read_memtype      read_memtype;
> @@ -587,6 +643,7 @@ struct xen_platform_op {
>          struct xenpf_mem_hotadd        mem_add;
>          struct xenpf_core_parking      core_parking;
>          struct xenpf_resource_op       resource_op;
> +        struct xenpf_symdata           symdata;
>          uint8_t                        pad[128];
>      } u;
>  };
> diff --git a/include/xen/pmu.h b/include/xen/pmu.h
> new file mode 100644
> index 0000000..0e1312c
> --- /dev/null
> +++ b/include/xen/pmu.h
> @@ -0,0 +1,143 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +#ifndef __XEN_PUBLIC_PMU_H__
> +#define __XEN_PUBLIC_PMU_H__
> +
> +#include "xen.h"
> +#if defined(__i386__) || defined(__x86_64__)
> +#include "arch-x86/pmu.h"
> +#elif defined (__arm__) || defined (__aarch64__)
> +#include "arch-arm.h"
> +#else
> +#error "Unsupported architecture"
> +#endif
> +
> +#define XENPMU_VER_MAJ    0
> +#define XENPMU_VER_MIN    1
> +
> +/*
> + * ` enum neg_errnoval
> + * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
> + *
> + * @cmd  == XENPMU_* (PMU operation)
> + * @args == struct xenpmu_params
> + */
> +/* ` enum xenpmu_op { */
> +#define XENPMU_mode_get        0 /* Also used for getting PMU version */
> +#define XENPMU_mode_set        1
> +#define XENPMU_feature_get     2
> +#define XENPMU_feature_set     3
> +#define XENPMU_init            4
> +#define XENPMU_finish          5
> +#define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
> +/* ` } */
> +
> +/* Parameters structure for HYPERVISOR_xenpmu_op call */
> +struct xen_pmu_params {
> +    /* IN/OUT parameters */
> +    struct {
> +        uint32_t maj;
> +        uint32_t min;
> +    } version;
> +    uint64_t val;
> +
> +    /* IN parameters */
> +    uint32_t vcpu;
> +    uint32_t pad;
> +};
> +typedef struct xen_pmu_params xen_pmu_params_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
> +
> +/* PMU modes:
> + * - XENPMU_MODE_OFF:   No PMU virtualization
> + * - XENPMU_MODE_SELF:  Guests can profile themselves
> + * - XENPMU_MODE_HV:    Guests can profile themselves, dom0 profiles
> + *                      itself and Xen
> + * - XENPMU_MODE_ALL:   Only dom0 has access to VPMU and it profiles
> + *                      everyone: itself, the hypervisor and the guests.
> + */
> +#define XENPMU_MODE_OFF           0
> +#define XENPMU_MODE_SELF          (1<<0)
> +#define XENPMU_MODE_HV            (1<<1)
> +#define XENPMU_MODE_ALL           (1<<2)
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS:  Intel BTS support (ignored on AMD)
> + * - XENPMU_FEATURE_IPC_ONLY:   Restrict PMCs to the most minimum set 
> possible.
> + *                              Instructions, cycles, and ref cycles. Can be
> + *                              used to calculate instructions-per-cycle 
> (IPC)
> + *                              (ignored on AMD).
> + * - XENPMU_FEATURE_ARCH_ONLY:  Restrict PMCs to the Intel Pre-Defined
> + *                              Architectural Performance Events exposed by
> + *                              cpuid and listed in the Intel developer's 
> manual
> + *                              (ignored on AMD).
> + */
> +#define XENPMU_FEATURE_INTEL_BTS  (1<<0)
> +#define XENPMU_FEATURE_IPC_ONLY   (1<<1)
> +#define XENPMU_FEATURE_ARCH_ONLY  (1<<2)
> +
> +/*
> + * Shared PMU data between hypervisor and PV(H) domains.
> + *
> + * The hypervisor fills out this structure during PMU interrupt and sends an
> + * interrupt to appropriate VCPU.
> + * Architecture-independent fields of xen_pmu_data are WO for the hypervisor
> + * and RO for the guest but some fields in xen_pmu_arch can be writable
> + * by both the hypervisor and the guest (see arch-$arch/pmu.h).
> + */
> +struct xen_pmu_data {
> +    /* Interrupted VCPU */
> +    uint32_t vcpu_id;
> +
> +    /*
> +     * Physical processor on which the interrupt occurred. On non-privileged
> +     * guests set to vcpu_id;
> +     */
> +    uint32_t pcpu_id;
> +
> +    /*
> +     * Domain that was interrupted. On non-privileged guests set to 
> DOMID_SELF.
> +     * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
> +     * XENPMU_MODE_ALL mode, domain ID of another domain.
> +     */
> +    domid_t  domain_id;
> +
> +    uint8_t pad[6];
> +
> +    /* Architecture-specific information */
> +    struct xen_pmu_arch pmu;
> +};
> +
> +#endif /* __XEN_PUBLIC_PMU_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/sched.h b/include/xen/sched.h
> index 4000ac9..811bd87 100644
> --- a/include/xen/sched.h
> +++ b/include/xen/sched.h
> @@ -118,6 +118,18 @@
>   * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
>   */
>  #define SCHEDOP_watchdog    6
> +
> +/*
> + * Override the current vcpu affinity by pinning it to one physical cpu or
> + * undo this override restoring the previous affinity.
> + * @arg == pointer to sched_pin_override_t structure.
> + *
> + * A negative pcpu value will undo a previous pin override and restore the
> + * previous cpu affinity.
> + * This call is allowed for the hardware domain only and requires the cpu
> + * to be part of the domain's cpupool.
> + */
> +#define SCHEDOP_pin_override 7
>  /* ` } */
>  
>  struct sched_shutdown {
> @@ -148,6 +160,12 @@ struct sched_watchdog {
>  typedef struct sched_watchdog sched_watchdog_t;
>  DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
>  
> +struct sched_pin_override {
> +    int32_t pcpu;
> +};
> +typedef struct sched_pin_override sched_pin_override_t;
> +DEFINE_XEN_GUEST_HANDLE(sched_pin_override_t);
> +
>  /*
>   * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
>   * software to determine the appropriate action. For the most part, Xen does
> @@ -159,7 +177,16 @@ DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
>  #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         
> */
>  #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             
> */
>  #define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     
> */
> -#define SHUTDOWN_MAX        4  /* Maximum valid shutdown reason.             
> */
> +
> +/*
> + * Domain asked to perform 'soft reset' for it. The expected behavior is to
> + * reset internal Xen state for the domain returning it to the point where it
> + * was created but leaving the domain's memory contents and vCPU contexts
> + * intact. This will allow the domain to start over and set up all Xen 
> specific
> + * interfaces again.
> + */
> +#define SHUTDOWN_soft_reset 5
> +#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.             
> */
>  /* ` } */
>  
>  #endif /* __XEN_PUBLIC_SCHED_H__ */
> diff --git a/include/xen/sysctl.h b/include/xen/sysctl.h
> index 8552dc6..6140f1a 100644
> --- a/include/xen/sysctl.h
> +++ b/include/xen/sysctl.h
> @@ -33,8 +33,10 @@
>  
>  #include "xen.h"
>  #include "domctl.h"
> +#include "physdev.h"
> +#include "tmem.h"
>  
> -#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B
> +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000010
>  
>  /*
>   * Read console content from Xen buffer ring.
> @@ -56,8 +58,6 @@ struct xen_sysctl_readconsole {
>      /* IN: Size of buffer; OUT: Bytes written to buffer. */
>      uint32_t count;
>  };
> -typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);
>  
>  /* Get trace buffers machine base address */
>  /* XEN_SYSCTL_tbuf_op */
> @@ -77,8 +77,6 @@ struct xen_sysctl_tbuf_op {
>      uint64_aligned_t buffer_mfn;
>      uint32_t size;  /* Also an IN variable! */
>  };
> -typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
>  
>  /*
>   * Get physical information about the host machine
> @@ -98,17 +96,14 @@ struct xen_sysctl_physinfo {
>      uint32_t nr_nodes;    /* # nodes currently online */
>      uint32_t max_node_id; /* Largest possible node ID on this host */
>      uint32_t cpu_khz;
> +    uint32_t capabilities;/* XEN_SYSCTL_PHYSCAP_??? */
>      uint64_aligned_t total_pages;
>      uint64_aligned_t free_pages;
>      uint64_aligned_t scrub_pages;
>      uint64_aligned_t outstanding_pages;
> +    uint64_aligned_t max_mfn; /* Largest possible MFN on this host */
>      uint32_t hw_cap[8];
> -
> -    /* XEN_SYSCTL_PHYSCAP_??? */
> -    uint32_t capabilities;
>  };
> -typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
>  
>  /*
>   * Get the ID of the current scheduler.
> @@ -118,8 +113,6 @@ struct xen_sysctl_sched_id {
>      /* OUT variable */
>      uint32_t sched_id;
>  };
> -typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t);
>  
>  /* Interface for controlling Xen software performance counters. */
>  /* XEN_SYSCTL_perfc_op */
> @@ -146,8 +139,6 @@ struct xen_sysctl_perfc_op {
>      /* counter values (or NULL) */
>      XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
>  };
> -typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
>  
>  /* XEN_SYSCTL_getdomaininfolist */
>  struct xen_sysctl_getdomaininfolist {
> @@ -158,8 +149,6 @@ struct xen_sysctl_getdomaininfolist {
>      /* OUT variables. */
>      uint32_t              num_domains;
>  };
> -typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
>  
>  /* Inject debug keys into Xen. */
>  /* XEN_SYSCTL_debug_keys */
> @@ -168,8 +157,6 @@ struct xen_sysctl_debug_keys {
>      XEN_GUEST_HANDLE_64(char) keys;
>      uint32_t nr_keys;
>  };
> -typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
>  
>  /* Get physical CPU information. */
>  /* XEN_SYSCTL_getcpuinfo */
> @@ -185,8 +172,6 @@ struct xen_sysctl_getcpuinfo {
>      /* OUT variables. */
>      uint32_t nr_cpus;
>  }; 
> -typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
>  
>  /* XEN_SYSCTL_availheap */
>  struct xen_sysctl_availheap {
> @@ -197,8 +182,6 @@ struct xen_sysctl_availheap {
>      /* OUT variables. */
>      uint64_aligned_t avail_bytes;/* Bytes available in the specified region. 
> */
>  };
> -typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
>  
>  /* XEN_SYSCTL_get_pmstat */
>  struct pm_px_val {
> @@ -217,8 +200,6 @@ struct pm_px_stat {
>      XEN_GUEST_HANDLE_64(uint64) trans_pt;   /* Px transition table */
>      XEN_GUEST_HANDLE_64(pm_px_val_t) pt;
>  };
> -typedef struct pm_px_stat pm_px_stat_t;
> -DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);
>  
>  struct pm_cx_stat {
>      uint32_t nr;    /* entry nr in triggers & residencies, including C0 */
> @@ -257,8 +238,6 @@ struct xen_sysctl_get_pmstat {
>          /* other struct for tx, etc */
>      } u;
>  };
> -typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
>  
>  /* XEN_SYSCTL_cpu_hotplug */
>  struct xen_sysctl_cpu_hotplug {
> @@ -268,8 +247,6 @@ struct xen_sysctl_cpu_hotplug {
>  #define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1
>      uint32_t op;    /* hotplug opcode */
>  };
> -typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
>  
>  /*
>   * Get/set xen power management, include 
> @@ -279,7 +256,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
>  struct xen_userspace {
>      uint32_t scaling_setspeed;
>  };
> -typedef struct xen_userspace xen_userspace_t;
>  
>  struct xen_ondemand {
>      uint32_t sampling_rate_max;
> @@ -288,7 +264,6 @@ struct xen_ondemand {
>      uint32_t sampling_rate;
>      uint32_t up_threshold;
>  };
> -typedef struct xen_ondemand xen_ondemand_t;
>  
>  /* 
>   * cpufreq para name of this structure named 
> @@ -459,67 +434,76 @@ struct xen_sysctl_lockprof_op {
>      /* profile information (or NULL) */
>      XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data;
>  };
> -typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
>  
> -/* XEN_SYSCTL_topologyinfo */
> -#define INVALID_TOPOLOGY_ID  (~0U)
> -struct xen_sysctl_topologyinfo {
> -    /*
> -     * IN: maximum addressable entry in the caller-provided arrays.
> -     * OUT: largest cpu identifier in the system.
> -     * If OUT is greater than IN then the arrays are truncated!
> -     * If OUT is leass than IN then the array tails are not written by 
> sysctl.
> -     */
> -    uint32_t max_cpu_index;
> +/* XEN_SYSCTL_cputopoinfo */
> +#define XEN_INVALID_CORE_ID     (~0U)
> +#define XEN_INVALID_SOCKET_ID   (~0U)
> +#define XEN_INVALID_NODE_ID     (~0U)
>  
> -    /*
> -     * If not NULL, these arrays are filled with core/socket/node identifier
> -     * for each cpu.
> -     * If a cpu has no core/socket/node information (e.g., cpu not present) 
> -     * then the sentinel value ~0u is written to each array.
> -     * The number of array elements written by the sysctl is:
> -     *   min(@max_cpu_index_IN,@max_cpu_index_OUT)+1
> -     */
> -    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
> -    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
> -    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
> +struct xen_sysctl_cputopo {
> +    uint32_t core;
> +    uint32_t socket;
> +    uint32_t node;
> +};
> +typedef struct xen_sysctl_cputopo xen_sysctl_cputopo_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopo_t);
> +
> +/*
> + * IN:
> + *  - a NULL 'cputopo' handle is a request for maximun 'num_cpus'.
> + *  - otherwise it's the number of entries in 'cputopo'
> + *
> + * OUT:
> + *  - If 'num_cpus' is less than the number Xen wants to write but the handle
> + *    handle is not a NULL one, partial data gets returned and 'num_cpus' 
> gets
> + *    updated to reflect the intended number.
> + *  - Otherwise, 'num_cpus' shall indicate the number of entries written, 
> which
> + *    may be less than the input value.
> + */
> +struct xen_sysctl_cputopoinfo {
> +    uint32_t num_cpus;
> +    XEN_GUEST_HANDLE_64(xen_sysctl_cputopo_t) cputopo;
>  };
> -typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
>  
>  /* XEN_SYSCTL_numainfo */
> -#define INVALID_NUMAINFO_ID (~0U)
> +#define XEN_INVALID_MEM_SZ     (~0U)
> +#define XEN_INVALID_NODE_DIST  (~0U)
> +
> +struct xen_sysctl_meminfo {
> +    uint64_t memsize;
> +    uint64_t memfree;
> +};
> +typedef struct xen_sysctl_meminfo xen_sysctl_meminfo_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_meminfo_t);
> +
> +/*
> + * IN:
> + *  - Both 'meminfo' and 'distance' handles being null is a request
> + *    for maximum value of 'num_nodes'.
> + *  - Otherwise it's the number of entries in 'meminfo' and square root
> + *    of number of entries in 'distance' (when corresponding handle is
> + *    non-null)
> + *
> + * OUT:
> + *  - If 'num_nodes' is less than the number Xen wants to write but either
> + *    handle is not a NULL one, partial data gets returned and 'num_nodes'
> + *    gets updated to reflect the intended number.
> + *  - Otherwise, 'num_nodes' shall indicate the number of entries written, 
> which
> + *    may be less than the input value.
> + */
> +
>  struct xen_sysctl_numainfo {
> -    /*
> -     * IN: maximum addressable entry in the caller-provided arrays.
> -     * OUT: largest node identifier in the system.
> -     * If OUT is greater than IN then the arrays are truncated!
> -     */
> -    uint32_t max_node_index;
> +    uint32_t num_nodes;
>  
> -    /* NB. Entries are 0 if node is not present. */
> -    XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
> -    XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
> +    XEN_GUEST_HANDLE_64(xen_sysctl_meminfo_t) meminfo;
>  
>      /*
> -     * Array, of size (max_node_index+1)^2, listing memory access distances
> -     * between nodes. If an entry has no node distance information (e.g., 
> node 
> -     * not present) then the value ~0u is written.
> -     * 
> -     * Note that the array rows must be indexed by multiplying by the 
> minimum 
> -     * of the caller-provided max_node_index and the returned value of
> -     * max_node_index. That is, if the largest node index in the system is
> -     * smaller than the caller can handle, a smaller 2-d array is constructed
> -     * within the space provided by the caller. When this occurs, trailing
> -     * space provided by the caller is not modified. If the largest node 
> index
> -     * in the system is larger than the caller can handle, then a 2-d array 
> of
> -     * the maximum size handleable by the caller is constructed.
> +     * Distance between nodes 'i' and 'j' is stored in index 'i*N + j',
> +     * where N is the number of nodes that will be returned in 'num_nodes'
> +     * (i.e. not 'num_nodes' provided by the caller)
>       */
> -    XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
> +    XEN_GUEST_HANDLE_64(uint32) distance;
>  };
> -typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
>  
>  /* XEN_SYSCTL_cpupool_op */
>  #define XEN_SYSCTL_CPUPOOL_OP_CREATE                1  /* C */
> @@ -539,8 +523,42 @@ struct xen_sysctl_cpupool_op {
>      uint32_t n_dom;       /*            OUT: I  */
>      struct xenctl_bitmap cpumap; /*     OUT: IF */
>  };
> -typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t);
> +
> +/*
> + * Error return values of cpupool operations:
> + *
> + * -EADDRINUSE:
> + *  XEN_SYSCTL_CPUPOOL_OP_RMCPU: A vcpu is temporarily pinned to the cpu
> + *    which is to be removed from a cpupool.
> + * -EADDRNOTAVAIL:
> + *  XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: A previous
> + *    request to remove a cpu from a cpupool was terminated with -EAGAIN
> + *    and has not been retried using the same parameters.
> + * -EAGAIN:
> + *  XEN_SYSCTL_CPUPOOL_OP_RMCPU: The cpu can't be removed from the cpupool
> + *    as it is active in the hypervisor. A retry will succeed soon.
> + * -EBUSY:
> + *  XEN_SYSCTL_CPUPOOL_OP_DESTROY, XEN_SYSCTL_CPUPOOL_OP_RMCPU: A cpupool
> + *    can't be destroyed or the last cpu can't be removed as there is still
> + *    a running domain in that cpupool.
> + * -EEXIST:
> + *  XEN_SYSCTL_CPUPOOL_OP_CREATE: A cpupool_id was specified and is already
> + *    existing.
> + * -EINVAL:
> + *  XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: An illegal
> + *    cpu was specified (cpu does not exist).
> + *  XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN: An illegal domain was specified
> + *    (domain id illegal or not suitable for operation).
> + * -ENODEV:
> + *  XEN_SYSCTL_CPUPOOL_OP_ADDCPU, XEN_SYSCTL_CPUPOOL_OP_RMCPU: The specified
> + *    cpu is either not free (add) or not member of the specified cpupool
> + *    (remove).
> + * -ENOENT:
> + *  all: The cpupool with the specified cpupool_id doesn't exist.
> + *
> + * Some common error return values like -ENOMEM and -EFAULT are possible for
> + * all the operations.
> + */
>  
>  #define ARINC653_MAX_DOMAINS_PER_SCHEDULE   64
>  /*
> @@ -570,18 +588,24 @@ struct xen_sysctl_arinc653_schedule {
>  typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t);
>  
> +/*
> + * Valid range for context switch rate limit (in microseconds).
> + * Applicable to Credit and Credit2 schedulers.
> + */
> +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000
> +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100
> +
>  struct xen_sysctl_credit_schedule {
>      /* Length of timeslice in milliseconds */
>  #define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000
>  #define XEN_SYSCTL_CSCHED_TSLICE_MIN 1
>      unsigned tslice_ms;
> -    /* Rate limit (minimum timeslice) in microseconds */
> -#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000
> -#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100
>      unsigned ratelimit_us;
>  };
> -typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t);
> +
> +struct xen_sysctl_credit2_schedule {
> +    unsigned ratelimit_us;
> +};
>  
>  /* XEN_SYSCTL_scheduler_op */
>  /* Set or get info? */
> @@ -596,45 +620,41 @@ struct xen_sysctl_scheduler_op {
>              XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule;
>          } sched_arinc653;
>          struct xen_sysctl_credit_schedule sched_credit;
> +        struct xen_sysctl_credit2_schedule sched_credit2;
>      } u;
>  };
> -typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t);
> -
> -/* XEN_SYSCTL_coverage_op */
> -/*
> - * Get total size of information, to help allocate
> - * the buffer. The pointer points to a 32 bit value.
> - */
> -#define XEN_SYSCTL_COVERAGE_get_total_size 0
>  
>  /*
> - * Read coverage information in a single run
> - * You must use a tool to split them.
> + * Output format of gcov data:
> + *
> + * XEN_GCOV_FORMAT_MAGIC XEN_GCOV_RECORD ... XEN_GCOV_RECORD
> + *
> + * That is, one magic number followed by 0 or more record.
> + *
> + * The magic number is stored as an uint32_t field.
> + *
> + * The record is packed and variable in length. It has the form:
> + *
> + *  filename: a NULL terminated path name extracted from gcov, used to
> + *            create the name of gcda file.
> + *  size:     a uint32_t field indicating the size of the payload, the
> + *            unit is byte.
> + *  payload:  the actual payload, length is `size' bytes.
> + *
> + * Userspace tool will split the record to different files.
>   */
> -#define XEN_SYSCTL_COVERAGE_read           1
>  
> -/*
> - * Reset all the coverage counters to 0
> - * No parameters.
> - */
> -#define XEN_SYSCTL_COVERAGE_reset          2
> +#define XEN_GCOV_FORMAT_MAGIC    0x58434f56 /* XCOV */
>  
> -/*
> - * Like XEN_SYSCTL_COVERAGE_read but reset also
> - * counters to 0 in a single call.
> - */
> -#define XEN_SYSCTL_COVERAGE_read_and_reset 3
> +#define XEN_SYSCTL_GCOV_get_size 0 /* Get total size of output data */
> +#define XEN_SYSCTL_GCOV_read     1 /* Read output data */
> +#define XEN_SYSCTL_GCOV_reset    2 /* Reset all counters */
>  
> -struct xen_sysctl_coverage_op {
> -    uint32_t cmd;        /* XEN_SYSCTL_COVERAGE_* */
> -    union {
> -        uint32_t total_size; /* OUT */
> -        XEN_GUEST_HANDLE_64(uint8)  raw_info;   /* OUT */
> -    } u;
> +struct xen_sysctl_gcov_op {
> +    uint32_t cmd;
> +    uint32_t size; /* IN/OUT: size of the buffer  */
> +    XEN_GUEST_HANDLE_64(char) buffer; /* OUT */
>  };
> -typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
>  
>  #define XEN_SYSCTL_PSR_CMT_get_total_rmid            0
>  #define XEN_SYSCTL_PSR_CMT_get_l3_upscaling_factor   1
> @@ -653,8 +673,377 @@ struct xen_sysctl_psr_cmt_op {
>          } l3_cache;
>      } u;
>  };
> -typedef struct xen_sysctl_psr_cmt_op xen_sysctl_psr_cmt_op_t;
> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cmt_op_t);
> +
> +/* XEN_SYSCTL_pcitopoinfo */
> +#define XEN_INVALID_DEV (XEN_INVALID_NODE_ID - 1)
> +struct xen_sysctl_pcitopoinfo {
> +    /*
> +     * IN: Number of elements in 'pcitopo' and 'nodes' arrays.
> +     * OUT: Number of processed elements of those arrays.
> +     */
> +    uint32_t num_devs;
> +
> +    /* IN: list of devices for which node IDs are requested. */
> +    XEN_GUEST_HANDLE_64(physdev_pci_device_t) devs;
> +
> +    /*
> +     * OUT: node identifier for each device.
> +     * If information for a particular device is not available then
> +     * corresponding entry will be set to XEN_INVALID_NODE_ID. If
> +     * device is not known to the hypervisor then XEN_INVALID_DEV
> +     * will be provided.
> +     */
> +    XEN_GUEST_HANDLE_64(uint32) nodes;
> +};
> +
> +#define XEN_SYSCTL_PSR_CAT_get_l3_info               0
> +#define XEN_SYSCTL_PSR_CAT_get_l2_info               1
> +struct xen_sysctl_psr_cat_op {
> +    uint32_t cmd;       /* IN: XEN_SYSCTL_PSR_CAT_* */
> +    uint32_t target;    /* IN */
> +    union {
> +        struct {
> +            uint32_t cbm_len;   /* OUT: CBM length */
> +            uint32_t cos_max;   /* OUT: Maximum COS */
> +#define XEN_SYSCTL_PSR_CAT_L3_CDP       (1u << 0)
> +            uint32_t flags;     /* OUT: CAT flags */
> +        } cat_info;
> +    } u;
> +};
> +
> +#define XEN_SYSCTL_TMEM_OP_ALL_CLIENTS 0xFFFFU
> +
> +#define XEN_SYSCTL_TMEM_OP_THAW                   0
> +#define XEN_SYSCTL_TMEM_OP_FREEZE                 1
> +#define XEN_SYSCTL_TMEM_OP_FLUSH                  2
> +#define XEN_SYSCTL_TMEM_OP_DESTROY                3
> +#define XEN_SYSCTL_TMEM_OP_LIST                   4
> +#define XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO        5
> +#define XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO        6
> +#define XEN_SYSCTL_TMEM_OP_GET_POOLS              7
> +#define XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB      8
> +#define XEN_SYSCTL_TMEM_OP_SET_POOLS              9
> +#define XEN_SYSCTL_TMEM_OP_SAVE_BEGIN             10
> +#define XEN_SYSCTL_TMEM_OP_SET_AUTH               11
> +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE     19
> +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV      20
> +#define XEN_SYSCTL_TMEM_OP_SAVE_END               21
> +#define XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN          30
> +#define XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE       32
> +#define XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE     33
> +
> +/*
> + * XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_[PAGE|INV] override the 'buf' in
> + * xen_sysctl_tmem_op with this structure - sometimes with an extra
> + * page tackled on.
> + */
> +struct tmem_handle {
> +    uint32_t pool_id;
> +    uint32_t index;
> +    xen_tmem_oid_t oid;
> +};
> +
> +/*
> + * XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT uses the 'client' in
> + * xen_tmem_op with this structure, which is mostly used during migration.
> + */
> +struct xen_tmem_client {
> +    uint32_t version;   /* If mismatched we will get XEN_EOPNOTSUPP. */
> +    uint32_t maxpools;  /* If greater than what hypervisor supports, will get
> +                           XEN_ERANGE. */
> +    uint32_t nr_pools;  /* Current amount of pools. Ignored on SET*/
> +    union {             /* See TMEM_CLIENT_[COMPRESS,FROZEN] */
> +        uint32_t raw;
> +        struct {
> +            uint8_t frozen:1,
> +                    compress:1,
> +                    migrating:1;
> +        } u;
> +    } flags;
> +    uint32_t weight;
> +};
> +typedef struct xen_tmem_client xen_tmem_client_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_tmem_client_t);
> +
> +/*
> + * XEN_SYSCTL_TMEM_OP_[GET|SET]_POOLS or XEN_SYSCTL_TMEM_OP_SET_AUTH
> + * uses the 'pool' array in * xen_sysctl_tmem_op with this structure.
> + * The XEN_SYSCTL_TMEM_OP_GET_POOLS hypercall will
> + * return the number of entries in 'pool' or a negative value
> + * if an error was encountered.
> + * The XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS] will return the number of
> + * entries in 'pool' processed or a negative value if an error
> + * was encountered.
> + */
> +struct xen_tmem_pool_info {
> +    union {
> +        uint32_t raw;
> +        struct {
> +            uint32_t persist:1,    /* See TMEM_POOL_PERSIST. */
> +                     shared:1,     /* See TMEM_POOL_SHARED. */
> +                     auth:1,       /* See TMEM_POOL_AUTH. */
> +                     rsv1:1,
> +                     pagebits:8,   /* TMEM_POOL_PAGESIZE_[SHIFT,MASK]. */
> +                     rsv2:12,
> +                     version:8;    /* TMEM_POOL_VERSION_[SHIFT,MASK]. */
> +        } u;
> +    } flags;
> +    uint32_t id;                  /* Less than tmem_client.maxpools. */
> +    uint64_t n_pages;             /* Zero on 
> XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS]. */
> +    uint64_aligned_t uuid[2];
> +};
> +typedef struct xen_tmem_pool_info xen_tmem_pool_info_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_tmem_pool_info_t);
> +
> +struct xen_sysctl_tmem_op {
> +    uint32_t cmd;       /* IN: XEN_SYSCTL_TMEM_OP_* . */
> +    int32_t pool_id;    /* IN: 0 by default unless _SAVE_*, RESTORE_* .*/
> +    uint32_t cli_id;    /* IN: client id, 0 for 
> XEN_SYSCTL_TMEM_QUERY_FREEABLE_MB
> +                           for all others can be the domain id or
> +                           XEN_SYSCTL_TMEM_OP_ALL_CLIENTS for all. */
> +    uint32_t len;       /* IN: length of 'buf'. If not applicable to use 0. 
> */
> +    uint32_t arg;       /* IN: If not applicable to command use 0. */
> +    uint32_t pad;       /* Padding so structure is the same under 32 and 64. 
> */
> +    xen_tmem_oid_t oid; /* IN: If not applicable to command use 0s. */
> +    union {
> +        XEN_GUEST_HANDLE_64(char) buf; /* IN/OUT: Buffer to save/restore */
> +        XEN_GUEST_HANDLE_64(xen_tmem_client_t) client; /* IN/OUT for */
> +                        /*  XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT. */
> +        XEN_GUEST_HANDLE_64(xen_tmem_pool_info_t) pool; /* OUT for */
> +                        /* XEN_SYSCTL_TMEM_OP_GET_POOLS. Must have 'len' */
> +                        /* of them. */
> +    } u;
> +};
> +
> +/*
> + * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific)
> + *
> + * Return hardware capabilities concerning masking or faulting of the cpuid
> + * instruction for PV guests.
> + */
> +struct xen_sysctl_cpu_levelling_caps {
> +#define XEN_SYSCTL_CPU_LEVELCAP_faulting    (1ul <<  0) /* CPUID faulting    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_ecx         (1ul <<  1) /* 0x00000001.ecx    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_edx         (1ul <<  2) /* 0x00000001.edx    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_ecx    (1ul <<  3) /* 0x80000001.ecx    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_edx    (1ul <<  4) /* 0x80000001.edx    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_xsave_eax   (1ul <<  5) /* 0x0000000D:1.eax  
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_thermal_ecx (1ul <<  6) /* 0x00000006.ecx    
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax    (1ul <<  7) /* 0x00000007:0.eax  
> */
> +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_ebx    (1ul <<  8) /* 0x00000007:0.ebx  
> */
> +    uint32_t caps;
> +};
> +
> +/*
> + * XEN_SYSCTL_get_cpu_featureset (x86 specific)
> + *
> + * Return information about featuresets available on this host.
> + *  -  Raw: The real cpuid values.
> + *  - Host: The values Xen is using, (after command line overrides, etc).
> + *  -   PV: Maximum set of features which can be given to a PV guest.
> + *  -  HVM: Maximum set of features which can be given to a HVM guest.
> + */
> +struct xen_sysctl_cpu_featureset {
> +#define XEN_SYSCTL_cpu_featureset_raw      0
> +#define XEN_SYSCTL_cpu_featureset_host     1
> +#define XEN_SYSCTL_cpu_featureset_pv       2
> +#define XEN_SYSCTL_cpu_featureset_hvm      3
> +    uint32_t index;       /* IN: Which featureset to query? */
> +    uint32_t nr_features; /* IN/OUT: Number of entries in/written to
> +                           * 'features', or the maximum number of features if
> +                           * the guest handle is NULL.  NB. All featuresets
> +                           * come from the same numberspace, so have the same
> +                           * maximum length. */
> +    XEN_GUEST_HANDLE_64(uint32) features; /* OUT: */
> +};
> +
> +/*
> + * XEN_SYSCTL_LIVEPATCH_op
> + *
> + * Refer to the docs/unstable/misc/livepatch.markdown
> + * for the design details of this hypercall.
> + *
> + * There are four sub-ops:
> + *  XEN_SYSCTL_LIVEPATCH_UPLOAD (0)
> + *  XEN_SYSCTL_LIVEPATCH_GET (1)
> + *  XEN_SYSCTL_LIVEPATCH_LIST (2)
> + *  XEN_SYSCTL_LIVEPATCH_ACTION (3)
> + *
> + * The normal sequence of sub-ops is to:
> + *  1) XEN_SYSCTL_LIVEPATCH_UPLOAD to upload the payload. If errors STOP.
> + *  2) XEN_SYSCTL_LIVEPATCH_GET to check the `->rc`. If -XEN_EAGAIN spin.
> + *     If zero go to next step.
> + *  3) XEN_SYSCTL_LIVEPATCH_ACTION with LIVEPATCH_ACTION_APPLY to apply the 
> patch.
> + *  4) XEN_SYSCTL_LIVEPATCH_GET to check the `->rc`. If in -XEN_EAGAIN spin.
> + *     If zero exit with success.
> + */
> +
> +#define LIVEPATCH_PAYLOAD_VERSION 1
> +/*
> + * .livepatch.funcs structure layout defined in the `Payload format`
> + * section in the Live Patch design document.
> + *
> + * We guard this with __XEN__ as toolstacks SHOULD not use it.
> + */
> +#ifdef __XEN__
> +struct livepatch_func {
> +    const char *name;       /* Name of function to be patched. */
> +    void *new_addr;
> +    void *old_addr;
> +    uint32_t new_size;
> +    uint32_t old_size;
> +    uint8_t version;        /* MUST be LIVEPATCH_PAYLOAD_VERSION. */
> +    uint8_t opaque[31];
> +};
> +typedef struct livepatch_func livepatch_func_t;
> +#endif
> +
> +/*
> + * Structure describing an ELF payload. Uniquely identifies the
> + * payload. Should be human readable.
> + * Recommended length is upto XEN_LIVEPATCH_NAME_SIZE.
> + * Includes the NUL terminator.
> + */
> +#define XEN_LIVEPATCH_NAME_SIZE 128
> +struct xen_livepatch_name {
> +    XEN_GUEST_HANDLE_64(char) name;         /* IN: pointer to name. */
> +    uint16_t size;                          /* IN: size of name. May be upto
> +                                               XEN_LIVEPATCH_NAME_SIZE. */
> +    uint16_t pad[3];                        /* IN: MUST be zero. */
> +};
> +
> +/*
> + * Upload a payload to the hypervisor. The payload is verified
> + * against basic checks and if there are any issues the proper return code
> + * will be returned. The payload is not applied at this time - that is
> + * controlled by XEN_SYSCTL_LIVEPATCH_ACTION.
> + *
> + * The return value is zero if the payload was succesfully uploaded.
> + * Otherwise an EXX return value is provided. Duplicate `name` are not
> + * supported.
> + *
> + * The payload at this point is verified against basic checks.
> + *
> + * The `payload` is the ELF payload as mentioned in the `Payload format`
> + * section in the Live Patch design document.
> + */
> +#define XEN_SYSCTL_LIVEPATCH_UPLOAD 0
> +struct xen_sysctl_livepatch_upload {
> +    struct xen_livepatch_name name;         /* IN, name of the patch. */
> +    uint64_t size;                          /* IN, size of the ELF file. */
> +    XEN_GUEST_HANDLE_64(uint8) payload;     /* IN, the ELF file. */
> +};
> +
> +/*
> + * Retrieve an status of an specific payload.
> + *
> + * Upon completion the `struct xen_livepatch_status` is updated.
> + *
> + * The return value is zero on success and XEN_EXX on failure. This operation
> + * is synchronous and does not require preemption.
> + */
> +#define XEN_SYSCTL_LIVEPATCH_GET 1
> +
> +struct xen_livepatch_status {
> +#define LIVEPATCH_STATE_CHECKED      1
> +#define LIVEPATCH_STATE_APPLIED      2
> +    uint32_t state;                /* OUT: LIVEPATCH_STATE_*. */
> +    int32_t rc;                    /* OUT: 0 if no error, otherwise 
> -XEN_EXX. */
> +};
> +typedef struct xen_livepatch_status xen_livepatch_status_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_livepatch_status_t);
> +
> +struct xen_sysctl_livepatch_get {
> +    struct xen_livepatch_name name;         /* IN, name of the payload. */
> +    struct xen_livepatch_status status;     /* IN/OUT, state of it. */
> +};
> +
> +/*
> + * Retrieve an array of abbreviated status and names of payloads that are
> + * loaded in the hypervisor.
> + *
> + * If the hypercall returns an positive number, it is the number (up to `nr`)
> + * of the payloads returned, along with `nr` updated with the number of 
> remaining
> + * payloads, `version` updated (it may be the same across hypercalls. If it
> + * varies the data is stale and further calls could fail). The `status`,
> + * `name`, and `len`' are updated at their designed index value (`idx`) with
> + * the returned value of data.
> + *
> + * If the hypercall returns E2BIG the `nr` is too big and should be
> + * lowered. The upper limit of `nr` is left to the implemention.
> + *
> + * Note that due to the asynchronous nature of hypercalls the domain might 
> have
> + * added or removed the number of payloads making this information stale. It 
> is
> + * the responsibility of the toolstack to use the `version` field to check
> + * between each invocation. if the version differs it should discard the 
> stale
> + * data and start from scratch. It is OK for the toolstack to use the new
> + * `version` field.
> + */
> +#define XEN_SYSCTL_LIVEPATCH_LIST 2
> +struct xen_sysctl_livepatch_list {
> +    uint32_t version;                       /* OUT: Hypervisor stamps value.
> +                                               If varies between calls, we 
> are
> +                                             * getting stale data. */
> +    uint32_t idx;                           /* IN: Index into hypervisor 
> list. */
> +    uint32_t nr;                            /* IN: How many status, name, 
> and len
> +                                               should fill out. Can be zero 
> to get
> +                                               amount of payloads and 
> version.
> +                                               OUT: How many payloads left. 
> */
> +    uint32_t pad;                           /* IN: Must be zero. */
> +    XEN_GUEST_HANDLE_64(xen_livepatch_status_t) status;  /* OUT. Must have 
> enough
> +                                               space allocate for nr of 
> them. */
> +    XEN_GUEST_HANDLE_64(char) name;         /* OUT: Array of names. Each 
> member
> +                                               MUST XEN_LIVEPATCH_NAME_SIZE 
> in size.
> +                                               Must have nr of them. */
> +    XEN_GUEST_HANDLE_64(uint32) len;        /* OUT: Array of lengths of 
> name's.
> +                                               Must have nr of them. */
> +};
> +
> +/*
> + * Perform an operation on the payload structure referenced by the `name` 
> field.
> + * The operation request is asynchronous and the status should be retrieved
> + * by using either XEN_SYSCTL_LIVEPATCH_GET or XEN_SYSCTL_LIVEPATCH_LIST 
> hypercall.
> + */
> +#define XEN_SYSCTL_LIVEPATCH_ACTION 3
> +struct xen_sysctl_livepatch_action {
> +    struct xen_livepatch_name name;         /* IN, name of the patch. */
> +#define LIVEPATCH_ACTION_UNLOAD       1
> +#define LIVEPATCH_ACTION_REVERT       2
> +#define LIVEPATCH_ACTION_APPLY        3
> +#define LIVEPATCH_ACTION_REPLACE      4
> +    uint32_t cmd;                           /* IN: LIVEPATCH_ACTION_*. */
> +    uint32_t timeout;                       /* IN: If zero then uses */
> +                                            /* hypervisor default. */
> +                                            /* Or upper bound of time (ns) */
> +                                            /* for operation to take. */
> +};
> +
> +struct xen_sysctl_livepatch_op {
> +    uint32_t cmd;                           /* IN: XEN_SYSCTL_LIVEPATCH_*. */
> +    uint32_t pad;                           /* IN: Always zero. */
> +    union {
> +        struct xen_sysctl_livepatch_upload upload;
> +        struct xen_sysctl_livepatch_list list;
> +        struct xen_sysctl_livepatch_get get;
> +        struct xen_sysctl_livepatch_action action;
> +    } u;
> +};
> +
> +/*
> + * XEN_SYSCTL_set_parameter
> + *
> + * Change hypervisor parameters at runtime.
> + * The input string is parsed similar to the boot parameters.
> + * Parameters are a single string terminated by a NUL byte of max. size
> + * characters. Multiple settings can be specified by separating them
> + * with blanks.
> + */
> +
> +struct xen_sysctl_set_parameter {
> +    XEN_GUEST_HANDLE_64(char) params;       /* IN: pointer to parameters. */
> +    uint16_t size;                          /* IN: size of parameters. */
> +    uint16_t pad[3];                        /* IN: MUST be zero. */
> +};
>  
>  struct xen_sysctl {
>      uint32_t cmd;
> @@ -672,18 +1061,26 @@ struct xen_sysctl {
>  #define XEN_SYSCTL_pm_op                         12
>  #define XEN_SYSCTL_page_offline_op               14
>  #define XEN_SYSCTL_lockprof_op                   15
> -#define XEN_SYSCTL_topologyinfo                  16 
> +#define XEN_SYSCTL_cputopoinfo                   16
>  #define XEN_SYSCTL_numainfo                      17
>  #define XEN_SYSCTL_cpupool_op                    18
>  #define XEN_SYSCTL_scheduler_op                  19
> -#define XEN_SYSCTL_coverage_op                   20
> +#define XEN_SYSCTL_gcov_op                       20
>  #define XEN_SYSCTL_psr_cmt_op                    21
> +#define XEN_SYSCTL_pcitopoinfo                   22
> +#define XEN_SYSCTL_psr_cat_op                    23
> +#define XEN_SYSCTL_tmem_op                       24
> +#define XEN_SYSCTL_get_cpu_levelling_caps        25
> +#define XEN_SYSCTL_get_cpu_featureset            26
> +#define XEN_SYSCTL_livepatch_op                  27
> +#define XEN_SYSCTL_set_parameter                 28
>      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
>      union {
>          struct xen_sysctl_readconsole       readconsole;
>          struct xen_sysctl_tbuf_op           tbuf_op;
>          struct xen_sysctl_physinfo          physinfo;
> -        struct xen_sysctl_topologyinfo      topologyinfo;
> +        struct xen_sysctl_cputopoinfo       cputopoinfo;
> +        struct xen_sysctl_pcitopoinfo       pcitopoinfo;
>          struct xen_sysctl_numainfo          numainfo;
>          struct xen_sysctl_sched_id          sched_id;
>          struct xen_sysctl_perfc_op          perfc_op;
> @@ -698,8 +1095,14 @@ struct xen_sysctl {
>          struct xen_sysctl_lockprof_op       lockprof_op;
>          struct xen_sysctl_cpupool_op        cpupool_op;
>          struct xen_sysctl_scheduler_op      scheduler_op;
> -        struct xen_sysctl_coverage_op       coverage_op;
> +        struct xen_sysctl_gcov_op           gcov_op;
>          struct xen_sysctl_psr_cmt_op        psr_cmt_op;
> +        struct xen_sysctl_psr_cat_op        psr_cat_op;
> +        struct xen_sysctl_tmem_op           tmem_op;
> +        struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps;
> +        struct xen_sysctl_cpu_featureset    cpu_featureset;
> +        struct xen_sysctl_livepatch_op      livepatch;
> +        struct xen_sysctl_set_parameter     set_parameter;
>          uint8_t                             pad[128];
>      } u;
>  };
> diff --git a/include/xen/tmem.h b/include/xen/tmem.h
> index 4fd2fc6..aa0aafa 100644
> --- a/include/xen/tmem.h
> +++ b/include/xen/tmem.h
> @@ -33,7 +33,11 @@
>  #define TMEM_SPEC_VERSION          1
>  
>  /* Commands to HYPERVISOR_tmem_op() */
> -#define TMEM_CONTROL               0
> +#ifdef __XEN__
> +#define TMEM_CONTROL               0 /* Now called XEN_SYSCTL_tmem_op */
> +#else
> +#undef TMEM_CONTROL
> +#endif
>  #define TMEM_NEW_POOL              1
>  #define TMEM_DESTROY_POOL          2
>  #define TMEM_PUT_PAGE              4
> @@ -47,35 +51,9 @@
>  #define TMEM_XCHG                 10
>  #endif
>  
> -/* Privileged commands to HYPERVISOR_tmem_op() */
> -#define TMEM_AUTH                 101 
> -#define TMEM_RESTORE_NEW          102
> -
> -/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */
> -#define TMEMC_THAW                   0
> -#define TMEMC_FREEZE                 1
> -#define TMEMC_FLUSH                  2
> -#define TMEMC_DESTROY                3
> -#define TMEMC_LIST                   4
> -#define TMEMC_SET_WEIGHT             5
> -#define TMEMC_SET_CAP                6
> -#define TMEMC_SET_COMPRESS           7
> -#define TMEMC_QUERY_FREEABLE_MB      8
> -#define TMEMC_SAVE_BEGIN             10
> -#define TMEMC_SAVE_GET_VERSION       11
> -#define TMEMC_SAVE_GET_MAXPOOLS      12
> -#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13
> -#define TMEMC_SAVE_GET_CLIENT_CAP    14
> -#define TMEMC_SAVE_GET_CLIENT_FLAGS  15
> -#define TMEMC_SAVE_GET_POOL_FLAGS    16
> -#define TMEMC_SAVE_GET_POOL_NPAGES   17
> -#define TMEMC_SAVE_GET_POOL_UUID     18
> -#define TMEMC_SAVE_GET_NEXT_PAGE     19
> -#define TMEMC_SAVE_GET_NEXT_INV      20
> -#define TMEMC_SAVE_END               21
> -#define TMEMC_RESTORE_BEGIN          30
> -#define TMEMC_RESTORE_PUT_PAGE       32
> -#define TMEMC_RESTORE_FLUSH_PAGE     33
> +/* Privileged commands now called via XEN_SYSCTL_tmem_op. */
> +#define TMEM_AUTH                 101 /* as XEN_SYSCTL_TMEM_OP_SET_AUTH. */
> +#define TMEM_RESTORE_NEW          102 /* as XEN_SYSCTL_TMEM_OP_SET_POOL. */
>  
>  /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
>  #define TMEM_POOL_PERSIST          1
> @@ -95,6 +73,11 @@
>  #define EFROZEN                 1000
>  #define EEMPTY                  1001
>  
> +struct xen_tmem_oid {
> +    uint64_t oid[3];
> +};
> +typedef struct xen_tmem_oid xen_tmem_oid_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_tmem_oid_t);
>  
>  #ifndef __ASSEMBLY__
>  #if __XEN_INTERFACE_VERSION__ < 0x00040400
> @@ -109,18 +92,13 @@ struct tmem_op {
>              uint64_t uuid[2];
>              uint32_t flags;
>              uint32_t arg1;
> -        } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */
> -        struct { 
> -            uint32_t subop;
> -            uint32_t cli_id;
> -            uint32_t arg1;
> -            uint32_t arg2;
> -            uint64_t oid[3];
> -            tmem_cli_va_t buf;
> -        } ctrl; /* for cmd == TMEM_CONTROL */
> +        } creat; /* for cmd == TMEM_NEW_POOL. */
>          struct {
> -            
> +#if __XEN_INTERFACE_VERSION__ < 0x00040600
>              uint64_t oid[3];
> +#else
> +            xen_tmem_oid_t oid;
> +#endif
>              uint32_t index;
>              uint32_t tmem_offset;
>              uint32_t pfn_offset;
> @@ -131,12 +109,6 @@ struct tmem_op {
>  };
>  typedef struct tmem_op tmem_op_t;
>  DEFINE_XEN_GUEST_HANDLE(tmem_op_t);
> -
> -struct tmem_handle {
> -    uint32_t pool_id;
> -    uint32_t index;
> -    uint64_t oid[3];
> -};
>  #endif
>  
>  #endif /* __XEN_PUBLIC_TMEM_H__ */
> diff --git a/include/xen/trace.h b/include/xen/trace.h
> index 5211ae7..3746bff 100644
> --- a/include/xen/trace.h
> +++ b/include/xen/trace.h
> @@ -75,9 +75,10 @@
>  /* Per-scheduler IDs, to identify scheduler specific events */
>  #define TRC_SCHED_CSCHED   0
>  #define TRC_SCHED_CSCHED2  1
> -#define TRC_SCHED_SEDF     2
> +/* #define XEN_SCHEDULER_SEDF 2 (Removed) */
>  #define TRC_SCHED_ARINC653 3
>  #define TRC_SCHED_RTDS     4
> +#define TRC_SCHED_SNULL    5
>  
>  /* Per-scheduler tracing */
>  #define TRC_SCHED_CLASS_EVT(_c, _e) \
> @@ -85,6 +86,9 @@
>        ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \
>      (_e & TRC_SCHED_EVT_MASK) )
>  
> +/* Trace classes for DOM0 operations */
> +#define TRC_DOM0_DOMOPS     0x00041000   /* Domains manipulations */
> +
>  /* Trace classes for Hardware */
>  #define TRC_HW_PM           0x00801000   /* Power management traces */
>  #define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling 
> of IRQs */
> @@ -112,6 +116,10 @@
>  #define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
>  #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
>  #define TRC_SCHED_SHUTDOWN_CODE  (TRC_SCHED_VERBOSE + 16)
> +#define TRC_SCHED_SWITCH_INFCONT (TRC_SCHED_VERBOSE + 17)
> +
> +#define TRC_DOM0_DOM_ADD         (TRC_DOM0_DOMOPS + 1)
> +#define TRC_DOM0_DOM_REM         (TRC_DOM0_DOMOPS + 2)
>  
>  #define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
>  #define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
> diff --git a/include/xen/vcpu.h b/include/xen/vcpu.h
> index 898b89f..8a9e30d 100644
> --- a/include/xen/vcpu.h
> +++ b/include/xen/vcpu.h
> @@ -41,8 +41,10 @@
>   * Initialise a VCPU. Each VCPU can be initialised only once. A 
>   * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
>   * 
> - * @extra_arg == pointer to vcpu_guest_context structure containing initial
> - *               state for the VCPU.
> + * @extra_arg == For PV or ARM guests this is a pointer to a 
> vcpu_guest_context
> + *               structure containing the initial state for the VCPU. For x86
> + *               HVM based guests this is a pointer to a vcpu_hvm_context
> + *               structure.
>   */
>  #define VCPUOP_initialise            0
>  
> @@ -82,6 +84,12 @@ struct vcpu_runstate_info {
>      /* When was current state entered (system time, ns)? */
>      uint64_t state_entry_time;
>      /*
> +     * Update indicator set in state_entry_time:
> +     * When activated via VMASST_TYPE_runstate_update_flag, set during
> +     * updates in guest memory mapped copy of vcpu_runstate_info.
> +     */
> +#define XEN_RUNSTATE_UPDATE          (xen_mk_ullong(1) << 63)
> +    /*
>       * Time spent in each RUNSTATE_* (ns). The sum of these times is
>       * guaranteed not to drift from system time.
>       */
> diff --git a/include/xen/version.h b/include/xen/version.h
> index 44f26b0..cb84565 100644
> --- a/include/xen/version.h
> +++ b/include/xen/version.h
> @@ -30,7 +30,8 @@
>  
>  #include "xen.h"
>  
> -/* NB. All ops return zero on success, except XENVER_{version,pagesize} */
> +/* NB. All ops return zero on success, except XENVER_{version,pagesize}
> + * XENVER_{version,pagesize,build_id} */
>  
>  /* arg == NULL; returns major:minor (16:16). */
>  #define XENVER_version      0
> @@ -77,12 +78,31 @@ typedef struct xen_feature_info xen_feature_info_t;
>  /* arg == NULL; returns host memory page size. */
>  #define XENVER_pagesize 7
>  
> -/* arg == xen_domain_handle_t. */
> +/* arg == xen_domain_handle_t.
> + *
> + * The toolstack fills it out for guest consumption. It is intended to hold
> + * the UUID of the guest.
> + */
>  #define XENVER_guest_handle 8
>  
>  #define XENVER_commandline 9
>  typedef char xen_commandline_t[1024];
>  
> +/*
> + * Return value is the number of bytes written, or XEN_Exx on error.
> + * Calling with empty parameter returns the size of build_id.
> + */
> +#define XENVER_build_id 10
> +struct xen_build_id {
> +        uint32_t        len; /* IN: size of buf[]. */
> +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
> +        unsigned char   buf[];
> +#elif defined(__GNUC__)
> +        unsigned char   buf[1]; /* OUT: Variable length buffer with 
> build_id. */
> +#endif
> +};
> +typedef struct xen_build_id xen_build_id_t;
> +
>  #endif /* __XEN_PUBLIC_VERSION_H__ */
>  
>  /*
> diff --git a/include/xen/vm_event.h b/include/xen/vm_event.h
> new file mode 100644
> index 0000000..b531f71
> --- /dev/null
> +++ b/include/xen/vm_event.h
> @@ -0,0 +1,378 @@
> +/******************************************************************************
> + * vm_event.h
> + *
> + * Memory event common structures.
> + *
> + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef _XEN_PUBLIC_VM_EVENT_H
> +#define _XEN_PUBLIC_VM_EVENT_H
> +
> +#include "xen.h"
> +
> +#define VM_EVENT_INTERFACE_VERSION 0x00000002
> +
> +#if defined(__XEN__) || defined(__XEN_TOOLS__)
> +
> +#include "io/ring.h"
> +
> +/*
> + * Memory event flags
> + */
> +
> +/*
> + * VCPU_PAUSED in a request signals that the vCPU triggering the event has 
> been
> + *  paused
> + * VCPU_PAUSED in a response signals to unpause the vCPU
> + */
> +#define VM_EVENT_FLAG_VCPU_PAUSED        (1 << 0)
> +/* Flags to aid debugging vm_event */
> +#define VM_EVENT_FLAG_FOREIGN            (1 << 1)
> +/*
> + * The following flags can be set in response to a mem_access event.
> + *
> + * Emulate the fault-causing instruction (if set in the event response 
> flags).
> + * This will allow the guest to continue execution without lifting the page
> + * access restrictions.
> + */
> +#define VM_EVENT_FLAG_EMULATE            (1 << 2)
> +/*
> + * Same as VM_EVENT_FLAG_EMULATE, but with write operations or operations
> + * potentially having side effects (like memory mapped or port I/O) disabled.
> + */
> +#define VM_EVENT_FLAG_EMULATE_NOWRITE    (1 << 3)
> +/*
> + * Toggle singlestepping on vm_event response.
> + * Requires the vCPU to be paused already (synchronous events only).
> + */
> +#define VM_EVENT_FLAG_TOGGLE_SINGLESTEP  (1 << 4)
> +/*
> + * Data is being sent back to the hypervisor in the event response, to be
> + * returned by the read function when emulating an instruction.
> + * This flag is only useful when combined with VM_EVENT_FLAG_EMULATE
> + * and takes precedence if combined with VM_EVENT_FLAG_EMULATE_NOWRITE
> + * (i.e. if both VM_EVENT_FLAG_EMULATE_NOWRITE and
> + * VM_EVENT_FLAG_SET_EMUL_READ_DATA are set, only the latter will be 
> honored).
> + */
> +#define VM_EVENT_FLAG_SET_EMUL_READ_DATA (1 << 5)
> +/*
> + * Deny completion of the operation that triggered the event.
> + * Currently only useful for MSR and control-register write events.
> + * Requires the vCPU to be paused already (synchronous events only).
> + */
> +#define VM_EVENT_FLAG_DENY               (1 << 6)
> +/*
> + * This flag can be set in a request or a response
> + *
> + * On a request, indicates that the event occurred in the alternate p2m
> + * specified by the altp2m_idx request field.
> + *
> + * On a response, indicates that the VCPU should resume in the alternate p2m
> + * specified by the altp2m_idx response field if possible.
> + */
> +#define VM_EVENT_FLAG_ALTERNATE_P2M      (1 << 7)
> +/*
> + * Set the vCPU registers to the values in the  vm_event response.
> + * At the moment x86-only, applies to EAX-EDX, ESP, EBP, ESI, EDI, R8-R15,
> + * EFLAGS, and EIP.
> + * Requires the vCPU to be paused already (synchronous events only).
> + */
> +#define VM_EVENT_FLAG_SET_REGISTERS      (1 << 8)
> +/*
> + * Instruction cache is being sent back to the hypervisor in the event 
> response
> + * to be used by the emulator. This flag is only useful when combined with
> + * VM_EVENT_FLAG_EMULATE and does not take presedence if combined with
> + * VM_EVENT_FLAG_EMULATE_NOWRITE or VM_EVENT_FLAG_SET_EMUL_READ_DATA, (i.e.
> + * if any of those flags are set, only those will be honored).
> + */
> +#define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
> +/*
> + * Have a one-shot VM_EVENT_REASON_INTERRUPT event sent for the first
> + * interrupt pending after resuming the VCPU.
> + */
> +#define VM_EVENT_FLAG_GET_NEXT_INTERRUPT (1 << 10)
> +
> +/*
> + * Reasons for the vm event request
> + */
> +
> +/* Default case */
> +#define VM_EVENT_REASON_UNKNOWN                 0
> +/* Memory access violation */
> +#define VM_EVENT_REASON_MEM_ACCESS              1
> +/* Memory sharing event */
> +#define VM_EVENT_REASON_MEM_SHARING             2
> +/* Memory paging event */
> +#define VM_EVENT_REASON_MEM_PAGING              3
> +/* A control register was updated */
> +#define VM_EVENT_REASON_WRITE_CTRLREG           4
> +/* An MSR was updated. */
> +#define VM_EVENT_REASON_MOV_TO_MSR              5
> +/* Debug operation executed (e.g. int3) */
> +#define VM_EVENT_REASON_SOFTWARE_BREAKPOINT     6
> +/* Single-step (e.g. MTF) */
> +#define VM_EVENT_REASON_SINGLESTEP              7
> +/* An event has been requested via HVMOP_guest_request_vm_event. */
> +#define VM_EVENT_REASON_GUEST_REQUEST           8
> +/* A debug exception was caught */
> +#define VM_EVENT_REASON_DEBUG_EXCEPTION         9
> +/* CPUID executed */
> +#define VM_EVENT_REASON_CPUID                   10
> +/*
> + * Privileged call executed (e.g. SMC).
> + * Note: event may be generated even if SMC condition check fails on some 
> CPUs.
> + *       As this behavior is CPU-specific, users are advised to not rely on 
> it.
> + *       These kinds of events will be filtered out in future versions.
> + */
> +#define VM_EVENT_REASON_PRIVILEGED_CALL         11
> +/* An interrupt has been delivered. */
> +#define VM_EVENT_REASON_INTERRUPT               12
> +/* A descriptor table register was accessed. */
> +#define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
> +/* Current instruction is not implemented by the emulator */
> +#define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
> +
> +/* Supported values for the vm_event_write_ctrlreg index. */
> +#define VM_EVENT_X86_CR0    0
> +#define VM_EVENT_X86_CR3    1
> +#define VM_EVENT_X86_CR4    2
> +#define VM_EVENT_X86_XCR0   3
> +
> +/*
> + * Using custom vCPU structs (i.e. not hvm_hw_cpu) for both x86 and ARM
> + * so as to not fill the vm_event ring buffer too quickly.
> + */
> +struct vm_event_regs_x86 {
> +    uint64_t rax;
> +    uint64_t rcx;
> +    uint64_t rdx;
> +    uint64_t rbx;
> +    uint64_t rsp;
> +    uint64_t rbp;
> +    uint64_t rsi;
> +    uint64_t rdi;
> +    uint64_t r8;
> +    uint64_t r9;
> +    uint64_t r10;
> +    uint64_t r11;
> +    uint64_t r12;
> +    uint64_t r13;
> +    uint64_t r14;
> +    uint64_t r15;
> +    uint64_t rflags;
> +    uint64_t dr7;
> +    uint64_t rip;
> +    uint64_t cr0;
> +    uint64_t cr2;
> +    uint64_t cr3;
> +    uint64_t cr4;
> +    uint64_t sysenter_cs;
> +    uint64_t sysenter_esp;
> +    uint64_t sysenter_eip;
> +    uint64_t msr_efer;
> +    uint64_t msr_star;
> +    uint64_t msr_lstar;
> +    uint64_t fs_base;
> +    uint64_t gs_base;
> +    uint32_t cs_arbytes;
> +    uint32_t _pad;
> +};
> +
> +/*
> + * Only the register 'pc' can be set on a vm_event response using the
> + * VM_EVENT_FLAG_SET_REGISTERS flag.
> + */
> +struct vm_event_regs_arm {
> +    uint64_t ttbr0;
> +    uint64_t ttbr1;
> +    uint64_t ttbcr;
> +    uint64_t pc;
> +    uint32_t cpsr;
> +    uint32_t _pad;
> +};
> +
> +/*
> + * mem_access flag definitions
> + *
> + * These flags are set only as part of a mem_event request.
> + *
> + * R/W/X: Defines the type of violation that has triggered the event
> + *        Multiple types can be set in a single violation!
> + * GLA_VALID: If the gla field holds a guest VA associated with the event
> + * FAULT_WITH_GLA: If the violation was triggered by accessing gla
> + * FAULT_IN_GPT: If the violation was triggered during translating gla
> + */
> +#define MEM_ACCESS_R                (1 << 0)
> +#define MEM_ACCESS_W                (1 << 1)
> +#define MEM_ACCESS_X                (1 << 2)
> +#define MEM_ACCESS_RWX              (MEM_ACCESS_R | MEM_ACCESS_W | 
> MEM_ACCESS_X)
> +#define MEM_ACCESS_RW               (MEM_ACCESS_R | MEM_ACCESS_W)
> +#define MEM_ACCESS_RX               (MEM_ACCESS_R | MEM_ACCESS_X)
> +#define MEM_ACCESS_WX               (MEM_ACCESS_W | MEM_ACCESS_X)
> +#define MEM_ACCESS_GLA_VALID        (1 << 3)
> +#define MEM_ACCESS_FAULT_WITH_GLA   (1 << 4)
> +#define MEM_ACCESS_FAULT_IN_GPT     (1 << 5)
> +
> +struct vm_event_mem_access {
> +    uint64_t gfn;
> +    uint64_t offset;
> +    uint64_t gla;   /* if flags has MEM_ACCESS_GLA_VALID set */
> +    uint32_t flags; /* MEM_ACCESS_* */
> +    uint32_t _pad;
> +};
> +
> +struct vm_event_write_ctrlreg {
> +    uint32_t index;
> +    uint32_t _pad;
> +    uint64_t new_value;
> +    uint64_t old_value;
> +};
> +
> +struct vm_event_singlestep {
> +    uint64_t gfn;
> +};
> +
> +struct vm_event_debug {
> +    uint64_t gfn;
> +    uint32_t insn_length;
> +    uint8_t type;        /* HVMOP_TRAP_* */
> +    uint8_t _pad[3];
> +};
> +
> +struct vm_event_mov_to_msr {
> +    uint64_t msr;
> +    uint64_t value;
> +};
> +
> +#define VM_EVENT_DESC_IDTR           1
> +#define VM_EVENT_DESC_GDTR           2
> +#define VM_EVENT_DESC_LDTR           3
> +#define VM_EVENT_DESC_TR             4
> +
> +struct vm_event_desc_access {
> +    union {
> +        struct {
> +            uint32_t instr_info;         /* VMX: VMCS 
> Instruction-Information */
> +            uint32_t _pad1;
> +            uint64_t exit_qualification; /* VMX: VMCS Exit Qualification */
> +        } vmx;
> +        struct {
> +            uint64_t exitinfo;           /* SVM: VMCB EXITINFO */
> +            uint64_t _pad2;
> +        } svm;
> +    } arch;
> +    uint8_t descriptor;                  /* VM_EVENT_DESC_* */
> +    uint8_t is_write;
> +    uint8_t _pad[6];
> +};
> +
> +struct vm_event_cpuid {
> +    uint32_t insn_length;
> +    uint32_t leaf;
> +    uint32_t subleaf;
> +    uint32_t _pad;
> +};
> +
> +struct vm_event_interrupt_x86 {
> +    uint32_t vector;
> +    uint32_t type;
> +    uint32_t error_code;
> +    uint32_t _pad;
> +    uint64_t cr2;
> +};
> +
> +#define MEM_PAGING_DROP_PAGE       (1 << 0)
> +#define MEM_PAGING_EVICT_FAIL      (1 << 1)
> +
> +struct vm_event_paging {
> +    uint64_t gfn;
> +    uint32_t p2mt;
> +    uint32_t flags;
> +};
> +
> +struct vm_event_sharing {
> +    uint64_t gfn;
> +    uint32_t p2mt;
> +    uint32_t _pad;
> +};
> +
> +struct vm_event_emul_read_data {
> +    uint32_t size;
> +    /* The struct is used in a union with vm_event_regs_x86. */
> +    uint8_t  data[sizeof(struct vm_event_regs_x86) - sizeof(uint32_t)];
> +};
> +
> +struct vm_event_emul_insn_data {
> +    uint8_t data[16]; /* Has to be completely filled */
> +};
> +
> +typedef struct vm_event_st {
> +    uint32_t version;   /* VM_EVENT_INTERFACE_VERSION */
> +    uint32_t flags;     /* VM_EVENT_FLAG_* */
> +    uint32_t reason;    /* VM_EVENT_REASON_* */
> +    uint32_t vcpu_id;
> +    uint16_t altp2m_idx; /* may be used during request and response */
> +    uint16_t _pad[3];
> +
> +    union {
> +        struct vm_event_paging                mem_paging;
> +        struct vm_event_sharing               mem_sharing;
> +        struct vm_event_mem_access            mem_access;
> +        struct vm_event_write_ctrlreg         write_ctrlreg;
> +        struct vm_event_mov_to_msr            mov_to_msr;
> +        struct vm_event_desc_access           desc_access;
> +        struct vm_event_singlestep            singlestep;
> +        struct vm_event_debug                 software_breakpoint;
> +        struct vm_event_debug                 debug_exception;
> +        struct vm_event_cpuid                 cpuid;
> +        union {
> +            struct vm_event_interrupt_x86     x86;
> +        } interrupt;
> +    } u;
> +
> +    union {
> +        union {
> +            struct vm_event_regs_x86 x86;
> +            struct vm_event_regs_arm arm;
> +        } regs;
> +
> +        union {
> +            struct vm_event_emul_read_data read;
> +            struct vm_event_emul_insn_data insn;
> +        } emul;
> +    } data;
> +} vm_event_request_t, vm_event_response_t;
> +
> +DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
> +
> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> +#endif /* _XEN_PUBLIC_VM_EVENT_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/xen-compat.h b/include/xen/xen-compat.h
> index 1e62dc1..b673653 100644
> --- a/include/xen/xen-compat.h
> +++ b/include/xen/xen-compat.h
> @@ -27,7 +27,7 @@
>  #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
>  #define __XEN_PUBLIC_XEN_COMPAT_H__
>  
> -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600
> +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040900
>  
>  #if defined(__XEN__) || defined(__XEN_TOOLS__)
>  /* Xen is built with matching headers and implements the latest interface. */
> diff --git a/include/xen/xen.h b/include/xen/xen.h
> index a6a2092..2ac6b1e 100644
> --- a/include/xen/xen.h
> +++ b/include/xen/xen.h
> @@ -52,6 +52,24 @@ DEFINE_XEN_GUEST_HANDLE(void);
>  DEFINE_XEN_GUEST_HANDLE(uint64_t);
>  DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
>  DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
> +
> +/* Turn a plain number into a C unsigned (long (long)) constant. */
> +#define __xen_mk_uint(x)  x ## U
> +#define __xen_mk_ulong(x) x ## UL
> +#ifndef __xen_mk_ullong
> +# define __xen_mk_ullong(x) x ## ULL
> +#endif
> +#define xen_mk_uint(x)    __xen_mk_uint(x)
> +#define xen_mk_ulong(x)   __xen_mk_ulong(x)
> +#define xen_mk_ullong(x)  __xen_mk_ullong(x)
> +
> +#else
> +
> +/* In assembly code we cannot use C numeric constant suffixes. */
> +#define xen_mk_uint(x)   x
> +#define xen_mk_ulong(x)  x
> +#define xen_mk_ullong(x) x
> +
>  #endif
>  
>  /*
> @@ -101,6 +119,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define __HYPERVISOR_kexec_op             37
>  #define __HYPERVISOR_tmem_op              38
>  #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
> +#define __HYPERVISOR_xenpmu_op            40
> +#define __HYPERVISOR_dm_op                41
>  
>  /* Architecture-specific hypercall definitions. */
>  #define __HYPERVISOR_arch_0               48
> @@ -160,6 +180,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           
> */
>  #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     
> */
>  #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
> +#define VIRQ_XENPMU     13 /* V.  PMC interrupt                              
> */
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16
> @@ -449,13 +470,13 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
>  /* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
>  /* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
>  /* ` enum uvm_flags { */
> -#define UVMF_NONE               (0UL<<0) /* No flushing at all.   */
> -#define UVMF_TLB_FLUSH          (1UL<<0) /* Flush entire TLB(s).  */
> -#define UVMF_INVLPG             (2UL<<0) /* Flush only one entry. */
> -#define UVMF_FLUSHTYPE_MASK     (3UL<<0)
> -#define UVMF_MULTI              (0UL<<2) /* Flush subset of TLBs. */
> -#define UVMF_LOCAL              (0UL<<2) /* Flush local TLB.      */
> -#define UVMF_ALL                (1UL<<2) /* Flush all TLBs.       */
> +#define UVMF_NONE           (xen_mk_ulong(0)<<0) /* No flushing at all.   */
> +#define UVMF_TLB_FLUSH      (xen_mk_ulong(1)<<0) /* Flush entire TLB(s).  */
> +#define UVMF_INVLPG         (xen_mk_ulong(2)<<0) /* Flush only one entry. */
> +#define UVMF_FLUSHTYPE_MASK (xen_mk_ulong(3)<<0)
> +#define UVMF_MULTI          (xen_mk_ulong(0)<<2) /* Flush subset of TLBs. */
> +#define UVMF_LOCAL          (xen_mk_ulong(0)<<2) /* Flush local TLB.      */
> +#define UVMF_ALL            (xen_mk_ulong(1)<<2) /* Flush all TLBs.       */
>  /* ` } */
>  
>  /*
> @@ -486,17 +507,42 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
>  /* x86/PAE guests: support PDPTs above 4GB. */
>  #define VMASST_TYPE_pae_extended_cr3     3
>  
> -#define MAX_VMASST_TYPE                  3
> +/*
> + * x86 guests: Sane behaviour for virtual iopl
> + *  - virtual iopl updated from do_iret() hypercalls.
> + *  - virtual iopl reported in bounce frames.
> + *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
> + */
> +#define VMASST_TYPE_architectural_iopl   4
>  
> -#ifndef __ASSEMBLY__
> +/*
> + * All guests: activate update indicator in vcpu_runstate_info
> + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
> + * vcpu_runstate_info during updates of the runstate information.
> + */
> +#define VMASST_TYPE_runstate_update_flag 5
>  
> -typedef uint16_t domid_t;
> +/*
> + * x86/64 guests: strictly hide M2P from user mode.
> + * This allows the guest to control respective hypervisor behavior:
> + * - when not set, L4 tables get created with the respective slot blank,
> + *   and whenever the L4 table gets used as a kernel one the missing
> + *   mapping gets inserted,
> + * - when set, L4 tables get created with the respective slot initialized
> + *   as before, and whenever the L4 table gets used as a user one the
> + *   mapping gets zapped.
> + */
> +#define VMASST_TYPE_m2p_strict           32
> +
> +#if __XEN_INTERFACE_VERSION__ < 0x00040600
> +#define MAX_VMASST_TYPE                  3
> +#endif
>  
>  /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
> -#define DOMID_FIRST_RESERVED (0x7FF0U)
> +#define DOMID_FIRST_RESERVED xen_mk_uint(0x7FF0)
>  
>  /* DOMID_SELF is used in certain contexts to refer to oneself. */
> -#define DOMID_SELF (0x7FF0U)
> +#define DOMID_SELF           xen_mk_uint(0x7FF0)
>  
>  /*
>   * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
> @@ -504,28 +550,37 @@ typedef uint16_t domid_t;
>   * is useful to ensure that no mappings to the OS's own heap are accidentally
>   * installed. (e.g., in Linux this could cause havoc as reference counts
>   * aren't adjusted on the I/O-mapping code path).
> - * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
> - * be specified by any calling domain.
> + * This only makes sense as HYPERVISOR_mmu_update()'s and
> + * HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument. For
> + * HYPERVISOR_mmu_update() context it can be specified by any calling domain,
> + * otherwise it's only permitted if the caller is privileged.
>   */
> -#define DOMID_IO   (0x7FF1U)
> +#define DOMID_IO             xen_mk_uint(0x7FF1)
>  
>  /*
>   * DOMID_XEN is used to allow privileged domains to map restricted parts of
>   * Xen's heap space (e.g., the machine_to_phys table).
> - * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
> - * the caller is privileged.
> + * This only makes sense as
> + * - HYPERVISOR_mmu_update()'s, HYPERVISOR_mmuext_op()'s, or
> + *   HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument,
> + * - with XENMAPSPACE_gmfn_foreign,
> + * and is only permitted if the caller is privileged.
>   */
> -#define DOMID_XEN  (0x7FF2U)
> +#define DOMID_XEN            xen_mk_uint(0x7FF2)
>  
>  /*
>   * DOMID_COW is used as the owner of sharable pages */
> -#define DOMID_COW  (0x7FF3U)
> +#define DOMID_COW            xen_mk_uint(0x7FF3)
>  
>  /* DOMID_INVALID is used to identify pages with unknown owner. */
> -#define DOMID_INVALID (0x7FF4U)
> +#define DOMID_INVALID        xen_mk_uint(0x7FF4)
>  
>  /* Idle domain. */
> -#define DOMID_IDLE (0x7FFFU)
> +#define DOMID_IDLE           xen_mk_uint(0x7FFF)
> +
> +#ifndef __ASSEMBLY__
> +
> +typedef uint16_t domid_t;
>  
>  /*
>   * Send an array of these to HYPERVISOR_mmu_update().
> @@ -585,10 +640,18 @@ struct vcpu_time_info {
>       */
>      uint32_t tsc_to_system_mul;
>      int8_t   tsc_shift;
> +#if __XEN_INTERFACE_VERSION__ > 0x040600
> +    uint8_t  flags;
> +    uint8_t  pad1[2];
> +#else
>      int8_t   pad1[3];
> +#endif
>  }; /* 32 bytes */
>  typedef struct vcpu_time_info vcpu_time_info_t;
>  
> +#define XEN_PVCLOCK_TSC_STABLE_BIT     (1 << 0)
> +#define XEN_PVCLOCK_GUEST_STOPPED      (1 << 1)
> +
>  struct vcpu_info {
>      /*
>       * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
> @@ -682,6 +745,12 @@ struct shared_info {
>      uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
>      uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
>      uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
> +#if !defined(__i386__)
> +    uint32_t wc_sec_hi;
> +# define xen_wc_sec_hi wc_sec_hi
> +#elif !defined(__XEN__) && !defined(__XEN_TOOLS__)
> +# define xen_wc_sec_hi arch.wc_sec_hi
> +#endif
>  
>      struct arch_shared_info arch;
>  
> @@ -698,24 +767,27 @@ typedef struct shared_info shared_info_t;
>   *  3. This the order of bootstrap elements in the initial virtual region:
>   *      a. relocated kernel image
>   *      b. initial ram disk              [mod_start, mod_len]
> + *         (may be omitted)
>   *      c. list of allocated page frames [mfn_list, nr_pages]
>   *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)
> - *      d. start_info_t structure        [register ESI (x86)]
> - *      e. bootstrap page tables         [pt_base and CR3 (x86)]
> - *      f. bootstrap stack               [register ESP (x86)]
> + *      d. start_info_t structure        [register rSI (x86)]
> + *         in case of dom0 this page contains the console info, too
> + *      e. unless dom0: xenstore ring page
> + *      f. unless dom0: console ring page
> + *      g. bootstrap page tables         [pt_base and CR3 (x86)]
> + *      h. bootstrap stack               [register ESP (x86)]
>   *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
> - *  5. The initial ram disk may be omitted.
> - *  6. The list of page frames forms a contiguous 'pseudo-physical' memory
> + *  5. The list of page frames forms a contiguous 'pseudo-physical' memory
>   *     layout for the domain. In particular, the bootstrap virtual-memory
>   *     region is a 1:1 mapping to the first section of the pseudo-physical 
> map.
> - *  7. All bootstrap elements are mapped read-writable for the guest OS. The
> + *  6. All bootstrap elements are mapped read-writable for the guest OS. The
>   *     only exception is the bootstrap page table, which is mapped read-only.
> - *  8. There is guaranteed to be at least 512kB padding after the final
> + *  7. There is guaranteed to be at least 512kB padding after the final
>   *     bootstrap element. If necessary, the bootstrap virtual region is
>   *     extended by an extra 4MB to ensure this.
>   *
>   * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page
> - * table layout") a bug caused the pt_base (3.e above) and cr3 to not point
> + * table layout") a bug caused the pt_base (3.g above) and cr3 to not point
>   * to the start of the guest page tables (it was offset by two pages).
>   * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU
>   * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got
> @@ -771,6 +843,8 @@ typedef struct start_info start_info_t;
>  #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
>  #define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
>  #define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */
> +#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped 
> */
> +                                   /* P->M making the 3 level tree obsolete? 
> */
>  #define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
>  
>  /*
> @@ -851,25 +925,19 @@ typedef struct dom0_vga_console_info {
>  
>  typedef uint8_t xen_domain_handle_t[16];
>  
> -/* Turn a plain number into a C unsigned long constant. */
> -#define __mk_unsigned_long(x) x ## UL
> -#define mk_unsigned_long(x) __mk_unsigned_long(x)
> -
>  __DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);
>  __DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
>  __DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
>  __DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
>  
> -#else /* __ASSEMBLY__ */
> -
> -/* In assembly code we cannot use C numeric constant suffixes. */
> -#define mk_unsigned_long(x) x
> -
>  #endif /* !__ASSEMBLY__ */
>  
>  /* Default definitions for macros used by domctl/sysctl. */
>  #if defined(__XEN__) || defined(__XEN_TOOLS__)
>  
> +#ifndef int64_aligned_t
> +#define int64_aligned_t int64_t
> +#endif
>  #ifndef uint64_aligned_t
>  #define uint64_aligned_t uint64_t
>  #endif
> diff --git a/include/xen/xenoprof.h b/include/xen/xenoprof.h
> index 1c305c4..1955db1 100644
> --- a/include/xen/xenoprof.h
> +++ b/include/xen/xenoprof.h
> @@ -68,7 +68,7 @@ struct event_log {
>  };
>  
>  /* PC value that indicates a special code */
> -#define XENOPROF_ESCAPE_CODE (~0ULL)
> +#define XENOPROF_ESCAPE_CODE (~xen_mk_ullong(0))
>  /* Transient events for the xenoprof->oprofile cpu buf */
>  #define XENOPROF_TRACE_BEGIN 1
>  
> diff --git a/include/xen/xsm/flask_op.h b/include/xen/xsm/flask_op.h
> index 233de81..970ec07 100644
> --- a/include/xen/xsm/flask_op.h
> +++ b/include/xen/xsm/flask_op.h
> @@ -25,6 +25,8 @@
>  #ifndef __FLASK_OP_H__
>  #define __FLASK_OP_H__
>  
> +#include "../event_channel.h"
> +
>  #define XEN_FLASK_INTERFACE_VERSION 1
>  
>  struct xen_flask_load {
> @@ -68,6 +70,7 @@ struct xen_flask_transition {
>      uint32_t newsid;
>  };
>  
> +#if __XEN_INTERFACE_VERSION__ < 0x00040800
>  struct xen_flask_userlist {
>      /* IN: starting SID for list */
>      uint32_t start_sid;
> @@ -81,6 +84,7 @@ struct xen_flask_userlist {
>          XEN_GUEST_HANDLE(uint32) sids;
>      } u;
>  };
> +#endif
>  
>  struct xen_flask_boolean {
>      /* IN/OUT: numeric identifier for boolean [GET/SET]
> @@ -148,6 +152,13 @@ struct xen_flask_relabel {
>      uint32_t sid;
>  };
>  
> +struct xen_flask_devicetree_label {
> +    /* IN */
> +    uint32_t sid;
> +    uint32_t length;
> +    XEN_GUEST_HANDLE(char) path;
> +};
> +
>  struct xen_flask_op {
>      uint32_t cmd;
>  #define FLASK_LOAD              1
> @@ -158,7 +169,7 @@ struct xen_flask_op {
>  #define FLASK_ACCESS            6
>  #define FLASK_CREATE            7
>  #define FLASK_RELABEL           8
> -#define FLASK_USER              9
> +#define FLASK_USER              9  /* No longer implemented */
>  #define FLASK_POLICYVERS        10
>  #define FLASK_GETBOOL           11
>  #define FLASK_SETBOOL           12
> @@ -174,6 +185,7 @@ struct xen_flask_op {
>  #define FLASK_DEL_OCONTEXT      22
>  #define FLASK_GET_PEER_SID      23
>  #define FLASK_RELABEL_DOMAIN    24
> +#define FLASK_DEVICETREE_LABEL  25
>      uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */
>      union {
>          struct xen_flask_load load;
> @@ -183,7 +195,9 @@ struct xen_flask_op {
>          struct xen_flask_access access;
>          /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */
>          struct xen_flask_transition transition;
> +#if __XEN_INTERFACE_VERSION__ < 0x00040800
>          struct xen_flask_userlist userlist;
> +#endif
>          /* FLASK_GETBOOL, FLASK_SETBOOL */
>          struct xen_flask_boolean boolean;
>          struct xen_flask_setavc_threshold setavc_threshold;
> @@ -193,6 +207,7 @@ struct xen_flask_op {
>          struct xen_flask_ocontext ocontext;
>          struct xen_flask_peersid peersid;
>          struct xen_flask_relabel relabel;
> +        struct xen_flask_devicetree_label devicetree_label;
>      } u;
>  };
>  typedef struct xen_flask_op xen_flask_op_t;
> -- 
> 2.11.0
> 

-- 
Samuel
Now I know someone out there is going to claim, "Well then, UNIX is intuitive,
because you only need to learn 5000 commands, and then everything else follows
from that! Har har har!"
(Andy Bates in comp.os.linux.misc, on "intuitive interfaces", slightly
defending Macs.)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.