x86: PV SMAP for 64-bit guests TODO? Apart from TOOGLE_MODE(), should we enforce SMAP mode for other implied-supervisor guest memory accesses? TODO? MMUEXT_SET_SMAP_MODE may better be replaced by a standalone hypercall (with just a single parameter); maybe by extending fpu_taskswitch. Note that the new state isn't being saved/restored. That's mainly because a capable kernel, when migrated from an incapable hypervisor to a capable one, would likely want to take advantage of the capability, and hence would need to set up all state anyway. This also implies that a capable kernel ought to be prepared to get migrated to an incapable hypervisor (as the loss of functionality isn't essential, it just results in security getting weakened). --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1149,6 +1149,7 @@ static void load_segments(struct vcpu *n (unsigned long *)regs->rsp : (unsigned long *)pv->kernel_sp; unsigned long cs_and_mask, rflags; + int smap_mode = -1; if ( is_pv_32on64_domain(n->domain) ) { @@ -1199,9 +1200,17 @@ static void load_segments(struct vcpu *n } if ( !(n->arch.flags & TF_kernel_mode) ) + { + n->arch.flags |= TF_smap_mode; toggle_guest_mode(n); + } else + { regs->cs &= ~3; + smap_mode = guest_smap_mode(n); + if ( !set_smap_mode(n, 1) ) + smap_mode = -1; + } /* CS longword also contains full evtchn_upcall_mask. */ cs_and_mask = (unsigned long)regs->cs | @@ -1210,6 +1219,11 @@ static void load_segments(struct vcpu *n /* Fold upcall mask into RFLAGS.IF. */ rflags = regs->rflags & ~X86_EFLAGS_IF; rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; + if ( smap_mode >= 0 ) + { + rflags &= ~X86_EFLAGS_AC; + rflags |= !smap_mode << 18; + } if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -827,7 +827,7 @@ long arch_do_domctl( evc->sysenter_callback_eip = v->arch.pv_vcpu.sysenter_callback_eip; evc->sysenter_disables_events = - v->arch.pv_vcpu.sysenter_disables_events; + !!(v->arch.pv_vcpu.sysenter_tbf & TBF_INTERRUPT); evc->syscall32_callback_cs = v->arch.pv_vcpu.syscall32_callback_cs; evc->syscall32_callback_eip = @@ -863,8 +863,9 @@ long arch_do_domctl( evc->sysenter_callback_cs; v->arch.pv_vcpu.sysenter_callback_eip = evc->sysenter_callback_eip; - v->arch.pv_vcpu.sysenter_disables_events = - evc->sysenter_disables_events; + v->arch.pv_vcpu.sysenter_tbf = 0; + if ( evc->sysenter_disables_events ) + v->arch.pv_vcpu.sysenter_tbf |= TBF_INTERRUPT; fixup_guest_code_selector(d, evc->syscall32_callback_cs); v->arch.pv_vcpu.syscall32_callback_cs = evc->syscall32_callback_cs; --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -488,7 +488,7 @@ void write_ptbase(struct vcpu *v) /* * Should be called after CR3 is updated. * - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and + * Uses values found in vcpu->arch.guest_table{,_user,_kernel}, and * for HVM guests, arch.monitor_table and hvm's guest CR3. * * Update ref counts to shadow tables appropriately. @@ -505,8 +505,10 @@ void update_cr3(struct vcpu *v) if ( !(v->arch.flags & TF_kernel_mode) ) cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); - else + else if ( !guest_smap_mode(v) ) cr3_mfn = pagetable_get_pfn(v->arch.guest_table); + else + cr3_mfn = pagetable_get_pfn(v->arch.pv_vcpu.guest_table_smap); make_cr3(v, cr3_mfn); } @@ -2687,7 +2689,22 @@ int vcpu_destroy_pagetables(struct vcpu rc = put_page_and_type_preemptible(page); } if ( !rc ) + { v->arch.guest_table_user = pagetable_null(); + + /* Drop ref to guest_table_smap (from MMUEXT_NEW_SMAP_BASEPTR). */ + mfn = pagetable_get_pfn(v->arch.pv_vcpu.guest_table_smap); + if ( mfn ) + { + page = mfn_to_page(mfn); + if ( paging_mode_refcounts(v->domain) ) + put_page(page); + else + rc = put_page_and_type_preemptible(page); + } + } + if ( !rc ) + v->arch.pv_vcpu.guest_table_smap = pagetable_null(); } v->arch.cr3 = 0; @@ -3086,7 +3103,11 @@ long do_mmuext_op( } break; - case MMUEXT_NEW_USER_BASEPTR: { + case MMUEXT_NEW_USER_BASEPTR: + case MMUEXT_NEW_SMAP_BASEPTR: { + pagetable_t *ppt = op.cmd == MMUEXT_NEW_USER_BASEPTR + ? &curr->arch.guest_table_user + : &curr->arch.pv_vcpu.guest_table_smap; unsigned long old_mfn; if ( paging_mode_translate(current->domain) ) @@ -3095,7 +3116,7 @@ long do_mmuext_op( break; } - old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + old_mfn = pagetable_get_pfn(*ppt); /* * This is particularly important when getting restarted after the * previous attempt got preempted in the put-old-MFN phase. @@ -3124,7 +3145,7 @@ long do_mmuext_op( } } - curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn); + *ppt = pagetable_from_pfn(op.arg1.mfn); if ( old_mfn != 0 ) { @@ -3249,6 +3270,15 @@ long do_mmuext_op( break; } + case MMUEXT_SET_SMAP_MODE: + if ( unlikely(is_pv_32bit_domain(d)) ) + rc = -ENOSYS, okay = 0; + else if ( unlikely(op.arg1.val & ~1) ) + okay = 0; + else if ( unlikely(!set_smap_mode(curr, op.arg1.val)) ) + rc = -EOPNOTSUPP, okay = 0; + break; + case MMUEXT_CLEAR_PAGE: { struct page_info *page; --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -451,6 +451,8 @@ static void do_guest_trap( if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; + if ( !TI_GET_AC(ti) ) + tb->flags |= TBF_SMAP; if ( unlikely(null_trap_bounce(v, tb)) ) gdprintk(XENLOG_WARNING, "Unhandled %s fault/trap [#%d] " @@ -1089,6 +1091,8 @@ struct trap_bounce *propagate_page_fault tb->eip = ti->address; if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; + if ( !TI_GET_AC(ti) ) + tb->flags |= TBF_SMAP; return tb; } @@ -1109,6 +1113,8 @@ struct trap_bounce *propagate_page_fault tb->eip = ti->address; if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; + if ( !TI_GET_AC(ti) ) + tb->flags |= TBF_SMAP; if ( unlikely(null_trap_bounce(v, tb)) ) { printk("d%d:v%d: unhandled page fault (ec=%04X)\n", @@ -1598,23 +1604,21 @@ static int guest_io_okay( unsigned int port, unsigned int bytes, struct vcpu *v, struct cpu_user_regs *regs) { - /* If in user mode, switch to kernel mode just to read I/O bitmap. */ - int user_mode = !(v->arch.flags & TF_kernel_mode); -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) - if ( !vm86_mode(regs) && (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) return 1; if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) { + unsigned int mode; union { uint8_t bytes[2]; uint16_t mask; } x; /* * Grab permission bytes from guest space. Inaccessible bytes are * read as 0xff (no access allowed). + * If in user mode, switch to kernel mode just to read I/O bitmap. */ - TOGGLE_MODE(); + TOGGLE_MODE(v, mode, 1); switch ( __copy_from_guest_offset(x.bytes, v->arch.pv_vcpu.iobmp, port>>3, 2) ) { @@ -1622,7 +1626,7 @@ static int guest_io_okay( case 1: x.bytes[1] = ~0; case 0: break; } - TOGGLE_MODE(); + TOGGLE_MODE(v, mode, 0); if ( (x.mask & (((1<arch.pv_vcpu.sysenter_callback_cs = reg->address.cs; v->arch.pv_vcpu.sysenter_callback_eip = reg->address.eip; - v->arch.pv_vcpu.sysenter_disables_events = - (reg->flags & CALLBACKF_mask_events) != 0; + v->arch.pv_vcpu.sysenter_tbf = + (reg->flags & CALLBACKF_mask_events ? TBF_INTERRUPT : 0); break; case CALLBACKTYPE_nmi: --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -28,7 +28,12 @@ switch_to_kernel: /* TB_flags = VGCF_syscall_disables_events ? TBF_INTERRUPT : 0 */ btl $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx) setc %cl + /* TB_flags |= VGCF_syscall_clac ? TBF_SMAP : 0 */ + btl $_VGCF_syscall_clac,VCPU_guest_context_flags(%rbx) + setc %al leal (,%rcx,TBF_INTERRUPT),%ecx + leal (,%rax,TBF_SMAP),%eax + orl %eax,%ecx movb %cl,TRAPBOUNCE_flags(%rdx) call create_bounce_frame andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) @@ -87,7 +92,7 @@ failsafe_callback: leaq VCPU_trap_bounce(%rbx),%rdx movq VCPU_failsafe_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) - movb $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx) + movb $TBF_FAILSAFE|TBF_SMAP,TRAPBOUNCE_flags(%rdx) bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx) jnc 1f orb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) @@ -215,7 +220,7 @@ test_guest_events: leaq VCPU_trap_bounce(%rbx),%rdx movq VCPU_event_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) - movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) + movb $TBF_INTERRUPT|TBF_SMAP,TRAPBOUNCE_flags(%rdx) call create_bounce_frame jmp test_all_events @@ -278,9 +283,8 @@ GLOBAL(sysenter_eflags_saved) pushq $0 SAVE_VOLATILE TRAP_syscall GET_CURRENT(%rbx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) + movzbl VCPU_sysenter_tbf(%rbx),%ecx movq VCPU_sysenter_addr(%rbx),%rax - setne %cl testl $X86_EFLAGS_NT,UREGS_eflags(%rsp) leaq VCPU_trap_bounce(%rbx),%rdx UNLIKELY_START(nz, sysenter_nt_set) @@ -290,7 +294,6 @@ UNLIKELY_START(nz, sysenter_nt_set) xorl %eax,%eax UNLIKELY_END(sysenter_nt_set) testq %rax,%rax - leal (,%rcx,TBF_INTERRUPT),%ecx UNLIKELY_START(z, sysenter_gpf) movq VCPU_trap_ctxt(%rbx),%rsi SAVE_PRESERVED @@ -299,7 +302,11 @@ UNLIKELY_START(z, sysenter_gpf) movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax testb $4,TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_flags(%rsi) setnz %cl + testb $8,TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_flags(%rsi) + setnz %sil leal TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE(,%rcx,TBF_INTERRUPT),%ecx + leal (,%rsi,TBF_SMAP),%esi + orl %esi,%ecx UNLIKELY_END(sysenter_gpf) movq VCPU_domain(%rbx),%rdi movq %rax,TRAPBOUNCE_eip(%rdx) @@ -351,19 +358,38 @@ int80_slow_path: /* On return only %rbx and %rdx are guaranteed non-clobbered. */ create_bounce_frame: ASSERT_INTERRUPTS_ENABLED - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jnz 1f - /* Push new frame at registered guest-OS stack base. */ + xorl %esi,%esi + testb $TBF_SMAP,TRAPBOUNCE_flags(%rdx) + movl VCPU_thread_flags(%rbx),%eax + setnz %sil + testb $TF_kernel_mode,%al pushq %rdx movq %rbx,%rdi + jnz 1f + /* Push new frame at registered guest-OS stack base. */ + andl $~TF_smap_mode,VCPU_thread_flags(%rbx) + shll $_TF_smap_mode,%esi + orl %esi,VCPU_thread_flags(%rbx) call toggle_guest_mode - popq %rdx movq VCPU_kernel_sp(%rbx),%rsi + movl $~0,%edi jmp 2f 1: /* In kernel context already: push new frame at existing %rsp. */ - movq UREGS_rsp+8(%rsp),%rsi - andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest. + pushq %rax + call set_smap_mode + test %al,%al + movl $~0,%edi + popq %rax # old VCPU_thread_flags(%rbx) +UNLIKELY_START(nz, cbf_smap) + movl $~X86_EFLAGS_AC,%edi + testb $TF_smap_mode,%al + UNLIKELY_DONE(nz, cbf_smap) + btsq $18+32,%rdi # LOG2(X86_EFLAGS_AC)+32 +UNLIKELY_END(cbf_smap) + movq UREGS_rsp+2*8(%rsp),%rsi + andl $~3,UREGS_cs+2*8(%rsp) # Indicate kernel context to guest. 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. + popq %rdx movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi movq $HYPERVISOR_VIRT_END+60,%rax @@ -394,7 +420,10 @@ __UNLIKELY_END(create_bounce_frame_bad_s setz %ch # %ch == !saved_upcall_mask movl UREGS_eflags+8(%rsp),%eax andl $~X86_EFLAGS_IF,%eax + andl %edi,%eax # Clear EFLAGS.AC if needed + shrq $32,%rdi addb %ch,%ch # Bit 9 (EFLAGS.IF) + orl %edi,%eax # Set EFLAGS.AC if needed orb %ch,%ah # Fold EFLAGS.IF into %eax .Lft5: movq %rax,16(%rsi) # RFLAGS movq UREGS_rip+8(%rsp),%rax --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -153,9 +153,11 @@ void vcpu_show_registers(const struct vc crs[0] = v->arch.pv_vcpu.ctrlreg[0]; crs[2] = arch_get_cr2(v); - crs[3] = pagetable_get_paddr(guest_kernel_mode(v, regs) ? + crs[3] = pagetable_get_paddr(!guest_kernel_mode(v, regs) ? + v->arch.guest_table_user : + !guest_smap_enabled(v) || !guest_smap_mode(v) ? v->arch.guest_table : - v->arch.guest_table_user); + v->arch.pv_vcpu.guest_table_smap); crs[4] = v->arch.pv_vcpu.ctrlreg[4]; _show_registers(regs, crs, CTXT_pv_guest, v); @@ -258,14 +260,19 @@ void toggle_guest_mode(struct vcpu *v) if ( is_pv_32bit_vcpu(v) ) return; v->arch.flags ^= TF_kernel_mode; + if ( !guest_smap_enabled(v) ) + v->arch.flags &= ~TF_smap_mode; asm volatile ( "swapgs" ); update_cr3(v); #ifdef USER_MAPPINGS_ARE_GLOBAL - /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ - asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); -#else - write_ptbase(v); + if ( !(v->arch.flags & TF_kernel_mode) || !guest_smap_mode(v) ) + { + /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ + asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); + } + else #endif + write_ptbase(v); if ( !(v->arch.flags & TF_kernel_mode) ) return; @@ -280,6 +287,35 @@ void toggle_guest_mode(struct vcpu *v) v->arch.pv_vcpu.pending_system_time.version = 0; } +bool_t set_smap_mode(struct vcpu *v, bool_t on) +{ + ASSERT(!is_pv_32bit_vcpu(v)); + ASSERT(v->arch.flags & TF_kernel_mode); + + if ( !guest_smap_enabled(v) ) + return 0; + if ( !on == !guest_smap_mode(v) ) + return 1; + + if ( on ) + v->arch.flags |= TF_smap_mode; + else + v->arch.flags &= ~TF_smap_mode; + + update_cr3(v); +#ifdef USER_MAPPINGS_ARE_GLOBAL + if ( !guest_smap_mode(v) ) + { + /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ + asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); + } + else +#endif + write_ptbase(v); + + return 1; +} + unsigned long do_iret(void) { struct cpu_user_regs *regs = guest_cpu_user_regs(); @@ -305,6 +341,8 @@ unsigned long do_iret(void) } toggle_guest_mode(v); } + else if ( set_smap_mode(v, !(iret_saved.rflags & X86_EFLAGS_AC)) ) + iret_saved.rflags &= ~X86_EFLAGS_AC; regs->rip = iret_saved.rip; regs->cs = iret_saved.cs | 3; /* force guest privilege */ @@ -480,6 +518,10 @@ static long register_guest_callback(stru else clear_bit(_VGCF_syscall_disables_events, &v->arch.vgc_flags); + if ( reg->flags & CALLBACKF_clac ) + set_bit(_VGCF_syscall_clac, &v->arch.vgc_flags); + else + clear_bit(_VGCF_syscall_clac, &v->arch.vgc_flags); break; case CALLBACKTYPE_syscall32: @@ -490,8 +532,9 @@ static long register_guest_callback(stru case CALLBACKTYPE_sysenter: v->arch.pv_vcpu.sysenter_callback_eip = reg->address; - v->arch.pv_vcpu.sysenter_disables_events = - !!(reg->flags & CALLBACKF_mask_events); + v->arch.pv_vcpu.sysenter_tbf = + (reg->flags & CALLBACKF_mask_events ? TBF_INTERRUPT : 0) | + (reg->flags & CALLBACKF_clac ? TBF_SMAP : 0); break; case CALLBACKTYPE_nmi: --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -75,6 +75,8 @@ void mapcache_override_current(struct vc /* x86/64: toggle guest between kernel and user modes. */ void toggle_guest_mode(struct vcpu *); +/* x86/64: switch guest between SMAP and "normal" modes. */ +bool_t set_smap_mode(struct vcpu *, bool_t); /* * Initialise a hypercall-transfer page. The given pointer must be mapped @@ -354,13 +356,16 @@ struct pv_vcpu unsigned short syscall32_callback_cs; unsigned short sysenter_callback_cs; bool_t syscall32_disables_events; - bool_t sysenter_disables_events; + u8 sysenter_tbf; /* Segment base addresses. */ unsigned long fs_base; unsigned long gs_base_kernel; unsigned long gs_base_user; + /* x86/64 kernel-only (SMAP) pagetable */ + pagetable_t guest_table_smap; + /* Bounce information for propagating an exception to guest OS. */ struct trap_bounce trap_bounce; struct trap_bounce int80_bounce; @@ -471,6 +476,10 @@ unsigned long pv_guest_cr4_fixup(const s ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE)) +#define guest_smap_enabled(v) \ + (!pagetable_is_null((v)->arch.pv_vcpu.guest_table_smap)) +#define guest_smap_mode(v) ((v)->arch.flags & TF_smap_mode) + void domain_cpuid(struct domain *d, unsigned int input, unsigned int sub_input, --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -405,17 +405,34 @@ guest_get_eff_l1e(struct vcpu *v, unsign paging_get_hostmode(v)->guest_get_eff_l1e(v, addr, eff_l1e); } +#define TOGGLE_MODE(v, m, in) do { \ + if ( in ) \ + (m) = (v)->arch.flags; \ + if ( (m) & TF_kernel_mode ) \ + { \ + set_smap_mode(v, (in) || ((m) & TF_smap_mode) ); \ + break; \ + } \ + if ( in ) \ + (v)->arch.flags |= TF_smap_mode; \ + else \ + { \ + (v)->arch.flags &= ~TF_smap_mode; \ + (v)->arch.flags |= (m) & TF_smap_mode; \ + } \ + toggle_guest_mode(v); \ +} while ( 0 ) + /* Read the guest's l1e that maps this address, from the kernel-mode * pagetables. */ static inline void guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { - int user_mode = !(v->arch.flags & TF_kernel_mode); -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) + unsigned int mode; - TOGGLE_MODE(); + TOGGLE_MODE(v, mode, 1); guest_get_eff_l1e(v, addr, eff_l1e); - TOGGLE_MODE(); + TOGGLE_MODE(v, mode, 0); } --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -125,12 +125,15 @@ /* 'trap_bounce' flags values */ #define TBF_EXCEPTION 1 #define TBF_EXCEPTION_ERRCODE 2 +#define TBF_SMAP 4 #define TBF_INTERRUPT 8 #define TBF_FAILSAFE 16 /* 'arch_vcpu' flags values */ #define _TF_kernel_mode 0 #define TF_kernel_mode (1<<_TF_kernel_mode) +#define _TF_smap_mode 1 +#define TF_smap_mode (1<<_TF_smap_mode) /* #PF error code values. */ #define PFEC_page_present (1U<<0) --- a/xen/include/public/arch-x86/xen.h +++ b/xen/include/public/arch-x86/xen.h @@ -138,6 +138,7 @@ typedef unsigned long xen_ulong_t; */ #define TI_GET_DPL(_ti) ((_ti)->flags & 3) #define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_GET_AC(_ti) ((_ti)->flags & 8) #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) struct trap_info { @@ -179,6 +180,8 @@ struct vcpu_guest_context { #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) #define _VGCF_online 5 #define VGCF_online (1<<_VGCF_online) +#define _VGCF_syscall_clac 6 +#define VGCF_syscall_clac (1<<_VGCF_syscall_clac) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ --- a/xen/include/public/callback.h +++ b/xen/include/public/callback.h @@ -76,6 +76,13 @@ */ #define _CALLBACKF_mask_events 0 #define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) +/* + * Effect CLAC upon callback entry? This flag is ignored for event, + * failsafe, and NMI callbacks: user space gets unconditionally hidden if + * respective functionality was enabled by the kernel. + */ +#define _CALLBACKF_clac 0 +#define CALLBACKF_clac (1U << _CALLBACKF_clac) /* * Register a callback. --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -341,6 +341,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * mfn: Machine frame number of new page-table base to install in MMU * when in user space. * + * cmd: MMUEXT_NEW_SMAP_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in kernel-only (SMAP) mode. + * * cmd: MMUEXT_TLB_FLUSH_LOCAL * No additional arguments. Flushes local TLB. * @@ -371,6 +375,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. * + * cmd: MMUEXT_SET_SMAP_MODE + * val: 0 - disable, 1 - enable (other values reserved) + * * cmd: MMUEXT_CLEAR_PAGE * mfn: Machine frame number to be cleared. * @@ -402,17 +409,21 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); #define MMUEXT_FLUSH_CACHE_GLOBAL 18 #define MMUEXT_MARK_SUPER 19 #define MMUEXT_UNMARK_SUPER 20 +#define MMUEXT_NEW_SMAP_BASEPTR 21 +#define MMUEXT_SET_SMAP_MODE 22 /* ` } */ #ifndef __ASSEMBLY__ struct mmuext_op { unsigned int cmd; /* => enum mmuext_cmd */ union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, NEW_SMAP_BASEPTR * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; + /* SET_SMAP_MODE */ + unsigned int val; } arg1; union { /* SET_LDT */