x86/EFI: meet further spec requirements for runtime calls So far we didn't guarantee 16-byte alignment of the stack: While (so far) we don't tell the compiler to use smaller alignment, we also don't guarantee 16-byte alignment when establishing stack pointers for new vCPU-s. Runtime service functions using SSE instructions may end with #GP(0) without that. Note that making use of -mpreferred-stack-boundary=3, as mentioned in the comment, wouldn't help to reduce the needed alignment: The compiler would then be free to align the stack of the function with the aligned object, but would be permitted to place an odd number of 8-byte objects there, resulting in the callee to still run on an unaligned stack. (The only working alternative to the approach chosen here would be to use -mincoming-stack-boundary=3, but that would affect all functions in runtime.c, not just the ones actually making runtime services calls. And it would still require the manual alignment logic here to be used with gcc 5.2 and earlier - not permitting that command line option -, just that then the alignment amount would become conditional.) Hence enforce the needed alignment by making efi_rs_enter() return a suitably aligned structure, which the caller then necessarily has to store in a suitably aligned local variable, the address of which then gets passed to efi_rs_leave(). Also (to limit exposure) move the function declarations to where they belong: They're local to runtime.c, and shared only with compat.c (by the latter including the former). Furthermore we should avoid #MF to be raised on the FLDCW we do. Signed-off-by: Jan Beulich --- v2: Almost complete re-work. --- a/xen/common/efi/runtime.c +++ b/xen/common/efi/runtime.c @@ -8,6 +8,25 @@ DEFINE_XEN_GUEST_HANDLE(CHAR16); +struct efi_rs_state { +#ifdef CONFIG_X86 + /* + * The way stacks get set up leads to them always being on an 8-byte + * boundary not evenly divisible by 16 (see asm-x86/current.h). The EFI ABI, + * just like the CPU one, however requires stacks to be 16-byte aligned + * before every function call. Since the compiler assumes this (unless + * passing it -mpreferred-stack-boundary=3), it wouldn't generate code to + * align the stack to 16 bytes even if putting a 16-byte aligned object + * there. Hence we need to force larger than 16-byte alignment, even if we + * don't strictly need that. + */ + unsigned long __aligned(32) cr3; +#endif +}; + +struct efi_rs_state efi_rs_enter(void); +void efi_rs_leave(struct efi_rs_state *); + #ifndef COMPAT /* @@ -54,17 +73,19 @@ struct efi __read_mostly efi = { const struct efi_pci_rom *__read_mostly efi_pci_roms; #ifndef CONFIG_ARM /* TODO - disabled until implemented on ARM */ -unsigned long efi_rs_enter(void) + +struct efi_rs_state efi_rs_enter(void) { static const u16 fcw = FCW_DEFAULT; static const u32 mxcsr = MXCSR_DEFAULT; - unsigned long cr3 = read_cr3(); + struct efi_rs_state state = { .cr3 = 0 }; if ( !efi_l4_pgtable ) - return 0; + return state; + state.cr3 = read_cr3(); save_fpu_enable(); - asm volatile ( "fldcw %0" :: "m" (fcw) ); + asm volatile ( "fnclex; fldcw %0" :: "m" (fcw) ); asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); spin_lock(&efi_rs_lock); @@ -87,14 +108,14 @@ unsigned long efi_rs_enter(void) write_cr3(virt_to_maddr(efi_l4_pgtable)); - return cr3; + return state; } -void efi_rs_leave(unsigned long cr3) +void efi_rs_leave(struct efi_rs_state *state) { - if ( !cr3 ) + if ( !state->cr3 ) return; - write_cr3(cr3); + write_cr3(state->cr3); if ( is_pv_vcpu(current) && !is_idle_vcpu(current) ) { struct desc_ptr gdt_desc = { @@ -121,14 +142,15 @@ unsigned long efi_get_time(void) { EFI_TIME time; EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(), flags; + struct efi_rs_state state = efi_rs_enter(); + unsigned long flags; - if ( !cr3 ) + if ( !state.cr3 ) return 0; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(&time, NULL); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( EFI_ERROR(status) ) return 0; @@ -140,12 +162,12 @@ unsigned long efi_get_time(void) void efi_halt_system(void) { EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return; status = efi_rs->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL); - efi_rs_leave(cr3); + efi_rs_leave(&state); printk(XENLOG_WARNING "EFI: could not halt system (%#lx)\n", status); } @@ -153,13 +175,13 @@ void efi_halt_system(void) void efi_reset_system(bool_t warm) { EFI_STATUS status; - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return; status = efi_rs->ResetSystem(warm ? EfiResetWarm : EfiResetCold, EFI_SUCCESS, 0, NULL); - efi_rs_leave(cr3); + efi_rs_leave(&state); printk(XENLOG_WARNING "EFI: could not reset system (%#lx)\n", status); } @@ -179,12 +201,12 @@ int efi_get_info(uint32_t idx, union xen break; case XEN_FW_EFI_RT_VERSION: { - unsigned long cr3 = efi_rs_enter(); + struct efi_rs_state state = efi_rs_enter(); - if ( !cr3 ) + if ( !state.cr3 ) return -EOPNOTSUPP; info->version = efi_rs->Hdr.Revision; - efi_rs_leave(cr3); + efi_rs_leave(&state); break; } case XEN_FW_EFI_CONFIG_TABLE: @@ -302,7 +324,8 @@ static inline EFI_GUID *cast_guid(struct int efi_runtime_call(struct xenpf_efi_runtime_call *op) { - unsigned long cr3, flags; + struct efi_rs_state state; + unsigned long flags; EFI_STATUS status = EFI_NOT_STARTED; int rc = 0; @@ -315,13 +338,13 @@ int efi_runtime_call(struct xenpf_efi_ru if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) ) { @@ -337,13 +360,13 @@ int efi_runtime_call(struct xenpf_efi_ru if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetTime(cast_time(&op->u.set_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_get_wakeup_time: @@ -353,14 +376,14 @@ int efi_runtime_call(struct xenpf_efi_ru if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetWakeupTime(&enabled, &pending, cast_time(&op->u.get_wakeup_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) ) { @@ -377,8 +400,8 @@ int efi_runtime_call(struct xenpf_efi_ru XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY) ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 ) + state = efi_rs_enter(); + if ( !state.cr3 ) return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetWakeupTime(!!(op->misc & @@ -388,7 +411,7 @@ int efi_runtime_call(struct xenpf_efi_ru NULL : cast_time(&op->u.set_wakeup_time)); spin_unlock_irqrestore(&rtc_lock, flags); - efi_rs_leave(cr3); + efi_rs_leave(&state); op->misc = 0; break; @@ -397,12 +420,12 @@ int efi_runtime_call(struct xenpf_efi_ru if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) status = efi_rs->GetNextHighMonotonicCount(&op->misc); else rc = -EOPNOTSUPP; - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_get_variable: @@ -436,13 +459,13 @@ int efi_runtime_call(struct xenpf_efi_ru else data = NULL; - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) { status = efi_rs->GetVariable( name, cast_guid(&op->u.get_variable.vendor_guid), &op->misc, &size, data); - efi_rs_leave(cr3); + efi_rs_leave(&state); if ( !EFI_ERROR(status) && copy_to_guest(op->u.get_variable.data, data, size) ) @@ -479,14 +502,14 @@ int efi_runtime_call(struct xenpf_efi_ru rc = -EFAULT; else { - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) status = efi_rs->SetVariable( name, cast_guid(&op->u.set_variable.vendor_guid), op->misc, op->u.set_variable.size, data); else rc = -EOPNOTSUPP; - efi_rs_leave(cr3); + efi_rs_leave(&state); } xfree(data); @@ -516,13 +539,13 @@ int efi_runtime_call(struct xenpf_efi_ru return -EFAULT; } - cr3 = efi_rs_enter(); - if ( cr3 ) + state = efi_rs_enter(); + if ( state.cr3 ) { status = efi_rs->GetNextVariableName( &size, name.str, cast_guid(&op->u.get_next_variable_name.vendor_guid)); - efi_rs_leave(cr3); + efi_rs_leave(&state); /* * Copy the variable name if necessary. The caller provided size @@ -571,10 +594,10 @@ int efi_runtime_call(struct xenpf_efi_ru break; } - cr3 = efi_rs_enter(); - if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) + state = efi_rs_enter(); + if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { - efi_rs_leave(cr3); + efi_rs_leave(&state); return -EOPNOTSUPP; } status = efi_rs->QueryVariableInfo( @@ -582,7 +605,7 @@ int efi_runtime_call(struct xenpf_efi_ru &op->u.query_variable_info.max_store_size, &op->u.query_variable_info.remain_store_size, &op->u.query_variable_info.max_size); - efi_rs_leave(cr3); + efi_rs_leave(&state); break; case XEN_EFI_query_capsule_capabilities: @@ -590,13 +613,13 @@ int efi_runtime_call(struct xenpf_efi_ru if ( op->misc ) return -EINVAL; - cr3 = efi_rs_enter(); - if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) + state = efi_rs_enter(); + if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { - efi_rs_leave(cr3); + efi_rs_leave(&state); return -EOPNOTSUPP; } - efi_rs_leave(cr3); + efi_rs_leave(&state); /* XXX fall through for now */ default: return -ENOSYS; --- a/xen/common/efi/efi.h +++ b/xen/common/efi/efi.h @@ -36,6 +36,3 @@ extern const struct efi_pci_rom *efi_pci extern UINT64 efi_boot_max_var_store_size, efi_boot_remain_var_store_size, efi_boot_max_var_size; - -unsigned long efi_rs_enter(void); -void efi_rs_leave(unsigned long);