x86/PV: don't wrongly hide/expose CPUID.OSXSAVE from/to user mode User mode code generally cannot be expected to invoke the PV-enabled CPUID Xen supports, and prior to the CPUID levelling changes for 4.7 (as well as even nowadays on levelling incapable hardware) such CPUID invocations actually saw the host CR4.OSXSAVE value, whereas prior to this patch - on Intel guest user mode always saw the flag clear, - on AMD guest user mode saw the flag set even when the guest kernel didn't enable use of XSAVE/XRSTOR. Fold in the guest view of CR4.OSXSAVE when setting the levelling MSRs, just like we do in other CPUID handling. To make guest CR4 changes immediately visible via CPUID, also invoke ctxt_switch_levelling() from the CR4 write path. Signed-off-by: Jan Beulich --- v2: Invert operation on AMD (from OR-ing in to AND-ing out), adjust title, and extend description. --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -206,17 +206,30 @@ static void __init noinline probe_maskin static void amd_ctxt_switch_levelling(const struct domain *nextd) { struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); - const struct cpuidmasks *masks = - (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) - ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; + const struct cpuidmasks *masks = NULL; + unsigned long cr4; + uint64_t val__1cd = 0, val_e1cd = 0, val__7ab0 = 0, val__6c = 0; + + if (nextd && is_pv_domain(nextd) && !is_idle_domain(nextd)) { + cr4 = current->arch.pv_vcpu.ctrlreg[4]; + masks = nextd->arch.pv_domain.cpuidmasks; + } else + cr4 = read_cr4(); + + if (!(cr4 & X86_CR4_OSXSAVE)) + val__1cd |= (uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32; + + if (!masks) + masks = &cpuidmask_defaults; #define LAZY(cap, msr, field) \ ({ \ - if (unlikely(these_masks->field != masks->field) && \ + val_##field = ~val_##field & masks->field; \ + if (unlikely(these_masks->field != val_##field) && \ ((levelling_caps & cap) == cap)) \ { \ - wrmsr_amd(msr, masks->field); \ - these_masks->field = masks->field; \ + wrmsr_amd(msr, val_##field); \ + these_masks->field = val_##field; \ } \ }) --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -154,7 +154,9 @@ static void __init probe_masking_msrs(vo static void intel_ctxt_switch_levelling(const struct domain *nextd) { struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); - const struct cpuidmasks *masks; + const struct cpuidmasks *masks = NULL; + unsigned long cr4; + uint64_t val__1cd = 0, val_e1cd = 0, val_Da1 = 0; if (cpu_has_cpuid_faulting) { /* @@ -178,16 +180,27 @@ static void intel_ctxt_switch_levelling( return; } - masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) - ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; + if (nextd && is_pv_domain(nextd) && !is_idle_domain(nextd)) { + cr4 = current->arch.pv_vcpu.ctrlreg[4]; + masks = nextd->arch.pv_domain.cpuidmasks; + } else + cr4 = read_cr4(); + + /* OSXSAVE cleared by pv_featureset. Fast-forward CR4 back in. */ + if (cr4 & X86_CR4_OSXSAVE) + val__1cd |= cpufeat_mask(X86_FEATURE_OSXSAVE); + + if (!masks) + masks = &cpuidmask_defaults; #define LAZY(msr, field) \ ({ \ - if (unlikely(these_masks->field != masks->field) && \ + val_##field |= masks->field; \ + if (unlikely(these_masks->field != val_##field) && \ (msr)) \ { \ - wrmsrl((msr), masks->field); \ - these_masks->field = masks->field; \ + wrmsrl((msr), val_##field); \ + these_masks->field = val_##field; \ } \ }) --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2733,6 +2733,7 @@ static int emulate_privileged_op(struct case 4: /* Write CR4 */ v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); write_cr4(pv_guest_cr4_to_real_cr4(v)); + ctxt_switch_levelling(currd); break; default: